fullstackgtm 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/market.ts ADDED
@@ -0,0 +1,559 @@
1
+ import { createHash } from "node:crypto";
2
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { credentialsDir } from "./credentials.ts";
5
+ import type { GtmEvidence } from "./types.ts";
6
+
7
+ /**
8
+ * The Market Map: a live model of the competitive category a company sells
9
+ * into. Vendors publish claims constantly (pricing pages, feature pages,
10
+ * hero copy); each (vendor × claim) cell gets a messaging-intensity reading,
11
+ * and each claim row gets a derived front state. Observations are
12
+ * append-only — history is the product; "what changed since last run" is a
13
+ * first-class question.
14
+ *
15
+ * Division of labor mirrors call intelligence: intensity readings are
16
+ * *proposals* (LLM or human, always with verbatim quoted evidence), while
17
+ * everything downstream — front states, drift, the report — is deterministic
18
+ * over the stored observations. Same stored observations, same map.
19
+ *
20
+ * The claim taxonomy and vendor registry live in a reviewable config file
21
+ * (git-friendly, analyst-edited); captures and observations live under the
22
+ * profile home so one client's category intel never bleeds into another's.
23
+ */
24
+
25
+ export type ClaimIntensity = "loud" | "quiet" | "absent" | "unobservable";
26
+
27
+ export type ObservationConfidence = "high" | "medium" | "low";
28
+
29
+ export type FrontState = "open" | "contested" | "owned" | "saturated" | "vacant";
30
+
31
+ export type MarketClaim = {
32
+ id: string;
33
+ /** The capability being claimed, precise enough to judge loud/quiet/absent. */
34
+ capability: string;
35
+ /** Which ICP the claim cell addresses (category-specific vocabulary). */
36
+ icp: string;
37
+ /** Which pricing structure the claim cell implies (category-specific). */
38
+ pricingStructure: string;
39
+ /** Operational definition: how a reader judges LOUD vs QUIET vs ABSENT. */
40
+ definition: string;
41
+ };
42
+
43
+ export type MarketVendor = {
44
+ id: string;
45
+ name: string;
46
+ urls: {
47
+ home: string;
48
+ /** null is itself an observation: no public pricing surface. */
49
+ pricing: string | null;
50
+ product: string[];
51
+ };
52
+ notes?: string;
53
+ };
54
+
55
+ export type MarketConfig = {
56
+ category: string;
57
+ anchorVendor?: string;
58
+ vendors: MarketVendor[];
59
+ claims: MarketClaim[];
60
+ /** The LOUD/QUIET/ABSENT/UNOBSERVABLE judging rule, stated for reviewers. */
61
+ surfaceRule?: string;
62
+ };
63
+
64
+ export type MarketObservation = {
65
+ /** stableHash(category, runLabel, vendorId, claimId) — deterministic. */
66
+ id: string;
67
+ vendorId: string;
68
+ claimId: string;
69
+ observedAt: string;
70
+ intensity: ClaimIntensity;
71
+ confidence: ObservationConfidence;
72
+ /** Reviewer-facing: why the reading is what it is. */
73
+ reason: string;
74
+ /**
75
+ * Verbatim quoted spans grounding any non-absent reading
76
+ * (sourceSystem "web", metadata.url + metadata.captureHash).
77
+ */
78
+ evidence: GtmEvidence[];
79
+ };
80
+
81
+ export type ObservationSet = {
82
+ id: string;
83
+ category: string;
84
+ runLabel: string;
85
+ runAt: string;
86
+ /** What produced the readings: "manual" or "llm:<provider>:<model>". */
87
+ extractor: string;
88
+ observations: MarketObservation[];
89
+ };
90
+
91
+ export type CaptureEntry = {
92
+ runLabel: string;
93
+ vendorId: string;
94
+ kind: "home" | "pricing" | "product";
95
+ url: string;
96
+ fetchedAt: string;
97
+ httpStatus: number | null;
98
+ /** sha256 of the extracted text; null when the fetch failed or was empty. */
99
+ captureHash: string | null;
100
+ textChars: number;
101
+ };
102
+
103
+ const INTENSITY_RANK: Record<ClaimIntensity, number> = {
104
+ loud: 3,
105
+ quiet: 2,
106
+ absent: 1,
107
+ unobservable: 0,
108
+ };
109
+
110
+ // Mirrors stableHash in rules.ts (FNV-1a); duplicated to keep market.ts
111
+ // importable without pulling the audit engine.
112
+ function fnv1a(value: string): string {
113
+ let hash = 0x811c9dc5;
114
+ for (let i = 0; i < value.length; i += 1) {
115
+ hash ^= value.charCodeAt(i);
116
+ hash = Math.imul(hash, 0x01000193);
117
+ }
118
+ return (hash >>> 0).toString(16).padStart(8, "0");
119
+ }
120
+
121
+ export function observationId(category: string, runLabel: string, vendorId: string, claimId: string): string {
122
+ return `obs_${fnv1a(`${category}|${runLabel}|${vendorId}|${claimId}`)}`;
123
+ }
124
+
125
+ // ---------------------------------------------------------------------------
126
+ // Config
127
+
128
+ export function parseMarketConfig(raw: string): MarketConfig {
129
+ const config = JSON.parse(raw) as MarketConfig;
130
+ if (!config.category) throw new Error("market config: missing category");
131
+ if (!Array.isArray(config.vendors) || config.vendors.length === 0) {
132
+ throw new Error("market config: at least one vendor is required");
133
+ }
134
+ if (!Array.isArray(config.claims) || config.claims.length === 0) {
135
+ throw new Error("market config: at least one claim is required");
136
+ }
137
+ for (const [label, items] of [
138
+ ["vendor", config.vendors],
139
+ ["claim", config.claims],
140
+ ] as const) {
141
+ const seen = new Set<string>();
142
+ for (const item of items) {
143
+ if (!item.id) throw new Error(`market config: ${label} missing id`);
144
+ if (seen.has(item.id)) throw new Error(`market config: duplicate ${label} id "${item.id}"`);
145
+ seen.add(item.id);
146
+ }
147
+ }
148
+ if (config.anchorVendor && !config.vendors.some((v) => v.id === config.anchorVendor)) {
149
+ throw new Error(`market config: anchorVendor "${config.anchorVendor}" is not in vendors`);
150
+ }
151
+ return config;
152
+ }
153
+
154
+ export function loadMarketConfig(path: string): MarketConfig {
155
+ return parseMarketConfig(readFileSync(path, "utf8"));
156
+ }
157
+
158
+ export function starterMarketConfig(category: string): MarketConfig {
159
+ return {
160
+ category,
161
+ anchorVendor: "your-company",
162
+ vendors: [
163
+ {
164
+ id: "your-company",
165
+ name: "Your Company",
166
+ urls: { home: "https://example.com/", pricing: null, product: [] },
167
+ notes: "Replace with the real vendor set (≤10 works well). pricing: null records 'no public pricing page'.",
168
+ },
169
+ ],
170
+ claims: [
171
+ {
172
+ id: "example-claim",
173
+ capability: "Example capability: what is being claimed, stated precisely",
174
+ icp: "who-buys-it",
175
+ pricingStructure: "how-it-is-priced",
176
+ definition:
177
+ "LOUD if the claim is hero copy or a top-nav named product with a dedicated page; QUIET if it appears only on pages below that; ABSENT if nowhere. Write the definition so a human could judge any vendor's page against it.",
178
+ },
179
+ ],
180
+ surfaceRule:
181
+ "LOUD = hero copy OR top-level-nav named product with dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed (explicit disavowals score ABSENT with the disavowal quoted in reason); UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
182
+ };
183
+ }
184
+
185
+ // ---------------------------------------------------------------------------
186
+ // Profile-scoped market home: captures and observations live with credentials
187
+ // so --profile isolation covers category intel too.
188
+
189
+ export function marketHome(category: string, baseDir?: string): string {
190
+ return join(baseDir ?? credentialsDir(), "market", category);
191
+ }
192
+
193
+ // ---------------------------------------------------------------------------
194
+ // Capture: fetch vendor pages, strip to readable text, store content-addressed.
195
+ // The hash cache is the change detector (unchanged page = same hash = no new
196
+ // classification needed), the replay buffer (re-judge a revised taxonomy
197
+ // without re-scraping), and the evidence chain (quoted spans stay resolvable).
198
+
199
+ const STRIP_BLOCKS = /<(script|style|noscript|svg|head)\b[\s\S]*?<\/\1\s*>/gi;
200
+ const ENTITIES: Record<string, string> = {
201
+ "&amp;": "&",
202
+ "&lt;": "<",
203
+ "&gt;": ">",
204
+ "&quot;": '"',
205
+ "&#39;": "'",
206
+ "&apos;": "'",
207
+ "&nbsp;": " ",
208
+ "&mdash;": "—",
209
+ "&ndash;": "–",
210
+ };
211
+
212
+ export function extractReadableText(html: string): string {
213
+ const withoutBlocks = html.replace(STRIP_BLOCKS, " ");
214
+ const withBreaks = withoutBlocks.replace(/<(\/p|\/div|\/li|\/h[1-6]|br\s*\/?)>/gi, "\n");
215
+ const withoutTags = withBreaks.replace(/<[^>]+>/g, " ");
216
+ const decoded = withoutTags
217
+ .replace(/&[a-z#0-9]+;/gi, (entity) => ENTITIES[entity.toLowerCase()] ?? " ")
218
+ .replace(/[ \t]+/g, " ");
219
+ return decoded
220
+ .split("\n")
221
+ .map((line) => line.trim())
222
+ .filter(Boolean)
223
+ .join("\n");
224
+ }
225
+
226
+ export type FetchPage = (url: string) => Promise<{ status: number; body: string }>;
227
+
228
+ const defaultFetchPage: FetchPage = async (url) => {
229
+ const response = await fetch(url, {
230
+ headers: {
231
+ "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
232
+ "Accept-Language": "en-US",
233
+ },
234
+ redirect: "follow",
235
+ });
236
+ return { status: response.status, body: await response.text() };
237
+ };
238
+
239
+ export type CaptureOptions = {
240
+ /** Directory for captures; defaults to <marketHome>/captures. */
241
+ dir?: string;
242
+ runLabel?: string;
243
+ /** Injectable for tests; defaults to global fetch. */
244
+ fetchPage?: FetchPage;
245
+ now?: () => Date;
246
+ };
247
+
248
+ export type CaptureResult = {
249
+ entries: CaptureEntry[];
250
+ manifestPath: string;
251
+ };
252
+
253
+ export async function captureMarket(config: MarketConfig, options: CaptureOptions = {}): Promise<CaptureResult> {
254
+ const dir = options.dir ?? join(marketHome(config.category), "captures");
255
+ const runLabel = options.runLabel ?? "run-1";
256
+ const fetchPage = options.fetchPage ?? defaultFetchPage;
257
+ const fetchedAt = (options.now ?? (() => new Date()))().toISOString();
258
+ mkdirSync(dir, { recursive: true });
259
+
260
+ const manifestPath = join(dir, "manifest.json");
261
+ const manifest: CaptureEntry[] = existsSync(manifestPath)
262
+ ? (JSON.parse(readFileSync(manifestPath, "utf8")) as CaptureEntry[])
263
+ : [];
264
+
265
+ const entries: CaptureEntry[] = [];
266
+ for (const vendor of config.vendors) {
267
+ const targets: Array<{ kind: CaptureEntry["kind"]; url: string }> = [
268
+ { kind: "home", url: vendor.urls.home },
269
+ ];
270
+ if (vendor.urls.pricing) targets.push({ kind: "pricing", url: vendor.urls.pricing });
271
+ for (const url of vendor.urls.product) targets.push({ kind: "product", url });
272
+
273
+ for (const target of targets) {
274
+ let status: number | null = null;
275
+ let text = "";
276
+ try {
277
+ const page = await fetchPage(target.url);
278
+ status = page.status;
279
+ if (page.status === 200) text = extractReadableText(page.body);
280
+ } catch {
281
+ status = null;
282
+ }
283
+ let captureHash: string | null = null;
284
+ if (text) {
285
+ captureHash = createHash("sha256").update(text).digest("hex");
286
+ // Content-addressed: an unchanged page dedupes to the same file.
287
+ writeFileSync(join(dir, `${captureHash}.txt`), text);
288
+ }
289
+ const entry: CaptureEntry = {
290
+ runLabel,
291
+ vendorId: vendor.id,
292
+ kind: target.kind,
293
+ url: target.url,
294
+ fetchedAt,
295
+ httpStatus: status,
296
+ captureHash,
297
+ textChars: text.length,
298
+ };
299
+ manifest.push(entry);
300
+ entries.push(entry);
301
+ }
302
+ }
303
+ writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
304
+ return { entries, manifestPath };
305
+ }
306
+
307
+ // ---------------------------------------------------------------------------
308
+ // Observation store: append-only sets, one JSON file per run. Like the plan
309
+ // store, this file layout and the hosted backend are two implementations of
310
+ // the same contract.
311
+
312
+ export interface ObservationStore {
313
+ append(set: ObservationSet): Promise<ObservationSet>;
314
+ get(runLabel: string): Promise<ObservationSet | null>;
315
+ list(): Promise<Array<{ runLabel: string; runAt: string; observations: number }>>;
316
+ latest(): Promise<ObservationSet | null>;
317
+ }
318
+
319
+ export function createFileObservationStore(category: string, directory?: string): ObservationStore {
320
+ const dir = directory ?? join(marketHome(category), "observations");
321
+
322
+ function fileFor(runLabel: string) {
323
+ if (!/^[\w.-]+$/.test(runLabel)) throw new Error(`Invalid run label: ${runLabel}`);
324
+ return join(dir, `${runLabel}.json`);
325
+ }
326
+
327
+ function read(runLabel: string): ObservationSet | null {
328
+ try {
329
+ return JSON.parse(readFileSync(fileFor(runLabel), "utf8")) as ObservationSet;
330
+ } catch {
331
+ return null;
332
+ }
333
+ }
334
+
335
+ function listSets(): ObservationSet[] {
336
+ let names: string[] = [];
337
+ try {
338
+ names = readdirSync(dir).filter((name) => name.endsWith(".json"));
339
+ } catch {
340
+ return [];
341
+ }
342
+ return names
343
+ .map((name) => read(name.replace(/\.json$/, "")))
344
+ .filter((set): set is ObservationSet => set !== null)
345
+ .sort((a, b) => a.runAt.localeCompare(b.runAt));
346
+ }
347
+
348
+ return {
349
+ async append(set) {
350
+ if (set.category !== category) {
351
+ throw new Error(`Observation set category "${set.category}" does not match store "${category}"`);
352
+ }
353
+ if (read(set.runLabel)) {
354
+ throw new Error(`Run "${set.runLabel}" already exists — observations are append-only; use a new run label`);
355
+ }
356
+ mkdirSync(dir, { recursive: true });
357
+ writeFileSync(fileFor(set.runLabel), `${JSON.stringify(set, null, 2)}\n`);
358
+ return set;
359
+ },
360
+ async get(runLabel) {
361
+ return read(runLabel);
362
+ },
363
+ async list() {
364
+ return listSets().map((set) => ({
365
+ runLabel: set.runLabel,
366
+ runAt: set.runAt,
367
+ observations: set.observations.length,
368
+ }));
369
+ },
370
+ async latest() {
371
+ const sets = listSets();
372
+ return sets.length ? sets[sets.length - 1] : null;
373
+ },
374
+ };
375
+ }
376
+
377
+ /**
378
+ * Validate a proposed observation set against the config before it enters
379
+ * the store: known vendors/claims, full coverage, legal readings, and the
380
+ * verbatim-evidence rule (non-absent readings must quote something).
381
+ * Returns problems; an empty array means accept.
382
+ */
383
+ export function validateObservationSet(config: MarketConfig, set: ObservationSet): string[] {
384
+ const problems: string[] = [];
385
+ const vendorIds = new Set(config.vendors.map((v) => v.id));
386
+ const claimIds = new Set(config.claims.map((c) => c.id));
387
+ const seen = new Set<string>();
388
+ for (const obs of set.observations) {
389
+ const cell = `${obs.vendorId} × ${obs.claimId}`;
390
+ if (!vendorIds.has(obs.vendorId)) problems.push(`unknown vendor "${obs.vendorId}"`);
391
+ if (!claimIds.has(obs.claimId)) problems.push(`unknown claim "${obs.claimId}"`);
392
+ if (seen.has(cell)) problems.push(`duplicate observation for ${cell}`);
393
+ seen.add(cell);
394
+ if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
395
+ problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
396
+ }
397
+ if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
398
+ problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
399
+ }
400
+ }
401
+ for (const vendor of config.vendors) {
402
+ for (const claim of config.claims) {
403
+ if (!seen.has(`${vendor.id} × ${claim.id}`)) {
404
+ problems.push(`missing observation for ${vendor.id} × ${claim.id}`);
405
+ }
406
+ }
407
+ }
408
+ return problems;
409
+ }
410
+
411
+ // ---------------------------------------------------------------------------
412
+ // Evidence span verification — the deterministic gate that makes the
413
+ // verbatim-quote rule mechanical instead of a prompt instruction. Because the
414
+ // source documents are *stored* (unlike call transcripts, which pass through),
415
+ // every quoted span can be checked against the capture it cites before the
416
+ // observation is accepted. Comparison is whitespace-normalized only: case and
417
+ // wording must match the page exactly.
418
+
419
+ export function loadCaptureTexts(
420
+ category: string,
421
+ directory?: string,
422
+ ): { entries: CaptureEntry[]; textByHash: Map<string, string> } {
423
+ const dir = directory ?? join(marketHome(category), "captures");
424
+ const manifestPath = join(dir, "manifest.json");
425
+ const entries: CaptureEntry[] = existsSync(manifestPath)
426
+ ? (JSON.parse(readFileSync(manifestPath, "utf8")) as CaptureEntry[])
427
+ : [];
428
+ const textByHash = new Map<string, string>();
429
+ for (const entry of entries) {
430
+ if (entry.captureHash && !textByHash.has(entry.captureHash)) {
431
+ try {
432
+ textByHash.set(entry.captureHash, readFileSync(join(dir, `${entry.captureHash}.txt`), "utf8"));
433
+ } catch {
434
+ // Missing capture file: verification of anything citing it will fail loudly.
435
+ }
436
+ }
437
+ }
438
+ return { entries, textByHash };
439
+ }
440
+
441
+ /**
442
+ * Whitespace-only normalization for span matching, plus one extraction
443
+ * artifact: the HTML-to-text step can emit a line break before punctuation
444
+ * that follows an inline tag ("placements\n. Districts"), which no honest
445
+ * quoter would reproduce — so whitespace *before* punctuation is dropped
446
+ * too. Words, casing, and characters must still match the page exactly.
447
+ */
448
+ export function normalizeForMatch(value: string): string {
449
+ return value
450
+ .replace(/\s+([.,;:!?])/g, "$1")
451
+ .replace(/\s+/g, " ")
452
+ .trim();
453
+ }
454
+
455
+ export type SpanVerificationFailure = {
456
+ vendorId: string;
457
+ claimId: string;
458
+ quote: string;
459
+ problem: string;
460
+ };
461
+
462
+ export function verifyEvidenceSpans(
463
+ observations: MarketObservation[],
464
+ textByHash: Map<string, string>,
465
+ ): SpanVerificationFailure[] {
466
+ const failures: SpanVerificationFailure[] = [];
467
+ for (const obs of observations) {
468
+ for (const evidence of obs.evidence) {
469
+ const quote = evidence.text ?? "";
470
+ const hash = String(evidence.metadata?.captureHash ?? "");
471
+ if (!hash) {
472
+ failures.push({
473
+ vendorId: obs.vendorId,
474
+ claimId: obs.claimId,
475
+ quote,
476
+ problem: "evidence has no captureHash — spans must cite a stored capture",
477
+ });
478
+ continue;
479
+ }
480
+ const captureText = textByHash.get(hash);
481
+ if (captureText === undefined) {
482
+ failures.push({
483
+ vendorId: obs.vendorId,
484
+ claimId: obs.claimId,
485
+ quote,
486
+ problem: `capture ${hash.slice(0, 12)} not found — evidence must stay resolvable`,
487
+ });
488
+ continue;
489
+ }
490
+ if (!normalizeForMatch(captureText).includes(normalizeForMatch(quote))) {
491
+ failures.push({
492
+ vendorId: obs.vendorId,
493
+ claimId: obs.claimId,
494
+ quote,
495
+ problem: `quote not found verbatim in capture ${hash.slice(0, 12)}`,
496
+ });
497
+ }
498
+ }
499
+ }
500
+ return failures;
501
+ }
502
+
503
+ // ---------------------------------------------------------------------------
504
+ // Front states — deterministic, recomputed every time, never stored.
505
+
506
+ export type ClaimFront = {
507
+ claimId: string;
508
+ state: FrontState;
509
+ loudVendorIds: string[];
510
+ quietVendorIds: string[];
511
+ };
512
+
513
+ /**
514
+ * Front rule v1: 0 loud → open (if anyone is quiet) or vacant; 1 loud →
515
+ * owned; 2–3 loud → contested; ≥4 loud → saturated. Unobservable cells are
516
+ * excluded — a failed capture never reads as absence.
517
+ */
518
+ export function computeFrontStates(config: MarketConfig, set: ObservationSet): ClaimFront[] {
519
+ const byCell = new Map<string, MarketObservation>();
520
+ for (const obs of set.observations) {
521
+ const key = `${obs.vendorId}|${obs.claimId}`;
522
+ const existing = byCell.get(key);
523
+ if (!existing || INTENSITY_RANK[obs.intensity] > INTENSITY_RANK[existing.intensity]) {
524
+ byCell.set(key, obs);
525
+ }
526
+ }
527
+ return config.claims.map((claim) => {
528
+ const loud: string[] = [];
529
+ const quiet: string[] = [];
530
+ for (const vendor of config.vendors) {
531
+ const obs = byCell.get(`${vendor.id}|${claim.id}`);
532
+ if (obs?.intensity === "loud") loud.push(vendor.id);
533
+ if (obs?.intensity === "quiet") quiet.push(vendor.id);
534
+ }
535
+ let state: FrontState;
536
+ if (loud.length === 0) state = quiet.length >= 1 ? "open" : "vacant";
537
+ else if (loud.length === 1) state = "owned";
538
+ else if (loud.length <= 3) state = "contested";
539
+ else state = "saturated";
540
+ return { claimId: claim.id, state, loudVendorIds: loud, quietVendorIds: quiet };
541
+ });
542
+ }
543
+
544
+ export type FrontDrift = {
545
+ claimId: string;
546
+ before: FrontState;
547
+ after: FrontState;
548
+ };
549
+
550
+ /** What changed in the category between two runs — the refresh's whole point. */
551
+ export function diffFrontStates(before: ClaimFront[], after: ClaimFront[]): FrontDrift[] {
552
+ const prior = new Map(before.map((front) => [front.claimId, front.state]));
553
+ const drift: FrontDrift[] = [];
554
+ for (const front of after) {
555
+ const was = prior.get(front.claimId);
556
+ if (was && was !== front.state) drift.push({ claimId: front.claimId, before: was, after: front.state });
557
+ }
558
+ return drift;
559
+ }