fullstackgtm 0.14.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -0
- package/README.md +14 -0
- package/dist/cli.js +169 -0
- package/dist/connectors/hubspot.js +62 -7
- package/dist/diff.js +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +4 -1
- package/dist/market.d.ts +147 -0
- package/dist/market.js +319 -0
- package/dist/marketReport.d.ts +3 -0
- package/dist/marketReport.js +233 -0
- package/dist/mcp.js +20 -0
- package/dist/merge.js +1 -1
- package/dist/resolve.d.ts +37 -0
- package/dist/resolve.js +126 -0
- package/dist/rules.d.ts +12 -0
- package/dist/rules.js +25 -3
- package/dist/types.d.ts +17 -1
- package/docs/crm-health-lifecycle.md +11 -11
- package/llms.txt +4 -0
- package/package.json +1 -1
- package/src/cli.ts +183 -0
- package/src/connectors/hubspot.ts +68 -10
- package/src/diff.ts +1 -1
- package/src/index.ts +29 -0
- package/src/market.ts +467 -0
- package/src/marketReport.ts +272 -0
- package/src/mcp.ts +26 -0
- package/src/merge.ts +1 -1
- package/src/resolve.ts +177 -0
- package/src/rules.ts +24 -3
- package/src/types.ts +18 -0
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
CanonicalAccount,
|
|
11
11
|
CanonicalContact,
|
|
12
12
|
CanonicalDeal,
|
|
13
|
+
RecordProvenance,
|
|
13
14
|
CanonicalGtmSnapshot,
|
|
14
15
|
CanonicalUser,
|
|
15
16
|
GtmConnector,
|
|
@@ -125,11 +126,14 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
125
126
|
active: owner.archived !== true,
|
|
126
127
|
}));
|
|
127
128
|
|
|
128
|
-
|
|
129
|
+
// Read-only record-source fields power duplicate-finding attribution
|
|
130
|
+
// ("all five created by integration X") — see RecordProvenance.
|
|
131
|
+
const PROVENANCE_PROPERTIES = "hs_object_source,hs_object_source_label,hs_object_source_id";
|
|
132
|
+
const companyProperties = `${mappedFields(
|
|
129
133
|
mappings,
|
|
130
134
|
"accounts",
|
|
131
135
|
HUBSPOT_DEFAULT_FIELD_MAPPINGS.accounts,
|
|
132
|
-
).join(",")
|
|
136
|
+
).join(",")},${PROVENANCE_PROPERTIES}`;
|
|
133
137
|
const companies = await fetchObjects("companies", companyProperties, false);
|
|
134
138
|
const accounts: CanonicalAccount[] = companies
|
|
135
139
|
.filter((company) => company.id)
|
|
@@ -155,16 +159,17 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
155
159
|
ownerId: stringOrUndefined(
|
|
156
160
|
readMapped(props, "accounts", "ownerId", "hubspot_owner_id"),
|
|
157
161
|
),
|
|
162
|
+
provenance: provenanceFrom(props),
|
|
158
163
|
lastSyncAt: stringOrUndefined(company.updatedAt),
|
|
159
164
|
raw: company,
|
|
160
165
|
};
|
|
161
166
|
});
|
|
162
167
|
|
|
163
|
-
const contactProperties = mappedFields(
|
|
168
|
+
const contactProperties = `${mappedFields(
|
|
164
169
|
mappings,
|
|
165
170
|
"contacts",
|
|
166
171
|
HUBSPOT_DEFAULT_FIELD_MAPPINGS.contacts,
|
|
167
|
-
).join(",")
|
|
172
|
+
).join(",")},${PROVENANCE_PROPERTIES}`;
|
|
168
173
|
const hubspotContacts = await fetchObjects("contacts", contactProperties, true);
|
|
169
174
|
const contacts: CanonicalContact[] = hubspotContacts
|
|
170
175
|
.filter((contact) => contact.id)
|
|
@@ -185,16 +190,17 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
185
190
|
ownerId: stringOrUndefined(
|
|
186
191
|
readMapped(props, "contacts", "ownerId", "hubspot_owner_id"),
|
|
187
192
|
),
|
|
193
|
+
provenance: provenanceFrom(props),
|
|
188
194
|
lastSyncAt: stringOrUndefined(contact.updatedAt),
|
|
189
195
|
raw: contact,
|
|
190
196
|
};
|
|
191
197
|
});
|
|
192
198
|
|
|
193
|
-
const dealProperties = mappedFields(
|
|
199
|
+
const dealProperties = `${mappedFields(
|
|
194
200
|
mappings,
|
|
195
201
|
"deals",
|
|
196
202
|
HUBSPOT_DEFAULT_FIELD_MAPPINGS.deals,
|
|
197
|
-
).join(",")
|
|
203
|
+
).join(",")},${PROVENANCE_PROPERTIES}`;
|
|
198
204
|
const hubspotDeals = await fetchObjects("deals", dealProperties, true);
|
|
199
205
|
const deals: CanonicalDeal[] = hubspotDeals
|
|
200
206
|
.filter((deal) => deal.id)
|
|
@@ -220,6 +226,7 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
220
226
|
identities: [{ provider: "hubspot", externalId: String(deal.id) }],
|
|
221
227
|
accountId: companyId ? String(companyId) : undefined,
|
|
222
228
|
ownerId: stringOrUndefined(readMapped(props, "deals", "ownerId", "hubspot_owner_id")),
|
|
229
|
+
provenance: provenanceFrom(props),
|
|
223
230
|
name: stringOrFallback(readMapped(props, "deals", "name", "dealname"), "Untitled Deal"),
|
|
224
231
|
amount: numberOrUndefined(readMapped(props, "deals", "amount", "amount")),
|
|
225
232
|
stage,
|
|
@@ -419,10 +426,22 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
419
426
|
resolvedExisting = true;
|
|
420
427
|
createdCompaniesByName.set(nameKey, companyId);
|
|
421
428
|
} else {
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
429
|
+
let created;
|
|
430
|
+
try {
|
|
431
|
+
created = await request(`/crm/v3/objects/companies`, {
|
|
432
|
+
method: "POST",
|
|
433
|
+
body: JSON.stringify({
|
|
434
|
+
properties: { name, hs_object_source_detail_2: `fullstackgtm create: (${operation.id})` },
|
|
435
|
+
}),
|
|
436
|
+
});
|
|
437
|
+
} catch {
|
|
438
|
+
// Some portals reject writes to source-detail properties — the
|
|
439
|
+
// provenance stamp is best-effort, the create is not.
|
|
440
|
+
created = await request(`/crm/v3/objects/companies`, {
|
|
441
|
+
method: "POST",
|
|
442
|
+
body: JSON.stringify({ properties: { name } }),
|
|
443
|
+
});
|
|
444
|
+
}
|
|
426
445
|
companyId = String(created.id);
|
|
427
446
|
createdCompanyName = name;
|
|
428
447
|
createdCompaniesByName.set(nameKey, companyId);
|
|
@@ -494,6 +513,37 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
494
513
|
} catch {
|
|
495
514
|
// fall through to create
|
|
496
515
|
}
|
|
516
|
+
// A live CRM often already carries a human-created follow-up for the same
|
|
517
|
+
// record (a previous partial run, or a rep's own task). Creating another
|
|
518
|
+
// on top is duplicate noise — skip when the object already has an open
|
|
519
|
+
// task, regardless of who created it. Fail-open: a lookup hiccup must
|
|
520
|
+
// not block the apply.
|
|
521
|
+
try {
|
|
522
|
+
const objectPath = OBJECT_PATHS[operation.objectType];
|
|
523
|
+
const assoc = await request(
|
|
524
|
+
`/crm/v4/objects/${objectPath}/${encodeURIComponent(operation.objectId)}/associations/tasks?limit=20`,
|
|
525
|
+
);
|
|
526
|
+
const taskIds = ((assoc?.results ?? []) as Array<{ toObjectId?: number | string }>)
|
|
527
|
+
.map((row) => String(row.toObjectId ?? ""))
|
|
528
|
+
.filter(Boolean)
|
|
529
|
+
.slice(0, 10);
|
|
530
|
+
for (const taskId of taskIds) {
|
|
531
|
+
const existingTask = await request(
|
|
532
|
+
`/crm/v3/objects/tasks/${encodeURIComponent(taskId)}?properties=hs_task_status`,
|
|
533
|
+
);
|
|
534
|
+
const status = String(existingTask?.properties?.hs_task_status ?? "");
|
|
535
|
+
if (status !== "COMPLETED" && status !== "DELETED") {
|
|
536
|
+
return {
|
|
537
|
+
operationId: operation.id,
|
|
538
|
+
status: "skipped",
|
|
539
|
+
detail: `An open task (task ${taskId}) already exists on ${operation.objectType}/${operation.objectId}; not creating a duplicate follow-up.`,
|
|
540
|
+
providerData: { id: taskId, existing: true },
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
} catch {
|
|
545
|
+
// fall through to create
|
|
546
|
+
}
|
|
497
547
|
const body = `${String(operation.afterValue ?? operation.reason ?? "")}\n\n[${token}]`;
|
|
498
548
|
const response = await request(`/crm/v3/objects/tasks`, {
|
|
499
549
|
method: "POST",
|
|
@@ -686,6 +736,14 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
|
|
|
686
736
|
};
|
|
687
737
|
}
|
|
688
738
|
|
|
739
|
+
function provenanceFrom(props: Record<string, unknown>): RecordProvenance | undefined {
|
|
740
|
+
const source = stringOrUndefined(props.hs_object_source);
|
|
741
|
+
const sourceLabel = stringOrUndefined(props.hs_object_source_label);
|
|
742
|
+
const sourceId = stringOrUndefined(props.hs_object_source_id);
|
|
743
|
+
if (!source && !sourceLabel && !sourceId) return undefined;
|
|
744
|
+
return { source, sourceLabel, sourceId };
|
|
745
|
+
}
|
|
746
|
+
|
|
689
747
|
function stringOrUndefined(value: unknown): string | undefined {
|
|
690
748
|
if (value === undefined || value === null || value === "") return undefined;
|
|
691
749
|
return String(value);
|
package/src/diff.ts
CHANGED
|
@@ -7,7 +7,7 @@ import type { AuditFinding, CanonicalGtmSnapshot, PatchPlan } from "./types.ts";
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
// Fields that change on every sync without semantic meaning.
|
|
10
|
-
const IGNORED_FIELDS = new Set(["raw", "lastSyncAt", "identities"]);
|
|
10
|
+
const IGNORED_FIELDS = new Set(["raw", "lastSyncAt", "identities", "provenance"]);
|
|
11
11
|
|
|
12
12
|
export type FieldChange = { field: string; before: unknown; after: unknown };
|
|
13
13
|
|
package/src/index.ts
CHANGED
|
@@ -95,6 +95,7 @@ export {
|
|
|
95
95
|
orphanAccountRule,
|
|
96
96
|
pastCloseDateRule,
|
|
97
97
|
patchOperationId,
|
|
98
|
+
provenanceSummary,
|
|
98
99
|
requiresHumanInput,
|
|
99
100
|
staleDealRule,
|
|
100
101
|
} from "./rules.ts";
|
|
@@ -128,6 +129,34 @@ export {
|
|
|
128
129
|
type Rubric,
|
|
129
130
|
type ScoredDimension,
|
|
130
131
|
} from "./llm.ts";
|
|
132
|
+
export { resolveRecord, type ResolveCandidate, type ResolveMatch, type ResolveResult } from "./resolve.ts";
|
|
133
|
+
export {
|
|
134
|
+
captureMarket,
|
|
135
|
+
computeFrontStates,
|
|
136
|
+
createFileObservationStore,
|
|
137
|
+
diffFrontStates,
|
|
138
|
+
extractReadableText,
|
|
139
|
+
loadMarketConfig,
|
|
140
|
+
marketHome,
|
|
141
|
+
observationId,
|
|
142
|
+
parseMarketConfig,
|
|
143
|
+
starterMarketConfig,
|
|
144
|
+
validateObservationSet,
|
|
145
|
+
type CaptureEntry,
|
|
146
|
+
type CaptureOptions,
|
|
147
|
+
type ClaimFront,
|
|
148
|
+
type ClaimIntensity,
|
|
149
|
+
type FrontDrift,
|
|
150
|
+
type FrontState,
|
|
151
|
+
type MarketClaim,
|
|
152
|
+
type MarketConfig,
|
|
153
|
+
type MarketObservation,
|
|
154
|
+
type MarketVendor,
|
|
155
|
+
type ObservationConfidence,
|
|
156
|
+
type ObservationSet,
|
|
157
|
+
type ObservationStore,
|
|
158
|
+
} from "./market.ts";
|
|
159
|
+
export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.ts";
|
|
131
160
|
export { suggestValues, type SuggestionConfidence, type ValueSuggestion } from "./suggest.ts";
|
|
132
161
|
export type {
|
|
133
162
|
ApprovalStatus,
|
package/src/market.ts
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { credentialsDir } from "./credentials.ts";
|
|
5
|
+
import type { GtmEvidence } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* The Market Map: a live model of the competitive category a company sells
|
|
9
|
+
* into. Vendors publish claims constantly (pricing pages, feature pages,
|
|
10
|
+
* hero copy); each (vendor × claim) cell gets a messaging-intensity reading,
|
|
11
|
+
* and each claim row gets a derived front state. Observations are
|
|
12
|
+
* append-only — history is the product; "what changed since last run" is a
|
|
13
|
+
* first-class question.
|
|
14
|
+
*
|
|
15
|
+
* Division of labor mirrors call intelligence: intensity readings are
|
|
16
|
+
* *proposals* (LLM or human, always with verbatim quoted evidence), while
|
|
17
|
+
* everything downstream — front states, drift, the report — is deterministic
|
|
18
|
+
* over the stored observations. Same stored observations, same map.
|
|
19
|
+
*
|
|
20
|
+
* The claim taxonomy and vendor registry live in a reviewable config file
|
|
21
|
+
* (git-friendly, analyst-edited); captures and observations live under the
|
|
22
|
+
* profile home so one client's category intel never bleeds into another's.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
export type ClaimIntensity = "loud" | "quiet" | "absent" | "unobservable";
|
|
26
|
+
|
|
27
|
+
export type ObservationConfidence = "high" | "medium" | "low";
|
|
28
|
+
|
|
29
|
+
export type FrontState = "open" | "contested" | "owned" | "saturated" | "vacant";
|
|
30
|
+
|
|
31
|
+
export type MarketClaim = {
|
|
32
|
+
id: string;
|
|
33
|
+
/** The capability being claimed, precise enough to judge loud/quiet/absent. */
|
|
34
|
+
capability: string;
|
|
35
|
+
/** Which ICP the claim cell addresses (category-specific vocabulary). */
|
|
36
|
+
icp: string;
|
|
37
|
+
/** Which pricing structure the claim cell implies (category-specific). */
|
|
38
|
+
pricingStructure: string;
|
|
39
|
+
/** Operational definition: how a reader judges LOUD vs QUIET vs ABSENT. */
|
|
40
|
+
definition: string;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export type MarketVendor = {
|
|
44
|
+
id: string;
|
|
45
|
+
name: string;
|
|
46
|
+
urls: {
|
|
47
|
+
home: string;
|
|
48
|
+
/** null is itself an observation: no public pricing surface. */
|
|
49
|
+
pricing: string | null;
|
|
50
|
+
product: string[];
|
|
51
|
+
};
|
|
52
|
+
notes?: string;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export type MarketConfig = {
|
|
56
|
+
category: string;
|
|
57
|
+
anchorVendor?: string;
|
|
58
|
+
vendors: MarketVendor[];
|
|
59
|
+
claims: MarketClaim[];
|
|
60
|
+
/** The LOUD/QUIET/ABSENT/UNOBSERVABLE judging rule, stated for reviewers. */
|
|
61
|
+
surfaceRule?: string;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export type MarketObservation = {
|
|
65
|
+
/** stableHash(category, runLabel, vendorId, claimId) — deterministic. */
|
|
66
|
+
id: string;
|
|
67
|
+
vendorId: string;
|
|
68
|
+
claimId: string;
|
|
69
|
+
observedAt: string;
|
|
70
|
+
intensity: ClaimIntensity;
|
|
71
|
+
confidence: ObservationConfidence;
|
|
72
|
+
/** Reviewer-facing: why the reading is what it is. */
|
|
73
|
+
reason: string;
|
|
74
|
+
/**
|
|
75
|
+
* Verbatim quoted spans grounding any non-absent reading
|
|
76
|
+
* (sourceSystem "web", metadata.url + metadata.captureHash).
|
|
77
|
+
*/
|
|
78
|
+
evidence: GtmEvidence[];
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export type ObservationSet = {
|
|
82
|
+
id: string;
|
|
83
|
+
category: string;
|
|
84
|
+
runLabel: string;
|
|
85
|
+
runAt: string;
|
|
86
|
+
/** What produced the readings: "manual" or "llm:<provider>:<model>". */
|
|
87
|
+
extractor: string;
|
|
88
|
+
observations: MarketObservation[];
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
export type CaptureEntry = {
|
|
92
|
+
runLabel: string;
|
|
93
|
+
vendorId: string;
|
|
94
|
+
kind: "home" | "pricing" | "product";
|
|
95
|
+
url: string;
|
|
96
|
+
fetchedAt: string;
|
|
97
|
+
httpStatus: number | null;
|
|
98
|
+
/** sha256 of the extracted text; null when the fetch failed or was empty. */
|
|
99
|
+
captureHash: string | null;
|
|
100
|
+
textChars: number;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const INTENSITY_RANK: Record<ClaimIntensity, number> = {
|
|
104
|
+
loud: 3,
|
|
105
|
+
quiet: 2,
|
|
106
|
+
absent: 1,
|
|
107
|
+
unobservable: 0,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
// Mirrors stableHash in rules.ts (FNV-1a); duplicated to keep market.ts
|
|
111
|
+
// importable without pulling the audit engine.
|
|
112
|
+
function fnv1a(value: string): string {
|
|
113
|
+
let hash = 0x811c9dc5;
|
|
114
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
115
|
+
hash ^= value.charCodeAt(i);
|
|
116
|
+
hash = Math.imul(hash, 0x01000193);
|
|
117
|
+
}
|
|
118
|
+
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function observationId(category: string, runLabel: string, vendorId: string, claimId: string): string {
|
|
122
|
+
return `obs_${fnv1a(`${category}|${runLabel}|${vendorId}|${claimId}`)}`;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// Config
|
|
127
|
+
|
|
128
|
+
export function parseMarketConfig(raw: string): MarketConfig {
|
|
129
|
+
const config = JSON.parse(raw) as MarketConfig;
|
|
130
|
+
if (!config.category) throw new Error("market config: missing category");
|
|
131
|
+
if (!Array.isArray(config.vendors) || config.vendors.length === 0) {
|
|
132
|
+
throw new Error("market config: at least one vendor is required");
|
|
133
|
+
}
|
|
134
|
+
if (!Array.isArray(config.claims) || config.claims.length === 0) {
|
|
135
|
+
throw new Error("market config: at least one claim is required");
|
|
136
|
+
}
|
|
137
|
+
for (const [label, items] of [
|
|
138
|
+
["vendor", config.vendors],
|
|
139
|
+
["claim", config.claims],
|
|
140
|
+
] as const) {
|
|
141
|
+
const seen = new Set<string>();
|
|
142
|
+
for (const item of items) {
|
|
143
|
+
if (!item.id) throw new Error(`market config: ${label} missing id`);
|
|
144
|
+
if (seen.has(item.id)) throw new Error(`market config: duplicate ${label} id "${item.id}"`);
|
|
145
|
+
seen.add(item.id);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (config.anchorVendor && !config.vendors.some((v) => v.id === config.anchorVendor)) {
|
|
149
|
+
throw new Error(`market config: anchorVendor "${config.anchorVendor}" is not in vendors`);
|
|
150
|
+
}
|
|
151
|
+
return config;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function loadMarketConfig(path: string): MarketConfig {
|
|
155
|
+
return parseMarketConfig(readFileSync(path, "utf8"));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function starterMarketConfig(category: string): MarketConfig {
|
|
159
|
+
return {
|
|
160
|
+
category,
|
|
161
|
+
anchorVendor: "your-company",
|
|
162
|
+
vendors: [
|
|
163
|
+
{
|
|
164
|
+
id: "your-company",
|
|
165
|
+
name: "Your Company",
|
|
166
|
+
urls: { home: "https://example.com/", pricing: null, product: [] },
|
|
167
|
+
notes: "Replace with the real vendor set (≤10 works well). pricing: null records 'no public pricing page'.",
|
|
168
|
+
},
|
|
169
|
+
],
|
|
170
|
+
claims: [
|
|
171
|
+
{
|
|
172
|
+
id: "example-claim",
|
|
173
|
+
capability: "Example capability: what is being claimed, stated precisely",
|
|
174
|
+
icp: "who-buys-it",
|
|
175
|
+
pricingStructure: "how-it-is-priced",
|
|
176
|
+
definition:
|
|
177
|
+
"LOUD if the claim is hero copy or a top-nav named product with a dedicated page; QUIET if it appears only on pages below that; ABSENT if nowhere. Write the definition so a human could judge any vendor's page against it.",
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
surfaceRule:
|
|
181
|
+
"LOUD = hero copy OR top-level-nav named product with dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed (explicit disavowals score ABSENT with the disavowal quoted in reason); UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Profile-scoped market home: captures and observations live with credentials
|
|
187
|
+
// so --profile isolation covers category intel too.
|
|
188
|
+
|
|
189
|
+
export function marketHome(category: string, baseDir?: string): string {
|
|
190
|
+
return join(baseDir ?? credentialsDir(), "market", category);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// ---------------------------------------------------------------------------
|
|
194
|
+
// Capture: fetch vendor pages, strip to readable text, store content-addressed.
|
|
195
|
+
// The hash cache is the change detector (unchanged page = same hash = no new
|
|
196
|
+
// classification needed), the replay buffer (re-judge a revised taxonomy
|
|
197
|
+
// without re-scraping), and the evidence chain (quoted spans stay resolvable).
|
|
198
|
+
|
|
199
|
+
const STRIP_BLOCKS = /<(script|style|noscript|svg|head)\b[\s\S]*?<\/\1\s*>/gi;
|
|
200
|
+
const ENTITIES: Record<string, string> = {
|
|
201
|
+
"&": "&",
|
|
202
|
+
"<": "<",
|
|
203
|
+
">": ">",
|
|
204
|
+
""": '"',
|
|
205
|
+
"'": "'",
|
|
206
|
+
"'": "'",
|
|
207
|
+
" ": " ",
|
|
208
|
+
"—": "—",
|
|
209
|
+
"–": "–",
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
export function extractReadableText(html: string): string {
|
|
213
|
+
const withoutBlocks = html.replace(STRIP_BLOCKS, " ");
|
|
214
|
+
const withBreaks = withoutBlocks.replace(/<(\/p|\/div|\/li|\/h[1-6]|br\s*\/?)>/gi, "\n");
|
|
215
|
+
const withoutTags = withBreaks.replace(/<[^>]+>/g, " ");
|
|
216
|
+
const decoded = withoutTags
|
|
217
|
+
.replace(/&[a-z#0-9]+;/gi, (entity) => ENTITIES[entity.toLowerCase()] ?? " ")
|
|
218
|
+
.replace(/[ \t]+/g, " ");
|
|
219
|
+
return decoded
|
|
220
|
+
.split("\n")
|
|
221
|
+
.map((line) => line.trim())
|
|
222
|
+
.filter(Boolean)
|
|
223
|
+
.join("\n");
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export type FetchPage = (url: string) => Promise<{ status: number; body: string }>;
|
|
227
|
+
|
|
228
|
+
const defaultFetchPage: FetchPage = async (url) => {
|
|
229
|
+
const response = await fetch(url, {
|
|
230
|
+
headers: {
|
|
231
|
+
"User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
|
|
232
|
+
"Accept-Language": "en-US",
|
|
233
|
+
},
|
|
234
|
+
redirect: "follow",
|
|
235
|
+
});
|
|
236
|
+
return { status: response.status, body: await response.text() };
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
export type CaptureOptions = {
|
|
240
|
+
/** Directory for captures; defaults to <marketHome>/captures. */
|
|
241
|
+
dir?: string;
|
|
242
|
+
runLabel?: string;
|
|
243
|
+
/** Injectable for tests; defaults to global fetch. */
|
|
244
|
+
fetchPage?: FetchPage;
|
|
245
|
+
now?: () => Date;
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
export type CaptureResult = {
|
|
249
|
+
entries: CaptureEntry[];
|
|
250
|
+
manifestPath: string;
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
export async function captureMarket(config: MarketConfig, options: CaptureOptions = {}): Promise<CaptureResult> {
|
|
254
|
+
const dir = options.dir ?? join(marketHome(config.category), "captures");
|
|
255
|
+
const runLabel = options.runLabel ?? "run-1";
|
|
256
|
+
const fetchPage = options.fetchPage ?? defaultFetchPage;
|
|
257
|
+
const fetchedAt = (options.now ?? (() => new Date()))().toISOString();
|
|
258
|
+
mkdirSync(dir, { recursive: true });
|
|
259
|
+
|
|
260
|
+
const manifestPath = join(dir, "manifest.json");
|
|
261
|
+
const manifest: CaptureEntry[] = existsSync(manifestPath)
|
|
262
|
+
? (JSON.parse(readFileSync(manifestPath, "utf8")) as CaptureEntry[])
|
|
263
|
+
: [];
|
|
264
|
+
|
|
265
|
+
const entries: CaptureEntry[] = [];
|
|
266
|
+
for (const vendor of config.vendors) {
|
|
267
|
+
const targets: Array<{ kind: CaptureEntry["kind"]; url: string }> = [
|
|
268
|
+
{ kind: "home", url: vendor.urls.home },
|
|
269
|
+
];
|
|
270
|
+
if (vendor.urls.pricing) targets.push({ kind: "pricing", url: vendor.urls.pricing });
|
|
271
|
+
for (const url of vendor.urls.product) targets.push({ kind: "product", url });
|
|
272
|
+
|
|
273
|
+
for (const target of targets) {
|
|
274
|
+
let status: number | null = null;
|
|
275
|
+
let text = "";
|
|
276
|
+
try {
|
|
277
|
+
const page = await fetchPage(target.url);
|
|
278
|
+
status = page.status;
|
|
279
|
+
if (page.status === 200) text = extractReadableText(page.body);
|
|
280
|
+
} catch {
|
|
281
|
+
status = null;
|
|
282
|
+
}
|
|
283
|
+
let captureHash: string | null = null;
|
|
284
|
+
if (text) {
|
|
285
|
+
captureHash = createHash("sha256").update(text).digest("hex");
|
|
286
|
+
// Content-addressed: an unchanged page dedupes to the same file.
|
|
287
|
+
writeFileSync(join(dir, `${captureHash}.txt`), text);
|
|
288
|
+
}
|
|
289
|
+
const entry: CaptureEntry = {
|
|
290
|
+
runLabel,
|
|
291
|
+
vendorId: vendor.id,
|
|
292
|
+
kind: target.kind,
|
|
293
|
+
url: target.url,
|
|
294
|
+
fetchedAt,
|
|
295
|
+
httpStatus: status,
|
|
296
|
+
captureHash,
|
|
297
|
+
textChars: text.length,
|
|
298
|
+
};
|
|
299
|
+
manifest.push(entry);
|
|
300
|
+
entries.push(entry);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
|
|
304
|
+
return { entries, manifestPath };
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// ---------------------------------------------------------------------------
|
|
308
|
+
// Observation store: append-only sets, one JSON file per run. Like the plan
|
|
309
|
+
// store, this file layout and the hosted backend are two implementations of
|
|
310
|
+
// the same contract.
|
|
311
|
+
|
|
312
|
+
export interface ObservationStore {
|
|
313
|
+
append(set: ObservationSet): Promise<ObservationSet>;
|
|
314
|
+
get(runLabel: string): Promise<ObservationSet | null>;
|
|
315
|
+
list(): Promise<Array<{ runLabel: string; runAt: string; observations: number }>>;
|
|
316
|
+
latest(): Promise<ObservationSet | null>;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
export function createFileObservationStore(category: string, directory?: string): ObservationStore {
|
|
320
|
+
const dir = directory ?? join(marketHome(category), "observations");
|
|
321
|
+
|
|
322
|
+
function fileFor(runLabel: string) {
|
|
323
|
+
if (!/^[\w.-]+$/.test(runLabel)) throw new Error(`Invalid run label: ${runLabel}`);
|
|
324
|
+
return join(dir, `${runLabel}.json`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function read(runLabel: string): ObservationSet | null {
|
|
328
|
+
try {
|
|
329
|
+
return JSON.parse(readFileSync(fileFor(runLabel), "utf8")) as ObservationSet;
|
|
330
|
+
} catch {
|
|
331
|
+
return null;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function listSets(): ObservationSet[] {
|
|
336
|
+
let names: string[] = [];
|
|
337
|
+
try {
|
|
338
|
+
names = readdirSync(dir).filter((name) => name.endsWith(".json"));
|
|
339
|
+
} catch {
|
|
340
|
+
return [];
|
|
341
|
+
}
|
|
342
|
+
return names
|
|
343
|
+
.map((name) => read(name.replace(/\.json$/, "")))
|
|
344
|
+
.filter((set): set is ObservationSet => set !== null)
|
|
345
|
+
.sort((a, b) => a.runAt.localeCompare(b.runAt));
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
return {
|
|
349
|
+
async append(set) {
|
|
350
|
+
if (set.category !== category) {
|
|
351
|
+
throw new Error(`Observation set category "${set.category}" does not match store "${category}"`);
|
|
352
|
+
}
|
|
353
|
+
if (read(set.runLabel)) {
|
|
354
|
+
throw new Error(`Run "${set.runLabel}" already exists — observations are append-only; use a new run label`);
|
|
355
|
+
}
|
|
356
|
+
mkdirSync(dir, { recursive: true });
|
|
357
|
+
writeFileSync(fileFor(set.runLabel), `${JSON.stringify(set, null, 2)}\n`);
|
|
358
|
+
return set;
|
|
359
|
+
},
|
|
360
|
+
async get(runLabel) {
|
|
361
|
+
return read(runLabel);
|
|
362
|
+
},
|
|
363
|
+
async list() {
|
|
364
|
+
return listSets().map((set) => ({
|
|
365
|
+
runLabel: set.runLabel,
|
|
366
|
+
runAt: set.runAt,
|
|
367
|
+
observations: set.observations.length,
|
|
368
|
+
}));
|
|
369
|
+
},
|
|
370
|
+
async latest() {
|
|
371
|
+
const sets = listSets();
|
|
372
|
+
return sets.length ? sets[sets.length - 1] : null;
|
|
373
|
+
},
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Validate a proposed observation set against the config before it enters
|
|
379
|
+
* the store: known vendors/claims, full coverage, legal readings, and the
|
|
380
|
+
* verbatim-evidence rule (non-absent readings must quote something).
|
|
381
|
+
* Returns problems; an empty array means accept.
|
|
382
|
+
*/
|
|
383
|
+
export function validateObservationSet(config: MarketConfig, set: ObservationSet): string[] {
|
|
384
|
+
const problems: string[] = [];
|
|
385
|
+
const vendorIds = new Set(config.vendors.map((v) => v.id));
|
|
386
|
+
const claimIds = new Set(config.claims.map((c) => c.id));
|
|
387
|
+
const seen = new Set<string>();
|
|
388
|
+
for (const obs of set.observations) {
|
|
389
|
+
const cell = `${obs.vendorId} × ${obs.claimId}`;
|
|
390
|
+
if (!vendorIds.has(obs.vendorId)) problems.push(`unknown vendor "${obs.vendorId}"`);
|
|
391
|
+
if (!claimIds.has(obs.claimId)) problems.push(`unknown claim "${obs.claimId}"`);
|
|
392
|
+
if (seen.has(cell)) problems.push(`duplicate observation for ${cell}`);
|
|
393
|
+
seen.add(cell);
|
|
394
|
+
if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
|
|
395
|
+
problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
|
|
396
|
+
}
|
|
397
|
+
if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
|
|
398
|
+
problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
for (const vendor of config.vendors) {
|
|
402
|
+
for (const claim of config.claims) {
|
|
403
|
+
if (!seen.has(`${vendor.id} × ${claim.id}`)) {
|
|
404
|
+
problems.push(`missing observation for ${vendor.id} × ${claim.id}`);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
return problems;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// ---------------------------------------------------------------------------
|
|
412
|
+
// Front states — deterministic, recomputed every time, never stored.
|
|
413
|
+
|
|
414
|
+
export type ClaimFront = {
|
|
415
|
+
claimId: string;
|
|
416
|
+
state: FrontState;
|
|
417
|
+
loudVendorIds: string[];
|
|
418
|
+
quietVendorIds: string[];
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Front rule v1: 0 loud → open (if anyone is quiet) or vacant; 1 loud →
|
|
423
|
+
* owned; 2–3 loud → contested; ≥4 loud → saturated. Unobservable cells are
|
|
424
|
+
* excluded — a failed capture never reads as absence.
|
|
425
|
+
*/
|
|
426
|
+
export function computeFrontStates(config: MarketConfig, set: ObservationSet): ClaimFront[] {
|
|
427
|
+
const byCell = new Map<string, MarketObservation>();
|
|
428
|
+
for (const obs of set.observations) {
|
|
429
|
+
const key = `${obs.vendorId}|${obs.claimId}`;
|
|
430
|
+
const existing = byCell.get(key);
|
|
431
|
+
if (!existing || INTENSITY_RANK[obs.intensity] > INTENSITY_RANK[existing.intensity]) {
|
|
432
|
+
byCell.set(key, obs);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
return config.claims.map((claim) => {
|
|
436
|
+
const loud: string[] = [];
|
|
437
|
+
const quiet: string[] = [];
|
|
438
|
+
for (const vendor of config.vendors) {
|
|
439
|
+
const obs = byCell.get(`${vendor.id}|${claim.id}`);
|
|
440
|
+
if (obs?.intensity === "loud") loud.push(vendor.id);
|
|
441
|
+
if (obs?.intensity === "quiet") quiet.push(vendor.id);
|
|
442
|
+
}
|
|
443
|
+
let state: FrontState;
|
|
444
|
+
if (loud.length === 0) state = quiet.length >= 1 ? "open" : "vacant";
|
|
445
|
+
else if (loud.length === 1) state = "owned";
|
|
446
|
+
else if (loud.length <= 3) state = "contested";
|
|
447
|
+
else state = "saturated";
|
|
448
|
+
return { claimId: claim.id, state, loudVendorIds: loud, quietVendorIds: quiet };
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
export type FrontDrift = {
|
|
453
|
+
claimId: string;
|
|
454
|
+
before: FrontState;
|
|
455
|
+
after: FrontState;
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
/** What changed in the category between two runs — the refresh's whole point. */
|
|
459
|
+
export function diffFrontStates(before: ClaimFront[], after: ClaimFront[]): FrontDrift[] {
|
|
460
|
+
const prior = new Map(before.map((front) => [front.claimId, front.state]));
|
|
461
|
+
const drift: FrontDrift[] = [];
|
|
462
|
+
for (const front of after) {
|
|
463
|
+
const was = prior.get(front.claimId);
|
|
464
|
+
if (was && was !== front.state) drift.push({ claimId: front.claimId, before: was, after: front.state });
|
|
465
|
+
}
|
|
466
|
+
return drift;
|
|
467
|
+
}
|