fullstackgtm 0.25.2 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/connector.js CHANGED
@@ -1,4 +1,69 @@
1
+ import { dedupeKey } from "./dedupe.js";
1
2
  import { requiresHumanInput } from "./rules.js";
3
+ const IRREVERSIBLE_OPERATIONS = new Set(["merge_records", "archive_record"]);
4
+ const IDENTITY_KEY_BY_TYPE = {
5
+ account: "domain",
6
+ contact: "email",
7
+ };
8
+ /** snapshot collection for an object type */
9
+ function collectionFor(objectType) {
10
+ if (objectType === "account")
11
+ return "accounts";
12
+ if (objectType === "contact")
13
+ return "contacts";
14
+ if (objectType === "deal")
15
+ return "deals";
16
+ return null;
17
+ }
18
+ /**
19
+ * Drift/safety check for the two IRREVERSIBLE operations against a fresh
20
+ * snapshot. Returns a conflict detail string, or null if the op is safe to
21
+ * apply. These operations get NO field compare-and-set (there is no single
22
+ * field to compare), so this snapshot check is their only guard.
23
+ */
24
+ function checkIrreversibleOp(operation, snapshot) {
25
+ const collection = collectionFor(operation.objectType);
26
+ if (!collection)
27
+ return null;
28
+ const records = snapshot[collection];
29
+ const byId = (id) => records.find((record) => String(record.id) === id);
30
+ if (operation.operation === "archive_record") {
31
+ if (!byId(operation.objectId)) {
32
+ return `Record ${operation.objectType}/${operation.objectId} no longer exists (already archived or merged). Re-plan against current data.`;
33
+ }
34
+ // Archiving a duplicate discards data a merge would keep — refuse unless the
35
+ // human explicitly forced it. This catches every archive_record path (agent,
36
+ // hand-edited plan, audit), not just `bulk-update --archive`.
37
+ if (!operation.forceArchiveDuplicate) {
38
+ const keyName = IDENTITY_KEY_BY_TYPE[operation.objectType];
39
+ if (keyName) {
40
+ const target = byId(operation.objectId);
41
+ const key = dedupeKey(target, keyName);
42
+ if (key) {
43
+ const sharers = records.filter((record) => String(record.id) !== operation.objectId && dedupeKey(record, keyName) === key);
44
+ if (sharers.length > 0) {
45
+ return (`Refusing to archive ${operation.objectType}/${operation.objectId}: it shares ${keyName} "${key}" with ` +
46
+ `${sharers.length} other record(s) — that's a duplicate, and archiving discards its data where merging keeps it. ` +
47
+ `Merge with \`fullstackgtm dedupe ${operation.objectType} --key ${keyName}\` instead, or rebuild the op with --force-archive-duplicates.`);
48
+ }
49
+ }
50
+ }
51
+ }
52
+ return null;
53
+ }
54
+ if (operation.operation === "merge_records") {
55
+ if (!byId(operation.objectId)) {
56
+ return `Merge survivor ${operation.objectType}/${operation.objectId} no longer exists (archived or merged away since the plan was built). Re-plan — merges are irreversible.`;
57
+ }
58
+ const groupIds = Array.isArray(operation.beforeValue) ? operation.beforeValue.map(String) : [];
59
+ const losersStillPresent = groupIds.filter((id) => id !== operation.objectId && byId(id));
60
+ if (groupIds.length > 0 && losersStillPresent.length === 0) {
61
+ return `Every record to merge into ${operation.objectType}/${operation.objectId} is already gone (merge already applied?). Nothing to do — re-plan if duplicates remain.`;
62
+ }
63
+ return null;
64
+ }
65
+ return null;
66
+ }
2
67
  const FIELD_WRITE_OPERATIONS = new Set(["set_field", "clear_field", "link_record"]);
3
68
  function normalizeForComparison(value) {
4
69
  if (value === undefined || value === null || value === "")
@@ -35,9 +100,16 @@ export async function applyPatchPlan(connector, plan, options) {
35
100
  // closed — but it can be shrunk: re-run the snapshot checks after the
36
101
  // first write and every `recheckEvery` writes, conflicting out any
37
102
  // operation whose record went stale mid-run.
38
- const needsSnapshot = ((plan.guards && plan.guards.length > 0) || plan.filter) && connector.fetchSnapshot;
103
+ // Irreversible ops (merge/archive) need a fresh snapshot too it is their
104
+ // only drift/safety guard (no field to compare-and-set). Respect a caller's
105
+ // explicit checkConflicts:false opt-out (a stub/known-stale snapshot).
106
+ const hasIrreversibleApproved = checkConflicts &&
107
+ plan.operations.some((operation) => approved.has(operation.id) && IRREVERSIBLE_OPERATIONS.has(operation.operation));
108
+ const needsSnapshot = ((plan.guards && plan.guards.length > 0) || plan.filter || hasIrreversibleApproved) &&
109
+ connector.fetchSnapshot;
39
110
  const recheckEvery = Math.max(1, options.recheckEvery ?? 25);
40
111
  const staleIds = new Set();
112
+ const irreversibleStale = new Map();
41
113
  let guardFailure = null;
42
114
  const refreshSnapshotChecks = async () => {
43
115
  if (!needsSnapshot)
@@ -52,6 +124,16 @@ export async function applyPatchPlan(connector, plan, options) {
52
124
  staleIds.add(operation.objectId);
53
125
  }
54
126
  }
127
+ irreversibleStale.clear();
128
+ if (checkConflicts) {
129
+ for (const operation of plan.operations) {
130
+ if (!approved.has(operation.id) || !IRREVERSIBLE_OPERATIONS.has(operation.operation))
131
+ continue;
132
+ const detail = checkIrreversibleOp(operation, liveSnapshot);
133
+ if (detail)
134
+ irreversibleStale.set(operation.id, detail);
135
+ }
136
+ }
55
137
  for (const guard of plan.guards ?? []) {
56
138
  const failure = evaluateGuard(liveSnapshot, guard);
57
139
  if (failure) {
@@ -182,6 +264,13 @@ export async function applyPatchPlan(connector, plan, options) {
182
264
  poisonedGroups.add(operation.groupId);
183
265
  continue;
184
266
  }
267
+ const irreversibleConflict = irreversibleStale.get(operation.id);
268
+ if (irreversibleConflict) {
269
+ results.push({ operationId: operation.id, status: "conflict", detail: irreversibleConflict });
270
+ if (operation.groupId)
271
+ poisonedGroups.add(operation.groupId);
272
+ continue;
273
+ }
185
274
  if (operation.groupId && poisonedGroups.has(operation.groupId)) {
186
275
  results.push({
187
276
  operationId: operation.id,
package/dist/dedupe.d.ts CHANGED
@@ -9,6 +9,12 @@ export type DedupeOptions = {
9
9
  /** refuse to build plans larger than this (default 500 operations) */
10
10
  maxOperations?: number;
11
11
  };
12
+ /**
13
+ * The subset of a record worth keeping as a merge-recovery artifact: its id (to
14
+ * reference) plus every populated data field, dropping bulky/plumbing fields
15
+ * (raw, identities, provenance) that aren't needed to recreate it by hand.
16
+ */
17
+ export declare function recoverableFields(record: Record<string, unknown>): Record<string, unknown>;
12
18
  /** Normalize a record's identity key; undefined when the field is empty. */
13
19
  export declare function dedupeKey(record: Record<string, unknown>, key: DedupeOptions["key"]): string | undefined;
14
20
  export declare function buildDedupePlan(snapshot: CanonicalGtmSnapshot, options: DedupeOptions): PatchPlan;
package/dist/dedupe.js CHANGED
@@ -40,6 +40,22 @@ const NON_DATA_FIELDS = new Set(["id", "provider", "crmId", "identities", "raw",
40
40
  function populatedDataFields(record) {
41
41
  return Object.entries(record).filter(([field, value]) => !NON_DATA_FIELDS.has(field) && value !== undefined && value !== null && value !== "").length;
42
42
  }
43
+ /**
44
+ * The subset of a record worth keeping as a merge-recovery artifact: its id (to
45
+ * reference) plus every populated data field, dropping bulky/plumbing fields
46
+ * (raw, identities, provenance) that aren't needed to recreate it by hand.
47
+ */
48
+ export function recoverableFields(record) {
49
+ const out = { id: String(record.id) };
50
+ for (const [field, value] of Object.entries(record)) {
51
+ if (NON_DATA_FIELDS.has(field))
52
+ continue;
53
+ if (value === undefined || value === null || value === "")
54
+ continue;
55
+ out[field] = value;
56
+ }
57
+ return out;
58
+ }
43
59
  /** True when id `a` sorts before id `b` — numeric when both ids are numeric. */
44
60
  function idBefore(a, b) {
45
61
  const numericA = Number(a);
@@ -102,6 +118,12 @@ export function buildDedupePlan(snapshot, options) {
102
118
  const groupIds = members
103
119
  .map((member) => String(member.id))
104
120
  .sort((a, b) => (idBefore(a, b) ? -1 : 1));
121
+ // Recovery artifact: the records that will be merged away (everyone but the
122
+ // survivor), captured with their field values so a human can recreate one by
123
+ // hand if the merge was wrong. Merges are irreversible — the plan is the backup.
124
+ const recoverySnapshot = members
125
+ .filter((member) => String(member.id) !== String(survivor.id))
126
+ .map((member) => recoverableFields(member));
105
127
  const survivorName = typeof survivor.name === "string" && survivor.name
106
128
  ? survivor.name
107
129
  : typeof survivor.email === "string" && survivor.email
@@ -124,7 +146,8 @@ export function buildDedupePlan(snapshot, options) {
124
146
  approvalRequired: true,
125
147
  sourceRuleOrPolicy: "dedupe",
126
148
  groupId: `grp_${options.objectType}_${String(survivor.id)}`,
127
- rollback: "IRREVERSIBLE: provider merges cannot be unmerged. The pre-apply snapshot retains every record's field values; recreate a record manually from it if a merge was wrong.",
149
+ recoverySnapshot,
150
+ rollback: "IRREVERSIBLE: provider merges cannot be unmerged. recoverySnapshot on this operation retains every merged-away record's field values; recreate a record manually from it if a merge was wrong.",
128
151
  });
129
152
  }
130
153
  return {
package/dist/index.d.ts CHANGED
@@ -16,6 +16,8 @@ export { apolloPullKeysForAppend, apolloPullKeysForRefresh, createApolloClient,
16
16
  export { diffFindings, diffSnapshots, diffToMarkdown, type CollectionDiff, type FieldChange, type FindingsDrift, type RecordChange, type SnapshotDiff, } from "./diff.ts";
17
17
  export { mergeSnapshots, type MergeConflict, type MergeMatch, type MergeReport, type MergeSuggestion, } from "./merge.ts";
18
18
  export { createFilePlanStore, type PlanStore, type StoredPlan } from "./planStore.ts";
19
+ export { computeApprovalDigests, loadOrCreateSigningKey, loadSigningKey, signApproval, verifyApprovalDigests, type ApprovalVerification, } from "./integrity.ts";
20
+ export { buildAuditLog, verifyAuditLog, type AuditLogEntry, type AuditLogExport, type AuditLogVerification, } from "./auditLog.ts";
19
21
  export { formatPatchPlanRun, patchPlanToMarkdown } from "./format.ts";
20
22
  export { auditReportToHtml, auditReportToMarkdown, type ReportOptions } from "./report.ts";
21
23
  export { HUBSPOT_DEFAULT_FIELD_MAPPINGS, SALESFORCE_DEFAULT_FIELD_MAPPINGS, mappedField, mappedFields, normalizeFieldMappings, readMappedValue, type CrmObjectType, type FieldMappings, } from "./mappings.ts";
package/dist/index.js CHANGED
@@ -16,6 +16,8 @@ export { apolloPullKeysForAppend, apolloPullKeysForRefresh, createApolloClient,
16
16
  export { diffFindings, diffSnapshots, diffToMarkdown, } from "./diff.js";
17
17
  export { mergeSnapshots, } from "./merge.js";
18
18
  export { createFilePlanStore } from "./planStore.js";
19
+ export { computeApprovalDigests, loadOrCreateSigningKey, loadSigningKey, signApproval, verifyApprovalDigests, } from "./integrity.js";
20
+ export { buildAuditLog, verifyAuditLog, } from "./auditLog.js";
19
21
  export { formatPatchPlanRun, patchPlanToMarkdown } from "./format.js";
20
22
  export { auditReportToHtml, auditReportToMarkdown } from "./report.js";
21
23
  export { HUBSPOT_DEFAULT_FIELD_MAPPINGS, SALESFORCE_DEFAULT_FIELD_MAPPINGS, mappedField, mappedFields, normalizeFieldMappings, readMappedValue, } from "./mappings.js";
@@ -0,0 +1,30 @@
1
+ import type { PatchOperation } from "./types.ts";
2
+ /** Read the signing key, or null if it has not been created yet. */
3
+ export declare function loadSigningKey(): Buffer | null;
4
+ /** Read the signing key, creating a fresh 32-byte one (0600) on first use. */
5
+ export declare function loadOrCreateSigningKey(): Buffer;
6
+ /** HMAC-SHA256 signature of one operation's approved content. */
7
+ export declare function signApproval(operation: PatchOperation, override: unknown, key: Buffer): string;
8
+ /**
9
+ * Compute the approval signature map for a set of approved operation ids,
10
+ * resolving each op from the plan and its (approved) value override.
11
+ */
12
+ export declare function computeApprovalDigests(operations: PatchOperation[], approvedOperationIds: string[], valueOverrides: Record<string, unknown>, key: Buffer): Record<string, string>;
13
+ export type ApprovalVerification = {
14
+ ok: true;
15
+ } | {
16
+ ok: false;
17
+ reason: "no_key";
18
+ tampered: string[];
19
+ } | {
20
+ ok: false;
21
+ reason: "mismatch";
22
+ tampered: string[];
23
+ };
24
+ /**
25
+ * Verify that every approved operation still matches what was signed. Returns
26
+ * ok:true when there are no stored digests (a pre-integrity plan — nothing to
27
+ * verify), when all match, or fails with the list of operation ids whose
28
+ * content changed since approval.
29
+ */
30
+ export declare function verifyApprovalDigests(operations: PatchOperation[], approvedOperationIds: string[], valueOverrides: Record<string, unknown>, storedDigests: Record<string, string> | undefined): ApprovalVerification;
@@ -0,0 +1,128 @@
1
+ import { createHmac, randomBytes } from "node:crypto";
2
+ import { existsSync, readFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.js";
5
+ /**
6
+ * Approval integrity.
7
+ *
8
+ * The plan store records WHICH operation ids a human approved, but the apply
9
+ * path re-reads the operation BODIES fresh from the (user-editable) plan file.
10
+ * Nothing bound the approval to the content: an approved op's afterValue or
11
+ * objectId could be changed on disk between `plans approve` and `apply` — by a
12
+ * compromised dependency, a co-tenant, or a plan file synced/edited on another
13
+ * machine — and the changed value would be written under the prior approval.
14
+ *
15
+ * Fix: at approval time, HMAC-sign each approved operation's security-relevant
16
+ * content (including the approved value override) with a per-install secret key
17
+ * stored 0600 alongside the credentials. At apply time, recompute and verify.
18
+ * Any post-approval edit to the operations or the approved overrides changes the
19
+ * signature; a tamper must now also forge an HMAC it cannot compute without the
20
+ * key. The key never leaves the machine, so a plan approved here and applied
21
+ * elsewhere fails closed ("re-approve on this machine") rather than open.
22
+ *
23
+ * This raises the bar from "trust the plan JSON" to "trust the plan JSON only
24
+ * insofar as it still matches what was signed with the local key." It is not a
25
+ * defense against an attacker who already holds the signing key (same-dir, same
26
+ * permissions as the credential store) — that is the documented boundary.
27
+ */
28
+ const SIGNING_KEY_FILE = ".plan-signing-key";
29
+ function signingKeyPath() {
30
+ return join(credentialsDir(), SIGNING_KEY_FILE);
31
+ }
32
+ /** Read the signing key, or null if it has not been created yet. */
33
+ export function loadSigningKey() {
34
+ const path = signingKeyPath();
35
+ if (!existsSync(path))
36
+ return null;
37
+ try {
38
+ return Buffer.from(readFileSync(path, "utf8").trim(), "hex");
39
+ }
40
+ catch {
41
+ return null;
42
+ }
43
+ }
44
+ /** Read the signing key, creating a fresh 32-byte one (0600) on first use. */
45
+ export function loadOrCreateSigningKey() {
46
+ const existing = loadSigningKey();
47
+ if (existing && existing.length >= 32)
48
+ return existing;
49
+ ensureSecureHomeDir();
50
+ const key = randomBytes(32);
51
+ writeSecureFile(signingKeyPath(), `${key.toString("hex")}\n`);
52
+ return key;
53
+ }
54
+ /**
55
+ * Canonical, stable string of the operation content an approval binds to. Only
56
+ * the fields that determine WHAT gets written: changing any of them must
57
+ * invalidate the approval. `override` is the approved value override for this op
58
+ * (the value actually written when set), so tampering with stored overrides is
59
+ * caught too.
60
+ */
61
+ function canonicalApprovalContent(operation, override) {
62
+ return JSON.stringify([
63
+ operation.id,
64
+ operation.operation,
65
+ operation.objectType,
66
+ operation.objectId,
67
+ operation.field ?? null,
68
+ operation.beforeValue ?? null,
69
+ operation.afterValue ?? null,
70
+ operation.groupId ?? null,
71
+ // Safety-relevant fields too: editing a precondition could relax a drift
72
+ // guard, and forging forceArchiveDuplicate could suppress the archive-of-
73
+ // duplicate refusal — the signed approval must pin apply BEHAVIOR, not just
74
+ // the written value. `reason` is human-reviewed AND written verbatim into
75
+ // create_task bodies (afterValue ?? reason fallback in the connectors), so a
76
+ // create_task with a null afterValue would otherwise let a disk edit to
77
+ // reason write unapproved text under a still-valid digest.
78
+ operation.preconditions ?? null,
79
+ operation.forceArchiveDuplicate ?? false,
80
+ operation.reason ?? null,
81
+ override === undefined ? null : ["__override__", override],
82
+ ]);
83
+ }
84
+ /** HMAC-SHA256 signature of one operation's approved content. */
85
+ export function signApproval(operation, override, key) {
86
+ return createHmac("sha256", key).update(canonicalApprovalContent(operation, override)).digest("hex");
87
+ }
88
+ /**
89
+ * Compute the approval signature map for a set of approved operation ids,
90
+ * resolving each op from the plan and its (approved) value override.
91
+ */
92
+ export function computeApprovalDigests(operations, approvedOperationIds, valueOverrides, key) {
93
+ const byId = new Map(operations.map((operation) => [operation.id, operation]));
94
+ const digests = {};
95
+ for (const id of approvedOperationIds) {
96
+ const operation = byId.get(id);
97
+ if (!operation)
98
+ continue;
99
+ digests[id] = signApproval(operation, valueOverrides[id], key);
100
+ }
101
+ return digests;
102
+ }
103
+ /**
104
+ * Verify that every approved operation still matches what was signed. Returns
105
+ * ok:true when there are no stored digests (a pre-integrity plan — nothing to
106
+ * verify), when all match, or fails with the list of operation ids whose
107
+ * content changed since approval.
108
+ */
109
+ export function verifyApprovalDigests(operations, approvedOperationIds, valueOverrides, storedDigests) {
110
+ if (!storedDigests || Object.keys(storedDigests).length === 0)
111
+ return { ok: true };
112
+ const key = loadSigningKey();
113
+ if (!key)
114
+ return { ok: false, reason: "no_key", tampered: approvedOperationIds };
115
+ const byId = new Map(operations.map((operation) => [operation.id, operation]));
116
+ const tampered = [];
117
+ for (const id of approvedOperationIds) {
118
+ const operation = byId.get(id);
119
+ const expected = storedDigests[id];
120
+ if (!operation || !expected) {
121
+ tampered.push(id);
122
+ continue;
123
+ }
124
+ if (signApproval(operation, valueOverrides[id], key) !== expected)
125
+ tampered.push(id);
126
+ }
127
+ return tampered.length === 0 ? { ok: true } : { ok: false, reason: "mismatch", tampered };
128
+ }
package/dist/llm.js CHANGED
@@ -70,8 +70,23 @@ export async function extractInsightsLlm(transcript, options) {
70
70
  const text = truncateTranscript(transcript);
71
71
  const prompt = `${EXTRACT_INSTRUCTIONS}\n\n${options.title ? `Call: ${options.title}\n` : ""}Transcript:\n${text}`;
72
72
  const result = (await forcedToolCall(prompt, "extract_call_insights", EXTRACT_SCHEMA, model, options));
73
+ const normalizedTranscript = normalizeSpan(text);
73
74
  const insights = (result.insights ?? [])
74
75
  .filter((insight) => INSIGHT_TYPES.includes(insight.type))
76
+ // Mechanical verbatim gate (mirrors market classify): the prompt asks for a
77
+ // verbatim quote, but a prompt-injected or hallucinated transcript could
78
+ // fabricate a grounded-looking insight that drives a governed writeback.
79
+ // (1) The evidence quote must be a non-trivial verbatim span of the transcript.
80
+ .filter((insight) => {
81
+ const quote = normalizeSpan(insight.evidence ?? "");
82
+ return quote.length >= 12 && normalizedTranscript.includes(quote);
83
+ })
84
+ // (2) For next_step — the only insight type whose `text` is WRITTEN to the CRM
85
+ // (set_field nextStep / create_task body) — the written action must itself be
86
+ // grounded in the verified quote, not just accompanied by an innocuous one.
87
+ // This closes the decoupling attack: a prompt-injected transcript that emits a
88
+ // malicious `text` while quoting an unrelated real span no longer survives.
89
+ .filter((insight) => insight.type !== "next_step" || actionGroundedInEvidence(insight.text, insight.evidence ?? ""))
75
90
  .map((insight) => ({
76
91
  ...insight,
77
92
  title: insight.type.replace(/_/g, " "),
@@ -81,6 +96,39 @@ export async function extractInsightsLlm(transcript, options) {
81
96
  .sort((a, b) => b.importance - a.importance || b.confidence - a.confidence);
82
97
  return { insights, model };
83
98
  }
99
+ /** Whitespace/punctuation-spacing-normalized match (same rule as market spans). */
100
+ function normalizeSpan(value) {
101
+ return value
102
+ .replace(/\s+([.,;:!?])/g, "$1")
103
+ .replace(/\s+/g, " ")
104
+ .trim()
105
+ .toLowerCase();
106
+ }
107
+ /**
108
+ * Is the written next-step action grounded in its (already transcript-verified)
109
+ * evidence quote? A legitimate next step paraphrases the quote, so it reuses the
110
+ * quote's salient terms; a prompt-injected action ("wire $50,000 to account
111
+ * 1234") quoting an unrelated innocuous span does not. Two checks: every
112
+ * number/amount in the action must appear in the evidence (defeats the
113
+ * financial-exfil class cleanly), and a meaningful share of the action's
114
+ * distinctive (≥4-char) words must appear in the evidence.
115
+ */
116
+ function actionGroundedInEvidence(text, evidence) {
117
+ const action = normalizeSpan(text);
118
+ const quote = normalizeSpan(evidence);
119
+ if (!action)
120
+ return false;
121
+ const numbers = action.match(/\d[\d,.]*/g) ?? [];
122
+ for (const n of numbers) {
123
+ if (!quote.includes(n))
124
+ return false; // an ungrounded amount/account/id is a red flag
125
+ }
126
+ const distinctive = [...new Set(action.split(/[^a-z0-9$]+/).filter((token) => token.length >= 4))];
127
+ if (distinctive.length === 0)
128
+ return true; // nothing distinctive to ground (a short generic step)
129
+ const grounded = distinctive.filter((token) => quote.includes(token)).length;
130
+ return grounded / distinctive.length >= 0.4;
131
+ }
84
132
  export const DEFAULT_RUBRIC = {
85
133
  scale: 5,
86
134
  dimensions: [
@@ -0,0 +1,41 @@
1
+ import { type LlmCallOptions } from "./llm.ts";
2
+ import { type FetchPage, type MarketConfig } from "./market.ts";
3
+ /**
4
+ * Cold-start taxonomy bootstrap. `market init` writes a stub for a human
5
+ * analyst to fill in; the self-serve hosted map has no analyst in the loop, so
6
+ * this proposes the claim taxonomy automatically from the seed vendors' own
7
+ * pages.
8
+ *
9
+ * Posture matches the rest of the market layer: the LLM is a *proposal* layer
10
+ * grounded in captured evidence (it only sees text we actually fetched), and
11
+ * everything downstream — capture, classify with verbatim-span verification,
12
+ * front states, the report — stays deterministic over the stored observations.
13
+ * The taxonomy it emits is a normal `market.config.json` a human can still edit.
14
+ */
15
+ export type SeedVendor = {
16
+ url: string;
17
+ /** Display name; derived from the host when omitted. */
18
+ name?: string;
19
+ /** Marks the user's own company as the anchor vendor. */
20
+ anchor?: boolean;
21
+ };
22
+ export type SuggestTaxonomyOptions = {
23
+ category: string;
24
+ vendors: SeedVendor[];
25
+ llm: LlmCallOptions;
26
+ /** Upper bound on proposed claims, to keep classification bounded. */
27
+ maxClaims?: number;
28
+ /** Per-vendor captured-text budget fed to the proposer (chars). */
29
+ perVendorChars?: number;
30
+ /** Test injectables. */
31
+ fetchPage?: FetchPage;
32
+ capturesDir?: string;
33
+ now?: () => Date;
34
+ };
35
+ export type SuggestTaxonomyResult = {
36
+ config: MarketConfig;
37
+ /** Vendors whose homepage capture was empty/failed (excluded from grounding). */
38
+ unreadableVendorIds: string[];
39
+ model: string;
40
+ };
41
+ export declare function suggestMarketConfig(options: SuggestTaxonomyOptions): Promise<SuggestTaxonomyResult>;
@@ -0,0 +1,193 @@
1
+ import { DEFAULT_MODELS, forcedToolCall, } from "./llm.js";
2
+ import { captureMarket, loadCaptureTexts, } from "./market.js";
3
+ const DEFAULT_MAX_CLAIMS = 16;
4
+ const DEFAULT_PER_VENDOR_CHARS = 6_000;
5
+ /** Stable, human-readable id from a string (claim capability or host). */
6
+ function slugify(value, maxWords = 6) {
7
+ const slug = value
8
+ .toLowerCase()
9
+ .replace(/[^a-z0-9]+/g, "-")
10
+ .replace(/^-+|-+$/g, "")
11
+ .split("-")
12
+ .filter(Boolean)
13
+ .slice(0, maxWords)
14
+ .join("-");
15
+ return slug || "item";
16
+ }
17
+ /** Second-level domain as a vendor id seed: https://www.stripe.com/ -> stripe. */
18
+ function vendorIdFromUrl(url) {
19
+ let host;
20
+ try {
21
+ host = new URL(url).hostname;
22
+ }
23
+ catch {
24
+ return slugify(url);
25
+ }
26
+ const labels = host.replace(/^www\./, "").split(".");
27
+ const sld = labels.length >= 2 ? labels[labels.length - 2] : labels[0];
28
+ return slugify(sld || host);
29
+ }
30
+ /** Disambiguate repeated ids by suffixing -2, -3, … */
31
+ function uniqueId(base, taken) {
32
+ if (!taken.has(base)) {
33
+ taken.add(base);
34
+ return base;
35
+ }
36
+ for (let n = 2;; n += 1) {
37
+ const candidate = `${base}-${n}`;
38
+ if (!taken.has(candidate)) {
39
+ taken.add(candidate);
40
+ return candidate;
41
+ }
42
+ }
43
+ }
44
+ function provisionalVendors(seeds) {
45
+ const taken = new Set();
46
+ return seeds.map((seed) => {
47
+ const id = uniqueId(vendorIdFromUrl(seed.url), taken);
48
+ const host = (() => {
49
+ try {
50
+ return new URL(seed.url).hostname.replace(/^www\./, "");
51
+ }
52
+ catch {
53
+ return seed.url;
54
+ }
55
+ })();
56
+ return {
57
+ id,
58
+ name: seed.name?.trim() || host,
59
+ urls: { home: seed.url, pricing: null, product: [] },
60
+ };
61
+ });
62
+ }
63
+ const TAXONOMY_SCHEMA = {
64
+ type: "object",
65
+ required: ["claims"],
66
+ properties: {
67
+ surfaceRule: {
68
+ type: "string",
69
+ description: "One sentence stating how a reader judges LOUD vs QUIET vs ABSENT for this category (e.g. hero/top-nav = LOUD, deeper pages = QUIET, nowhere = ABSENT).",
70
+ },
71
+ claims: {
72
+ type: "array",
73
+ description: "The distinct capability positions vendors in this category compete on. 8-16 of them. Only include claims you can actually see evidence for on the supplied pages.",
74
+ items: {
75
+ type: "object",
76
+ required: ["capability", "icp", "pricingStructure", "definition"],
77
+ properties: {
78
+ capability: {
79
+ type: "string",
80
+ description: "What is being claimed, precise enough to judge loud/quiet/absent. Max ~10 words.",
81
+ },
82
+ icp: { type: "string", description: "Which buyer/ICP this claim cell addresses (category vocabulary)." },
83
+ pricingStructure: {
84
+ type: "string",
85
+ description: "Which pricing structure the claim implies (e.g. per-seat, usage-based, flat, free-tier).",
86
+ },
87
+ definition: {
88
+ type: "string",
89
+ description: "Operational definition a human (or classifier) uses to score any vendor's page LOUD/QUIET/ABSENT on this claim.",
90
+ },
91
+ terms: {
92
+ type: "array",
93
+ items: { type: "string" },
94
+ description: "Exact buyer phrasings for this claim, for deterministic mention matching. 2-5 terms.",
95
+ },
96
+ },
97
+ },
98
+ },
99
+ vendors: {
100
+ type: "array",
101
+ description: "Optional refinements: a clean display name per seed URL, and a pricing-page URL if one is clearly linked.",
102
+ items: {
103
+ type: "object",
104
+ required: ["seedUrl"],
105
+ properties: {
106
+ seedUrl: { type: "string" },
107
+ name: { type: "string" },
108
+ pricingUrl: { type: ["string", "null"] },
109
+ },
110
+ },
111
+ },
112
+ },
113
+ };
114
+ function buildDossier(vendors, capture, perVendorChars) {
115
+ const { entries, textByHash } = capture;
116
+ const unreadable = [];
117
+ const blocks = [];
118
+ for (const vendor of vendors) {
119
+ const hash = entries.find((e) => e.vendorId === vendor.id && e.captureHash)?.captureHash ?? null;
120
+ const text = hash ? textByHash.get(hash) ?? "" : "";
121
+ if (!text.trim()) {
122
+ unreadable.push(vendor.id);
123
+ continue;
124
+ }
125
+ blocks.push(`### ${vendor.name} (${vendor.urls.home})\n${text.slice(0, perVendorChars)}`);
126
+ }
127
+ return { dossier: blocks.join("\n\n"), unreadable };
128
+ }
129
+ const INSTRUCTIONS = `You are seeding a competitive "market map" for a category. A market map breaks the category into CLAIMS — the distinct capability positions vendors compete on — so each (vendor x claim) cell can later be scored LOUD / QUIET / ABSENT from that vendor's pages.
130
+
131
+ Propose the claim taxonomy for this category from the competitor homepages below. Rules:
132
+ - Ground every claim in what is actually visible on the supplied pages. Do not invent positions no vendor mentions.
133
+ - Each claim is a cell: a precise capability, the ICP it targets, and the pricing structure it implies.
134
+ - Write each definition so a reader could judge ANY vendor's page LOUD/QUIET/ABSENT against it.
135
+ - Aim for the 8-16 claims that genuinely differentiate vendors. Prefer specific, contested positions over generic table stakes.
136
+ - Provide 2-5 verbatim buyer terms per claim for later mention matching.
137
+ - Optionally return a cleaned display name and a pricing-page URL per seed vendor when evident.`;
138
+ export async function suggestMarketConfig(options) {
139
+ const { category } = options;
140
+ if (options.vendors.length === 0)
141
+ throw new Error("suggestMarketConfig requires at least one seed vendor");
142
+ const maxClaims = options.maxClaims ?? DEFAULT_MAX_CLAIMS;
143
+ const perVendorChars = options.perVendorChars ?? DEFAULT_PER_VENDOR_CHARS;
144
+ const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
145
+ const vendors = provisionalVendors(options.vendors);
146
+ const anchorSeed = options.vendors.find((seed) => seed.anchor);
147
+ const anchorId = anchorSeed ? vendors[options.vendors.indexOf(anchorSeed)]?.id : undefined;
148
+ // Capture the seed homepages so the proposer only sees text we actually
149
+ // fetched (the SSRF guard in captureMarket applies to these user-supplied URLs).
150
+ await captureMarket({ category, vendors, claims: [] }, { dir: options.capturesDir, runLabel: "bootstrap", fetchPage: options.fetchPage, now: options.now });
151
+ const capture = loadCaptureTexts(category, options.capturesDir);
152
+ const { dossier, unreadable } = buildDossier(vendors, capture, perVendorChars);
153
+ if (!dossier.trim()) {
154
+ throw new Error(`market init --auto: none of the ${vendors.length} seed pages returned readable text — check the URLs are public homepages.`);
155
+ }
156
+ const prompt = `${INSTRUCTIONS}\n\nCategory: ${category}\n\nCompetitor homepages:\n${dossier}`;
157
+ const result = (await forcedToolCall(prompt, "propose_market_taxonomy", TAXONOMY_SCHEMA, model, options.llm));
158
+ const takenClaimIds = new Set();
159
+ const claims = (result.claims ?? [])
160
+ .filter((claim) => claim?.capability && claim?.definition)
161
+ .slice(0, maxClaims)
162
+ .map((claim) => ({
163
+ id: uniqueId(slugify(claim.capability), takenClaimIds),
164
+ capability: claim.capability.trim(),
165
+ icp: (claim.icp ?? "").trim() || "general",
166
+ pricingStructure: (claim.pricingStructure ?? "").trim() || "unspecified",
167
+ definition: claim.definition.trim(),
168
+ ...(claim.terms?.length ? { terms: claim.terms.map((t) => t.trim()).filter(Boolean) } : {}),
169
+ }));
170
+ if (claims.length === 0) {
171
+ throw new Error("market init --auto: the model proposed no usable claims — try again or seed the taxonomy by hand.");
172
+ }
173
+ // Apply optional vendor refinements (display name + pricing URL), matched by seed URL.
174
+ const refinementByUrl = new Map((result.vendors ?? []).map((v) => [v.seedUrl, v]));
175
+ const refinedVendors = vendors.map((vendor) => {
176
+ const refinement = refinementByUrl.get(vendor.urls.home);
177
+ const pricing = refinement?.pricingUrl && /^https?:\/\//i.test(refinement.pricingUrl) ? refinement.pricingUrl : vendor.urls.pricing;
178
+ return {
179
+ ...vendor,
180
+ name: refinement?.name?.trim() || vendor.name,
181
+ urls: { ...vendor.urls, pricing },
182
+ };
183
+ });
184
+ const config = {
185
+ category,
186
+ ...(anchorId ? { anchorVendor: anchorId } : {}),
187
+ vendors: refinedVendors,
188
+ claims,
189
+ surfaceRule: result.surfaceRule?.trim() ||
190
+ "LOUD = hero copy OR top-level-nav named product with a dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed; UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
191
+ };
192
+ return { config, unreadableVendorIds: unreadable, model };
193
+ }