fullstackgtm 0.25.2 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +98 -0
- package/DATA-FLOWS.md +52 -0
- package/NOTICE +5 -0
- package/README.md +3 -1
- package/SECURITY.md +69 -0
- package/dist/auditLog.d.ts +58 -0
- package/dist/auditLog.js +112 -0
- package/dist/bulkUpdate.js +6 -1
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +102 -0
- package/dist/connector.js +90 -1
- package/dist/dedupe.d.ts +6 -0
- package/dist/dedupe.js +24 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/integrity.d.ts +30 -0
- package/dist/integrity.js +128 -0
- package/dist/llm.js +48 -0
- package/dist/marketTaxonomy.d.ts +41 -0
- package/dist/marketTaxonomy.js +193 -0
- package/dist/planStore.d.ts +6 -0
- package/dist/planStore.js +10 -2
- package/dist/schedule.js +4 -0
- package/dist/types.d.ts +16 -0
- package/package.json +6 -3
- package/src/auditLog.ts +173 -0
- package/src/bulkUpdate.ts +6 -1
- package/src/cli.ts +112 -0
- package/src/connector.ts +96 -1
- package/src/dedupe.ts +23 -1
- package/src/index.ts +15 -0
- package/src/integrity.ts +146 -0
- package/src/llm.ts +47 -0
- package/src/marketTaxonomy.ts +288 -0
- package/src/planStore.ts +23 -4
- package/src/schedule.ts +6 -0
- package/src/types.ts +16 -0
package/dist/connector.js
CHANGED
|
@@ -1,4 +1,69 @@
|
|
|
1
|
+
import { dedupeKey } from "./dedupe.js";
|
|
1
2
|
import { requiresHumanInput } from "./rules.js";
|
|
3
|
+
const IRREVERSIBLE_OPERATIONS = new Set(["merge_records", "archive_record"]);
|
|
4
|
+
const IDENTITY_KEY_BY_TYPE = {
|
|
5
|
+
account: "domain",
|
|
6
|
+
contact: "email",
|
|
7
|
+
};
|
|
8
|
+
/** snapshot collection for an object type */
|
|
9
|
+
function collectionFor(objectType) {
|
|
10
|
+
if (objectType === "account")
|
|
11
|
+
return "accounts";
|
|
12
|
+
if (objectType === "contact")
|
|
13
|
+
return "contacts";
|
|
14
|
+
if (objectType === "deal")
|
|
15
|
+
return "deals";
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Drift/safety check for the two IRREVERSIBLE operations against a fresh
|
|
20
|
+
* snapshot. Returns a conflict detail string, or null if the op is safe to
|
|
21
|
+
* apply. These operations get NO field compare-and-set (there is no single
|
|
22
|
+
* field to compare), so this snapshot check is their only guard.
|
|
23
|
+
*/
|
|
24
|
+
function checkIrreversibleOp(operation, snapshot) {
|
|
25
|
+
const collection = collectionFor(operation.objectType);
|
|
26
|
+
if (!collection)
|
|
27
|
+
return null;
|
|
28
|
+
const records = snapshot[collection];
|
|
29
|
+
const byId = (id) => records.find((record) => String(record.id) === id);
|
|
30
|
+
if (operation.operation === "archive_record") {
|
|
31
|
+
if (!byId(operation.objectId)) {
|
|
32
|
+
return `Record ${operation.objectType}/${operation.objectId} no longer exists (already archived or merged). Re-plan against current data.`;
|
|
33
|
+
}
|
|
34
|
+
// Archiving a duplicate discards data a merge would keep — refuse unless the
|
|
35
|
+
// human explicitly forced it. This catches every archive_record path (agent,
|
|
36
|
+
// hand-edited plan, audit), not just `bulk-update --archive`.
|
|
37
|
+
if (!operation.forceArchiveDuplicate) {
|
|
38
|
+
const keyName = IDENTITY_KEY_BY_TYPE[operation.objectType];
|
|
39
|
+
if (keyName) {
|
|
40
|
+
const target = byId(operation.objectId);
|
|
41
|
+
const key = dedupeKey(target, keyName);
|
|
42
|
+
if (key) {
|
|
43
|
+
const sharers = records.filter((record) => String(record.id) !== operation.objectId && dedupeKey(record, keyName) === key);
|
|
44
|
+
if (sharers.length > 0) {
|
|
45
|
+
return (`Refusing to archive ${operation.objectType}/${operation.objectId}: it shares ${keyName} "${key}" with ` +
|
|
46
|
+
`${sharers.length} other record(s) — that's a duplicate, and archiving discards its data where merging keeps it. ` +
|
|
47
|
+
`Merge with \`fullstackgtm dedupe ${operation.objectType} --key ${keyName}\` instead, or rebuild the op with --force-archive-duplicates.`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
if (operation.operation === "merge_records") {
|
|
55
|
+
if (!byId(operation.objectId)) {
|
|
56
|
+
return `Merge survivor ${operation.objectType}/${operation.objectId} no longer exists (archived or merged away since the plan was built). Re-plan — merges are irreversible.`;
|
|
57
|
+
}
|
|
58
|
+
const groupIds = Array.isArray(operation.beforeValue) ? operation.beforeValue.map(String) : [];
|
|
59
|
+
const losersStillPresent = groupIds.filter((id) => id !== operation.objectId && byId(id));
|
|
60
|
+
if (groupIds.length > 0 && losersStillPresent.length === 0) {
|
|
61
|
+
return `Every record to merge into ${operation.objectType}/${operation.objectId} is already gone (merge already applied?). Nothing to do — re-plan if duplicates remain.`;
|
|
62
|
+
}
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
2
67
|
const FIELD_WRITE_OPERATIONS = new Set(["set_field", "clear_field", "link_record"]);
|
|
3
68
|
function normalizeForComparison(value) {
|
|
4
69
|
if (value === undefined || value === null || value === "")
|
|
@@ -35,9 +100,16 @@ export async function applyPatchPlan(connector, plan, options) {
|
|
|
35
100
|
// closed — but it can be shrunk: re-run the snapshot checks after the
|
|
36
101
|
// first write and every `recheckEvery` writes, conflicting out any
|
|
37
102
|
// operation whose record went stale mid-run.
|
|
38
|
-
|
|
103
|
+
// Irreversible ops (merge/archive) need a fresh snapshot too — it is their
|
|
104
|
+
// only drift/safety guard (no field to compare-and-set). Respect a caller's
|
|
105
|
+
// explicit checkConflicts:false opt-out (a stub/known-stale snapshot).
|
|
106
|
+
const hasIrreversibleApproved = checkConflicts &&
|
|
107
|
+
plan.operations.some((operation) => approved.has(operation.id) && IRREVERSIBLE_OPERATIONS.has(operation.operation));
|
|
108
|
+
const needsSnapshot = ((plan.guards && plan.guards.length > 0) || plan.filter || hasIrreversibleApproved) &&
|
|
109
|
+
connector.fetchSnapshot;
|
|
39
110
|
const recheckEvery = Math.max(1, options.recheckEvery ?? 25);
|
|
40
111
|
const staleIds = new Set();
|
|
112
|
+
const irreversibleStale = new Map();
|
|
41
113
|
let guardFailure = null;
|
|
42
114
|
const refreshSnapshotChecks = async () => {
|
|
43
115
|
if (!needsSnapshot)
|
|
@@ -52,6 +124,16 @@ export async function applyPatchPlan(connector, plan, options) {
|
|
|
52
124
|
staleIds.add(operation.objectId);
|
|
53
125
|
}
|
|
54
126
|
}
|
|
127
|
+
irreversibleStale.clear();
|
|
128
|
+
if (checkConflicts) {
|
|
129
|
+
for (const operation of plan.operations) {
|
|
130
|
+
if (!approved.has(operation.id) || !IRREVERSIBLE_OPERATIONS.has(operation.operation))
|
|
131
|
+
continue;
|
|
132
|
+
const detail = checkIrreversibleOp(operation, liveSnapshot);
|
|
133
|
+
if (detail)
|
|
134
|
+
irreversibleStale.set(operation.id, detail);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
55
137
|
for (const guard of plan.guards ?? []) {
|
|
56
138
|
const failure = evaluateGuard(liveSnapshot, guard);
|
|
57
139
|
if (failure) {
|
|
@@ -182,6 +264,13 @@ export async function applyPatchPlan(connector, plan, options) {
|
|
|
182
264
|
poisonedGroups.add(operation.groupId);
|
|
183
265
|
continue;
|
|
184
266
|
}
|
|
267
|
+
const irreversibleConflict = irreversibleStale.get(operation.id);
|
|
268
|
+
if (irreversibleConflict) {
|
|
269
|
+
results.push({ operationId: operation.id, status: "conflict", detail: irreversibleConflict });
|
|
270
|
+
if (operation.groupId)
|
|
271
|
+
poisonedGroups.add(operation.groupId);
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
185
274
|
if (operation.groupId && poisonedGroups.has(operation.groupId)) {
|
|
186
275
|
results.push({
|
|
187
276
|
operationId: operation.id,
|
package/dist/dedupe.d.ts
CHANGED
|
@@ -9,6 +9,12 @@ export type DedupeOptions = {
|
|
|
9
9
|
/** refuse to build plans larger than this (default 500 operations) */
|
|
10
10
|
maxOperations?: number;
|
|
11
11
|
};
|
|
12
|
+
/**
|
|
13
|
+
* The subset of a record worth keeping as a merge-recovery artifact: its id (to
|
|
14
|
+
* reference) plus every populated data field, dropping bulky/plumbing fields
|
|
15
|
+
* (raw, identities, provenance) that aren't needed to recreate it by hand.
|
|
16
|
+
*/
|
|
17
|
+
export declare function recoverableFields(record: Record<string, unknown>): Record<string, unknown>;
|
|
12
18
|
/** Normalize a record's identity key; undefined when the field is empty. */
|
|
13
19
|
export declare function dedupeKey(record: Record<string, unknown>, key: DedupeOptions["key"]): string | undefined;
|
|
14
20
|
export declare function buildDedupePlan(snapshot: CanonicalGtmSnapshot, options: DedupeOptions): PatchPlan;
|
package/dist/dedupe.js
CHANGED
|
@@ -40,6 +40,22 @@ const NON_DATA_FIELDS = new Set(["id", "provider", "crmId", "identities", "raw",
|
|
|
40
40
|
function populatedDataFields(record) {
|
|
41
41
|
return Object.entries(record).filter(([field, value]) => !NON_DATA_FIELDS.has(field) && value !== undefined && value !== null && value !== "").length;
|
|
42
42
|
}
|
|
43
|
+
/**
|
|
44
|
+
* The subset of a record worth keeping as a merge-recovery artifact: its id (to
|
|
45
|
+
* reference) plus every populated data field, dropping bulky/plumbing fields
|
|
46
|
+
* (raw, identities, provenance) that aren't needed to recreate it by hand.
|
|
47
|
+
*/
|
|
48
|
+
export function recoverableFields(record) {
|
|
49
|
+
const out = { id: String(record.id) };
|
|
50
|
+
for (const [field, value] of Object.entries(record)) {
|
|
51
|
+
if (NON_DATA_FIELDS.has(field))
|
|
52
|
+
continue;
|
|
53
|
+
if (value === undefined || value === null || value === "")
|
|
54
|
+
continue;
|
|
55
|
+
out[field] = value;
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
43
59
|
/** True when id `a` sorts before id `b` — numeric when both ids are numeric. */
|
|
44
60
|
function idBefore(a, b) {
|
|
45
61
|
const numericA = Number(a);
|
|
@@ -102,6 +118,12 @@ export function buildDedupePlan(snapshot, options) {
|
|
|
102
118
|
const groupIds = members
|
|
103
119
|
.map((member) => String(member.id))
|
|
104
120
|
.sort((a, b) => (idBefore(a, b) ? -1 : 1));
|
|
121
|
+
// Recovery artifact: the records that will be merged away (everyone but the
|
|
122
|
+
// survivor), captured with their field values so a human can recreate one by
|
|
123
|
+
// hand if the merge was wrong. Merges are irreversible — the plan is the backup.
|
|
124
|
+
const recoverySnapshot = members
|
|
125
|
+
.filter((member) => String(member.id) !== String(survivor.id))
|
|
126
|
+
.map((member) => recoverableFields(member));
|
|
105
127
|
const survivorName = typeof survivor.name === "string" && survivor.name
|
|
106
128
|
? survivor.name
|
|
107
129
|
: typeof survivor.email === "string" && survivor.email
|
|
@@ -124,7 +146,8 @@ export function buildDedupePlan(snapshot, options) {
|
|
|
124
146
|
approvalRequired: true,
|
|
125
147
|
sourceRuleOrPolicy: "dedupe",
|
|
126
148
|
groupId: `grp_${options.objectType}_${String(survivor.id)}`,
|
|
127
|
-
|
|
149
|
+
recoverySnapshot,
|
|
150
|
+
rollback: "IRREVERSIBLE: provider merges cannot be unmerged. recoverySnapshot on this operation retains every merged-away record's field values; recreate a record manually from it if a merge was wrong.",
|
|
128
151
|
});
|
|
129
152
|
}
|
|
130
153
|
return {
|
package/dist/index.d.ts
CHANGED
|
@@ -16,6 +16,8 @@ export { apolloPullKeysForAppend, apolloPullKeysForRefresh, createApolloClient,
|
|
|
16
16
|
export { diffFindings, diffSnapshots, diffToMarkdown, type CollectionDiff, type FieldChange, type FindingsDrift, type RecordChange, type SnapshotDiff, } from "./diff.ts";
|
|
17
17
|
export { mergeSnapshots, type MergeConflict, type MergeMatch, type MergeReport, type MergeSuggestion, } from "./merge.ts";
|
|
18
18
|
export { createFilePlanStore, type PlanStore, type StoredPlan } from "./planStore.ts";
|
|
19
|
+
export { computeApprovalDigests, loadOrCreateSigningKey, loadSigningKey, signApproval, verifyApprovalDigests, type ApprovalVerification, } from "./integrity.ts";
|
|
20
|
+
export { buildAuditLog, verifyAuditLog, type AuditLogEntry, type AuditLogExport, type AuditLogVerification, } from "./auditLog.ts";
|
|
19
21
|
export { formatPatchPlanRun, patchPlanToMarkdown } from "./format.ts";
|
|
20
22
|
export { auditReportToHtml, auditReportToMarkdown, type ReportOptions } from "./report.ts";
|
|
21
23
|
export { HUBSPOT_DEFAULT_FIELD_MAPPINGS, SALESFORCE_DEFAULT_FIELD_MAPPINGS, mappedField, mappedFields, normalizeFieldMappings, readMappedValue, type CrmObjectType, type FieldMappings, } from "./mappings.ts";
|
package/dist/index.js
CHANGED
|
@@ -16,6 +16,8 @@ export { apolloPullKeysForAppend, apolloPullKeysForRefresh, createApolloClient,
|
|
|
16
16
|
export { diffFindings, diffSnapshots, diffToMarkdown, } from "./diff.js";
|
|
17
17
|
export { mergeSnapshots, } from "./merge.js";
|
|
18
18
|
export { createFilePlanStore } from "./planStore.js";
|
|
19
|
+
export { computeApprovalDigests, loadOrCreateSigningKey, loadSigningKey, signApproval, verifyApprovalDigests, } from "./integrity.js";
|
|
20
|
+
export { buildAuditLog, verifyAuditLog, } from "./auditLog.js";
|
|
19
21
|
export { formatPatchPlanRun, patchPlanToMarkdown } from "./format.js";
|
|
20
22
|
export { auditReportToHtml, auditReportToMarkdown } from "./report.js";
|
|
21
23
|
export { HUBSPOT_DEFAULT_FIELD_MAPPINGS, SALESFORCE_DEFAULT_FIELD_MAPPINGS, mappedField, mappedFields, normalizeFieldMappings, readMappedValue, } from "./mappings.js";
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { PatchOperation } from "./types.ts";
|
|
2
|
+
/** Read the signing key, or null if it has not been created yet. */
|
|
3
|
+
export declare function loadSigningKey(): Buffer | null;
|
|
4
|
+
/** Read the signing key, creating a fresh 32-byte one (0600) on first use. */
|
|
5
|
+
export declare function loadOrCreateSigningKey(): Buffer;
|
|
6
|
+
/** HMAC-SHA256 signature of one operation's approved content. */
|
|
7
|
+
export declare function signApproval(operation: PatchOperation, override: unknown, key: Buffer): string;
|
|
8
|
+
/**
|
|
9
|
+
* Compute the approval signature map for a set of approved operation ids,
|
|
10
|
+
* resolving each op from the plan and its (approved) value override.
|
|
11
|
+
*/
|
|
12
|
+
export declare function computeApprovalDigests(operations: PatchOperation[], approvedOperationIds: string[], valueOverrides: Record<string, unknown>, key: Buffer): Record<string, string>;
|
|
13
|
+
export type ApprovalVerification = {
|
|
14
|
+
ok: true;
|
|
15
|
+
} | {
|
|
16
|
+
ok: false;
|
|
17
|
+
reason: "no_key";
|
|
18
|
+
tampered: string[];
|
|
19
|
+
} | {
|
|
20
|
+
ok: false;
|
|
21
|
+
reason: "mismatch";
|
|
22
|
+
tampered: string[];
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Verify that every approved operation still matches what was signed. Returns
|
|
26
|
+
* ok:true when there are no stored digests (a pre-integrity plan — nothing to
|
|
27
|
+
* verify), when all match, or fails with the list of operation ids whose
|
|
28
|
+
* content changed since approval.
|
|
29
|
+
*/
|
|
30
|
+
export declare function verifyApprovalDigests(operations: PatchOperation[], approvedOperationIds: string[], valueOverrides: Record<string, unknown>, storedDigests: Record<string, string> | undefined): ApprovalVerification;
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { createHmac, randomBytes } from "node:crypto";
|
|
2
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.js";
|
|
5
|
+
/**
|
|
6
|
+
* Approval integrity.
|
|
7
|
+
*
|
|
8
|
+
* The plan store records WHICH operation ids a human approved, but the apply
|
|
9
|
+
* path re-reads the operation BODIES fresh from the (user-editable) plan file.
|
|
10
|
+
* Nothing bound the approval to the content: an approved op's afterValue or
|
|
11
|
+
* objectId could be changed on disk between `plans approve` and `apply` — by a
|
|
12
|
+
* compromised dependency, a co-tenant, or a plan file synced/edited on another
|
|
13
|
+
* machine — and the changed value would be written under the prior approval.
|
|
14
|
+
*
|
|
15
|
+
* Fix: at approval time, HMAC-sign each approved operation's security-relevant
|
|
16
|
+
* content (including the approved value override) with a per-install secret key
|
|
17
|
+
* stored 0600 alongside the credentials. At apply time, recompute and verify.
|
|
18
|
+
* Any post-approval edit to the operations or the approved overrides changes the
|
|
19
|
+
* signature; a tamper must now also forge an HMAC it cannot compute without the
|
|
20
|
+
* key. The key never leaves the machine, so a plan approved here and applied
|
|
21
|
+
* elsewhere fails closed ("re-approve on this machine") rather than open.
|
|
22
|
+
*
|
|
23
|
+
* This raises the bar from "trust the plan JSON" to "trust the plan JSON only
|
|
24
|
+
* insofar as it still matches what was signed with the local key." It is not a
|
|
25
|
+
* defense against an attacker who already holds the signing key (same-dir, same
|
|
26
|
+
* permissions as the credential store) — that is the documented boundary.
|
|
27
|
+
*/
|
|
28
|
+
const SIGNING_KEY_FILE = ".plan-signing-key";
|
|
29
|
+
function signingKeyPath() {
|
|
30
|
+
return join(credentialsDir(), SIGNING_KEY_FILE);
|
|
31
|
+
}
|
|
32
|
+
/** Read the signing key, or null if it has not been created yet. */
|
|
33
|
+
export function loadSigningKey() {
|
|
34
|
+
const path = signingKeyPath();
|
|
35
|
+
if (!existsSync(path))
|
|
36
|
+
return null;
|
|
37
|
+
try {
|
|
38
|
+
return Buffer.from(readFileSync(path, "utf8").trim(), "hex");
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
/** Read the signing key, creating a fresh 32-byte one (0600) on first use. */
|
|
45
|
+
export function loadOrCreateSigningKey() {
|
|
46
|
+
const existing = loadSigningKey();
|
|
47
|
+
if (existing && existing.length >= 32)
|
|
48
|
+
return existing;
|
|
49
|
+
ensureSecureHomeDir();
|
|
50
|
+
const key = randomBytes(32);
|
|
51
|
+
writeSecureFile(signingKeyPath(), `${key.toString("hex")}\n`);
|
|
52
|
+
return key;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Canonical, stable string of the operation content an approval binds to. Only
|
|
56
|
+
* the fields that determine WHAT gets written: changing any of them must
|
|
57
|
+
* invalidate the approval. `override` is the approved value override for this op
|
|
58
|
+
* (the value actually written when set), so tampering with stored overrides is
|
|
59
|
+
* caught too.
|
|
60
|
+
*/
|
|
61
|
+
function canonicalApprovalContent(operation, override) {
|
|
62
|
+
return JSON.stringify([
|
|
63
|
+
operation.id,
|
|
64
|
+
operation.operation,
|
|
65
|
+
operation.objectType,
|
|
66
|
+
operation.objectId,
|
|
67
|
+
operation.field ?? null,
|
|
68
|
+
operation.beforeValue ?? null,
|
|
69
|
+
operation.afterValue ?? null,
|
|
70
|
+
operation.groupId ?? null,
|
|
71
|
+
// Safety-relevant fields too: editing a precondition could relax a drift
|
|
72
|
+
// guard, and forging forceArchiveDuplicate could suppress the archive-of-
|
|
73
|
+
// duplicate refusal — the signed approval must pin apply BEHAVIOR, not just
|
|
74
|
+
// the written value. `reason` is human-reviewed AND written verbatim into
|
|
75
|
+
// create_task bodies (afterValue ?? reason fallback in the connectors), so a
|
|
76
|
+
// create_task with a null afterValue would otherwise let a disk edit to
|
|
77
|
+
// reason write unapproved text under a still-valid digest.
|
|
78
|
+
operation.preconditions ?? null,
|
|
79
|
+
operation.forceArchiveDuplicate ?? false,
|
|
80
|
+
operation.reason ?? null,
|
|
81
|
+
override === undefined ? null : ["__override__", override],
|
|
82
|
+
]);
|
|
83
|
+
}
|
|
84
|
+
/** HMAC-SHA256 signature of one operation's approved content. */
|
|
85
|
+
export function signApproval(operation, override, key) {
|
|
86
|
+
return createHmac("sha256", key).update(canonicalApprovalContent(operation, override)).digest("hex");
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Compute the approval signature map for a set of approved operation ids,
|
|
90
|
+
* resolving each op from the plan and its (approved) value override.
|
|
91
|
+
*/
|
|
92
|
+
export function computeApprovalDigests(operations, approvedOperationIds, valueOverrides, key) {
|
|
93
|
+
const byId = new Map(operations.map((operation) => [operation.id, operation]));
|
|
94
|
+
const digests = {};
|
|
95
|
+
for (const id of approvedOperationIds) {
|
|
96
|
+
const operation = byId.get(id);
|
|
97
|
+
if (!operation)
|
|
98
|
+
continue;
|
|
99
|
+
digests[id] = signApproval(operation, valueOverrides[id], key);
|
|
100
|
+
}
|
|
101
|
+
return digests;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Verify that every approved operation still matches what was signed. Returns
|
|
105
|
+
* ok:true when there are no stored digests (a pre-integrity plan — nothing to
|
|
106
|
+
* verify), when all match, or fails with the list of operation ids whose
|
|
107
|
+
* content changed since approval.
|
|
108
|
+
*/
|
|
109
|
+
export function verifyApprovalDigests(operations, approvedOperationIds, valueOverrides, storedDigests) {
|
|
110
|
+
if (!storedDigests || Object.keys(storedDigests).length === 0)
|
|
111
|
+
return { ok: true };
|
|
112
|
+
const key = loadSigningKey();
|
|
113
|
+
if (!key)
|
|
114
|
+
return { ok: false, reason: "no_key", tampered: approvedOperationIds };
|
|
115
|
+
const byId = new Map(operations.map((operation) => [operation.id, operation]));
|
|
116
|
+
const tampered = [];
|
|
117
|
+
for (const id of approvedOperationIds) {
|
|
118
|
+
const operation = byId.get(id);
|
|
119
|
+
const expected = storedDigests[id];
|
|
120
|
+
if (!operation || !expected) {
|
|
121
|
+
tampered.push(id);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
if (signApproval(operation, valueOverrides[id], key) !== expected)
|
|
125
|
+
tampered.push(id);
|
|
126
|
+
}
|
|
127
|
+
return tampered.length === 0 ? { ok: true } : { ok: false, reason: "mismatch", tampered };
|
|
128
|
+
}
|
package/dist/llm.js
CHANGED
|
@@ -70,8 +70,23 @@ export async function extractInsightsLlm(transcript, options) {
|
|
|
70
70
|
const text = truncateTranscript(transcript);
|
|
71
71
|
const prompt = `${EXTRACT_INSTRUCTIONS}\n\n${options.title ? `Call: ${options.title}\n` : ""}Transcript:\n${text}`;
|
|
72
72
|
const result = (await forcedToolCall(prompt, "extract_call_insights", EXTRACT_SCHEMA, model, options));
|
|
73
|
+
const normalizedTranscript = normalizeSpan(text);
|
|
73
74
|
const insights = (result.insights ?? [])
|
|
74
75
|
.filter((insight) => INSIGHT_TYPES.includes(insight.type))
|
|
76
|
+
// Mechanical verbatim gate (mirrors market classify): the prompt asks for a
|
|
77
|
+
// verbatim quote, but a prompt-injected or hallucinated transcript could
|
|
78
|
+
// fabricate a grounded-looking insight that drives a governed writeback.
|
|
79
|
+
// (1) The evidence quote must be a non-trivial verbatim span of the transcript.
|
|
80
|
+
.filter((insight) => {
|
|
81
|
+
const quote = normalizeSpan(insight.evidence ?? "");
|
|
82
|
+
return quote.length >= 12 && normalizedTranscript.includes(quote);
|
|
83
|
+
})
|
|
84
|
+
// (2) For next_step — the only insight type whose `text` is WRITTEN to the CRM
|
|
85
|
+
// (set_field nextStep / create_task body) — the written action must itself be
|
|
86
|
+
// grounded in the verified quote, not just accompanied by an innocuous one.
|
|
87
|
+
// This closes the decoupling attack: a prompt-injected transcript that emits a
|
|
88
|
+
// malicious `text` while quoting an unrelated real span no longer survives.
|
|
89
|
+
.filter((insight) => insight.type !== "next_step" || actionGroundedInEvidence(insight.text, insight.evidence ?? ""))
|
|
75
90
|
.map((insight) => ({
|
|
76
91
|
...insight,
|
|
77
92
|
title: insight.type.replace(/_/g, " "),
|
|
@@ -81,6 +96,39 @@ export async function extractInsightsLlm(transcript, options) {
|
|
|
81
96
|
.sort((a, b) => b.importance - a.importance || b.confidence - a.confidence);
|
|
82
97
|
return { insights, model };
|
|
83
98
|
}
|
|
99
|
+
/** Whitespace/punctuation-spacing-normalized match (same rule as market spans). */
|
|
100
|
+
function normalizeSpan(value) {
|
|
101
|
+
return value
|
|
102
|
+
.replace(/\s+([.,;:!?])/g, "$1")
|
|
103
|
+
.replace(/\s+/g, " ")
|
|
104
|
+
.trim()
|
|
105
|
+
.toLowerCase();
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Is the written next-step action grounded in its (already transcript-verified)
|
|
109
|
+
* evidence quote? A legitimate next step paraphrases the quote, so it reuses the
|
|
110
|
+
* quote's salient terms; a prompt-injected action ("wire $50,000 to account
|
|
111
|
+
* 1234") quoting an unrelated innocuous span does not. Two checks: every
|
|
112
|
+
* number/amount in the action must appear in the evidence (defeats the
|
|
113
|
+
* financial-exfil class cleanly), and a meaningful share of the action's
|
|
114
|
+
* distinctive (≥4-char) words must appear in the evidence.
|
|
115
|
+
*/
|
|
116
|
+
function actionGroundedInEvidence(text, evidence) {
|
|
117
|
+
const action = normalizeSpan(text);
|
|
118
|
+
const quote = normalizeSpan(evidence);
|
|
119
|
+
if (!action)
|
|
120
|
+
return false;
|
|
121
|
+
const numbers = action.match(/\d[\d,.]*/g) ?? [];
|
|
122
|
+
for (const n of numbers) {
|
|
123
|
+
if (!quote.includes(n))
|
|
124
|
+
return false; // an ungrounded amount/account/id is a red flag
|
|
125
|
+
}
|
|
126
|
+
const distinctive = [...new Set(action.split(/[^a-z0-9$]+/).filter((token) => token.length >= 4))];
|
|
127
|
+
if (distinctive.length === 0)
|
|
128
|
+
return true; // nothing distinctive to ground (a short generic step)
|
|
129
|
+
const grounded = distinctive.filter((token) => quote.includes(token)).length;
|
|
130
|
+
return grounded / distinctive.length >= 0.4;
|
|
131
|
+
}
|
|
84
132
|
export const DEFAULT_RUBRIC = {
|
|
85
133
|
scale: 5,
|
|
86
134
|
dimensions: [
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { type LlmCallOptions } from "./llm.ts";
|
|
2
|
+
import { type FetchPage, type MarketConfig } from "./market.ts";
|
|
3
|
+
/**
|
|
4
|
+
* Cold-start taxonomy bootstrap. `market init` writes a stub for a human
|
|
5
|
+
* analyst to fill in; the self-serve hosted map has no analyst in the loop, so
|
|
6
|
+
* this proposes the claim taxonomy automatically from the seed vendors' own
|
|
7
|
+
* pages.
|
|
8
|
+
*
|
|
9
|
+
* Posture matches the rest of the market layer: the LLM is a *proposal* layer
|
|
10
|
+
* grounded in captured evidence (it only sees text we actually fetched), and
|
|
11
|
+
* everything downstream — capture, classify with verbatim-span verification,
|
|
12
|
+
* front states, the report — stays deterministic over the stored observations.
|
|
13
|
+
* The taxonomy it emits is a normal `market.config.json` a human can still edit.
|
|
14
|
+
*/
|
|
15
|
+
export type SeedVendor = {
|
|
16
|
+
url: string;
|
|
17
|
+
/** Display name; derived from the host when omitted. */
|
|
18
|
+
name?: string;
|
|
19
|
+
/** Marks the user's own company as the anchor vendor. */
|
|
20
|
+
anchor?: boolean;
|
|
21
|
+
};
|
|
22
|
+
export type SuggestTaxonomyOptions = {
|
|
23
|
+
category: string;
|
|
24
|
+
vendors: SeedVendor[];
|
|
25
|
+
llm: LlmCallOptions;
|
|
26
|
+
/** Upper bound on proposed claims, to keep classification bounded. */
|
|
27
|
+
maxClaims?: number;
|
|
28
|
+
/** Per-vendor captured-text budget fed to the proposer (chars). */
|
|
29
|
+
perVendorChars?: number;
|
|
30
|
+
/** Test injectables. */
|
|
31
|
+
fetchPage?: FetchPage;
|
|
32
|
+
capturesDir?: string;
|
|
33
|
+
now?: () => Date;
|
|
34
|
+
};
|
|
35
|
+
export type SuggestTaxonomyResult = {
|
|
36
|
+
config: MarketConfig;
|
|
37
|
+
/** Vendors whose homepage capture was empty/failed (excluded from grounding). */
|
|
38
|
+
unreadableVendorIds: string[];
|
|
39
|
+
model: string;
|
|
40
|
+
};
|
|
41
|
+
export declare function suggestMarketConfig(options: SuggestTaxonomyOptions): Promise<SuggestTaxonomyResult>;
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import { DEFAULT_MODELS, forcedToolCall, } from "./llm.js";
|
|
2
|
+
import { captureMarket, loadCaptureTexts, } from "./market.js";
|
|
3
|
+
const DEFAULT_MAX_CLAIMS = 16;
|
|
4
|
+
const DEFAULT_PER_VENDOR_CHARS = 6_000;
|
|
5
|
+
/** Stable, human-readable id from a string (claim capability or host). */
|
|
6
|
+
function slugify(value, maxWords = 6) {
|
|
7
|
+
const slug = value
|
|
8
|
+
.toLowerCase()
|
|
9
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
10
|
+
.replace(/^-+|-+$/g, "")
|
|
11
|
+
.split("-")
|
|
12
|
+
.filter(Boolean)
|
|
13
|
+
.slice(0, maxWords)
|
|
14
|
+
.join("-");
|
|
15
|
+
return slug || "item";
|
|
16
|
+
}
|
|
17
|
+
/** Second-level domain as a vendor id seed: https://www.stripe.com/ -> stripe. */
|
|
18
|
+
function vendorIdFromUrl(url) {
|
|
19
|
+
let host;
|
|
20
|
+
try {
|
|
21
|
+
host = new URL(url).hostname;
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
return slugify(url);
|
|
25
|
+
}
|
|
26
|
+
const labels = host.replace(/^www\./, "").split(".");
|
|
27
|
+
const sld = labels.length >= 2 ? labels[labels.length - 2] : labels[0];
|
|
28
|
+
return slugify(sld || host);
|
|
29
|
+
}
|
|
30
|
+
/** Disambiguate repeated ids by suffixing -2, -3, … */
|
|
31
|
+
function uniqueId(base, taken) {
|
|
32
|
+
if (!taken.has(base)) {
|
|
33
|
+
taken.add(base);
|
|
34
|
+
return base;
|
|
35
|
+
}
|
|
36
|
+
for (let n = 2;; n += 1) {
|
|
37
|
+
const candidate = `${base}-${n}`;
|
|
38
|
+
if (!taken.has(candidate)) {
|
|
39
|
+
taken.add(candidate);
|
|
40
|
+
return candidate;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function provisionalVendors(seeds) {
|
|
45
|
+
const taken = new Set();
|
|
46
|
+
return seeds.map((seed) => {
|
|
47
|
+
const id = uniqueId(vendorIdFromUrl(seed.url), taken);
|
|
48
|
+
const host = (() => {
|
|
49
|
+
try {
|
|
50
|
+
return new URL(seed.url).hostname.replace(/^www\./, "");
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return seed.url;
|
|
54
|
+
}
|
|
55
|
+
})();
|
|
56
|
+
return {
|
|
57
|
+
id,
|
|
58
|
+
name: seed.name?.trim() || host,
|
|
59
|
+
urls: { home: seed.url, pricing: null, product: [] },
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
const TAXONOMY_SCHEMA = {
|
|
64
|
+
type: "object",
|
|
65
|
+
required: ["claims"],
|
|
66
|
+
properties: {
|
|
67
|
+
surfaceRule: {
|
|
68
|
+
type: "string",
|
|
69
|
+
description: "One sentence stating how a reader judges LOUD vs QUIET vs ABSENT for this category (e.g. hero/top-nav = LOUD, deeper pages = QUIET, nowhere = ABSENT).",
|
|
70
|
+
},
|
|
71
|
+
claims: {
|
|
72
|
+
type: "array",
|
|
73
|
+
description: "The distinct capability positions vendors in this category compete on. 8-16 of them. Only include claims you can actually see evidence for on the supplied pages.",
|
|
74
|
+
items: {
|
|
75
|
+
type: "object",
|
|
76
|
+
required: ["capability", "icp", "pricingStructure", "definition"],
|
|
77
|
+
properties: {
|
|
78
|
+
capability: {
|
|
79
|
+
type: "string",
|
|
80
|
+
description: "What is being claimed, precise enough to judge loud/quiet/absent. Max ~10 words.",
|
|
81
|
+
},
|
|
82
|
+
icp: { type: "string", description: "Which buyer/ICP this claim cell addresses (category vocabulary)." },
|
|
83
|
+
pricingStructure: {
|
|
84
|
+
type: "string",
|
|
85
|
+
description: "Which pricing structure the claim implies (e.g. per-seat, usage-based, flat, free-tier).",
|
|
86
|
+
},
|
|
87
|
+
definition: {
|
|
88
|
+
type: "string",
|
|
89
|
+
description: "Operational definition a human (or classifier) uses to score any vendor's page LOUD/QUIET/ABSENT on this claim.",
|
|
90
|
+
},
|
|
91
|
+
terms: {
|
|
92
|
+
type: "array",
|
|
93
|
+
items: { type: "string" },
|
|
94
|
+
description: "Exact buyer phrasings for this claim, for deterministic mention matching. 2-5 terms.",
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
vendors: {
|
|
100
|
+
type: "array",
|
|
101
|
+
description: "Optional refinements: a clean display name per seed URL, and a pricing-page URL if one is clearly linked.",
|
|
102
|
+
items: {
|
|
103
|
+
type: "object",
|
|
104
|
+
required: ["seedUrl"],
|
|
105
|
+
properties: {
|
|
106
|
+
seedUrl: { type: "string" },
|
|
107
|
+
name: { type: "string" },
|
|
108
|
+
pricingUrl: { type: ["string", "null"] },
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
function buildDossier(vendors, capture, perVendorChars) {
|
|
115
|
+
const { entries, textByHash } = capture;
|
|
116
|
+
const unreadable = [];
|
|
117
|
+
const blocks = [];
|
|
118
|
+
for (const vendor of vendors) {
|
|
119
|
+
const hash = entries.find((e) => e.vendorId === vendor.id && e.captureHash)?.captureHash ?? null;
|
|
120
|
+
const text = hash ? textByHash.get(hash) ?? "" : "";
|
|
121
|
+
if (!text.trim()) {
|
|
122
|
+
unreadable.push(vendor.id);
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
blocks.push(`### ${vendor.name} (${vendor.urls.home})\n${text.slice(0, perVendorChars)}`);
|
|
126
|
+
}
|
|
127
|
+
return { dossier: blocks.join("\n\n"), unreadable };
|
|
128
|
+
}
|
|
129
|
+
const INSTRUCTIONS = `You are seeding a competitive "market map" for a category. A market map breaks the category into CLAIMS — the distinct capability positions vendors compete on — so each (vendor x claim) cell can later be scored LOUD / QUIET / ABSENT from that vendor's pages.
|
|
130
|
+
|
|
131
|
+
Propose the claim taxonomy for this category from the competitor homepages below. Rules:
|
|
132
|
+
- Ground every claim in what is actually visible on the supplied pages. Do not invent positions no vendor mentions.
|
|
133
|
+
- Each claim is a cell: a precise capability, the ICP it targets, and the pricing structure it implies.
|
|
134
|
+
- Write each definition so a reader could judge ANY vendor's page LOUD/QUIET/ABSENT against it.
|
|
135
|
+
- Aim for the 8-16 claims that genuinely differentiate vendors. Prefer specific, contested positions over generic table stakes.
|
|
136
|
+
- Provide 2-5 verbatim buyer terms per claim for later mention matching.
|
|
137
|
+
- Optionally return a cleaned display name and a pricing-page URL per seed vendor when evident.`;
|
|
138
|
+
export async function suggestMarketConfig(options) {
|
|
139
|
+
const { category } = options;
|
|
140
|
+
if (options.vendors.length === 0)
|
|
141
|
+
throw new Error("suggestMarketConfig requires at least one seed vendor");
|
|
142
|
+
const maxClaims = options.maxClaims ?? DEFAULT_MAX_CLAIMS;
|
|
143
|
+
const perVendorChars = options.perVendorChars ?? DEFAULT_PER_VENDOR_CHARS;
|
|
144
|
+
const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
|
|
145
|
+
const vendors = provisionalVendors(options.vendors);
|
|
146
|
+
const anchorSeed = options.vendors.find((seed) => seed.anchor);
|
|
147
|
+
const anchorId = anchorSeed ? vendors[options.vendors.indexOf(anchorSeed)]?.id : undefined;
|
|
148
|
+
// Capture the seed homepages so the proposer only sees text we actually
|
|
149
|
+
// fetched (the SSRF guard in captureMarket applies to these user-supplied URLs).
|
|
150
|
+
await captureMarket({ category, vendors, claims: [] }, { dir: options.capturesDir, runLabel: "bootstrap", fetchPage: options.fetchPage, now: options.now });
|
|
151
|
+
const capture = loadCaptureTexts(category, options.capturesDir);
|
|
152
|
+
const { dossier, unreadable } = buildDossier(vendors, capture, perVendorChars);
|
|
153
|
+
if (!dossier.trim()) {
|
|
154
|
+
throw new Error(`market init --auto: none of the ${vendors.length} seed pages returned readable text — check the URLs are public homepages.`);
|
|
155
|
+
}
|
|
156
|
+
const prompt = `${INSTRUCTIONS}\n\nCategory: ${category}\n\nCompetitor homepages:\n${dossier}`;
|
|
157
|
+
const result = (await forcedToolCall(prompt, "propose_market_taxonomy", TAXONOMY_SCHEMA, model, options.llm));
|
|
158
|
+
const takenClaimIds = new Set();
|
|
159
|
+
const claims = (result.claims ?? [])
|
|
160
|
+
.filter((claim) => claim?.capability && claim?.definition)
|
|
161
|
+
.slice(0, maxClaims)
|
|
162
|
+
.map((claim) => ({
|
|
163
|
+
id: uniqueId(slugify(claim.capability), takenClaimIds),
|
|
164
|
+
capability: claim.capability.trim(),
|
|
165
|
+
icp: (claim.icp ?? "").trim() || "general",
|
|
166
|
+
pricingStructure: (claim.pricingStructure ?? "").trim() || "unspecified",
|
|
167
|
+
definition: claim.definition.trim(),
|
|
168
|
+
...(claim.terms?.length ? { terms: claim.terms.map((t) => t.trim()).filter(Boolean) } : {}),
|
|
169
|
+
}));
|
|
170
|
+
if (claims.length === 0) {
|
|
171
|
+
throw new Error("market init --auto: the model proposed no usable claims — try again or seed the taxonomy by hand.");
|
|
172
|
+
}
|
|
173
|
+
// Apply optional vendor refinements (display name + pricing URL), matched by seed URL.
|
|
174
|
+
const refinementByUrl = new Map((result.vendors ?? []).map((v) => [v.seedUrl, v]));
|
|
175
|
+
const refinedVendors = vendors.map((vendor) => {
|
|
176
|
+
const refinement = refinementByUrl.get(vendor.urls.home);
|
|
177
|
+
const pricing = refinement?.pricingUrl && /^https?:\/\//i.test(refinement.pricingUrl) ? refinement.pricingUrl : vendor.urls.pricing;
|
|
178
|
+
return {
|
|
179
|
+
...vendor,
|
|
180
|
+
name: refinement?.name?.trim() || vendor.name,
|
|
181
|
+
urls: { ...vendor.urls, pricing },
|
|
182
|
+
};
|
|
183
|
+
});
|
|
184
|
+
const config = {
|
|
185
|
+
category,
|
|
186
|
+
...(anchorId ? { anchorVendor: anchorId } : {}),
|
|
187
|
+
vendors: refinedVendors,
|
|
188
|
+
claims,
|
|
189
|
+
surfaceRule: result.surfaceRule?.trim() ||
|
|
190
|
+
"LOUD = hero copy OR top-level-nav named product with a dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed; UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
|
|
191
|
+
};
|
|
192
|
+
return { config, unreadableVendorIds: unreadable, model };
|
|
193
|
+
}
|