fullstackgtm 0.25.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/dist/bulkUpdate.js +6 -1
  3. package/dist/cli.js +67 -2
  4. package/dist/connector.js +90 -1
  5. package/dist/connectors/hubspot.js +5 -2
  6. package/dist/connectors/salesforce.js +4 -2
  7. package/dist/connectors/stripe.js +4 -2
  8. package/dist/credentials.js +22 -1
  9. package/dist/dedupe.d.ts +6 -0
  10. package/dist/dedupe.js +24 -1
  11. package/dist/enrich.js +24 -2
  12. package/dist/enrichApollo.js +5 -2
  13. package/dist/index.d.ts +1 -0
  14. package/dist/index.js +1 -0
  15. package/dist/integrity.d.ts +30 -0
  16. package/dist/integrity.js +128 -0
  17. package/dist/market.d.ts +1 -0
  18. package/dist/market.js +144 -8
  19. package/dist/marketReport.d.ts +9 -0
  20. package/dist/marketReport.js +29 -4
  21. package/dist/marketTaxonomy.d.ts +41 -0
  22. package/dist/marketTaxonomy.js +193 -0
  23. package/dist/planStore.d.ts +6 -0
  24. package/dist/planStore.js +10 -2
  25. package/dist/schedule.d.ts +17 -0
  26. package/dist/schedule.js +87 -2
  27. package/dist/types.d.ts +16 -0
  28. package/package.json +1 -1
  29. package/src/bulkUpdate.ts +6 -1
  30. package/src/cli.ts +80 -1
  31. package/src/connector.ts +96 -1
  32. package/src/connectors/hubspot.ts +5 -2
  33. package/src/connectors/salesforce.ts +4 -2
  34. package/src/connectors/stripe.ts +4 -2
  35. package/src/credentials.ts +24 -0
  36. package/src/dedupe.ts +23 -1
  37. package/src/enrich.ts +25 -2
  38. package/src/enrichApollo.ts +5 -2
  39. package/src/index.ts +8 -0
  40. package/src/integrity.ts +146 -0
  41. package/src/market.ts +129 -8
  42. package/src/marketReport.ts +30 -4
  43. package/src/marketTaxonomy.ts +288 -0
  44. package/src/planStore.ts +23 -4
  45. package/src/schedule.ts +98 -2
  46. package/src/types.ts +16 -0
@@ -0,0 +1,128 @@
1
+ import { createHmac, randomBytes } from "node:crypto";
2
+ import { existsSync, readFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.js";
5
+ /**
6
+ * Approval integrity.
7
+ *
8
+ * The plan store records WHICH operation ids a human approved, but the apply
9
+ * path re-reads the operation BODIES fresh from the (user-editable) plan file.
10
+ * Nothing bound the approval to the content: an approved op's afterValue or
11
+ * objectId could be changed on disk between `plans approve` and `apply` — by a
12
+ * compromised dependency, a co-tenant, or a plan file synced/edited on another
13
+ * machine — and the changed value would be written under the prior approval.
14
+ *
15
+ * Fix: at approval time, HMAC-sign each approved operation's security-relevant
16
+ * content (including the approved value override) with a per-install secret key
17
+ * stored 0600 alongside the credentials. At apply time, recompute and verify.
18
+ * Any post-approval edit to the operations or the approved overrides changes the
19
+ * signature; a tamper must now also forge an HMAC it cannot compute without the
20
+ * key. The key never leaves the machine, so a plan approved here and applied
21
+ * elsewhere fails closed ("re-approve on this machine") rather than open.
22
+ *
23
+ * This raises the bar from "trust the plan JSON" to "trust the plan JSON only
24
+ * insofar as it still matches what was signed with the local key." It is not a
25
+ * defense against an attacker who already holds the signing key (same-dir, same
26
+ * permissions as the credential store) — that is the documented boundary.
27
+ */
28
+ const SIGNING_KEY_FILE = ".plan-signing-key";
29
+ function signingKeyPath() {
30
+ return join(credentialsDir(), SIGNING_KEY_FILE);
31
+ }
32
+ /** Read the signing key, or null if it has not been created yet. */
33
+ export function loadSigningKey() {
34
+ const path = signingKeyPath();
35
+ if (!existsSync(path))
36
+ return null;
37
+ try {
38
+ return Buffer.from(readFileSync(path, "utf8").trim(), "hex");
39
+ }
40
+ catch {
41
+ return null;
42
+ }
43
+ }
44
+ /** Read the signing key, creating a fresh 32-byte one (0600) on first use. */
45
+ export function loadOrCreateSigningKey() {
46
+ const existing = loadSigningKey();
47
+ if (existing && existing.length >= 32)
48
+ return existing;
49
+ ensureSecureHomeDir();
50
+ const key = randomBytes(32);
51
+ writeSecureFile(signingKeyPath(), `${key.toString("hex")}\n`);
52
+ return key;
53
+ }
54
+ /**
55
+ * Canonical, stable string of the operation content an approval binds to. Only
56
+ * the fields that determine WHAT gets written: changing any of them must
57
+ * invalidate the approval. `override` is the approved value override for this op
58
+ * (the value actually written when set), so tampering with stored overrides is
59
+ * caught too.
60
+ */
61
+ function canonicalApprovalContent(operation, override) {
62
+ return JSON.stringify([
63
+ operation.id,
64
+ operation.operation,
65
+ operation.objectType,
66
+ operation.objectId,
67
+ operation.field ?? null,
68
+ operation.beforeValue ?? null,
69
+ operation.afterValue ?? null,
70
+ operation.groupId ?? null,
71
+ // Safety-relevant fields too: editing a precondition could relax a drift
72
+ // guard, and forging forceArchiveDuplicate could suppress the archive-of-
73
+ // duplicate refusal — the signed approval must pin apply BEHAVIOR, not just
74
+ // the written value. `reason` is human-reviewed AND written verbatim into
75
+ // create_task bodies (afterValue ?? reason fallback in the connectors), so a
76
+ // create_task with a null afterValue would otherwise let a disk edit to
77
+ // reason write unapproved text under a still-valid digest.
78
+ operation.preconditions ?? null,
79
+ operation.forceArchiveDuplicate ?? false,
80
+ operation.reason ?? null,
81
+ override === undefined ? null : ["__override__", override],
82
+ ]);
83
+ }
84
+ /** HMAC-SHA256 signature of one operation's approved content. */
85
+ export function signApproval(operation, override, key) {
86
+ return createHmac("sha256", key).update(canonicalApprovalContent(operation, override)).digest("hex");
87
+ }
88
+ /**
89
+ * Compute the approval signature map for a set of approved operation ids,
90
+ * resolving each op from the plan and its (approved) value override.
91
+ */
92
+ export function computeApprovalDigests(operations, approvedOperationIds, valueOverrides, key) {
93
+ const byId = new Map(operations.map((operation) => [operation.id, operation]));
94
+ const digests = {};
95
+ for (const id of approvedOperationIds) {
96
+ const operation = byId.get(id);
97
+ if (!operation)
98
+ continue;
99
+ digests[id] = signApproval(operation, valueOverrides[id], key);
100
+ }
101
+ return digests;
102
+ }
103
+ /**
104
+ * Verify that every approved operation still matches what was signed. Returns
105
+ * ok:true when there are no stored digests (a pre-integrity plan — nothing to
106
+ * verify), when all match, or fails with the list of operation ids whose
107
+ * content changed since approval.
108
+ */
109
+ export function verifyApprovalDigests(operations, approvedOperationIds, valueOverrides, storedDigests) {
110
+ if (!storedDigests || Object.keys(storedDigests).length === 0)
111
+ return { ok: true };
112
+ const key = loadSigningKey();
113
+ if (!key)
114
+ return { ok: false, reason: "no_key", tampered: approvedOperationIds };
115
+ const byId = new Map(operations.map((operation) => [operation.id, operation]));
116
+ const tampered = [];
117
+ for (const id of approvedOperationIds) {
118
+ const operation = byId.get(id);
119
+ const expected = storedDigests[id];
120
+ if (!operation || !expected) {
121
+ tampered.push(id);
122
+ continue;
123
+ }
124
+ if (signApproval(operation, valueOverrides[id], key) !== expected)
125
+ tampered.push(id);
126
+ }
127
+ return tampered.length === 0 ? { ok: true } : { ok: false, reason: "mismatch", tampered };
128
+ }
package/dist/market.d.ts CHANGED
@@ -153,6 +153,7 @@ export type FetchPage = (url: string) => Promise<{
153
153
  status: number;
154
154
  body: string;
155
155
  }>;
156
+ export declare function assertPublicUrl(rawUrl: string): Promise<URL>;
156
157
  export type CaptureOptions = {
157
158
  /** Directory for captures; defaults to <marketHome>/captures. */
158
159
  dir?: string;
package/dist/market.js CHANGED
@@ -1,5 +1,7 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { lookup } from "node:dns/promises";
2
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
4
+ import { isIP } from "node:net";
3
5
  import { join } from "node:path";
4
6
  import { credentialsDir } from "./credentials.js";
5
7
  const INTENSITY_RANK = {
@@ -141,15 +143,144 @@ export function extractReadableText(html) {
141
143
  .filter(Boolean)
142
144
  .join("\n");
143
145
  }
146
+ /**
147
+ * SSRF guard. market.config.json URLs are operator-authored, but configs are
148
+ * shared/templated in consulting/team use and `market capture|refresh` is on
149
+ * the cron allowlist — an unguarded fetch is an unattended internal-network
150
+ * and cloud-metadata probe. We therefore (1) allow only http/https, (2) refuse
151
+ * any host that is or resolves to a private/loopback/link-local/metadata
152
+ * address, and (3) follow redirects manually, re-validating each hop.
153
+ *
154
+ * Residual gap (documented, not defended here): TOCTOU DNS rebinding between
155
+ * our lookup and fetch's own resolution. Out of scope for fetching public
156
+ * competitor pages; a hardened deployment should fetch through an egress proxy.
157
+ */
158
+ const MAX_REDIRECTS = 5;
159
+ const FETCH_TIMEOUT_MS = 15_000;
160
+ const MAX_BODY_BYTES = 5_000_000;
161
+ function ipv4IsPrivate(ip) {
162
+ const parts = ip.split(".").map((n) => Number(n));
163
+ if (parts.length !== 4 || parts.some((n) => !Number.isInteger(n) || n < 0 || n > 255))
164
+ return true;
165
+ const [a, b] = parts;
166
+ if (a === 0 || a === 127)
167
+ return true; // this-host, loopback
168
+ if (a === 10)
169
+ return true; // private
170
+ if (a === 172 && b >= 16 && b <= 31)
171
+ return true; // private
172
+ if (a === 192 && b === 168)
173
+ return true; // private
174
+ if (a === 169 && b === 254)
175
+ return true; // link-local incl. 169.254.169.254 metadata
176
+ if (a === 100 && b >= 64 && b <= 127)
177
+ return true; // CGNAT
178
+ if (a >= 224)
179
+ return true; // multicast / reserved
180
+ return false;
181
+ }
182
+ function ipIsPrivate(ip) {
183
+ const family = isIP(ip);
184
+ if (family === 4)
185
+ return ipv4IsPrivate(ip);
186
+ if (family === 6) {
187
+ const lower = ip.toLowerCase();
188
+ if (lower === "::1" || lower === "::")
189
+ return true; // loopback / unspecified
190
+ // IPv4-mapped (::ffff:…) — Node normalizes ::ffff:127.0.0.1 to ::ffff:7f00:1,
191
+ // so accept both the dotted and the hex-pair forms, unwrap, check the v4.
192
+ const mapped = lower.match(/^::ffff:(.+)$/);
193
+ if (mapped) {
194
+ const rest = mapped[1];
195
+ if (rest.includes("."))
196
+ return ipv4IsPrivate(rest);
197
+ const groups = rest.split(":");
198
+ if (groups.length === 2) {
199
+ const hi = parseInt(groups[0], 16);
200
+ const lo = parseInt(groups[1], 16);
201
+ if (Number.isNaN(hi) || Number.isNaN(lo))
202
+ return true;
203
+ return ipv4IsPrivate(`${(hi >> 8) & 0xff}.${hi & 0xff}.${(lo >> 8) & 0xff}.${lo & 0xff}`);
204
+ }
205
+ return true; // unrecognized mapped form → refuse
206
+ }
207
+ if (lower.startsWith("fe8") || lower.startsWith("fe9") || lower.startsWith("fea") || lower.startsWith("feb"))
208
+ return true; // link-local fe80::/10
209
+ if (lower.startsWith("fc") || lower.startsWith("fd"))
210
+ return true; // unique-local fc00::/7
211
+ return false;
212
+ }
213
+ return true; // not a recognizable IP literal → refuse
214
+ }
215
+ export async function assertPublicUrl(rawUrl) {
216
+ let url;
217
+ try {
218
+ url = new URL(rawUrl);
219
+ }
220
+ catch {
221
+ throw new Error(`market capture: "${rawUrl}" is not a valid URL.`);
222
+ }
223
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
224
+ throw new Error(`market capture refuses ${url.protocol} URLs (only http/https): ${rawUrl}`);
225
+ }
226
+ const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
227
+ if (isIP(host)) {
228
+ if (ipIsPrivate(host))
229
+ throw new Error(`market capture refuses private/loopback address ${host} (SSRF guard).`);
230
+ return url;
231
+ }
232
+ // Hostname: resolve and refuse if ANY address is private.
233
+ const addrs = await lookup(host, { all: true });
234
+ for (const { address } of addrs) {
235
+ if (ipIsPrivate(address)) {
236
+ throw new Error(`market capture refuses ${host} — it resolves to private/internal address ${address} (SSRF guard).`);
237
+ }
238
+ }
239
+ return url;
240
+ }
144
241
  const defaultFetchPage = async (url) => {
145
- const response = await fetch(url, {
146
- headers: {
147
- "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
148
- "Accept-Language": "en-US",
149
- },
150
- redirect: "follow",
151
- });
152
- return { status: response.status, body: await response.text() };
242
+ let current = url;
243
+ for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
244
+ await assertPublicUrl(current);
245
+ const controller = new AbortController();
246
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
247
+ let response;
248
+ try {
249
+ response = await fetch(current, {
250
+ headers: {
251
+ "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
252
+ "Accept-Language": "en-US",
253
+ },
254
+ redirect: "manual",
255
+ signal: controller.signal,
256
+ });
257
+ }
258
+ finally {
259
+ clearTimeout(timer);
260
+ }
261
+ if (response.status >= 300 && response.status < 400 && response.headers.get("location")) {
262
+ current = new URL(response.headers.get("location"), current).toString();
263
+ continue; // re-validate the redirect target on the next iteration
264
+ }
265
+ const reader = response.body?.getReader();
266
+ if (!reader)
267
+ return { status: response.status, body: await response.text() };
268
+ const chunks = [];
269
+ let total = 0;
270
+ for (;;) {
271
+ const { done, value } = await reader.read();
272
+ if (done)
273
+ break;
274
+ total += value.length;
275
+ if (total > MAX_BODY_BYTES) {
276
+ await reader.cancel();
277
+ break;
278
+ }
279
+ chunks.push(value);
280
+ }
281
+ return { status: response.status, body: Buffer.concat(chunks).toString("utf8") };
282
+ }
283
+ throw new Error(`market capture: too many redirects (>${MAX_REDIRECTS}) for ${url}`);
153
284
  };
154
285
  export async function captureMarket(config, options = {}) {
155
286
  const dir = options.dir ?? join(marketHome(config.category), "captures");
@@ -284,6 +415,11 @@ export function validateObservationSet(config, set) {
284
415
  if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
285
416
  problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
286
417
  }
418
+ // confidence is rendered into the HTML report; only the enum is allowed, so
419
+ // an `observe --from` file can't smuggle markup through a free-text value.
420
+ if (obs.confidence !== "high" && obs.confidence !== "medium" && obs.confidence !== "low") {
421
+ problems.push(`${cell}: invalid confidence "${String(obs.confidence)}" (expected high, medium, or low)`);
422
+ }
287
423
  if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
288
424
  problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
289
425
  }
@@ -1,3 +1,12 @@
1
1
  import type { MarketConfig, ObservationSet } from "./market.ts";
2
+ /**
3
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
4
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
5
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
6
+ * strings) would close the tag and inject markup. Replacing them with their
7
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
8
+ * sequence unrepresentable in the HTML source.
9
+ */
10
+ export declare function safeJsonForScript(value: unknown): string;
2
11
  export declare function marketMapToMarkdown(config: MarketConfig, set: ObservationSet): string;
3
12
  export declare function marketMapToHtml(config: MarketConfig, set: ObservationSet): string;
@@ -28,6 +28,22 @@ function escapeHtml(value) {
28
28
  .replace(/>/g, "&gt;")
29
29
  .replace(/"/g, "&quot;");
30
30
  }
31
+ /**
32
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
33
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
34
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
35
+ * strings) would close the tag and inject markup. Replacing them with their
36
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
37
+ * sequence unrepresentable in the HTML source.
38
+ */
39
+ export function safeJsonForScript(value) {
40
+ return JSON.stringify(value)
41
+ .replace(/</g, "\\u003c")
42
+ .replace(/>/g, "\\u003e")
43
+ .replace(/&/g, "\\u0026")
44
+ .replace(/\u2028/g, "\\u2028")
45
+ .replace(/\u2029/g, "\\u2029");
46
+ }
31
47
  function buildModel(config, set) {
32
48
  const fronts = computeFrontStates(config, set);
33
49
  const stateByClaim = new Map(fronts.map((front) => [front.claimId, front.state]));
@@ -320,7 +336,7 @@ function axisSectionsHtml(config, set) {
320
336
  <table class="legend"><thead><tr><th></th><th>vendor</th><th class="num">${legendMeasureHead}</th></tr></thead><tbody>${legendRows}</tbody></table>
321
337
  </div>
322
338
  <div class="map-tip" id="map-tip" hidden></div>
323
- <script type="application/json" id="map-data">${JSON.stringify(tipData)}</script>
339
+ <script type="application/json" id="map-data">${safeJsonForScript(tipData)}</script>
324
340
  <script>
325
341
  (function () {
326
342
  var data = JSON.parse(document.getElementById("map-data").textContent);
@@ -331,7 +347,16 @@ function axisSectionsHtml(config, set) {
331
347
  function show(v, evt) {
332
348
  var d = data[v];
333
349
  if (!d) return;
334
- tip.innerHTML = "<b>" + d.n + " · " + d.name + "</b>" + d.lines.map(function (l) { return "<div>" + l + "</div>"; }).join("");
350
+ // textContent only vendor names / axis labels are untrusted (competitor-controlled).
351
+ tip.textContent = "";
352
+ var head = document.createElement("b");
353
+ head.textContent = d.n + " · " + d.name;
354
+ tip.appendChild(head);
355
+ d.lines.forEach(function (l) {
356
+ var div = document.createElement("div");
357
+ div.textContent = l;
358
+ tip.appendChild(div);
359
+ });
335
360
  tip.hidden = false;
336
361
  var box = fig.getBoundingClientRect();
337
362
  tip.style.left = Math.min(evt.clientX - box.left + 14, box.width - tip.offsetWidth - 8) + "px";
@@ -419,7 +444,7 @@ export function marketMapToHtml(config, set) {
419
444
  const anchorLoud = anchor
420
445
  ? claimIds.filter((claimId) => model.cell(anchor, claimId)?.intensity === "loud").length
421
446
  : 0;
422
- const anchorNote = anchor ? ` · ${vendorNamesById.get(anchor) ?? anchor} loud on ${anchorLoud}` : "";
447
+ const anchorNote = anchor ? ` · ${e(vendorNamesById.get(anchor) ?? anchor)} loud on ${anchorLoud}` : "";
423
448
  return `<details class="claim-group"><summary><b>${e(group.title)}</b> — ${claimIds.length} claim${claimIds.length === 1 ? "" : "s"} <span class="sum-soft">(${e(group.blurb)}${anchorNote})</span></summary>
424
449
  <table><thead><tr><th></th>${vendorHeads}<th></th></tr></thead><tbody>${claimIds.map(matrixRow).join("")}</tbody></table>
425
450
  </details>`;
@@ -475,7 +500,7 @@ export function marketMapToHtml(config, set) {
475
500
  const obs = model.cell(vendor.id, claimId);
476
501
  if (!obs || obs.evidence.length === 0)
477
502
  return [];
478
- return obs.evidence.map((evidence) => `<div class="ev"><span class="ev-head">${e(claimId)} · ${obs.intensity.toUpperCase()} (${obs.confidence})</span>` +
503
+ return obs.evidence.map((evidence) => `<div class="ev"><span class="ev-head">${e(claimId)} · ${e(obs.intensity.toUpperCase())} (${e(String(obs.confidence ?? ""))})</span>` +
479
504
  `<blockquote>“${e(evidence.text)}”</blockquote>` +
480
505
  `<span class="ev-src">${e(String(evidence.metadata?.url ?? ""))} · capture ${e(String(evidence.metadata?.captureHash ?? "").slice(0, 12))}</span></div>`);
481
506
  });
@@ -0,0 +1,41 @@
1
+ import { type LlmCallOptions } from "./llm.ts";
2
+ import { type FetchPage, type MarketConfig } from "./market.ts";
3
+ /**
4
+ * Cold-start taxonomy bootstrap. `market init` writes a stub for a human
5
+ * analyst to fill in; the self-serve hosted map has no analyst in the loop, so
6
+ * this proposes the claim taxonomy automatically from the seed vendors' own
7
+ * pages.
8
+ *
9
+ * Posture matches the rest of the market layer: the LLM is a *proposal* layer
10
+ * grounded in captured evidence (it only sees text we actually fetched), and
11
+ * everything downstream — capture, classify with verbatim-span verification,
12
+ * front states, the report — stays deterministic over the stored observations.
13
+ * The taxonomy it emits is a normal `market.config.json` a human can still edit.
14
+ */
15
+ export type SeedVendor = {
16
+ url: string;
17
+ /** Display name; derived from the host when omitted. */
18
+ name?: string;
19
+ /** Marks the user's own company as the anchor vendor. */
20
+ anchor?: boolean;
21
+ };
22
+ export type SuggestTaxonomyOptions = {
23
+ category: string;
24
+ vendors: SeedVendor[];
25
+ llm: LlmCallOptions;
26
+ /** Upper bound on proposed claims, to keep classification bounded. */
27
+ maxClaims?: number;
28
+ /** Per-vendor captured-text budget fed to the proposer (chars). */
29
+ perVendorChars?: number;
30
+ /** Test injectables. */
31
+ fetchPage?: FetchPage;
32
+ capturesDir?: string;
33
+ now?: () => Date;
34
+ };
35
+ export type SuggestTaxonomyResult = {
36
+ config: MarketConfig;
37
+ /** Vendors whose homepage capture was empty/failed (excluded from grounding). */
38
+ unreadableVendorIds: string[];
39
+ model: string;
40
+ };
41
+ export declare function suggestMarketConfig(options: SuggestTaxonomyOptions): Promise<SuggestTaxonomyResult>;
@@ -0,0 +1,193 @@
1
+ import { DEFAULT_MODELS, forcedToolCall, } from "./llm.js";
2
+ import { captureMarket, loadCaptureTexts, } from "./market.js";
3
+ const DEFAULT_MAX_CLAIMS = 16;
4
+ const DEFAULT_PER_VENDOR_CHARS = 6_000;
5
+ /** Stable, human-readable id from a string (claim capability or host). */
6
+ function slugify(value, maxWords = 6) {
7
+ const slug = value
8
+ .toLowerCase()
9
+ .replace(/[^a-z0-9]+/g, "-")
10
+ .replace(/^-+|-+$/g, "")
11
+ .split("-")
12
+ .filter(Boolean)
13
+ .slice(0, maxWords)
14
+ .join("-");
15
+ return slug || "item";
16
+ }
17
+ /** Second-level domain as a vendor id seed: https://www.stripe.com/ -> stripe. */
18
+ function vendorIdFromUrl(url) {
19
+ let host;
20
+ try {
21
+ host = new URL(url).hostname;
22
+ }
23
+ catch {
24
+ return slugify(url);
25
+ }
26
+ const labels = host.replace(/^www\./, "").split(".");
27
+ const sld = labels.length >= 2 ? labels[labels.length - 2] : labels[0];
28
+ return slugify(sld || host);
29
+ }
30
+ /** Disambiguate repeated ids by suffixing -2, -3, … */
31
+ function uniqueId(base, taken) {
32
+ if (!taken.has(base)) {
33
+ taken.add(base);
34
+ return base;
35
+ }
36
+ for (let n = 2;; n += 1) {
37
+ const candidate = `${base}-${n}`;
38
+ if (!taken.has(candidate)) {
39
+ taken.add(candidate);
40
+ return candidate;
41
+ }
42
+ }
43
+ }
44
+ function provisionalVendors(seeds) {
45
+ const taken = new Set();
46
+ return seeds.map((seed) => {
47
+ const id = uniqueId(vendorIdFromUrl(seed.url), taken);
48
+ const host = (() => {
49
+ try {
50
+ return new URL(seed.url).hostname.replace(/^www\./, "");
51
+ }
52
+ catch {
53
+ return seed.url;
54
+ }
55
+ })();
56
+ return {
57
+ id,
58
+ name: seed.name?.trim() || host,
59
+ urls: { home: seed.url, pricing: null, product: [] },
60
+ };
61
+ });
62
+ }
63
+ const TAXONOMY_SCHEMA = {
64
+ type: "object",
65
+ required: ["claims"],
66
+ properties: {
67
+ surfaceRule: {
68
+ type: "string",
69
+ description: "One sentence stating how a reader judges LOUD vs QUIET vs ABSENT for this category (e.g. hero/top-nav = LOUD, deeper pages = QUIET, nowhere = ABSENT).",
70
+ },
71
+ claims: {
72
+ type: "array",
73
+ description: "The distinct capability positions vendors in this category compete on. 8-16 of them. Only include claims you can actually see evidence for on the supplied pages.",
74
+ items: {
75
+ type: "object",
76
+ required: ["capability", "icp", "pricingStructure", "definition"],
77
+ properties: {
78
+ capability: {
79
+ type: "string",
80
+ description: "What is being claimed, precise enough to judge loud/quiet/absent. Max ~10 words.",
81
+ },
82
+ icp: { type: "string", description: "Which buyer/ICP this claim cell addresses (category vocabulary)." },
83
+ pricingStructure: {
84
+ type: "string",
85
+ description: "Which pricing structure the claim implies (e.g. per-seat, usage-based, flat, free-tier).",
86
+ },
87
+ definition: {
88
+ type: "string",
89
+ description: "Operational definition a human (or classifier) uses to score any vendor's page LOUD/QUIET/ABSENT on this claim.",
90
+ },
91
+ terms: {
92
+ type: "array",
93
+ items: { type: "string" },
94
+ description: "Exact buyer phrasings for this claim, for deterministic mention matching. 2-5 terms.",
95
+ },
96
+ },
97
+ },
98
+ },
99
+ vendors: {
100
+ type: "array",
101
+ description: "Optional refinements: a clean display name per seed URL, and a pricing-page URL if one is clearly linked.",
102
+ items: {
103
+ type: "object",
104
+ required: ["seedUrl"],
105
+ properties: {
106
+ seedUrl: { type: "string" },
107
+ name: { type: "string" },
108
+ pricingUrl: { type: ["string", "null"] },
109
+ },
110
+ },
111
+ },
112
+ },
113
+ };
114
+ function buildDossier(vendors, capture, perVendorChars) {
115
+ const { entries, textByHash } = capture;
116
+ const unreadable = [];
117
+ const blocks = [];
118
+ for (const vendor of vendors) {
119
+ const hash = entries.find((e) => e.vendorId === vendor.id && e.captureHash)?.captureHash ?? null;
120
+ const text = hash ? textByHash.get(hash) ?? "" : "";
121
+ if (!text.trim()) {
122
+ unreadable.push(vendor.id);
123
+ continue;
124
+ }
125
+ blocks.push(`### ${vendor.name} (${vendor.urls.home})\n${text.slice(0, perVendorChars)}`);
126
+ }
127
+ return { dossier: blocks.join("\n\n"), unreadable };
128
+ }
129
+ const INSTRUCTIONS = `You are seeding a competitive "market map" for a category. A market map breaks the category into CLAIMS — the distinct capability positions vendors compete on — so each (vendor x claim) cell can later be scored LOUD / QUIET / ABSENT from that vendor's pages.
130
+
131
+ Propose the claim taxonomy for this category from the competitor homepages below. Rules:
132
+ - Ground every claim in what is actually visible on the supplied pages. Do not invent positions no vendor mentions.
133
+ - Each claim is a cell: a precise capability, the ICP it targets, and the pricing structure it implies.
134
+ - Write each definition so a reader could judge ANY vendor's page LOUD/QUIET/ABSENT against it.
135
+ - Aim for the 8-16 claims that genuinely differentiate vendors. Prefer specific, contested positions over generic table stakes.
136
+ - Provide 2-5 verbatim buyer terms per claim for later mention matching.
137
+ - Optionally return a cleaned display name and a pricing-page URL per seed vendor when evident.`;
138
+ export async function suggestMarketConfig(options) {
139
+ const { category } = options;
140
+ if (options.vendors.length === 0)
141
+ throw new Error("suggestMarketConfig requires at least one seed vendor");
142
+ const maxClaims = options.maxClaims ?? DEFAULT_MAX_CLAIMS;
143
+ const perVendorChars = options.perVendorChars ?? DEFAULT_PER_VENDOR_CHARS;
144
+ const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
145
+ const vendors = provisionalVendors(options.vendors);
146
+ const anchorSeed = options.vendors.find((seed) => seed.anchor);
147
+ const anchorId = anchorSeed ? vendors[options.vendors.indexOf(anchorSeed)]?.id : undefined;
148
+ // Capture the seed homepages so the proposer only sees text we actually
149
+ // fetched (the SSRF guard in captureMarket applies to these user-supplied URLs).
150
+ await captureMarket({ category, vendors, claims: [] }, { dir: options.capturesDir, runLabel: "bootstrap", fetchPage: options.fetchPage, now: options.now });
151
+ const capture = loadCaptureTexts(category, options.capturesDir);
152
+ const { dossier, unreadable } = buildDossier(vendors, capture, perVendorChars);
153
+ if (!dossier.trim()) {
154
+ throw new Error(`market init --auto: none of the ${vendors.length} seed pages returned readable text — check the URLs are public homepages.`);
155
+ }
156
+ const prompt = `${INSTRUCTIONS}\n\nCategory: ${category}\n\nCompetitor homepages:\n${dossier}`;
157
+ const result = (await forcedToolCall(prompt, "propose_market_taxonomy", TAXONOMY_SCHEMA, model, options.llm));
158
+ const takenClaimIds = new Set();
159
+ const claims = (result.claims ?? [])
160
+ .filter((claim) => claim?.capability && claim?.definition)
161
+ .slice(0, maxClaims)
162
+ .map((claim) => ({
163
+ id: uniqueId(slugify(claim.capability), takenClaimIds),
164
+ capability: claim.capability.trim(),
165
+ icp: (claim.icp ?? "").trim() || "general",
166
+ pricingStructure: (claim.pricingStructure ?? "").trim() || "unspecified",
167
+ definition: claim.definition.trim(),
168
+ ...(claim.terms?.length ? { terms: claim.terms.map((t) => t.trim()).filter(Boolean) } : {}),
169
+ }));
170
+ if (claims.length === 0) {
171
+ throw new Error("market init --auto: the model proposed no usable claims — try again or seed the taxonomy by hand.");
172
+ }
173
+ // Apply optional vendor refinements (display name + pricing URL), matched by seed URL.
174
+ const refinementByUrl = new Map((result.vendors ?? []).map((v) => [v.seedUrl, v]));
175
+ const refinedVendors = vendors.map((vendor) => {
176
+ const refinement = refinementByUrl.get(vendor.urls.home);
177
+ const pricing = refinement?.pricingUrl && /^https?:\/\//i.test(refinement.pricingUrl) ? refinement.pricingUrl : vendor.urls.pricing;
178
+ return {
179
+ ...vendor,
180
+ name: refinement?.name?.trim() || vendor.name,
181
+ urls: { ...vendor.urls, pricing },
182
+ };
183
+ });
184
+ const config = {
185
+ category,
186
+ ...(anchorId ? { anchorVendor: anchorId } : {}),
187
+ vendors: refinedVendors,
188
+ claims,
189
+ surfaceRule: result.surfaceRule?.trim() ||
190
+ "LOUD = hero copy OR top-level-nav named product with a dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed; UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
191
+ };
192
+ return { config, unreadableVendorIds: unreadable, model };
193
+ }
@@ -11,6 +11,12 @@ export type StoredPlan = {
11
11
  status: ApprovalStatus;
12
12
  approvedOperationIds: string[];
13
13
  valueOverrides: Record<string, unknown>;
14
+ /**
15
+ * HMAC of each approved operation's content at approval time (see
16
+ * integrity.ts). Apply re-verifies these so a post-approval edit to the plan
17
+ * file is caught instead of written. Absent on plans approved before 0.26.0.
18
+ */
19
+ approvalDigests?: Record<string, string>;
14
20
  runs: PatchPlanRun[];
15
21
  createdAt: string;
16
22
  updatedAt: string;
package/dist/planStore.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { chmodSync, mkdirSync, readdirSync, readFileSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.js";
4
+ import { computeApprovalDigests, loadOrCreateSigningKey } from "./integrity.js";
4
5
  /**
5
6
  * Plans as JSON files in a directory (default `$FSGTM_HOME/plans`), one file
6
7
  * per plan id. Filesystem-shaped on purpose: greppable, diffable, and any
@@ -90,11 +91,18 @@ export function createFilePlanStore(directory) {
90
91
  throw new Error(`Plan ${planId} has no operation ${operationId}.`);
91
92
  }
92
93
  }
94
+ const approvedOperationIds = Array.from(new Set([...stored.approvedOperationIds, ...operationIds]));
95
+ const mergedOverrides = { ...stored.valueOverrides, ...valueOverrides };
96
+ // Bind the approval to the operation content so apply can detect a
97
+ // post-approval edit. Recompute over ALL approved ops (a later approve
98
+ // call may add overrides that change an earlier op's resolved value).
99
+ const approvalDigests = computeApprovalDigests(stored.plan.operations, approvedOperationIds, mergedOverrides, loadOrCreateSigningKey());
93
100
  return write({
94
101
  ...stored,
95
102
  status: "approved",
96
- approvedOperationIds: Array.from(new Set([...stored.approvedOperationIds, ...operationIds])),
97
- valueOverrides: { ...stored.valueOverrides, ...valueOverrides },
103
+ approvedOperationIds,
104
+ valueOverrides: mergedOverrides,
105
+ approvalDigests,
98
106
  });
99
107
  },
100
108
  async reject(planId) {