npm - fullstackgtm - Versions diffs - 0.25.1 → 0.26.0 - Mend

fullstackgtm 0.25.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/CHANGELOG.md +97 -0
package/dist/bulkUpdate.js +6 -1
package/dist/cli.js +67 -2
package/dist/connector.js +90 -1
package/dist/connectors/hubspot.js +5 -2
package/dist/connectors/salesforce.js +4 -2
package/dist/connectors/stripe.js +4 -2
package/dist/credentials.js +22 -1
package/dist/dedupe.d.ts +6 -0
package/dist/dedupe.js +24 -1
package/dist/enrich.js +24 -2
package/dist/enrichApollo.js +5 -2
package/dist/index.d.ts +1 -0
package/dist/index.js +1 -0
package/dist/integrity.d.ts +30 -0
package/dist/integrity.js +128 -0
package/dist/market.d.ts +1 -0
package/dist/market.js +144 -8
package/dist/marketReport.d.ts +9 -0
package/dist/marketReport.js +29 -4
package/dist/marketTaxonomy.d.ts +41 -0
package/dist/marketTaxonomy.js +193 -0
package/dist/planStore.d.ts +6 -0
package/dist/planStore.js +10 -2
package/dist/schedule.d.ts +17 -0
package/dist/schedule.js +87 -2
package/dist/types.d.ts +16 -0
package/package.json +1 -1
package/src/bulkUpdate.ts +6 -1
package/src/cli.ts +80 -1
package/src/connector.ts +96 -1
package/src/connectors/hubspot.ts +5 -2
package/src/connectors/salesforce.ts +4 -2
package/src/connectors/stripe.ts +4 -2
package/src/credentials.ts +24 -0
package/src/dedupe.ts +23 -1
package/src/enrich.ts +25 -2
package/src/enrichApollo.ts +5 -2
package/src/index.ts +8 -0
package/src/integrity.ts +146 -0
package/src/market.ts +129 -8
package/src/marketReport.ts +30 -4
package/src/marketTaxonomy.ts +288 -0
package/src/planStore.ts +23 -4
package/src/schedule.ts +98 -2
package/src/types.ts +16 -0

package/src/marketTaxonomy.ts ADDED Viewed

@@ -0,0 +1,288 @@
+import {
+  DEFAULT_MODELS,
+  forcedToolCall,
+  type LlmCallOptions,
+} from "./llm.ts";
+import {
+  captureMarket,
+  type FetchPage,
+  loadCaptureTexts,
+  type MarketClaim,
+  type MarketConfig,
+  type MarketVendor,
+} from "./market.ts";
+/**
+ * Cold-start taxonomy bootstrap. `market init` writes a stub for a human
+ * analyst to fill in; the self-serve hosted map has no analyst in the loop, so
+ * this proposes the claim taxonomy automatically from the seed vendors' own
+ * pages.
+ *
+ * Posture matches the rest of the market layer: the LLM is a *proposal* layer
+ * grounded in captured evidence (it only sees text we actually fetched), and
+ * everything downstream — capture, classify with verbatim-span verification,
+ * front states, the report — stays deterministic over the stored observations.
+ * The taxonomy it emits is a normal `market.config.json` a human can still edit.
+ */
+export type SeedVendor = {
+  url: string;
+  /** Display name; derived from the host when omitted. */
+  name?: string;
+  /** Marks the user's own company as the anchor vendor. */
+  anchor?: boolean;
+};
+export type SuggestTaxonomyOptions = {
+  category: string;
+  vendors: SeedVendor[];
+  llm: LlmCallOptions;
+  /** Upper bound on proposed claims, to keep classification bounded. */
+  maxClaims?: number;
+  /** Per-vendor captured-text budget fed to the proposer (chars). */
+  perVendorChars?: number;
+  /** Test injectables. */
+  fetchPage?: FetchPage;
+  capturesDir?: string;
+  now?: () => Date;
+};
+export type SuggestTaxonomyResult = {
+  config: MarketConfig;
+  /** Vendors whose homepage capture was empty/failed (excluded from grounding). */
+  unreadableVendorIds: string[];
+  model: string;
+};
+const DEFAULT_MAX_CLAIMS = 16;
+const DEFAULT_PER_VENDOR_CHARS = 6_000;
+/** Stable, human-readable id from a string (claim capability or host). */
+function slugify(value: string, maxWords = 6): string {
+  const slug = value
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-+|-+$/g, "")
+    .split("-")
+    .filter(Boolean)
+    .slice(0, maxWords)
+    .join("-");
+  return slug || "item";
+}
+/** Second-level domain as a vendor id seed: https://www.stripe.com/ -> stripe. */
+function vendorIdFromUrl(url: string): string {
+  let host: string;
+  try {
+    host = new URL(url).hostname;
+  } catch {
+    return slugify(url);
+  }
+  const labels = host.replace(/^www\./, "").split(".");
+  const sld = labels.length >= 2 ? labels[labels.length - 2] : labels[0];
+  return slugify(sld || host);
+}
+/** Disambiguate repeated ids by suffixing -2, -3, … */
+function uniqueId(base: string, taken: Set<string>): string {
+  if (!taken.has(base)) {
+    taken.add(base);
+    return base;
+  }
+  for (let n = 2; ; n += 1) {
+    const candidate = `${base}-${n}`;
+    if (!taken.has(candidate)) {
+      taken.add(candidate);
+      return candidate;
+    }
+  }
+}
+function provisionalVendors(seeds: SeedVendor[]): MarketVendor[] {
+  const taken = new Set<string>();
+  return seeds.map((seed) => {
+    const id = uniqueId(vendorIdFromUrl(seed.url), taken);
+    const host = (() => {
+      try {
+        return new URL(seed.url).hostname.replace(/^www\./, "");
+      } catch {
+        return seed.url;
+      }
+    })();
+    return {
+      id,
+      name: seed.name?.trim() || host,
+      urls: { home: seed.url, pricing: null, product: [] },
+    };
+  });
+}
+type ProposedClaim = {
+  capability: string;
+  icp: string;
+  pricingStructure: string;
+  definition: string;
+  terms?: string[];
+};
+type ProposedVendor = { seedUrl: string; name?: string; pricingUrl?: string | null };
+const TAXONOMY_SCHEMA = {
+  type: "object",
+  required: ["claims"],
+  properties: {
+    surfaceRule: {
+      type: "string",
+      description:
+        "One sentence stating how a reader judges LOUD vs QUIET vs ABSENT for this category (e.g. hero/top-nav = LOUD, deeper pages = QUIET, nowhere = ABSENT).",
+    },
+    claims: {
+      type: "array",
+      description:
+        "The distinct capability positions vendors in this category compete on. 8-16 of them. Only include claims you can actually see evidence for on the supplied pages.",
+      items: {
+        type: "object",
+        required: ["capability", "icp", "pricingStructure", "definition"],
+        properties: {
+          capability: {
+            type: "string",
+            description: "What is being claimed, precise enough to judge loud/quiet/absent. Max ~10 words.",
+          },
+          icp: { type: "string", description: "Which buyer/ICP this claim cell addresses (category vocabulary)." },
+          pricingStructure: {
+            type: "string",
+            description: "Which pricing structure the claim implies (e.g. per-seat, usage-based, flat, free-tier).",
+          },
+          definition: {
+            type: "string",
+            description:
+              "Operational definition a human (or classifier) uses to score any vendor's page LOUD/QUIET/ABSENT on this claim.",
+          },
+          terms: {
+            type: "array",
+            items: { type: "string" },
+            description: "Exact buyer phrasings for this claim, for deterministic mention matching. 2-5 terms.",
+          },
+        },
+      },
+    },
+    vendors: {
+      type: "array",
+      description: "Optional refinements: a clean display name per seed URL, and a pricing-page URL if one is clearly linked.",
+      items: {
+        type: "object",
+        required: ["seedUrl"],
+        properties: {
+          seedUrl: { type: "string" },
+          name: { type: "string" },
+          pricingUrl: { type: ["string", "null"] },
+        },
+      },
+    },
+  },
+} as const;
+function buildDossier(
+  vendors: MarketVendor[],
+  capture: ReturnType<typeof loadCaptureTexts>,
+  perVendorChars: number,
+): { dossier: string; unreadable: string[] } {
+  const { entries, textByHash } = capture;
+  const unreadable: string[] = [];
+  const blocks: string[] = [];
+  for (const vendor of vendors) {
+    const hash = entries.find((e) => e.vendorId === vendor.id && e.captureHash)?.captureHash ?? null;
+    const text = hash ? textByHash.get(hash) ?? "" : "";
+    if (!text.trim()) {
+      unreadable.push(vendor.id);
+      continue;
+    }
+    blocks.push(`### ${vendor.name} (${vendor.urls.home})\n${text.slice(0, perVendorChars)}`);
+  }
+  return { dossier: blocks.join("\n\n"), unreadable };
+}
+const INSTRUCTIONS = `You are seeding a competitive "market map" for a category. A market map breaks the category into CLAIMS — the distinct capability positions vendors compete on — so each (vendor x claim) cell can later be scored LOUD / QUIET / ABSENT from that vendor's pages.
+Propose the claim taxonomy for this category from the competitor homepages below. Rules:
+- Ground every claim in what is actually visible on the supplied pages. Do not invent positions no vendor mentions.
+- Each claim is a cell: a precise capability, the ICP it targets, and the pricing structure it implies.
+- Write each definition so a reader could judge ANY vendor's page LOUD/QUIET/ABSENT against it.
+- Aim for the 8-16 claims that genuinely differentiate vendors. Prefer specific, contested positions over generic table stakes.
+- Provide 2-5 verbatim buyer terms per claim for later mention matching.
+- Optionally return a cleaned display name and a pricing-page URL per seed vendor when evident.`;
+export async function suggestMarketConfig(options: SuggestTaxonomyOptions): Promise<SuggestTaxonomyResult> {
+  const { category } = options;
+  if (options.vendors.length === 0) throw new Error("suggestMarketConfig requires at least one seed vendor");
+  const maxClaims = options.maxClaims ?? DEFAULT_MAX_CLAIMS;
+  const perVendorChars = options.perVendorChars ?? DEFAULT_PER_VENDOR_CHARS;
+  const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
+  const vendors = provisionalVendors(options.vendors);
+  const anchorSeed = options.vendors.find((seed) => seed.anchor);
+  const anchorId = anchorSeed ? vendors[options.vendors.indexOf(anchorSeed)]?.id : undefined;
+  // Capture the seed homepages so the proposer only sees text we actually
+  // fetched (the SSRF guard in captureMarket applies to these user-supplied URLs).
+  await captureMarket(
+    { category, vendors, claims: [] },
+    { dir: options.capturesDir, runLabel: "bootstrap", fetchPage: options.fetchPage, now: options.now },
+  );
+  const capture = loadCaptureTexts(category, options.capturesDir);
+  const { dossier, unreadable } = buildDossier(vendors, capture, perVendorChars);
+  if (!dossier.trim()) {
+    throw new Error(
+      `market init --auto: none of the ${vendors.length} seed pages returned readable text — check the URLs are public homepages.`,
+    );
+  }
+  const prompt = `${INSTRUCTIONS}\n\nCategory: ${category}\n\nCompetitor homepages:\n${dossier}`;
+  const result = (await forcedToolCall(prompt, "propose_market_taxonomy", TAXONOMY_SCHEMA, model, options.llm)) as {
+    surfaceRule?: string;
+    claims?: ProposedClaim[];
+    vendors?: ProposedVendor[];
+  };
+  const takenClaimIds = new Set<string>();
+  const claims: MarketClaim[] = (result.claims ?? [])
+    .filter((claim) => claim?.capability && claim?.definition)
+    .slice(0, maxClaims)
+    .map((claim) => ({
+      id: uniqueId(slugify(claim.capability), takenClaimIds),
+      capability: claim.capability.trim(),
+      icp: (claim.icp ?? "").trim() || "general",
+      pricingStructure: (claim.pricingStructure ?? "").trim() || "unspecified",
+      definition: claim.definition.trim(),
+      ...(claim.terms?.length ? { terms: claim.terms.map((t) => t.trim()).filter(Boolean) } : {}),
+    }));
+  if (claims.length === 0) {
+    throw new Error("market init --auto: the model proposed no usable claims — try again or seed the taxonomy by hand.");
+  }
+  // Apply optional vendor refinements (display name + pricing URL), matched by seed URL.
+  const refinementByUrl = new Map((result.vendors ?? []).map((v) => [v.seedUrl, v]));
+  const refinedVendors: MarketVendor[] = vendors.map((vendor) => {
+    const refinement = refinementByUrl.get(vendor.urls.home);
+    const pricing =
+      refinement?.pricingUrl && /^https?:\/\//i.test(refinement.pricingUrl) ? refinement.pricingUrl : vendor.urls.pricing;
+    return {
+      ...vendor,
+      name: refinement?.name?.trim() || vendor.name,
+      urls: { ...vendor.urls, pricing },
+    };
+  });
+  const config: MarketConfig = {
+    category,
+    ...(anchorId ? { anchorVendor: anchorId } : {}),
+    vendors: refinedVendors,
+    claims,
+    surfaceRule:
+      result.surfaceRule?.trim() ||
+      "LOUD = hero copy OR top-level-nav named product with a dedicated page; QUIET = present on any indexed page below that; ABSENT = nowhere observed; UNOBSERVABLE = capture empty/failed — never score ABSENT from a failed capture.",
+  };
+  return { config, unreadableVendorIds: unreadable, model };
+}

package/src/planStore.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { chmodSync, mkdirSync, readdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.ts";
+import { computeApprovalDigests, loadOrCreateSigningKey } from "./integrity.ts";
 import type { ApprovalStatus, PatchPlan, PatchPlanRun } from "./types.ts";
 /**
@@ -16,6 +17,12 @@ export type StoredPlan = {
   status: ApprovalStatus;
   approvedOperationIds: string[];
   valueOverrides: Record<string, unknown>;
+  /**
+   * HMAC of each approved operation's content at approval time (see
+   * integrity.ts). Apply re-verifies these so a post-approval edit to the plan
+   * file is caught instead of written. Absent on plans approved before 0.26.0.
+   */
+  approvalDigests?: Record<string, string>;
   runs: PatchPlanRun[];
   createdAt: string;
   updatedAt: string;
@@ -125,13 +132,25 @@ export function createFilePlanStore(directory?: string): PlanStore {
           throw new Error(`Plan ${planId} has no operation ${operationId}.`);
         }
       }
+      const approvedOperationIds = Array.from(
+        new Set([...stored.approvedOperationIds, ...operationIds]),
+      );
+      const mergedOverrides = { ...stored.valueOverrides, ...valueOverrides };
+      // Bind the approval to the operation content so apply can detect a
+      // post-approval edit. Recompute over ALL approved ops (a later approve
+      // call may add overrides that change an earlier op's resolved value).
+      const approvalDigests = computeApprovalDigests(
+        stored.plan.operations,
+        approvedOperationIds,
+        mergedOverrides,
+        loadOrCreateSigningKey(),
+      );
       return write({
         ...stored,
         status: "approved",
-        approvedOperationIds: Array.from(
-          new Set([...stored.approvedOperationIds, ...operationIds]),
-        ),
-        valueOverrides: { ...stored.valueOverrides, ...valueOverrides },
+        approvedOperationIds,
+        valueOverrides: mergedOverrides,
+        approvalDigests,
       });
     },

package/src/schedule.ts CHANGED Viewed

@@ -124,6 +124,12 @@ export function validateSchedulableArgv(argv: string[]): void {
           "plan store's approval state. Use `apply --plan-id <id>` and approve via `plans approve`.",
       );
     }
+    if (argv.includes("--value")) {
+      throw new Error(
+        "A scheduled apply cannot take --value — an unattended run must write exactly the values " +
+          "signed at approval. Set the value with `plans approve --value <op>=<v>` and re-approve.",
+      );
+    }
     return;
   }
   if (!Object.hasOwn(SCHEDULABLE, head)) {
@@ -145,6 +151,75 @@ export function validateSchedulableArgv(argv: string[]): void {
   }
 }
+/**
+ * A schedule label is free text the operator chooses, but it is later
+ * interpolated into a crontab comment line by `renderManagedBlock`. A newline
+ * (or carriage return) would break out of the comment and inject an arbitrary
+ * crontab entry on `schedule install`. Reject control characters at the entry
+ * point so a label can never carry a second line; `renderManagedBlock` also
+ * strips them defensively in case a hand-edited schedules.json slips one past.
+ */
+export function assertSingleLineLabel(label: string): void {
+  if (hasControlChar(label)) {
+    throw new Error(
+      "A schedule --label cannot contain newlines or control characters " +
+        "(they would inject lines into the managed crontab block). Use a plain single-line name.",
+    );
+  }
+}
+/**
+ * True if the string contains any line-breaking or control character. Covers
+ * C0 controls + DEL, plus the Unicode separators a non-cron parser might honor
+ * (NEL U+0085, LS U+2028, PS U+2029, VT U+000B, FF U+000C) — defense-in-depth
+ * for the future modal/aws scaffold renderers whose target formats may treat
+ * those as line breaks.
+ */
+export function hasControlChar(value: string): boolean {
+  for (let i = 0; i < value.length; i++) {
+    const code = value.charCodeAt(i);
+    if (code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029) return true;
+  }
+  return false;
+}
+/** Collapse any control/separator character to a space — last-resort guard at render time. */
+function sanitizeCrontabComment(value: string): string {
+  let out = "";
+  for (const ch of value) {
+    const code = ch.charCodeAt(0);
+    out += code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029 ? " " : ch;
+  }
+  return out.replace(/ {2,}/g, " ").trim();
+}
+/**
+ * Validate every field of an entry that `renderManagedBlock` interpolates into
+ * the crontab — not just the label. The EXECUTABLE line embeds `cron` and `id`
+ * raw, and `schedule install` renders entries straight from schedules.json, so
+ * a hand-edited (or otherwise tampered) entry with a newline in cron/id/profile
+ * would inject a live crontab line. Refuse to render a tampered entry rather
+ * than emit it. (Well-formed entries never trip this: cron is parser-validated,
+ * id is an fnv1a hex hash, label is guarded at add-time.)
+ */
+function assertRenderableEntry(profile: string, entry: ScheduleEntry): void {
+  const fields: Array<[string, string]> = [
+    ["profile", profile],
+    ["cron", entry.cron],
+    ["id", entry.id],
+    ["label", entry.label],
+    ...entry.argv.map((token, i) => [`argv[${i}]`, token] as [string, string]),
+  ];
+  for (const [name, value] of fields) {
+    if (hasControlChar(value)) {
+      throw new Error(
+        `Refusing to render schedule entry ${entry.id}: its ${name} contains a newline or control character. ` +
+          "The schedules.json store has been tampered with or corrupted — repair it before installing.",
+      );
+    }
+  }
+}
 /**
  * Split a `schedule add "<command>"` string into argv, honoring single and
  * double quotes (no escapes, no expansion — this is tokenization, not shell).
@@ -206,7 +281,13 @@ const CRON_FIELD_SPECS = [
 ] as const;
 export function parseCron(expression: string): CronExpression {
-  const fields = expression.trim().split(/\s+/);
+  // Reject non-ASCII whitespace and control chars: JS \s splits on U+00A0,
+  // U+3000, etc., but Vixie cron's field separator is only space/tab. A source
+  // carrying them would parse here yet be misparsed or rejected by `crontab -`.
+  if (hasControlChar(expression) || /[^\x20-\x7e]/.test(expression)) {
+    throw new Error(`Invalid cron expression "${expression}": only ASCII characters, space, and tab are allowed.`);
+  }
+  const fields = expression.trim().split(/[ \t]+/);
   if (fields.length !== 5) {
     throw new Error(
       `Invalid cron expression "${expression}": expected 5 fields ` +
@@ -559,13 +640,28 @@ export function renderManagedBlock(
   entries: ScheduleEntry[],
   cliInvocation: string,
 ): string {
+  // cliInvocation is spliced raw into the executable line; it is built from
+  // process.execPath, the script path, and FSGTM_HOME (cli.ts), so a newline in
+  // FSGTM_HOME would inject a crontab line. Validate it like the entry fields —
+  // single-quote shell-escaping does NOT defend cron's line parser.
+  if (hasControlChar(cliInvocation)) {
+    throw new Error(
+      "Refusing to render the managed crontab: the resolved CLI invocation (node path, script path, " +
+        "or FSGTM_HOME) contains a newline or control character. Check $FSGTM_HOME.",
+    );
+  }
   const { open, close } = crontabSentinels(profile);
   const lines = [
     open,
     "# Managed by `fullstackgtm schedule install` — replaced wholesale on re-install; do not edit.",
   ];
   for (const entry of entries) {
-    lines.push(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`);
+    // Refuse to render any entry whose interpolated fields carry a control char
+    // — the executable line below embeds cron/id raw, so a tampered store could
+    // otherwise inject a live crontab line. The comment line is additionally
+    // sanitized so a benign-but-messy label can't break it.
+    assertRenderableEntry(profile, entry);
+    lines.push(sanitizeCrontabComment(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`));
     lines.push(`${entry.cron} ${cliInvocation} schedule run ${entry.id} --profile ${profile} --trigger cron`);
   }
   lines.push(close);

package/src/types.ts CHANGED Viewed

@@ -303,6 +303,22 @@ export type PatchOperation = {
    * member of the group.
    */
   groupId?: string;
+  /**
+   * Set only when a human explicitly chose to archive a record that shares an
+   * identity key with another (`bulk-update --archive --force-archive-duplicates`).
+   * Without it, apply refuses to archive_record a record the live snapshot still
+   * sees as a duplicate — archiving a duplicate discards data that merging keeps,
+   * and an agent on a dedupe task must not silently substitute archive for merge.
+   */
+  forceArchiveDuplicate?: boolean;
+  /**
+   * For irreversible operations (merge_records, archive_record): the field
+   * values of the records that will be destroyed, captured at plan-build time.
+   * Merges and archives cannot be undone on any provider, so this is the
+   * recovery artifact a human uses to recreate a record by hand if a merge or
+   * archive was wrong — the plan file IS the backup.
+   */
+  recoverySnapshot?: Record<string, unknown>[];
 };
 /**