fullstackgtm 0.25.1 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,50 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
5
5
  and the project adheres to [Semantic Versioning](https://semver.org/).
6
6
  The path to 1.0 is planned in [docs/roadmap-to-1.0.md](./docs/roadmap-to-1.0.md).
7
7
 
8
+ ## [0.25.2] — 2026-06-15
9
+
10
+ Security hardening I — confirmed fixes from an adversarial audit (each verified
11
+ by a refute-by-default re-attack; the crontab and report fixes took three
12
+ rounds because the re-attack kept finding deeper paths).
13
+
14
+ ### Security
15
+
16
+ - **Crontab injection via `schedule install` (was: arbitrary code execution).**
17
+ `schedule add --label` rejects newlines/control chars; `renderManagedBlock`
18
+ now refuses to render any entry (or CLI invocation) whose interpolated
19
+ fields — label, cron, id, profile, argv, **and the resolved node/script
20
+ path + `FSGTM_HOME`** — carry a control character, so a hand-edited
21
+ `schedules.json` or a newline in `FSGTM_HOME` can no longer inject a live
22
+ crontab line. `parseCron` now accepts ASCII space/tab only (rejects Unicode
23
+ whitespace), and a stray `%` in a path is escaped (`\%`) so it can't truncate
24
+ the managed line.
25
+ - **SSRF in `market capture`.** Page fetches now allow only http/https, refuse
26
+ any host that is or resolves to a private/loopback/link-local/CGNAT/metadata
27
+ address (IPv4, IPv6, and IPv4-mapped IPv6 in dotted or hex form), follow
28
+ redirects manually with per-hop re-validation, and cap time/body size.
29
+ - **Stored XSS in the market HTML report.** The embedded JSON data island is
30
+ serialized with `<`/`>`/`&`/U+2028/U+2029 escaped (no `</script>` breakout),
31
+ the tooltip is built with `textContent` (no `innerHTML`), and the two
32
+ remaining raw sinks (anchor vendor name, evidence-appendix confidence) are
33
+ now `escapeHtml`'d; `validateObservationSet` rejects a non-enum `confidence`
34
+ so an `observe --from` file can't smuggle markup.
35
+ - **Provider response bodies no longer leak into errors.** HubSpot, Salesforce,
36
+ Apollo, and Stripe connectors throw status-line-only errors (a 4xx body can
37
+ echo submitted emails/domains or the key, and these errors are persisted into
38
+ scheduled-run records).
39
+ - **CSV/formula injection neutralized at the enrich write path.** Ingested
40
+ string values beginning with `= + - @` / tab / CR are prefixed with `'` so
41
+ they can't execute if the CRM is later exported to a spreadsheet; numeric
42
+ values keep full fidelity.
43
+ - **Credential-store mode enforced on read, not just write.** A pre-existing
44
+ `credentials.json` with group/other permissions is re-tightened to 0600 (and
45
+ warned) on read, closing the inherited-loose-permissions gap.
46
+
47
+ Known residuals tracked for follow-up: `marketMapToMarkdown` does not
48
+ HTML-escape (safe in terminals/GitHub; only a risk if a downstream renderer
49
+ trusts raw HTML — to be addressed with the report work); the credential read
50
+ check is reactive (a loose file is exposed until the next CLI read).
51
+
8
52
  ## [0.25.1] — 2026-06-12
9
53
 
10
54
  Docs-sync release — no code changes.
package/dist/cli.js CHANGED
@@ -27,7 +27,7 @@ import { marketMapToHtml, marketMapToMarkdown } from "./marketReport.js";
27
27
  import { DEFAULT_RUBRIC, detectProviderFromKey, extractInsightsLlm, parseRubric, resolveLlmCredential, scoreCallLlm, validateLlmKey, } from "./llm.js";
28
28
  import { buildEnrichPlan, createFileEnrichRunStore, DEFAULT_STALE_DAYS, ENRICH_CONFIG_FILE_NAME, enrichRunId, inferIngestObjectType, latestStamps, loadEnrichConfig, parseCsv, resolveCrmField, selectStaleWork, stagedSourceRecords, staleDaysFor, } from "./enrich.js";
29
29
  import { apolloPullKeysForAppend, apolloPullKeysForRefresh, createApolloClient, pullApolloRecords, } from "./enrichApollo.js";
30
- import { computeMissedFirings, createFileScheduleRunStore, createFileScheduleStore, nextCronFiring, parseCron, renderManagedBlock, replaceManagedBlock, scheduleId, systemCrontabIo, tokenizeCommand, validateSchedulableArgv, } from "./schedule.js";
30
+ import { computeMissedFirings, createFileScheduleRunStore, createFileScheduleStore, nextCronFiring, parseCron, renderManagedBlock, replaceManagedBlock, assertSingleLineLabel, hasControlChar, scheduleId, systemCrontabIo, tokenizeCommand, validateSchedulableArgv, } from "./schedule.js";
31
31
  import { resolveRecord } from "./resolve.js";
32
32
  import { buildBulkUpdatePlan } from "./bulkUpdate.js";
33
33
  import { buildDedupePlan } from "./dedupe.js";
@@ -1614,6 +1614,7 @@ trigger: manual. status shows next firing and surfaces missed firings
1614
1614
  const createdAt = new Date().toISOString();
1615
1615
  const label = option(rest, "--label") ??
1616
1616
  argv.filter((arg) => !arg.startsWith("--")).slice(0, 2).join("-").replace(/[^\w.-]+/g, "-");
1617
+ assertSingleLineLabel(label);
1617
1618
  const entry = {
1618
1619
  id: scheduleId(label, cron.source, argv, createdAt),
1619
1620
  label,
@@ -1819,13 +1820,27 @@ function scheduleCliInvocation() {
1819
1820
  if (!script || !existsSync(script)) {
1820
1821
  throw new Error("Cannot resolve the fullstackgtm entry point for crontab lines (process.argv[1] is missing).");
1821
1822
  }
1823
+ // A newline/control char in any of these flows verbatim into the crontab
1824
+ // executable line; single-quote escaping defends the shell, not cron's line
1825
+ // parser. Refuse early with a clear message (renderManagedBlock re-checks).
1826
+ for (const [name, value] of [
1827
+ ["FSGTM_HOME", process.env.FSGTM_HOME],
1828
+ ["the node executable path", process.execPath],
1829
+ ["the CLI script path", script],
1830
+ ]) {
1831
+ if (value && hasControlChar(value)) {
1832
+ throw new Error(`Cannot install schedules: ${name} contains a newline or control character.`);
1833
+ }
1834
+ }
1822
1835
  const quote = (value) => `'${value.replace(/'/g, `'\\''`)}'`;
1823
1836
  const parts = [quote(process.execPath)];
1824
1837
  if (script.endsWith(".ts"))
1825
1838
  parts.push("--experimental-strip-types");
1826
1839
  parts.push(quote(script));
1827
1840
  const home = process.env.FSGTM_HOME ? `FSGTM_HOME=${quote(process.env.FSGTM_HOME)} ` : "";
1828
- return home + parts.join(" ");
1841
+ // cron treats an unescaped `%` in the command field as a newline/stdin split.
1842
+ // Escape it as `\%` so a stray `%` in a path can't truncate the managed line.
1843
+ return (home + parts.join(" ")).replace(/%/g, "\\%");
1829
1844
  }
1830
1845
  /**
1831
1846
  * The single provider entry point: execute the scheduled command in-process
@@ -44,8 +44,11 @@ export function createHubspotConnector(options) {
44
44
  throw new Error(`Cannot reach HubSpot at ${baseUrl}${cause}. Check network access.`);
45
45
  }
46
46
  if (!response.ok) {
47
- const body = await response.text();
48
- throw new Error(`HubSpot API error ${response.status}: ${body}`);
47
+ // Status line only — HubSpot 4xx bodies echo submitted property values
48
+ // (contact emails, company domains) and the request payload, and these
49
+ // errors are persisted into scheduled-run records. Never interpolate it.
50
+ await response.text().catch(() => undefined);
51
+ throw new Error(`HubSpot API error ${response.status}. Check the token scopes and request.`);
49
52
  }
50
53
  // DELETE and some association writes return 204 with an empty body.
51
54
  const text = await response.text();
@@ -46,8 +46,10 @@ export function createSalesforceConnector(options) {
46
46
  throw new Error(`Cannot reach Salesforce at ${connection.instanceUrl}${cause}. Check SALESFORCE_INSTANCE_URL (your My Domain URL, e.g. https://yourco.my.salesforce.com) and network access.`);
47
47
  }
48
48
  if (!response.ok) {
49
- const body = await response.text();
50
- throw new Error(`Salesforce API error ${response.status}: ${body}`);
49
+ // Status line only — the body echoes submitted field values and the
50
+ // request, and these errors are persisted into scheduled-run records.
51
+ await response.text().catch(() => undefined);
52
+ throw new Error(`Salesforce API error ${response.status}. Check the token and request.`);
51
53
  }
52
54
  // Salesforce PATCH returns 204 No Content on success.
53
55
  const text = await response.text();
@@ -26,8 +26,10 @@ export function createStripeConnector(options) {
26
26
  headers: { Authorization: `Bearer ${apiKey}` },
27
27
  });
28
28
  if (!response.ok) {
29
- const body = await response.text();
30
- throw new Error(`Stripe API error ${response.status}: ${body}`);
29
+ // Status line only — the body can echo request details bound to a live
30
+ // billing key, and these errors land in scheduled-run records.
31
+ await response.text().catch(() => undefined);
32
+ throw new Error(`Stripe API error ${response.status}. Check the restricted key and request.`);
31
33
  }
32
34
  return response.json();
33
35
  }
@@ -1,4 +1,4 @@
1
- import { chmodSync, existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync, } from "node:fs";
1
+ import { chmodSync, existsSync, mkdirSync, readdirSync, readFileSync, statSync, unlinkSync, writeFileSync, } from "node:fs";
2
2
  import { homedir } from "node:os";
3
3
  import { join } from "node:path";
4
4
  import { refreshHubspotToken } from "./connectors/hubspotAuth.js";
@@ -98,8 +98,29 @@ export function writeSecureFile(path, contents) {
98
98
  // Non-POSIX filesystems ignore chmod.
99
99
  }
100
100
  }
101
+ /**
102
+ * The 0600/0700 guarantee was write-only: a credentials.json inherited at
103
+ * looser permissions (a restored backup, a file created by another tool, a
104
+ * cloned home) was read and trusted regardless of its actual mode. Enforce the
105
+ * mode on read too — re-tighten to 0600 and warn once — so a world-readable
106
+ * credential store can't sit there silently leaking the token to other users.
107
+ */
108
+ function enforceCredentialFileMode(path) {
109
+ try {
110
+ const mode = statSync(path).mode & 0o777;
111
+ if ((mode & 0o077) !== 0) {
112
+ chmodSync(path, 0o600);
113
+ console.error(`fullstackgtm: tightened ${path} from ${mode.toString(8).padStart(3, "0")} to 600 ` +
114
+ "(it was readable or writable by other users).");
115
+ }
116
+ }
117
+ catch {
118
+ // Missing file or non-POSIX filesystem: nothing to enforce.
119
+ }
120
+ }
101
121
  function readFile() {
102
122
  try {
123
+ enforceCredentialFileMode(credentialsPath());
103
124
  const parsed = JSON.parse(readFileSync(credentialsPath(), "utf8"));
104
125
  if (parsed && typeof parsed === "object" && parsed.version === 1 && parsed.providers) {
105
126
  return parsed;
package/dist/enrich.js CHANGED
@@ -291,6 +291,28 @@ function valueToString(value) {
291
291
  return String(value);
292
292
  return "";
293
293
  }
294
+ /**
295
+ * CSV/formula-injection neutralization for string values destined for a CRM
296
+ * write. Third-party export rows (Clay CSV, webhook JSON) can contain cells
297
+ * like `=cmd|'/c calc'!A1` or `@SUM(...)`; written verbatim to a CRM field they
298
+ * lie dormant until someone exports the CRM to CSV and opens it in a spreadsheet,
299
+ * where the leading `= + - @` (or a leading tab/CR) makes the client execute it.
300
+ * We prefix a single apostrophe — the spreadsheet-standard escape that renders
301
+ * the cell as literal text. Numeric values bypass this (they're written as
302
+ * numbers, not strings), so signed numbers keep full fidelity; a phone number
303
+ * supplied as a string and starting with `+` gains a leading `'`, which the
304
+ * human sees in the approved diff. Applied only at the write path, never to
305
+ * match keys.
306
+ */
307
+ function neutralizeFormulaInjection(value) {
308
+ if (value && /^[=+\-@\t\r]/.test(value))
309
+ return `'${value}`;
310
+ return value;
311
+ }
312
+ /** valueToString for a value that will be written to a CRM field. */
313
+ function writeSafeString(value) {
314
+ return neutralizeFormulaInjection(valueToString(value));
315
+ }
294
316
  function normalizeKeyValue(key, value) {
295
317
  const text = valueToString(value).toLowerCase();
296
318
  if (!text)
@@ -498,7 +520,7 @@ export function buildEnrichPlan(options) {
498
520
  operation: "set_field",
499
521
  field: canonicalField,
500
522
  beforeValue: currentValue ?? null,
501
- afterValue: typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue),
523
+ afterValue: typeof sourceValue === "number" ? sourceValue : writeSafeString(sourceValue),
502
524
  reason: `${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
503
525
  `${outcome.matchedKey}) reports a changed value for ${canonicalField}.`,
504
526
  sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
@@ -516,7 +538,7 @@ export function buildEnrichPlan(options) {
516
538
  if (!isEmptyValue(currentValue))
517
539
  continue;
518
540
  emittedForRecord = true;
519
- const afterValue = typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue);
541
+ const afterValue = typeof sourceValue === "number" ? sourceValue : writeSafeString(sourceValue);
520
542
  operations.push({
521
543
  id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
522
544
  objectType: canonicalObjectType(record.objectType),
@@ -56,9 +56,12 @@ export function createApolloClient(options) {
56
56
  if (response.status === 404)
57
57
  return null;
58
58
  if (!response.ok) {
59
- const body = await response.text();
59
+ // Status line only — never interpolate the response body. It can echo
60
+ // the submitted query (contact emails / company domains) or the API key,
61
+ // and these errors are persisted verbatim into scheduled-run records.
62
+ await response.text().catch(() => undefined);
60
63
  const exhausted = response.status === 429 ? ` (rate limited; ${maxRetries} retries exhausted)` : "";
61
- throw new Error(`Apollo API error ${response.status}${exhausted}: ${body}`);
64
+ throw new Error(`Apollo API error ${response.status}${exhausted}. Check the API key and request.`);
62
65
  }
63
66
  const text = await response.text();
64
67
  return text ? JSON.parse(text) : null;
package/dist/market.d.ts CHANGED
@@ -153,6 +153,7 @@ export type FetchPage = (url: string) => Promise<{
153
153
  status: number;
154
154
  body: string;
155
155
  }>;
156
+ export declare function assertPublicUrl(rawUrl: string): Promise<URL>;
156
157
  export type CaptureOptions = {
157
158
  /** Directory for captures; defaults to <marketHome>/captures. */
158
159
  dir?: string;
package/dist/market.js CHANGED
@@ -1,5 +1,7 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { lookup } from "node:dns/promises";
2
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
4
+ import { isIP } from "node:net";
3
5
  import { join } from "node:path";
4
6
  import { credentialsDir } from "./credentials.js";
5
7
  const INTENSITY_RANK = {
@@ -141,15 +143,144 @@ export function extractReadableText(html) {
141
143
  .filter(Boolean)
142
144
  .join("\n");
143
145
  }
146
+ /**
147
+ * SSRF guard. market.config.json URLs are operator-authored, but configs are
148
+ * shared/templated in consulting/team use and `market capture|refresh` is on
149
+ * the cron allowlist — an unguarded fetch is an unattended internal-network
150
+ * and cloud-metadata probe. We therefore (1) allow only http/https, (2) refuse
151
+ * any host that is or resolves to a private/loopback/link-local/metadata
152
+ * address, and (3) follow redirects manually, re-validating each hop.
153
+ *
154
+ * Residual gap (documented, not defended here): TOCTOU DNS rebinding between
155
+ * our lookup and fetch's own resolution. Out of scope for fetching public
156
+ * competitor pages; a hardened deployment should fetch through an egress proxy.
157
+ */
158
+ const MAX_REDIRECTS = 5;
159
+ const FETCH_TIMEOUT_MS = 15_000;
160
+ const MAX_BODY_BYTES = 5_000_000;
161
+ function ipv4IsPrivate(ip) {
162
+ const parts = ip.split(".").map((n) => Number(n));
163
+ if (parts.length !== 4 || parts.some((n) => !Number.isInteger(n) || n < 0 || n > 255))
164
+ return true;
165
+ const [a, b] = parts;
166
+ if (a === 0 || a === 127)
167
+ return true; // this-host, loopback
168
+ if (a === 10)
169
+ return true; // private
170
+ if (a === 172 && b >= 16 && b <= 31)
171
+ return true; // private
172
+ if (a === 192 && b === 168)
173
+ return true; // private
174
+ if (a === 169 && b === 254)
175
+ return true; // link-local incl. 169.254.169.254 metadata
176
+ if (a === 100 && b >= 64 && b <= 127)
177
+ return true; // CGNAT
178
+ if (a >= 224)
179
+ return true; // multicast / reserved
180
+ return false;
181
+ }
182
+ function ipIsPrivate(ip) {
183
+ const family = isIP(ip);
184
+ if (family === 4)
185
+ return ipv4IsPrivate(ip);
186
+ if (family === 6) {
187
+ const lower = ip.toLowerCase();
188
+ if (lower === "::1" || lower === "::")
189
+ return true; // loopback / unspecified
190
+ // IPv4-mapped (::ffff:…) — Node normalizes ::ffff:127.0.0.1 to ::ffff:7f00:1,
191
+ // so accept both the dotted and the hex-pair forms, unwrap, check the v4.
192
+ const mapped = lower.match(/^::ffff:(.+)$/);
193
+ if (mapped) {
194
+ const rest = mapped[1];
195
+ if (rest.includes("."))
196
+ return ipv4IsPrivate(rest);
197
+ const groups = rest.split(":");
198
+ if (groups.length === 2) {
199
+ const hi = parseInt(groups[0], 16);
200
+ const lo = parseInt(groups[1], 16);
201
+ if (Number.isNaN(hi) || Number.isNaN(lo))
202
+ return true;
203
+ return ipv4IsPrivate(`${(hi >> 8) & 0xff}.${hi & 0xff}.${(lo >> 8) & 0xff}.${lo & 0xff}`);
204
+ }
205
+ return true; // unrecognized mapped form → refuse
206
+ }
207
+ if (lower.startsWith("fe8") || lower.startsWith("fe9") || lower.startsWith("fea") || lower.startsWith("feb"))
208
+ return true; // link-local fe80::/10
209
+ if (lower.startsWith("fc") || lower.startsWith("fd"))
210
+ return true; // unique-local fc00::/7
211
+ return false;
212
+ }
213
+ return true; // not a recognizable IP literal → refuse
214
+ }
215
+ export async function assertPublicUrl(rawUrl) {
216
+ let url;
217
+ try {
218
+ url = new URL(rawUrl);
219
+ }
220
+ catch {
221
+ throw new Error(`market capture: "${rawUrl}" is not a valid URL.`);
222
+ }
223
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
224
+ throw new Error(`market capture refuses ${url.protocol} URLs (only http/https): ${rawUrl}`);
225
+ }
226
+ const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
227
+ if (isIP(host)) {
228
+ if (ipIsPrivate(host))
229
+ throw new Error(`market capture refuses private/loopback address ${host} (SSRF guard).`);
230
+ return url;
231
+ }
232
+ // Hostname: resolve and refuse if ANY address is private.
233
+ const addrs = await lookup(host, { all: true });
234
+ for (const { address } of addrs) {
235
+ if (ipIsPrivate(address)) {
236
+ throw new Error(`market capture refuses ${host} — it resolves to private/internal address ${address} (SSRF guard).`);
237
+ }
238
+ }
239
+ return url;
240
+ }
144
241
  const defaultFetchPage = async (url) => {
145
- const response = await fetch(url, {
146
- headers: {
147
- "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
148
- "Accept-Language": "en-US",
149
- },
150
- redirect: "follow",
151
- });
152
- return { status: response.status, body: await response.text() };
242
+ let current = url;
243
+ for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
244
+ await assertPublicUrl(current);
245
+ const controller = new AbortController();
246
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
247
+ let response;
248
+ try {
249
+ response = await fetch(current, {
250
+ headers: {
251
+ "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
252
+ "Accept-Language": "en-US",
253
+ },
254
+ redirect: "manual",
255
+ signal: controller.signal,
256
+ });
257
+ }
258
+ finally {
259
+ clearTimeout(timer);
260
+ }
261
+ if (response.status >= 300 && response.status < 400 && response.headers.get("location")) {
262
+ current = new URL(response.headers.get("location"), current).toString();
263
+ continue; // re-validate the redirect target on the next iteration
264
+ }
265
+ const reader = response.body?.getReader();
266
+ if (!reader)
267
+ return { status: response.status, body: await response.text() };
268
+ const chunks = [];
269
+ let total = 0;
270
+ for (;;) {
271
+ const { done, value } = await reader.read();
272
+ if (done)
273
+ break;
274
+ total += value.length;
275
+ if (total > MAX_BODY_BYTES) {
276
+ await reader.cancel();
277
+ break;
278
+ }
279
+ chunks.push(value);
280
+ }
281
+ return { status: response.status, body: Buffer.concat(chunks).toString("utf8") };
282
+ }
283
+ throw new Error(`market capture: too many redirects (>${MAX_REDIRECTS}) for ${url}`);
153
284
  };
154
285
  export async function captureMarket(config, options = {}) {
155
286
  const dir = options.dir ?? join(marketHome(config.category), "captures");
@@ -284,6 +415,11 @@ export function validateObservationSet(config, set) {
284
415
  if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
285
416
  problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
286
417
  }
418
+ // confidence is rendered into the HTML report; only the enum is allowed, so
419
+ // an `observe --from` file can't smuggle markup through a free-text value.
420
+ if (obs.confidence !== "high" && obs.confidence !== "medium" && obs.confidence !== "low") {
421
+ problems.push(`${cell}: invalid confidence "${String(obs.confidence)}" (expected high, medium, or low)`);
422
+ }
287
423
  if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
288
424
  problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
289
425
  }
@@ -1,3 +1,12 @@
1
1
  import type { MarketConfig, ObservationSet } from "./market.ts";
2
+ /**
3
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
4
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
5
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
6
+ * strings) would close the tag and inject markup. Replacing them with their
7
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
8
+ * sequence unrepresentable in the HTML source.
9
+ */
10
+ export declare function safeJsonForScript(value: unknown): string;
2
11
  export declare function marketMapToMarkdown(config: MarketConfig, set: ObservationSet): string;
3
12
  export declare function marketMapToHtml(config: MarketConfig, set: ObservationSet): string;
@@ -28,6 +28,22 @@ function escapeHtml(value) {
28
28
  .replace(/>/g, "&gt;")
29
29
  .replace(/"/g, "&quot;");
30
30
  }
31
+ /**
32
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
33
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
34
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
35
+ * strings) would close the tag and inject markup. Replacing them with their
36
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
37
+ * sequence unrepresentable in the HTML source.
38
+ */
39
+ export function safeJsonForScript(value) {
40
+ return JSON.stringify(value)
41
+ .replace(/</g, "\\u003c")
42
+ .replace(/>/g, "\\u003e")
43
+ .replace(/&/g, "\\u0026")
44
+ .replace(/\u2028/g, "\\u2028")
45
+ .replace(/\u2029/g, "\\u2029");
46
+ }
31
47
  function buildModel(config, set) {
32
48
  const fronts = computeFrontStates(config, set);
33
49
  const stateByClaim = new Map(fronts.map((front) => [front.claimId, front.state]));
@@ -320,7 +336,7 @@ function axisSectionsHtml(config, set) {
320
336
  <table class="legend"><thead><tr><th></th><th>vendor</th><th class="num">${legendMeasureHead}</th></tr></thead><tbody>${legendRows}</tbody></table>
321
337
  </div>
322
338
  <div class="map-tip" id="map-tip" hidden></div>
323
- <script type="application/json" id="map-data">${JSON.stringify(tipData)}</script>
339
+ <script type="application/json" id="map-data">${safeJsonForScript(tipData)}</script>
324
340
  <script>
325
341
  (function () {
326
342
  var data = JSON.parse(document.getElementById("map-data").textContent);
@@ -331,7 +347,16 @@ function axisSectionsHtml(config, set) {
331
347
  function show(v, evt) {
332
348
  var d = data[v];
333
349
  if (!d) return;
334
- tip.innerHTML = "<b>" + d.n + " · " + d.name + "</b>" + d.lines.map(function (l) { return "<div>" + l + "</div>"; }).join("");
350
+ // textContent only vendor names / axis labels are untrusted (competitor-controlled).
351
+ tip.textContent = "";
352
+ var head = document.createElement("b");
353
+ head.textContent = d.n + " · " + d.name;
354
+ tip.appendChild(head);
355
+ d.lines.forEach(function (l) {
356
+ var div = document.createElement("div");
357
+ div.textContent = l;
358
+ tip.appendChild(div);
359
+ });
335
360
  tip.hidden = false;
336
361
  var box = fig.getBoundingClientRect();
337
362
  tip.style.left = Math.min(evt.clientX - box.left + 14, box.width - tip.offsetWidth - 8) + "px";
@@ -419,7 +444,7 @@ export function marketMapToHtml(config, set) {
419
444
  const anchorLoud = anchor
420
445
  ? claimIds.filter((claimId) => model.cell(anchor, claimId)?.intensity === "loud").length
421
446
  : 0;
422
- const anchorNote = anchor ? ` · ${vendorNamesById.get(anchor) ?? anchor} loud on ${anchorLoud}` : "";
447
+ const anchorNote = anchor ? ` · ${e(vendorNamesById.get(anchor) ?? anchor)} loud on ${anchorLoud}` : "";
423
448
  return `<details class="claim-group"><summary><b>${e(group.title)}</b> — ${claimIds.length} claim${claimIds.length === 1 ? "" : "s"} <span class="sum-soft">(${e(group.blurb)}${anchorNote})</span></summary>
424
449
  <table><thead><tr><th></th>${vendorHeads}<th></th></tr></thead><tbody>${claimIds.map(matrixRow).join("")}</tbody></table>
425
450
  </details>`;
@@ -475,7 +500,7 @@ export function marketMapToHtml(config, set) {
475
500
  const obs = model.cell(vendor.id, claimId);
476
501
  if (!obs || obs.evidence.length === 0)
477
502
  return [];
478
- return obs.evidence.map((evidence) => `<div class="ev"><span class="ev-head">${e(claimId)} · ${obs.intensity.toUpperCase()} (${obs.confidence})</span>` +
503
+ return obs.evidence.map((evidence) => `<div class="ev"><span class="ev-head">${e(claimId)} · ${e(obs.intensity.toUpperCase())} (${e(String(obs.confidence ?? ""))})</span>` +
479
504
  `<blockquote>“${e(evidence.text)}”</blockquote>` +
480
505
  `<span class="ev-src">${e(String(evidence.metadata?.url ?? ""))} · capture ${e(String(evidence.metadata?.captureHash ?? "").slice(0, 12))}</span></div>`);
481
506
  });
@@ -53,6 +53,23 @@ export declare function scheduleId(label: string, cron: string, argv: string[],
53
53
  * in-process into the CLI router, never through a shell).
54
54
  */
55
55
  export declare function validateSchedulableArgv(argv: string[]): void;
56
+ /**
57
+ * A schedule label is free text the operator chooses, but it is later
58
+ * interpolated into a crontab comment line by `renderManagedBlock`. A newline
59
+ * (or carriage return) would break out of the comment and inject an arbitrary
60
+ * crontab entry on `schedule install`. Reject control characters at the entry
61
+ * point so a label can never carry a second line; `renderManagedBlock` also
62
+ * strips them defensively in case a hand-edited schedules.json slips one past.
63
+ */
64
+ export declare function assertSingleLineLabel(label: string): void;
65
+ /**
66
+ * True if the string contains any line-breaking or control character. Covers
67
+ * C0 controls + DEL, plus the Unicode separators a non-cron parser might honor
68
+ * (NEL U+0085, LS U+2028, PS U+2029, VT U+000B, FF U+000C) — defense-in-depth
69
+ * for the future modal/aws scaffold renderers whose target formats may treat
70
+ * those as line breaks.
71
+ */
72
+ export declare function hasControlChar(value: string): boolean;
56
73
  /**
57
74
  * Split a `schedule add "<command>"` string into argv, honoring single and
58
75
  * double quotes (no escapes, no expansion — this is tokenization, not shell).
package/dist/schedule.js CHANGED
@@ -77,6 +77,68 @@ export function validateSchedulableArgv(argv) {
77
77
  }
78
78
  }
79
79
  }
80
+ /**
81
+ * A schedule label is free text the operator chooses, but it is later
82
+ * interpolated into a crontab comment line by `renderManagedBlock`. A newline
83
+ * (or carriage return) would break out of the comment and inject an arbitrary
84
+ * crontab entry on `schedule install`. Reject control characters at the entry
85
+ * point so a label can never carry a second line; `renderManagedBlock` also
86
+ * strips them defensively in case a hand-edited schedules.json slips one past.
87
+ */
88
+ export function assertSingleLineLabel(label) {
89
+ if (hasControlChar(label)) {
90
+ throw new Error("A schedule --label cannot contain newlines or control characters " +
91
+ "(they would inject lines into the managed crontab block). Use a plain single-line name.");
92
+ }
93
+ }
94
+ /**
95
+ * True if the string contains any line-breaking or control character. Covers
96
+ * C0 controls + DEL, plus the Unicode separators a non-cron parser might honor
97
+ * (NEL U+0085, LS U+2028, PS U+2029, VT U+000B, FF U+000C) — defense-in-depth
98
+ * for the future modal/aws scaffold renderers whose target formats may treat
99
+ * those as line breaks.
100
+ */
101
+ export function hasControlChar(value) {
102
+ for (let i = 0; i < value.length; i++) {
103
+ const code = value.charCodeAt(i);
104
+ if (code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029)
105
+ return true;
106
+ }
107
+ return false;
108
+ }
109
+ /** Collapse any control/separator character to a space — last-resort guard at render time. */
110
+ function sanitizeCrontabComment(value) {
111
+ let out = "";
112
+ for (const ch of value) {
113
+ const code = ch.charCodeAt(0);
114
+ out += code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029 ? " " : ch;
115
+ }
116
+ return out.replace(/ {2,}/g, " ").trim();
117
+ }
118
+ /**
119
+ * Validate every field of an entry that `renderManagedBlock` interpolates into
120
+ * the crontab — not just the label. The EXECUTABLE line embeds `cron` and `id`
121
+ * raw, and `schedule install` renders entries straight from schedules.json, so
122
+ * a hand-edited (or otherwise tampered) entry with a newline in cron/id/profile
123
+ * would inject a live crontab line. Refuse to render a tampered entry rather
124
+ * than emit it. (Well-formed entries never trip this: cron is parser-validated,
125
+ * id is an fnv1a hex hash, label is guarded at add-time.)
126
+ */
127
+ function assertRenderableEntry(profile, entry) {
128
+ const fields = [
129
+ ["profile", profile],
130
+ ["cron", entry.cron],
131
+ ["id", entry.id],
132
+ ["label", entry.label],
133
+ ...entry.argv.map((token, i) => [`argv[${i}]`, token]),
134
+ ];
135
+ for (const [name, value] of fields) {
136
+ if (hasControlChar(value)) {
137
+ throw new Error(`Refusing to render schedule entry ${entry.id}: its ${name} contains a newline or control character. ` +
138
+ "The schedules.json store has been tampered with or corrupted — repair it before installing.");
139
+ }
140
+ }
141
+ }
80
142
  /**
81
143
  * Split a `schedule add "<command>"` string into argv, honoring single and
82
144
  * double quotes (no escapes, no expansion — this is tokenization, not shell).
@@ -124,7 +186,13 @@ const CRON_FIELD_SPECS = [
124
186
  { name: "day-of-week", min: 0, max: 7 },
125
187
  ];
126
188
  export function parseCron(expression) {
127
- const fields = expression.trim().split(/\s+/);
189
+ // Reject non-ASCII whitespace and control chars: JS \s splits on U+00A0,
190
+ // U+3000, etc., but Vixie cron's field separator is only space/tab. A source
191
+ // carrying them would parse here yet be misparsed or rejected by `crontab -`.
192
+ if (hasControlChar(expression) || /[^\x20-\x7e]/.test(expression)) {
193
+ throw new Error(`Invalid cron expression "${expression}": only ASCII characters, space, and tab are allowed.`);
194
+ }
195
+ const fields = expression.trim().split(/[ \t]+/);
128
196
  if (fields.length !== 5) {
129
197
  throw new Error(`Invalid cron expression "${expression}": expected 5 fields ` +
130
198
  `(minute hour day-of-month month day-of-week), got ${fields.length}.`);
@@ -432,13 +500,26 @@ export function crontabSentinels(profile) {
432
500
  * but fullstackgtm dispatch (no arbitrary shell, ever).
433
501
  */
434
502
  export function renderManagedBlock(profile, entries, cliInvocation) {
503
+ // cliInvocation is spliced raw into the executable line; it is built from
504
+ // process.execPath, the script path, and FSGTM_HOME (cli.ts), so a newline in
505
+ // FSGTM_HOME would inject a crontab line. Validate it like the entry fields —
506
+ // single-quote shell-escaping does NOT defend cron's line parser.
507
+ if (hasControlChar(cliInvocation)) {
508
+ throw new Error("Refusing to render the managed crontab: the resolved CLI invocation (node path, script path, " +
509
+ "or FSGTM_HOME) contains a newline or control character. Check $FSGTM_HOME.");
510
+ }
435
511
  const { open, close } = crontabSentinels(profile);
436
512
  const lines = [
437
513
  open,
438
514
  "# Managed by `fullstackgtm schedule install` — replaced wholesale on re-install; do not edit.",
439
515
  ];
440
516
  for (const entry of entries) {
441
- lines.push(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`);
517
+ // Refuse to render any entry whose interpolated fields carry a control char
518
+ // — the executable line below embeds cron/id raw, so a tampered store could
519
+ // otherwise inject a live crontab line. The comment line is additionally
520
+ // sanitized so a benign-but-messy label can't break it.
521
+ assertRenderableEntry(profile, entry);
522
+ lines.push(sanitizeCrontabComment(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`));
442
523
  lines.push(`${entry.cron} ${cliInvocation} schedule run ${entry.id} --profile ${profile} --trigger cron`);
443
524
  }
444
525
  lines.push(close);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fullstackgtm",
3
- "version": "0.25.1",
3
+ "version": "0.25.2",
4
4
  "description": "Open-source agentic GTM ops framework: canonical GTM data model, pluggable deterministic audits, reviewable dry-run patch plans, approval-gated write-back with conflict detection, and cross-system entity resolution. HubSpot, Salesforce, and Stripe connectors included.",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Full Stack GTM",
package/src/cli.ts CHANGED
@@ -109,6 +109,8 @@ import {
109
109
  parseCron,
110
110
  renderManagedBlock,
111
111
  replaceManagedBlock,
112
+ assertSingleLineLabel,
113
+ hasControlChar,
112
114
  scheduleId,
113
115
  systemCrontabIo,
114
116
  tokenizeCommand,
@@ -1831,6 +1833,7 @@ trigger: manual. status shows next firing and surfaces missed firings
1831
1833
  const label =
1832
1834
  option(rest, "--label") ??
1833
1835
  argv.filter((arg) => !arg.startsWith("--")).slice(0, 2).join("-").replace(/[^\w.-]+/g, "-");
1836
+ assertSingleLineLabel(label);
1834
1837
  const entry: ScheduleEntry = {
1835
1838
  id: scheduleId(label, cron.source, argv, createdAt),
1836
1839
  label,
@@ -2052,12 +2055,26 @@ function scheduleCliInvocation(): string {
2052
2055
  if (!script || !existsSync(script)) {
2053
2056
  throw new Error("Cannot resolve the fullstackgtm entry point for crontab lines (process.argv[1] is missing).");
2054
2057
  }
2058
+ // A newline/control char in any of these flows verbatim into the crontab
2059
+ // executable line; single-quote escaping defends the shell, not cron's line
2060
+ // parser. Refuse early with a clear message (renderManagedBlock re-checks).
2061
+ for (const [name, value] of [
2062
+ ["FSGTM_HOME", process.env.FSGTM_HOME],
2063
+ ["the node executable path", process.execPath],
2064
+ ["the CLI script path", script],
2065
+ ] as const) {
2066
+ if (value && hasControlChar(value)) {
2067
+ throw new Error(`Cannot install schedules: ${name} contains a newline or control character.`);
2068
+ }
2069
+ }
2055
2070
  const quote = (value: string) => `'${value.replace(/'/g, `'\\''`)}'`;
2056
2071
  const parts = [quote(process.execPath)];
2057
2072
  if (script.endsWith(".ts")) parts.push("--experimental-strip-types");
2058
2073
  parts.push(quote(script));
2059
2074
  const home = process.env.FSGTM_HOME ? `FSGTM_HOME=${quote(process.env.FSGTM_HOME)} ` : "";
2060
- return home + parts.join(" ");
2075
+ // cron treats an unescaped `%` in the command field as a newline/stdin split.
2076
+ // Escape it as `\%` so a stray `%` in a path can't truncate the managed line.
2077
+ return (home + parts.join(" ")).replace(/%/g, "\\%");
2061
2078
  }
2062
2079
 
2063
2080
  /**
@@ -77,8 +77,11 @@ export function createHubspotConnector(options: HubspotConnectorOptions): Requir
77
77
  throw new Error(`Cannot reach HubSpot at ${baseUrl}${cause}. Check network access.`);
78
78
  }
79
79
  if (!response.ok) {
80
- const body = await response.text();
81
- throw new Error(`HubSpot API error ${response.status}: ${body}`);
80
+ // Status line only — HubSpot 4xx bodies echo submitted property values
81
+ // (contact emails, company domains) and the request payload, and these
82
+ // errors are persisted into scheduled-run records. Never interpolate it.
83
+ await response.text().catch(() => undefined);
84
+ throw new Error(`HubSpot API error ${response.status}. Check the token scopes and request.`);
82
85
  }
83
86
  // DELETE and some association writes return 204 with an empty body.
84
87
  const text = await response.text();
@@ -88,8 +88,10 @@ export function createSalesforceConnector(
88
88
  );
89
89
  }
90
90
  if (!response.ok) {
91
- const body = await response.text();
92
- throw new Error(`Salesforce API error ${response.status}: ${body}`);
91
+ // Status line only — the body echoes submitted field values and the
92
+ // request, and these errors are persisted into scheduled-run records.
93
+ await response.text().catch(() => undefined);
94
+ throw new Error(`Salesforce API error ${response.status}. Check the token and request.`);
93
95
  }
94
96
  // Salesforce PATCH returns 204 No Content on success.
95
97
  const text = await response.text();
@@ -46,8 +46,10 @@ export function createStripeConnector(options: StripeConnectorOptions): GtmConne
46
46
  headers: { Authorization: `Bearer ${apiKey}` },
47
47
  });
48
48
  if (!response.ok) {
49
- const body = await response.text();
50
- throw new Error(`Stripe API error ${response.status}: ${body}`);
49
+ // Status line only — the body can echo request details bound to a live
50
+ // billing key, and these errors land in scheduled-run records.
51
+ await response.text().catch(() => undefined);
52
+ throw new Error(`Stripe API error ${response.status}. Check the restricted key and request.`);
51
53
  }
52
54
  return response.json();
53
55
  }
@@ -4,6 +4,7 @@ import {
4
4
  mkdirSync,
5
5
  readdirSync,
6
6
  readFileSync,
7
+ statSync,
7
8
  unlinkSync,
8
9
  writeFileSync,
9
10
  } from "node:fs";
@@ -143,8 +144,31 @@ export function writeSecureFile(path: string, contents: string) {
143
144
  }
144
145
  }
145
146
 
147
+ /**
148
+ * The 0600/0700 guarantee was write-only: a credentials.json inherited at
149
+ * looser permissions (a restored backup, a file created by another tool, a
150
+ * cloned home) was read and trusted regardless of its actual mode. Enforce the
151
+ * mode on read too — re-tighten to 0600 and warn once — so a world-readable
152
+ * credential store can't sit there silently leaking the token to other users.
153
+ */
154
+ function enforceCredentialFileMode(path: string): void {
155
+ try {
156
+ const mode = statSync(path).mode & 0o777;
157
+ if ((mode & 0o077) !== 0) {
158
+ chmodSync(path, 0o600);
159
+ console.error(
160
+ `fullstackgtm: tightened ${path} from ${mode.toString(8).padStart(3, "0")} to 600 ` +
161
+ "(it was readable or writable by other users).",
162
+ );
163
+ }
164
+ } catch {
165
+ // Missing file or non-POSIX filesystem: nothing to enforce.
166
+ }
167
+ }
168
+
146
169
  function readFile(): CredentialsFile {
147
170
  try {
171
+ enforceCredentialFileMode(credentialsPath());
148
172
  const parsed = JSON.parse(readFileSync(credentialsPath(), "utf8"));
149
173
  if (parsed && typeof parsed === "object" && parsed.version === 1 && parsed.providers) {
150
174
  return parsed as CredentialsFile;
package/src/enrich.ts CHANGED
@@ -394,6 +394,29 @@ function valueToString(value: unknown): string {
394
394
  return "";
395
395
  }
396
396
 
397
+ /**
398
+ * CSV/formula-injection neutralization for string values destined for a CRM
399
+ * write. Third-party export rows (Clay CSV, webhook JSON) can contain cells
400
+ * like `=cmd|'/c calc'!A1` or `@SUM(...)`; written verbatim to a CRM field they
401
+ * lie dormant until someone exports the CRM to CSV and opens it in a spreadsheet,
402
+ * where the leading `= + - @` (or a leading tab/CR) makes the client execute it.
403
+ * We prefix a single apostrophe — the spreadsheet-standard escape that renders
404
+ * the cell as literal text. Numeric values bypass this (they're written as
405
+ * numbers, not strings), so signed numbers keep full fidelity; a phone number
406
+ * supplied as a string and starting with `+` gains a leading `'`, which the
407
+ * human sees in the approved diff. Applied only at the write path, never to
408
+ * match keys.
409
+ */
410
+ function neutralizeFormulaInjection(value: string): string {
411
+ if (value && /^[=+\-@\t\r]/.test(value)) return `'${value}`;
412
+ return value;
413
+ }
414
+
415
+ /** valueToString for a value that will be written to a CRM field. */
416
+ function writeSafeString(value: unknown): string {
417
+ return neutralizeFormulaInjection(valueToString(value));
418
+ }
419
+
397
420
  // ---------------------------------------------------------------------------
398
421
  // Matching: ordered keys, unique-hit-wins, zero-hits-next-key,
399
422
  // multi-hit → onAmbiguous. Ambiguity is surfaced, never resolved by coin flip.
@@ -708,7 +731,7 @@ export function buildEnrichPlan(options: BuildEnrichPlanOptions): EnrichPlanResu
708
731
  operation: "set_field",
709
732
  field: canonicalField,
710
733
  beforeValue: currentValue ?? null,
711
- afterValue: typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue),
734
+ afterValue: typeof sourceValue === "number" ? sourceValue : writeSafeString(sourceValue),
712
735
  reason:
713
736
  `${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
714
737
  `${outcome.matchedKey}) reports a changed value for ${canonicalField}.`,
@@ -726,7 +749,7 @@ export function buildEnrichPlan(options: BuildEnrichPlanOptions): EnrichPlanResu
726
749
  if (isEmptyValue(sourceValue)) continue;
727
750
  if (!isEmptyValue(currentValue)) continue;
728
751
  emittedForRecord = true;
729
- const afterValue = typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue);
752
+ const afterValue = typeof sourceValue === "number" ? sourceValue : writeSafeString(sourceValue);
730
753
  operations.push({
731
754
  id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
732
755
  objectType: canonicalObjectType(record.objectType),
@@ -78,9 +78,12 @@ export function createApolloClient(options: ApolloClientOptions): ApolloClient {
78
78
  }
79
79
  if (response.status === 404) return null;
80
80
  if (!response.ok) {
81
- const body = await response.text();
81
+ // Status line only — never interpolate the response body. It can echo
82
+ // the submitted query (contact emails / company domains) or the API key,
83
+ // and these errors are persisted verbatim into scheduled-run records.
84
+ await response.text().catch(() => undefined);
82
85
  const exhausted = response.status === 429 ? ` (rate limited; ${maxRetries} retries exhausted)` : "";
83
- throw new Error(`Apollo API error ${response.status}${exhausted}: ${body}`);
86
+ throw new Error(`Apollo API error ${response.status}${exhausted}. Check the API key and request.`);
84
87
  }
85
88
  const text = await response.text();
86
89
  return text ? (JSON.parse(text) as Record<string, unknown>) : null;
package/src/market.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { lookup } from "node:dns/promises";
2
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
4
+ import { isIP } from "node:net";
3
5
  import { join } from "node:path";
4
6
  import { credentialsDir } from "./credentials.ts";
5
7
  import type { GtmEvidence } from "./types.ts";
@@ -309,15 +311,129 @@ export function extractReadableText(html: string): string {
309
311
 
310
312
  export type FetchPage = (url: string) => Promise<{ status: number; body: string }>;
311
313
 
314
+ /**
315
+ * SSRF guard. market.config.json URLs are operator-authored, but configs are
316
+ * shared/templated in consulting/team use and `market capture|refresh` is on
317
+ * the cron allowlist — an unguarded fetch is an unattended internal-network
318
+ * and cloud-metadata probe. We therefore (1) allow only http/https, (2) refuse
319
+ * any host that is or resolves to a private/loopback/link-local/metadata
320
+ * address, and (3) follow redirects manually, re-validating each hop.
321
+ *
322
+ * Residual gap (documented, not defended here): TOCTOU DNS rebinding between
323
+ * our lookup and fetch's own resolution. Out of scope for fetching public
324
+ * competitor pages; a hardened deployment should fetch through an egress proxy.
325
+ */
326
+ const MAX_REDIRECTS = 5;
327
+ const FETCH_TIMEOUT_MS = 15_000;
328
+ const MAX_BODY_BYTES = 5_000_000;
329
+
330
+ function ipv4IsPrivate(ip: string): boolean {
331
+ const parts = ip.split(".").map((n) => Number(n));
332
+ if (parts.length !== 4 || parts.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return true;
333
+ const [a, b] = parts;
334
+ if (a === 0 || a === 127) return true; // this-host, loopback
335
+ if (a === 10) return true; // private
336
+ if (a === 172 && b >= 16 && b <= 31) return true; // private
337
+ if (a === 192 && b === 168) return true; // private
338
+ if (a === 169 && b === 254) return true; // link-local incl. 169.254.169.254 metadata
339
+ if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT
340
+ if (a >= 224) return true; // multicast / reserved
341
+ return false;
342
+ }
343
+
344
+ function ipIsPrivate(ip: string): boolean {
345
+ const family = isIP(ip);
346
+ if (family === 4) return ipv4IsPrivate(ip);
347
+ if (family === 6) {
348
+ const lower = ip.toLowerCase();
349
+ if (lower === "::1" || lower === "::") return true; // loopback / unspecified
350
+ // IPv4-mapped (::ffff:…) — Node normalizes ::ffff:127.0.0.1 to ::ffff:7f00:1,
351
+ // so accept both the dotted and the hex-pair forms, unwrap, check the v4.
352
+ const mapped = lower.match(/^::ffff:(.+)$/);
353
+ if (mapped) {
354
+ const rest = mapped[1];
355
+ if (rest.includes(".")) return ipv4IsPrivate(rest);
356
+ const groups = rest.split(":");
357
+ if (groups.length === 2) {
358
+ const hi = parseInt(groups[0], 16);
359
+ const lo = parseInt(groups[1], 16);
360
+ if (Number.isNaN(hi) || Number.isNaN(lo)) return true;
361
+ return ipv4IsPrivate(`${(hi >> 8) & 0xff}.${hi & 0xff}.${(lo >> 8) & 0xff}.${lo & 0xff}`);
362
+ }
363
+ return true; // unrecognized mapped form → refuse
364
+ }
365
+ if (lower.startsWith("fe8") || lower.startsWith("fe9") || lower.startsWith("fea") || lower.startsWith("feb")) return true; // link-local fe80::/10
366
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return true; // unique-local fc00::/7
367
+ return false;
368
+ }
369
+ return true; // not a recognizable IP literal → refuse
370
+ }
371
+
372
+ export async function assertPublicUrl(rawUrl: string): Promise<URL> {
373
+ let url: URL;
374
+ try {
375
+ url = new URL(rawUrl);
376
+ } catch {
377
+ throw new Error(`market capture: "${rawUrl}" is not a valid URL.`);
378
+ }
379
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
380
+ throw new Error(`market capture refuses ${url.protocol} URLs (only http/https): ${rawUrl}`);
381
+ }
382
+ const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
383
+ if (isIP(host)) {
384
+ if (ipIsPrivate(host)) throw new Error(`market capture refuses private/loopback address ${host} (SSRF guard).`);
385
+ return url;
386
+ }
387
+ // Hostname: resolve and refuse if ANY address is private.
388
+ const addrs = await lookup(host, { all: true });
389
+ for (const { address } of addrs) {
390
+ if (ipIsPrivate(address)) {
391
+ throw new Error(`market capture refuses ${host} — it resolves to private/internal address ${address} (SSRF guard).`);
392
+ }
393
+ }
394
+ return url;
395
+ }
396
+
312
397
  const defaultFetchPage: FetchPage = async (url) => {
313
- const response = await fetch(url, {
314
- headers: {
315
- "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
316
- "Accept-Language": "en-US",
317
- },
318
- redirect: "follow",
319
- });
320
- return { status: response.status, body: await response.text() };
398
+ let current = url;
399
+ for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
400
+ await assertPublicUrl(current);
401
+ const controller = new AbortController();
402
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
403
+ let response: Response;
404
+ try {
405
+ response = await fetch(current, {
406
+ headers: {
407
+ "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
408
+ "Accept-Language": "en-US",
409
+ },
410
+ redirect: "manual",
411
+ signal: controller.signal,
412
+ });
413
+ } finally {
414
+ clearTimeout(timer);
415
+ }
416
+ if (response.status >= 300 && response.status < 400 && response.headers.get("location")) {
417
+ current = new URL(response.headers.get("location") as string, current).toString();
418
+ continue; // re-validate the redirect target on the next iteration
419
+ }
420
+ const reader = response.body?.getReader();
421
+ if (!reader) return { status: response.status, body: await response.text() };
422
+ const chunks: Uint8Array[] = [];
423
+ let total = 0;
424
+ for (;;) {
425
+ const { done, value } = await reader.read();
426
+ if (done) break;
427
+ total += value.length;
428
+ if (total > MAX_BODY_BYTES) {
429
+ await reader.cancel();
430
+ break;
431
+ }
432
+ chunks.push(value);
433
+ }
434
+ return { status: response.status, body: Buffer.concat(chunks).toString("utf8") };
435
+ }
436
+ throw new Error(`market capture: too many redirects (>${MAX_REDIRECTS}) for ${url}`);
321
437
  };
322
438
 
323
439
  export type CaptureOptions = {
@@ -478,6 +594,11 @@ export function validateObservationSet(config: MarketConfig, set: ObservationSet
478
594
  if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
479
595
  problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
480
596
  }
597
+ // confidence is rendered into the HTML report; only the enum is allowed, so
598
+ // an `observe --from` file can't smuggle markup through a free-text value.
599
+ if (obs.confidence !== "high" && obs.confidence !== "medium" && obs.confidence !== "low") {
600
+ problems.push(`${cell}: invalid confidence "${String(obs.confidence)}" (expected high, medium, or low)`);
601
+ }
481
602
  if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
482
603
  problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
483
604
  }
@@ -40,6 +40,23 @@ function escapeHtml(value: string): string {
40
40
  .replace(/"/g, "&quot;");
41
41
  }
42
42
 
43
+ /**
44
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
45
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
46
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
47
+ * strings) would close the tag and inject markup. Replacing them with their
48
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
49
+ * sequence unrepresentable in the HTML source.
50
+ */
51
+ export function safeJsonForScript(value: unknown): string {
52
+ return JSON.stringify(value)
53
+ .replace(/</g, "\\u003c")
54
+ .replace(/>/g, "\\u003e")
55
+ .replace(/&/g, "\\u0026")
56
+ .replace(/\u2028/g, "\\u2028")
57
+ .replace(/\u2029/g, "\\u2029");
58
+ }
59
+
43
60
  type MapModel = {
44
61
  config: MarketConfig;
45
62
  set: ObservationSet;
@@ -374,7 +391,7 @@ function axisSectionsHtml(
374
391
  <table class="legend"><thead><tr><th></th><th>vendor</th><th class="num">${legendMeasureHead}</th></tr></thead><tbody>${legendRows}</tbody></table>
375
392
  </div>
376
393
  <div class="map-tip" id="map-tip" hidden></div>
377
- <script type="application/json" id="map-data">${JSON.stringify(tipData)}</script>
394
+ <script type="application/json" id="map-data">${safeJsonForScript(tipData)}</script>
378
395
  <script>
379
396
  (function () {
380
397
  var data = JSON.parse(document.getElementById("map-data").textContent);
@@ -385,7 +402,16 @@ function axisSectionsHtml(
385
402
  function show(v, evt) {
386
403
  var d = data[v];
387
404
  if (!d) return;
388
- tip.innerHTML = "<b>" + d.n + " · " + d.name + "</b>" + d.lines.map(function (l) { return "<div>" + l + "</div>"; }).join("");
405
+ // textContent only vendor names / axis labels are untrusted (competitor-controlled).
406
+ tip.textContent = "";
407
+ var head = document.createElement("b");
408
+ head.textContent = d.n + " · " + d.name;
409
+ tip.appendChild(head);
410
+ d.lines.forEach(function (l) {
411
+ var div = document.createElement("div");
412
+ div.textContent = l;
413
+ tip.appendChild(div);
414
+ });
389
415
  tip.hidden = false;
390
416
  var box = fig.getBoundingClientRect();
391
417
  tip.style.left = Math.min(evt.clientX - box.left + 14, box.width - tip.offsetWidth - 8) + "px";
@@ -481,7 +507,7 @@ export function marketMapToHtml(config: MarketConfig, set: ObservationSet): stri
481
507
  const anchorLoud = anchor
482
508
  ? claimIds.filter((claimId) => model.cell(anchor, claimId)?.intensity === "loud").length
483
509
  : 0;
484
- const anchorNote = anchor ? ` · ${vendorNamesById.get(anchor) ?? anchor} loud on ${anchorLoud}` : "";
510
+ const anchorNote = anchor ? ` · ${e(vendorNamesById.get(anchor) ?? anchor)} loud on ${anchorLoud}` : "";
485
511
  return `<details class="claim-group"><summary><b>${e(group.title)}</b> — ${claimIds.length} claim${claimIds.length === 1 ? "" : "s"} <span class="sum-soft">(${e(group.blurb)}${anchorNote})</span></summary>
486
512
  <table><thead><tr><th></th>${vendorHeads}<th></th></tr></thead><tbody>${claimIds.map(matrixRow).join("")}</tbody></table>
487
513
  </details>`;
@@ -543,7 +569,7 @@ export function marketMapToHtml(config: MarketConfig, set: ObservationSet): stri
543
569
  if (!obs || obs.evidence.length === 0) return [];
544
570
  return obs.evidence.map(
545
571
  (evidence) =>
546
- `<div class="ev"><span class="ev-head">${e(claimId)} · ${obs.intensity.toUpperCase()} (${obs.confidence})</span>` +
572
+ `<div class="ev"><span class="ev-head">${e(claimId)} · ${e(obs.intensity.toUpperCase())} (${e(String(obs.confidence ?? ""))})</span>` +
547
573
  `<blockquote>“${e(evidence.text)}”</blockquote>` +
548
574
  `<span class="ev-src">${e(String(evidence.metadata?.url ?? ""))} · capture ${e(String(evidence.metadata?.captureHash ?? "").slice(0, 12))}</span></div>`,
549
575
  );
package/src/schedule.ts CHANGED
@@ -145,6 +145,75 @@ export function validateSchedulableArgv(argv: string[]): void {
145
145
  }
146
146
  }
147
147
 
148
+ /**
149
+ * A schedule label is free text the operator chooses, but it is later
150
+ * interpolated into a crontab comment line by `renderManagedBlock`. A newline
151
+ * (or carriage return) would break out of the comment and inject an arbitrary
152
+ * crontab entry on `schedule install`. Reject control characters at the entry
153
+ * point so a label can never carry a second line; `renderManagedBlock` also
154
+ * strips them defensively in case a hand-edited schedules.json slips one past.
155
+ */
156
+ export function assertSingleLineLabel(label: string): void {
157
+ if (hasControlChar(label)) {
158
+ throw new Error(
159
+ "A schedule --label cannot contain newlines or control characters " +
160
+ "(they would inject lines into the managed crontab block). Use a plain single-line name.",
161
+ );
162
+ }
163
+ }
164
+
165
+ /**
166
+ * True if the string contains any line-breaking or control character. Covers
167
+ * C0 controls + DEL, plus the Unicode separators a non-cron parser might honor
168
+ * (NEL U+0085, LS U+2028, PS U+2029, VT U+000B, FF U+000C) — defense-in-depth
169
+ * for the future modal/aws scaffold renderers whose target formats may treat
170
+ * those as line breaks.
171
+ */
172
+ export function hasControlChar(value: string): boolean {
173
+ for (let i = 0; i < value.length; i++) {
174
+ const code = value.charCodeAt(i);
175
+ if (code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029) return true;
176
+ }
177
+ return false;
178
+ }
179
+
180
+ /** Collapse any control/separator character to a space — last-resort guard at render time. */
181
+ function sanitizeCrontabComment(value: string): string {
182
+ let out = "";
183
+ for (const ch of value) {
184
+ const code = ch.charCodeAt(0);
185
+ out += code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029 ? " " : ch;
186
+ }
187
+ return out.replace(/ {2,}/g, " ").trim();
188
+ }
189
+
190
+ /**
191
+ * Validate every field of an entry that `renderManagedBlock` interpolates into
192
+ * the crontab — not just the label. The EXECUTABLE line embeds `cron` and `id`
193
+ * raw, and `schedule install` renders entries straight from schedules.json, so
194
+ * a hand-edited (or otherwise tampered) entry with a newline in cron/id/profile
195
+ * would inject a live crontab line. Refuse to render a tampered entry rather
196
+ * than emit it. (Well-formed entries never trip this: cron is parser-validated,
197
+ * id is an fnv1a hex hash, label is guarded at add-time.)
198
+ */
199
+ function assertRenderableEntry(profile: string, entry: ScheduleEntry): void {
200
+ const fields: Array<[string, string]> = [
201
+ ["profile", profile],
202
+ ["cron", entry.cron],
203
+ ["id", entry.id],
204
+ ["label", entry.label],
205
+ ...entry.argv.map((token, i) => [`argv[${i}]`, token] as [string, string]),
206
+ ];
207
+ for (const [name, value] of fields) {
208
+ if (hasControlChar(value)) {
209
+ throw new Error(
210
+ `Refusing to render schedule entry ${entry.id}: its ${name} contains a newline or control character. ` +
211
+ "The schedules.json store has been tampered with or corrupted — repair it before installing.",
212
+ );
213
+ }
214
+ }
215
+ }
216
+
148
217
  /**
149
218
  * Split a `schedule add "<command>"` string into argv, honoring single and
150
219
  * double quotes (no escapes, no expansion — this is tokenization, not shell).
@@ -206,7 +275,13 @@ const CRON_FIELD_SPECS = [
206
275
  ] as const;
207
276
 
208
277
  export function parseCron(expression: string): CronExpression {
209
- const fields = expression.trim().split(/\s+/);
278
+ // Reject non-ASCII whitespace and control chars: JS \s splits on U+00A0,
279
+ // U+3000, etc., but Vixie cron's field separator is only space/tab. A source
280
+ // carrying them would parse here yet be misparsed or rejected by `crontab -`.
281
+ if (hasControlChar(expression) || /[^\x20-\x7e]/.test(expression)) {
282
+ throw new Error(`Invalid cron expression "${expression}": only ASCII characters, space, and tab are allowed.`);
283
+ }
284
+ const fields = expression.trim().split(/[ \t]+/);
210
285
  if (fields.length !== 5) {
211
286
  throw new Error(
212
287
  `Invalid cron expression "${expression}": expected 5 fields ` +
@@ -559,13 +634,28 @@ export function renderManagedBlock(
559
634
  entries: ScheduleEntry[],
560
635
  cliInvocation: string,
561
636
  ): string {
637
+ // cliInvocation is spliced raw into the executable line; it is built from
638
+ // process.execPath, the script path, and FSGTM_HOME (cli.ts), so a newline in
639
+ // FSGTM_HOME would inject a crontab line. Validate it like the entry fields —
640
+ // single-quote shell-escaping does NOT defend cron's line parser.
641
+ if (hasControlChar(cliInvocation)) {
642
+ throw new Error(
643
+ "Refusing to render the managed crontab: the resolved CLI invocation (node path, script path, " +
644
+ "or FSGTM_HOME) contains a newline or control character. Check $FSGTM_HOME.",
645
+ );
646
+ }
562
647
  const { open, close } = crontabSentinels(profile);
563
648
  const lines = [
564
649
  open,
565
650
  "# Managed by `fullstackgtm schedule install` — replaced wholesale on re-install; do not edit.",
566
651
  ];
567
652
  for (const entry of entries) {
568
- lines.push(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`);
653
+ // Refuse to render any entry whose interpolated fields carry a control char
654
+ // — the executable line below embeds cron/id raw, so a tampered store could
655
+ // otherwise inject a live crontab line. The comment line is additionally
656
+ // sanitized so a benign-but-messy label can't break it.
657
+ assertRenderableEntry(profile, entry);
658
+ lines.push(sanitizeCrontabComment(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`));
569
659
  lines.push(`${entry.cron} ${cliInvocation} schedule run ${entry.id} --profile ${profile} --trigger cron`);
570
660
  }
571
661
  lines.push(close);