fullstackgtm 0.25.0 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/market.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { lookup } from "node:dns/promises";
2
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
4
+ import { isIP } from "node:net";
3
5
  import { join } from "node:path";
4
6
  import { credentialsDir } from "./credentials.ts";
5
7
  import type { GtmEvidence } from "./types.ts";
@@ -309,15 +311,129 @@ export function extractReadableText(html: string): string {
309
311
 
310
312
  export type FetchPage = (url: string) => Promise<{ status: number; body: string }>;
311
313
 
314
+ /**
315
+ * SSRF guard. market.config.json URLs are operator-authored, but configs are
316
+ * shared/templated in consulting/team use and `market capture|refresh` is on
317
+ * the cron allowlist — an unguarded fetch is an unattended internal-network
318
+ * and cloud-metadata probe. We therefore (1) allow only http/https, (2) refuse
319
+ * any host that is or resolves to a private/loopback/link-local/metadata
320
+ * address, and (3) follow redirects manually, re-validating each hop.
321
+ *
322
+ * Residual gap (documented, not defended here): TOCTOU DNS rebinding between
323
+ * our lookup and fetch's own resolution. Out of scope for fetching public
324
+ * competitor pages; a hardened deployment should fetch through an egress proxy.
325
+ */
326
+ const MAX_REDIRECTS = 5;
327
+ const FETCH_TIMEOUT_MS = 15_000;
328
+ const MAX_BODY_BYTES = 5_000_000;
329
+
330
+ function ipv4IsPrivate(ip: string): boolean {
331
+ const parts = ip.split(".").map((n) => Number(n));
332
+ if (parts.length !== 4 || parts.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return true;
333
+ const [a, b] = parts;
334
+ if (a === 0 || a === 127) return true; // this-host, loopback
335
+ if (a === 10) return true; // private
336
+ if (a === 172 && b >= 16 && b <= 31) return true; // private
337
+ if (a === 192 && b === 168) return true; // private
338
+ if (a === 169 && b === 254) return true; // link-local incl. 169.254.169.254 metadata
339
+ if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT
340
+ if (a >= 224) return true; // multicast / reserved
341
+ return false;
342
+ }
343
+
344
+ function ipIsPrivate(ip: string): boolean {
345
+ const family = isIP(ip);
346
+ if (family === 4) return ipv4IsPrivate(ip);
347
+ if (family === 6) {
348
+ const lower = ip.toLowerCase();
349
+ if (lower === "::1" || lower === "::") return true; // loopback / unspecified
350
+ // IPv4-mapped (::ffff:…) — Node normalizes ::ffff:127.0.0.1 to ::ffff:7f00:1,
351
+ // so accept both the dotted and the hex-pair forms, unwrap, check the v4.
352
+ const mapped = lower.match(/^::ffff:(.+)$/);
353
+ if (mapped) {
354
+ const rest = mapped[1];
355
+ if (rest.includes(".")) return ipv4IsPrivate(rest);
356
+ const groups = rest.split(":");
357
+ if (groups.length === 2) {
358
+ const hi = parseInt(groups[0], 16);
359
+ const lo = parseInt(groups[1], 16);
360
+ if (Number.isNaN(hi) || Number.isNaN(lo)) return true;
361
+ return ipv4IsPrivate(`${(hi >> 8) & 0xff}.${hi & 0xff}.${(lo >> 8) & 0xff}.${lo & 0xff}`);
362
+ }
363
+ return true; // unrecognized mapped form → refuse
364
+ }
365
+ if (lower.startsWith("fe8") || lower.startsWith("fe9") || lower.startsWith("fea") || lower.startsWith("feb")) return true; // link-local fe80::/10
366
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return true; // unique-local fc00::/7
367
+ return false;
368
+ }
369
+ return true; // not a recognizable IP literal → refuse
370
+ }
371
+
372
+ export async function assertPublicUrl(rawUrl: string): Promise<URL> {
373
+ let url: URL;
374
+ try {
375
+ url = new URL(rawUrl);
376
+ } catch {
377
+ throw new Error(`market capture: "${rawUrl}" is not a valid URL.`);
378
+ }
379
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
380
+ throw new Error(`market capture refuses ${url.protocol} URLs (only http/https): ${rawUrl}`);
381
+ }
382
+ const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
383
+ if (isIP(host)) {
384
+ if (ipIsPrivate(host)) throw new Error(`market capture refuses private/loopback address ${host} (SSRF guard).`);
385
+ return url;
386
+ }
387
+ // Hostname: resolve and refuse if ANY address is private.
388
+ const addrs = await lookup(host, { all: true });
389
+ for (const { address } of addrs) {
390
+ if (ipIsPrivate(address)) {
391
+ throw new Error(`market capture refuses ${host} — it resolves to private/internal address ${address} (SSRF guard).`);
392
+ }
393
+ }
394
+ return url;
395
+ }
396
+
312
397
  const defaultFetchPage: FetchPage = async (url) => {
313
- const response = await fetch(url, {
314
- headers: {
315
- "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
316
- "Accept-Language": "en-US",
317
- },
318
- redirect: "follow",
319
- });
320
- return { status: response.status, body: await response.text() };
398
+ let current = url;
399
+ for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
400
+ await assertPublicUrl(current);
401
+ const controller = new AbortController();
402
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
403
+ let response: Response;
404
+ try {
405
+ response = await fetch(current, {
406
+ headers: {
407
+ "User-Agent": "fullstackgtm-market/0 (+https://github.com/fullstackgtm/core)",
408
+ "Accept-Language": "en-US",
409
+ },
410
+ redirect: "manual",
411
+ signal: controller.signal,
412
+ });
413
+ } finally {
414
+ clearTimeout(timer);
415
+ }
416
+ if (response.status >= 300 && response.status < 400 && response.headers.get("location")) {
417
+ current = new URL(response.headers.get("location") as string, current).toString();
418
+ continue; // re-validate the redirect target on the next iteration
419
+ }
420
+ const reader = response.body?.getReader();
421
+ if (!reader) return { status: response.status, body: await response.text() };
422
+ const chunks: Uint8Array[] = [];
423
+ let total = 0;
424
+ for (;;) {
425
+ const { done, value } = await reader.read();
426
+ if (done) break;
427
+ total += value.length;
428
+ if (total > MAX_BODY_BYTES) {
429
+ await reader.cancel();
430
+ break;
431
+ }
432
+ chunks.push(value);
433
+ }
434
+ return { status: response.status, body: Buffer.concat(chunks).toString("utf8") };
435
+ }
436
+ throw new Error(`market capture: too many redirects (>${MAX_REDIRECTS}) for ${url}`);
321
437
  };
322
438
 
323
439
  export type CaptureOptions = {
@@ -478,6 +594,11 @@ export function validateObservationSet(config: MarketConfig, set: ObservationSet
478
594
  if (!INTENSITY_RANK[obs.intensity] && obs.intensity !== "unobservable") {
479
595
  problems.push(`${cell}: invalid intensity "${obs.intensity}"`);
480
596
  }
597
+ // confidence is rendered into the HTML report; only the enum is allowed, so
598
+ // an `observe --from` file can't smuggle markup through a free-text value.
599
+ if (obs.confidence !== "high" && obs.confidence !== "medium" && obs.confidence !== "low") {
600
+ problems.push(`${cell}: invalid confidence "${String(obs.confidence)}" (expected high, medium, or low)`);
601
+ }
481
602
  if ((obs.intensity === "loud" || obs.intensity === "quiet") && obs.evidence.length === 0) {
482
603
  problems.push(`${cell}: ${obs.intensity} reading with no quoted evidence`);
483
604
  }
@@ -40,6 +40,23 @@ function escapeHtml(value: string): string {
40
40
  .replace(/"/g, "&quot;");
41
41
  }
42
42
 
43
+ /**
44
+ * Serialize JSON for embedding inside an inline <script> block. JSON.stringify
45
+ * does not escape `<`, `>`, `&`, or the U+2028/U+2029 line separators, so a
46
+ * vendor name containing `</script>` (these are untrusted, competitor-authored
47
+ * strings) would close the tag and inject markup. Replacing them with their
48
+ * \uXXXX escapes keeps the parsed value identical while making the breakout
49
+ * sequence unrepresentable in the HTML source.
50
+ */
51
+ export function safeJsonForScript(value: unknown): string {
52
+ return JSON.stringify(value)
53
+ .replace(/</g, "\\u003c")
54
+ .replace(/>/g, "\\u003e")
55
+ .replace(/&/g, "\\u0026")
56
+ .replace(/\u2028/g, "\\u2028")
57
+ .replace(/\u2029/g, "\\u2029");
58
+ }
59
+
43
60
  type MapModel = {
44
61
  config: MarketConfig;
45
62
  set: ObservationSet;
@@ -374,7 +391,7 @@ function axisSectionsHtml(
374
391
  <table class="legend"><thead><tr><th></th><th>vendor</th><th class="num">${legendMeasureHead}</th></tr></thead><tbody>${legendRows}</tbody></table>
375
392
  </div>
376
393
  <div class="map-tip" id="map-tip" hidden></div>
377
- <script type="application/json" id="map-data">${JSON.stringify(tipData)}</script>
394
+ <script type="application/json" id="map-data">${safeJsonForScript(tipData)}</script>
378
395
  <script>
379
396
  (function () {
380
397
  var data = JSON.parse(document.getElementById("map-data").textContent);
@@ -385,7 +402,16 @@ function axisSectionsHtml(
385
402
  function show(v, evt) {
386
403
  var d = data[v];
387
404
  if (!d) return;
388
- tip.innerHTML = "<b>" + d.n + " · " + d.name + "</b>" + d.lines.map(function (l) { return "<div>" + l + "</div>"; }).join("");
405
+ // textContent only vendor names / axis labels are untrusted (competitor-controlled).
406
+ tip.textContent = "";
407
+ var head = document.createElement("b");
408
+ head.textContent = d.n + " · " + d.name;
409
+ tip.appendChild(head);
410
+ d.lines.forEach(function (l) {
411
+ var div = document.createElement("div");
412
+ div.textContent = l;
413
+ tip.appendChild(div);
414
+ });
389
415
  tip.hidden = false;
390
416
  var box = fig.getBoundingClientRect();
391
417
  tip.style.left = Math.min(evt.clientX - box.left + 14, box.width - tip.offsetWidth - 8) + "px";
@@ -481,7 +507,7 @@ export function marketMapToHtml(config: MarketConfig, set: ObservationSet): stri
481
507
  const anchorLoud = anchor
482
508
  ? claimIds.filter((claimId) => model.cell(anchor, claimId)?.intensity === "loud").length
483
509
  : 0;
484
- const anchorNote = anchor ? ` · ${vendorNamesById.get(anchor) ?? anchor} loud on ${anchorLoud}` : "";
510
+ const anchorNote = anchor ? ` · ${e(vendorNamesById.get(anchor) ?? anchor)} loud on ${anchorLoud}` : "";
485
511
  return `<details class="claim-group"><summary><b>${e(group.title)}</b> — ${claimIds.length} claim${claimIds.length === 1 ? "" : "s"} <span class="sum-soft">(${e(group.blurb)}${anchorNote})</span></summary>
486
512
  <table><thead><tr><th></th>${vendorHeads}<th></th></tr></thead><tbody>${claimIds.map(matrixRow).join("")}</tbody></table>
487
513
  </details>`;
@@ -543,7 +569,7 @@ export function marketMapToHtml(config: MarketConfig, set: ObservationSet): stri
543
569
  if (!obs || obs.evidence.length === 0) return [];
544
570
  return obs.evidence.map(
545
571
  (evidence) =>
546
- `<div class="ev"><span class="ev-head">${e(claimId)} · ${obs.intensity.toUpperCase()} (${obs.confidence})</span>` +
572
+ `<div class="ev"><span class="ev-head">${e(claimId)} · ${e(obs.intensity.toUpperCase())} (${e(String(obs.confidence ?? ""))})</span>` +
547
573
  `<blockquote>“${e(evidence.text)}”</blockquote>` +
548
574
  `<span class="ev-src">${e(String(evidence.metadata?.url ?? ""))} · capture ${e(String(evidence.metadata?.captureHash ?? "").slice(0, 12))}</span></div>`,
549
575
  );
package/src/schedule.ts CHANGED
@@ -145,6 +145,75 @@ export function validateSchedulableArgv(argv: string[]): void {
145
145
  }
146
146
  }
147
147
 
148
+ /**
149
+ * A schedule label is free text the operator chooses, but it is later
150
+ * interpolated into a crontab comment line by `renderManagedBlock`. A newline
151
+ * (or carriage return) would break out of the comment and inject an arbitrary
152
+ * crontab entry on `schedule install`. Reject control characters at the entry
153
+ * point so a label can never carry a second line; `renderManagedBlock` also
154
+ * strips them defensively in case a hand-edited schedules.json slips one past.
155
+ */
156
+ export function assertSingleLineLabel(label: string): void {
157
+ if (hasControlChar(label)) {
158
+ throw new Error(
159
+ "A schedule --label cannot contain newlines or control characters " +
160
+ "(they would inject lines into the managed crontab block). Use a plain single-line name.",
161
+ );
162
+ }
163
+ }
164
+
165
+ /**
166
+ * True if the string contains any line-breaking or control character. Covers
167
+ * C0 controls + DEL, plus the Unicode separators a non-cron parser might honor
168
+ * (NEL U+0085, LS U+2028, PS U+2029, VT U+000B, FF U+000C) — defense-in-depth
169
+ * for the future modal/aws scaffold renderers whose target formats may treat
170
+ * those as line breaks.
171
+ */
172
+ export function hasControlChar(value: string): boolean {
173
+ for (let i = 0; i < value.length; i++) {
174
+ const code = value.charCodeAt(i);
175
+ if (code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029) return true;
176
+ }
177
+ return false;
178
+ }
179
+
180
+ /** Collapse any control/separator character to a space — last-resort guard at render time. */
181
+ function sanitizeCrontabComment(value: string): string {
182
+ let out = "";
183
+ for (const ch of value) {
184
+ const code = ch.charCodeAt(0);
185
+ out += code < 0x20 || code === 0x7f || code === 0x85 || code === 0x2028 || code === 0x2029 ? " " : ch;
186
+ }
187
+ return out.replace(/ {2,}/g, " ").trim();
188
+ }
189
+
190
+ /**
191
+ * Validate every field of an entry that `renderManagedBlock` interpolates into
192
+ * the crontab — not just the label. The EXECUTABLE line embeds `cron` and `id`
193
+ * raw, and `schedule install` renders entries straight from schedules.json, so
194
+ * a hand-edited (or otherwise tampered) entry with a newline in cron/id/profile
195
+ * would inject a live crontab line. Refuse to render a tampered entry rather
196
+ * than emit it. (Well-formed entries never trip this: cron is parser-validated,
197
+ * id is an fnv1a hex hash, label is guarded at add-time.)
198
+ */
199
+ function assertRenderableEntry(profile: string, entry: ScheduleEntry): void {
200
+ const fields: Array<[string, string]> = [
201
+ ["profile", profile],
202
+ ["cron", entry.cron],
203
+ ["id", entry.id],
204
+ ["label", entry.label],
205
+ ...entry.argv.map((token, i) => [`argv[${i}]`, token] as [string, string]),
206
+ ];
207
+ for (const [name, value] of fields) {
208
+ if (hasControlChar(value)) {
209
+ throw new Error(
210
+ `Refusing to render schedule entry ${entry.id}: its ${name} contains a newline or control character. ` +
211
+ "The schedules.json store has been tampered with or corrupted — repair it before installing.",
212
+ );
213
+ }
214
+ }
215
+ }
216
+
148
217
  /**
149
218
  * Split a `schedule add "<command>"` string into argv, honoring single and
150
219
  * double quotes (no escapes, no expansion — this is tokenization, not shell).
@@ -206,7 +275,13 @@ const CRON_FIELD_SPECS = [
206
275
  ] as const;
207
276
 
208
277
  export function parseCron(expression: string): CronExpression {
209
- const fields = expression.trim().split(/\s+/);
278
+ // Reject non-ASCII whitespace and control chars: JS \s splits on U+00A0,
279
+ // U+3000, etc., but Vixie cron's field separator is only space/tab. A source
280
+ // carrying them would parse here yet be misparsed or rejected by `crontab -`.
281
+ if (hasControlChar(expression) || /[^\x20-\x7e]/.test(expression)) {
282
+ throw new Error(`Invalid cron expression "${expression}": only ASCII characters, space, and tab are allowed.`);
283
+ }
284
+ const fields = expression.trim().split(/[ \t]+/);
210
285
  if (fields.length !== 5) {
211
286
  throw new Error(
212
287
  `Invalid cron expression "${expression}": expected 5 fields ` +
@@ -559,13 +634,28 @@ export function renderManagedBlock(
559
634
  entries: ScheduleEntry[],
560
635
  cliInvocation: string,
561
636
  ): string {
637
+ // cliInvocation is spliced raw into the executable line; it is built from
638
+ // process.execPath, the script path, and FSGTM_HOME (cli.ts), so a newline in
639
+ // FSGTM_HOME would inject a crontab line. Validate it like the entry fields —
640
+ // single-quote shell-escaping does NOT defend cron's line parser.
641
+ if (hasControlChar(cliInvocation)) {
642
+ throw new Error(
643
+ "Refusing to render the managed crontab: the resolved CLI invocation (node path, script path, " +
644
+ "or FSGTM_HOME) contains a newline or control character. Check $FSGTM_HOME.",
645
+ );
646
+ }
562
647
  const { open, close } = crontabSentinels(profile);
563
648
  const lines = [
564
649
  open,
565
650
  "# Managed by `fullstackgtm schedule install` — replaced wholesale on re-install; do not edit.",
566
651
  ];
567
652
  for (const entry of entries) {
568
- lines.push(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`);
653
+ // Refuse to render any entry whose interpolated fields carry a control char
654
+ // — the executable line below embeds cron/id raw, so a tampered store could
655
+ // otherwise inject a live crontab line. The comment line is additionally
656
+ // sanitized so a benign-but-messy label can't break it.
657
+ assertRenderableEntry(profile, entry);
658
+ lines.push(sanitizeCrontabComment(`# ${entry.label} (${entry.id}): ${entry.argv.join(" ")}`));
569
659
  lines.push(`${entry.cron} ${cliInvocation} schedule run ${entry.id} --profile ${profile} --trigger cron`);
570
660
  }
571
661
  lines.push(close);