@3030-labs/wotw 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -751,6 +751,7 @@ async function loadConfig(searchFrom) {
751
751
  const withEnv = applyEnvOverrides(merged);
752
752
  const validated = validateConfig(withEnv);
753
753
  validateHostedConfig(validated);
754
+ validateHostedRedactionSink(validated);
754
755
  return { config: validated, path: path2 };
755
756
  }
756
757
  function applyEnvOverrides(config) {
@@ -832,6 +833,15 @@ function applyEnvOverrides(config) {
832
833
  }
833
834
  return out;
834
835
  }
836
+ function validateHostedRedactionSink(config, env = process.env) {
837
+ if (!config.hosted.enabled) return;
838
+ const secret = env.WOTW_CLOUD_SINK_SECRET;
839
+ if (!secret || secret.length === 0) {
840
+ throw new Error(
841
+ "Config error: hosted.enabled is true but WOTW_CLOUD_SINK_SECRET is unset. The redaction-emit wire requires this secret to authenticate with /api/internal/redaction-log; running hosted-mode without it would silently drop every compliance redaction event."
842
+ );
843
+ }
844
+ }
835
845
  function validateHostedConfig(config) {
836
846
  if (!config.hosted.enabled) return;
837
847
  if (!config.hosted.tenant_id || config.hosted.tenant_id.length === 0) {
@@ -952,6 +962,7 @@ var init_config = __esm({
952
962
  __name(loadConfig, "loadConfig");
953
963
  __name(applyEnvOverrides, "applyEnvOverrides");
954
964
  UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
965
+ __name(validateHostedRedactionSink, "validateHostedRedactionSink");
955
966
  __name(validateHostedConfig, "validateHostedConfig");
956
967
  __name(mergeConfig, "mergeConfig");
957
968
  positiveNumber = z.number().positive();
@@ -6736,12 +6747,27 @@ var init_dek_archive_scheduler = __esm({
6736
6747
  });
6737
6748
 
6738
6749
  // src/utils/sanitize.ts
6739
- function sanitize(input, rules = DEFAULT_REDACTIONS) {
6750
+ function sanitizeWithEvents(input, rules = DEFAULT_REDACTIONS) {
6751
+ const events = [];
6740
6752
  let out = input;
6741
6753
  for (const rule of rules) {
6754
+ rule.pattern.lastIndex = 0;
6755
+ let matchedBytes = 0;
6756
+ for (const m of out.matchAll(rule.pattern)) {
6757
+ matchedBytes += Buffer.byteLength(m[0], "utf8");
6758
+ }
6759
+ if (matchedBytes === 0) continue;
6760
+ rule.pattern.lastIndex = 0;
6742
6761
  out = out.replace(rule.pattern, rule.replacement);
6762
+ if (rule.cloud_rule_id) {
6763
+ events.push({
6764
+ rule_name: rule.name,
6765
+ cloud_rule_id: rule.cloud_rule_id,
6766
+ byte_count: matchedBytes
6767
+ });
6768
+ }
6743
6769
  }
6744
- return out;
6770
+ return { output: out, events };
6745
6771
  }
6746
6772
  var DEFAULT_REDACTIONS;
6747
6773
  var init_sanitize = __esm({
@@ -6752,19 +6778,22 @@ var init_sanitize = __esm({
6752
6778
  {
6753
6779
  name: "aws-access-key",
6754
6780
  pattern: /\bAKIA[0-9A-Z]{16}\b/g,
6755
- replacement: "[REDACTED:AWS_ACCESS_KEY]"
6781
+ replacement: "[REDACTED:AWS_ACCESS_KEY]",
6782
+ cloud_rule_id: "credential_pattern_01"
6756
6783
  },
6757
6784
  {
6758
6785
  name: "aws-secret-key",
6759
6786
  pattern: /\b[A-Za-z0-9/+=]{40}\b(?=.*(?:secret|aws))/gi,
6760
- replacement: "[REDACTED:AWS_SECRET_KEY]"
6787
+ replacement: "[REDACTED:AWS_SECRET_KEY]",
6788
+ cloud_rule_id: "credential_pattern_02"
6761
6789
  },
6762
6790
  {
6763
6791
  name: "github-token",
6764
6792
  // Review item 2: also catch GitHub fine-grained personal access
6765
6793
  // tokens (`github_pat_*`, 82+ chars per docs).
6766
6794
  pattern: /\bgh[pousr]_[A-Za-z0-9]{36,255}\b|\bgithub_pat_[A-Za-z0-9_]{50,}\b/g,
6767
- replacement: "[REDACTED:GITHUB_TOKEN]"
6795
+ replacement: "[REDACTED:GITHUB_TOKEN]",
6796
+ cloud_rule_id: "credential_pattern_03"
6768
6797
  },
6769
6798
  {
6770
6799
  name: "anthropic-api-key",
@@ -6772,7 +6801,8 @@ var init_sanitize = __esm({
6772
6801
  // window stays generous enough to catch both legacy and current
6773
6802
  // formats including api03- prefix.
6774
6803
  pattern: /\bsk-ant-[A-Za-z0-9-_]{80,120}\b/g,
6775
- replacement: "[REDACTED:ANTHROPIC_API_KEY]"
6804
+ replacement: "[REDACTED:ANTHROPIC_API_KEY]",
6805
+ cloud_rule_id: "credential_pattern_04"
6776
6806
  },
6777
6807
  {
6778
6808
  name: "openai-api-key",
@@ -6781,31 +6811,38 @@ var init_sanitize = __esm({
6781
6811
  // `sk-svcacct-*`, `sk-admin-*` all use `-` separators after the
6782
6812
  // prefix and longer character set. Updated character class.
6783
6813
  pattern: /\bsk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{20,200}\b|\bsk-[A-Za-z0-9]{20,200}\b/g,
6784
- replacement: "[REDACTED:OPENAI_API_KEY]"
6814
+ replacement: "[REDACTED:OPENAI_API_KEY]",
6815
+ cloud_rule_id: "credential_pattern_05"
6785
6816
  },
6786
6817
  {
6787
6818
  name: "gemini-api-key",
6788
- // Review item 2: Google AI Studio API keys are `AIza` + 35 chars.
6789
- // No rule existed pre-fix.
6790
- pattern: /\bAIza[A-Za-z0-9_-]{35}\b/g,
6791
- replacement: "[REDACTED:GEMINI_API_KEY]"
6819
+ // Google AI Studio API keys come in two formats:
6820
+ // - legacy: `AIza` + 35 chars
6821
+ // - current: `AQ.` + ~40-80 url-safe chars (rolled out 2024-2025; the
6822
+ // `AIza`-only rule silently missed these, leaking new-format keys)
6823
+ pattern: /\bAIza[A-Za-z0-9_-]{35}\b|\bAQ\.[A-Za-z0-9_-]{40,80}\b/g,
6824
+ replacement: "[REDACTED:GEMINI_API_KEY]",
6825
+ cloud_rule_id: "credential_pattern_06"
6792
6826
  },
6793
6827
  {
6794
6828
  name: "wotw-daemon-token",
6795
6829
  // Review item 2: daemon tokens emitted by `wotw user add` are
6796
6830
  // `wotw_` + base64url chars. Pre-fix these went unredacted.
6797
6831
  pattern: /\bwotw_[A-Za-z0-9_-]{30,200}\b/g,
6798
- replacement: "[REDACTED:WOTW_TOKEN]"
6832
+ replacement: "[REDACTED:WOTW_TOKEN]",
6833
+ cloud_rule_id: "credential_pattern_07"
6799
6834
  },
6800
6835
  {
6801
6836
  name: "private-key-block",
6802
6837
  pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,
6803
- replacement: "[REDACTED:PRIVATE_KEY_BLOCK]"
6838
+ replacement: "[REDACTED:PRIVATE_KEY_BLOCK]",
6839
+ cloud_rule_id: "credential_pattern_08"
6804
6840
  },
6805
6841
  {
6806
6842
  name: "jwt",
6807
6843
  pattern: /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g,
6808
- replacement: "[REDACTED:JWT]"
6844
+ replacement: "[REDACTED:JWT]",
6845
+ cloud_rule_id: "credential_pattern_09"
6809
6846
  },
6810
6847
  {
6811
6848
  // L-SEC-3: This pattern is deliberately scoped to full `scheme://`
@@ -6816,20 +6853,26 @@ var init_sanitize = __esm({
6816
6853
  // Verified by unit tests in test/unit/sanitize.test.ts.
6817
6854
  name: "password-in-url",
6818
6855
  pattern: /(\w+:\/\/[^:/\s]+:)[^@\s]+(@)/g,
6819
- replacement: "$1[REDACTED]$2"
6856
+ replacement: "$1[REDACTED]$2",
6857
+ cloud_rule_id: "credential_pattern_10"
6820
6858
  },
6821
6859
  {
6860
+ // PII — stays daemon-local. cloud_rule_id intentionally omitted: the
6861
+ // PASS-024 cloud whitelist accepts only credential_pattern_01..10 +
6862
+ // truncation_32kb, treating PII metadata as data-that-shouldn't-leave-
6863
+ // the-daemon. Redaction still fires on-disk; sink emission is skipped.
6822
6864
  name: "credit-card",
6823
6865
  pattern: /\b(?:\d[ -]*?){13,16}\b/g,
6824
6866
  replacement: "[REDACTED:PAN]"
6825
6867
  },
6826
6868
  {
6869
+ // PII — see credit-card note above.
6827
6870
  name: "us-ssn",
6828
6871
  pattern: /\b\d{3}-\d{2}-\d{4}\b/g,
6829
6872
  replacement: "[REDACTED:SSN]"
6830
6873
  }
6831
6874
  ];
6832
- __name(sanitize, "sanitize");
6875
+ __name(sanitizeWithEvents, "sanitizeWithEvents");
6833
6876
  }
6834
6877
  });
6835
6878
 
@@ -6839,6 +6882,8 @@ import { join as join19 } from "path";
6839
6882
  async function buildIngestionPrompt(opts) {
6840
6883
  const read = opts.readFile ?? ((p2) => readFileSync13(p2, "utf8"));
6841
6884
  const excerpts = [];
6885
+ const emitStore = opts.redactionEmitStore && opts.workspaceId ? opts.redactionEmitStore : null;
6886
+ const emitWorkspaceId = opts.workspaceId ?? "";
6842
6887
  for (const file of opts.files) {
6843
6888
  try {
6844
6889
  const raw = read(file);
@@ -6854,12 +6899,52 @@ async function buildIngestionPrompt(opts) {
6854
6899
  { path: file, rawBytes, capBytes: MAX_EXCERPT_BYTES },
6855
6900
  "source file truncated for prompt \u2014 model sees only the first MAX_EXCERPT_BYTES"
6856
6901
  );
6902
+ if (emitStore) {
6903
+ try {
6904
+ emitStore.enqueue(emitWorkspaceId, {
6905
+ redacted_at: (/* @__PURE__ */ new Date()).toISOString(),
6906
+ rule_id: "truncation_32kb",
6907
+ source_file_path: file,
6908
+ redaction_byte_count: rawBytes - MAX_EXCERPT_BYTES
6909
+ });
6910
+ } catch (storeErr) {
6911
+ getLogger("prompt-builder").warn(
6912
+ {
6913
+ path: file,
6914
+ err: storeErr instanceof Error ? storeErr.message : String(storeErr)
6915
+ },
6916
+ "redaction-emit enqueue failed (truncation); prompt continues"
6917
+ );
6918
+ }
6919
+ }
6857
6920
  } else {
6858
6921
  body = raw;
6859
6922
  }
6923
+ const { output: sanitized, events } = sanitizeWithEvents(body);
6924
+ if (emitStore && events.length > 0) {
6925
+ for (const event of events) {
6926
+ try {
6927
+ emitStore.enqueue(emitWorkspaceId, {
6928
+ redacted_at: (/* @__PURE__ */ new Date()).toISOString(),
6929
+ rule_id: event.cloud_rule_id,
6930
+ source_file_path: file,
6931
+ redaction_byte_count: event.byte_count
6932
+ });
6933
+ } catch (storeErr) {
6934
+ getLogger("prompt-builder").warn(
6935
+ {
6936
+ path: file,
6937
+ rule: event.rule_name,
6938
+ err: storeErr instanceof Error ? storeErr.message : String(storeErr)
6939
+ },
6940
+ "redaction-emit enqueue failed (credential); prompt continues"
6941
+ );
6942
+ }
6943
+ }
6944
+ }
6860
6945
  excerpts.push({
6861
6946
  path: file,
6862
- excerpt: sanitize(body),
6947
+ excerpt: sanitized,
6863
6948
  bytes: rawBytes,
6864
6949
  truncated
6865
6950
  });
@@ -7456,7 +7541,9 @@ var init_queue = __esm({
7456
7541
  const prompt = await buildIngestionPrompt({
7457
7542
  config: this.opts.config,
7458
7543
  files: batch.paths,
7459
- existingPages
7544
+ existingPages,
7545
+ redactionEmitStore: this.opts.redactionEmitStore ?? null,
7546
+ workspaceId: this.opts.redactionWorkspaceId
7460
7547
  });
7461
7548
  const sourceFiles = [...batch.paths];
7462
7549
  const sourceHashes = [];
@@ -11373,6 +11460,486 @@ var init_server = __esm({
11373
11460
  }
11374
11461
  });
11375
11462
 
11463
+ // src/provenance/redaction-emit-store.ts
11464
+ import Database3 from "better-sqlite3";
11465
+ import { randomUUID as randomUUID4 } from "crypto";
11466
+ import { dirname as dirname14 } from "path";
11467
+ var SCHEMA_VERSION3, SCHEMA_SQL3, RedactionEmitStore;
11468
+ var init_redaction_emit_store = __esm({
11469
+ "src/provenance/redaction-emit-store.ts"() {
11470
+ "use strict";
11471
+ init_esm_shims();
11472
+ init_actionable_error();
11473
+ init_fs();
11474
+ init_logger();
11475
+ SCHEMA_VERSION3 = 1;
11476
+ SCHEMA_SQL3 = `
11477
+ CREATE TABLE IF NOT EXISTS pending_redaction_emits (
11478
+ event_id TEXT PRIMARY KEY,
11479
+ workspace_id TEXT NOT NULL,
11480
+ payload_json TEXT NOT NULL,
11481
+ created_at TEXT NOT NULL,
11482
+ attempts INTEGER NOT NULL DEFAULT 0,
11483
+ last_attempt_at TEXT,
11484
+ last_error TEXT,
11485
+ status TEXT NOT NULL DEFAULT 'pending'
11486
+ CHECK (status IN ('pending','sent','archived'))
11487
+ );
11488
+ CREATE INDEX IF NOT EXISTS idx_pending_redaction_emits_drain
11489
+ ON pending_redaction_emits(status, created_at);
11490
+ CREATE INDEX IF NOT EXISTS idx_pending_redaction_emits_workspace
11491
+ ON pending_redaction_emits(workspace_id, created_at);
11492
+ `;
11493
+ RedactionEmitStore = class {
11494
+ static {
11495
+ __name(this, "RedactionEmitStore");
11496
+ }
11497
+ path;
11498
+ db;
11499
+ constructor(opts) {
11500
+ this.path = opts.path;
11501
+ if (!opts.inMemory) {
11502
+ ensureDirSync(dirname14(this.path));
11503
+ }
11504
+ try {
11505
+ this.db = new Database3(opts.inMemory ? ":memory:" : this.path);
11506
+ } catch (err) {
11507
+ if (looksLikeNativeBindingFailure(err)) {
11508
+ throw nativeBindingLoadError("better-sqlite3", err);
11509
+ }
11510
+ throw err;
11511
+ }
11512
+ this.db.pragma("journal_mode = WAL");
11513
+ this.db.pragma("foreign_keys = ON");
11514
+ this.migrate();
11515
+ }
11516
+ migrate() {
11517
+ const log = getLogger("redaction-emit-store");
11518
+ const currentVersion = this.db.pragma("user_version", { simple: true });
11519
+ if (currentVersion === SCHEMA_VERSION3) return;
11520
+ if (currentVersion > SCHEMA_VERSION3) {
11521
+ throw new Error(
11522
+ `redaction-emit.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${SCHEMA_VERSION3}) \u2014 refusing to downgrade`
11523
+ );
11524
+ }
11525
+ log.info({ from: currentVersion, to: SCHEMA_VERSION3 }, "running redaction-emit.db migrations");
11526
+ this.db.exec(SCHEMA_SQL3);
11527
+ this.db.pragma(`user_version = ${SCHEMA_VERSION3}`);
11528
+ }
11529
+ /**
11530
+ * Write a new 'pending' row. Returns the generated event_id. Caller
11531
+ * (prompt-builder) must call this BEFORE attempting cloud emission —
11532
+ * the SQLite append is the durability anchor.
11533
+ *
11534
+ * `now` is injected for testability; defaults to ISO 8601 now.
11535
+ */
11536
+ enqueue(workspaceId, payload, now = (/* @__PURE__ */ new Date()).toISOString()) {
11537
+ const event_id = randomUUID4();
11538
+ this.db.prepare(
11539
+ `INSERT INTO pending_redaction_emits
11540
+ (event_id, workspace_id, payload_json, created_at, attempts, status)
11541
+ VALUES (?, ?, ?, ?, 0, 'pending')`
11542
+ ).run(event_id, workspaceId, JSON.stringify(payload), now);
11543
+ return event_id;
11544
+ }
11545
+ /**
11546
+ * List pending rows in creation order, capped at `limit`. The cloud
11547
+ * endpoint caps batches at 1000 events; the worker passes 1000 here
11548
+ * to fill each batch.
11549
+ */
11550
+ listPending(limit) {
11551
+ const rows = this.db.prepare(
11552
+ `SELECT event_id, workspace_id, payload_json, created_at, attempts,
11553
+ last_attempt_at, last_error, status
11554
+ FROM pending_redaction_emits
11555
+ WHERE status = 'pending'
11556
+ ORDER BY created_at ASC, event_id ASC
11557
+ LIMIT ?`
11558
+ ).all(limit);
11559
+ return rows.map(this.toRow);
11560
+ }
11561
+ /**
11562
+ * Mark a batch of pending rows as 'sent' after a successful cloud
11563
+ * POST. Atomic transaction — partial failure rolls back. Returns the
11564
+ * number of rows that actually transitioned (pending → sent).
11565
+ *
11566
+ * Rows already in 'sent' state are no-ops; this is the idempotency
11567
+ * guarantee on the daemon side (re-drain after a crash where SQLite
11568
+ * commit hadn't flushed). Rows in 'archived' state are NOT touched —
11569
+ * those are forensic-final.
11570
+ */
11571
+ markSent(eventIds, now = (/* @__PURE__ */ new Date()).toISOString()) {
11572
+ if (eventIds.length === 0) return 0;
11573
+ const update = this.db.prepare(
11574
+ `UPDATE pending_redaction_emits
11575
+ SET status = 'sent', last_attempt_at = ?
11576
+ WHERE event_id = ? AND status = 'pending'`
11577
+ );
11578
+ return this.db.transaction((ids) => {
11579
+ let changes = 0;
11580
+ for (const id of ids) {
11581
+ const result = update.run(now, id);
11582
+ changes += Number(result.changes);
11583
+ }
11584
+ return changes;
11585
+ })(eventIds);
11586
+ }
11587
+ /**
11588
+ * Increment attempts counter + record last_error for a batch that
11589
+ * failed to POST. Rows stay 'pending' so the next drain tick retries
11590
+ * them. Does NOT touch rows already in 'sent' or 'archived' state.
11591
+ */
11592
+ markFailed(eventIds, error, now = (/* @__PURE__ */ new Date()).toISOString()) {
11593
+ if (eventIds.length === 0) return 0;
11594
+ const update = this.db.prepare(
11595
+ `UPDATE pending_redaction_emits
11596
+ SET attempts = attempts + 1,
11597
+ last_attempt_at = ?,
11598
+ last_error = ?
11599
+ WHERE event_id = ? AND status = 'pending'`
11600
+ );
11601
+ return this.db.transaction((ids) => {
11602
+ let changes = 0;
11603
+ for (const id of ids) {
11604
+ const result = update.run(now, error.slice(0, 500), id);
11605
+ changes += Number(result.changes);
11606
+ }
11607
+ return changes;
11608
+ })(eventIds);
11609
+ }
11610
+ /**
11611
+ * Move terminally-failed rows (attempts >= maxAttempts) to 'archived'.
11612
+ * Archived rows remain in the table for forensic inspection — never
11613
+ * deleted. Returns the list of event_ids that transitioned.
11614
+ *
11615
+ * Called by the worker after each failed batch to evict rows that
11616
+ * are stuck retrying forever. The cap matches the worker's
11617
+ * MAX_ATTEMPTS constant.
11618
+ */
11619
+ archiveExhausted(maxAttempts) {
11620
+ return this.db.transaction(() => {
11621
+ const rows = this.db.prepare(
11622
+ `SELECT event_id FROM pending_redaction_emits
11623
+ WHERE status = 'pending' AND attempts >= ?`
11624
+ ).all(maxAttempts);
11625
+ if (rows.length === 0) return [];
11626
+ const ids = rows.map((r) => r.event_id);
11627
+ const placeholders = ids.map(() => "?").join(",");
11628
+ this.db.prepare(
11629
+ `UPDATE pending_redaction_emits SET status = 'archived'
11630
+ WHERE event_id IN (${placeholders}) AND status = 'pending'`
11631
+ ).run(...ids);
11632
+ return ids;
11633
+ })();
11634
+ }
11635
+ /** Count rows by status. Useful for diagnostics + tests. */
11636
+ countByStatus() {
11637
+ const rows = this.db.prepare(`SELECT status, COUNT(*) as n FROM pending_redaction_emits GROUP BY status`).all();
11638
+ const out = {
11639
+ pending: 0,
11640
+ sent: 0,
11641
+ archived: 0
11642
+ };
11643
+ for (const r of rows) out[r.status] = r.n;
11644
+ return out;
11645
+ }
11646
+ /** Schema version (diagnostics + tests). */
11647
+ schemaVersion() {
11648
+ return this.db.pragma("user_version", { simple: true });
11649
+ }
11650
+ /** Close the underlying handle. Idempotent. */
11651
+ close() {
11652
+ if (this.db.open) this.db.close();
11653
+ }
11654
+ toRow(raw) {
11655
+ return {
11656
+ event_id: raw.event_id,
11657
+ workspace_id: raw.workspace_id,
11658
+ payload: JSON.parse(raw.payload_json),
11659
+ created_at: raw.created_at,
11660
+ attempts: raw.attempts,
11661
+ last_attempt_at: raw.last_attempt_at,
11662
+ last_error: raw.last_error,
11663
+ status: raw.status
11664
+ };
11665
+ }
11666
+ };
11667
+ }
11668
+ });
11669
+
11670
+ // src/provenance/redaction-sink.ts
11671
+ function redactionSinkFromEnv(env = process.env) {
11672
+ const wikiId = env.WOTW_WIKI_ID;
11673
+ const sinkSecret = env.WOTW_CLOUD_SINK_SECRET;
11674
+ if (!wikiId || !sinkSecret) return null;
11675
+ return new RedactionSink({
11676
+ workspaceId: wikiId,
11677
+ apiBaseUrl: env.WOTW_API_BASE_URL || void 0,
11678
+ sinkSecret
11679
+ });
11680
+ }
11681
+ var DEFAULT_API_BASE_URL, REQUEST_TIMEOUT_MS, CLOUD_REDACTION_BATCH_CAP, RedactionSink;
11682
+ var init_redaction_sink = __esm({
11683
+ "src/provenance/redaction-sink.ts"() {
11684
+ "use strict";
11685
+ init_esm_shims();
11686
+ init_logger();
11687
+ DEFAULT_API_BASE_URL = "https://wotw.dev";
11688
+ REQUEST_TIMEOUT_MS = 1e4;
11689
+ CLOUD_REDACTION_BATCH_CAP = 1e3;
11690
+ RedactionSink = class {
11691
+ static {
11692
+ __name(this, "RedactionSink");
11693
+ }
11694
+ workspaceId;
11695
+ apiBaseUrl;
11696
+ sinkSecret;
11697
+ fetchImpl;
11698
+ constructor(opts) {
11699
+ this.workspaceId = opts.workspaceId;
11700
+ this.apiBaseUrl = opts.apiBaseUrl ?? DEFAULT_API_BASE_URL;
11701
+ if (!this.apiBaseUrl.startsWith("https://")) {
11702
+ throw new Error(
11703
+ `redaction-sink: apiBaseUrl must be https:// (got "${this.apiBaseUrl}"); WOTW_API_BASE_URL is misconfigured. Refusing to send sink key over plaintext.`
11704
+ );
11705
+ }
11706
+ this.sinkSecret = opts.sinkSecret;
11707
+ this.fetchImpl = opts.fetchImpl ?? fetch;
11708
+ }
11709
+ /**
11710
+ * POST a single batch of redaction events to the cloud. Never throws;
11711
+ * caller decides whether to retry based on the returned `ok` field.
11712
+ *
11713
+ * The batch MUST contain at least one event and at most
11714
+ * CLOUD_REDACTION_BATCH_CAP events — the caller (worker) trims to fit.
11715
+ */
11716
+ async post(events) {
11717
+ const log = getLogger("provenance.redaction-sink");
11718
+ if (events.length === 0) {
11719
+ return { ok: true, inserted: 0 };
11720
+ }
11721
+ if (events.length > CLOUD_REDACTION_BATCH_CAP) {
11722
+ return {
11723
+ ok: false,
11724
+ status: null,
11725
+ errorBody: `client-side batch cap exceeded (got ${events.length}, max ${CLOUD_REDACTION_BATCH_CAP})`
11726
+ };
11727
+ }
11728
+ const url = `${this.apiBaseUrl}/api/internal/redaction-log`;
11729
+ const body = {
11730
+ workspace_id: this.workspaceId,
11731
+ events: events.map((e) => ({
11732
+ event_id: e.event_id,
11733
+ redacted_at: e.redacted_at,
11734
+ rule_id: e.rule_id,
11735
+ source_file_path: e.source_file_path,
11736
+ redaction_byte_count: e.redaction_byte_count
11737
+ }))
11738
+ };
11739
+ const controller = new AbortController();
11740
+ const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
11741
+ try {
11742
+ const res = await this.fetchImpl(url, {
11743
+ method: "POST",
11744
+ headers: {
11745
+ "Content-Type": "application/json",
11746
+ "x-sink-key": this.sinkSecret
11747
+ },
11748
+ body: JSON.stringify(body),
11749
+ signal: controller.signal
11750
+ });
11751
+ if (res.ok) {
11752
+ let inserted = events.length;
11753
+ try {
11754
+ const data = await res.json();
11755
+ if (typeof data.inserted === "number") inserted = data.inserted;
11756
+ } catch {
11757
+ }
11758
+ log.debug(
11759
+ {
11760
+ workspaceId: this.workspaceId,
11761
+ batchSize: events.length,
11762
+ inserted
11763
+ },
11764
+ "redaction batch accepted by cloud"
11765
+ );
11766
+ return { ok: true, inserted };
11767
+ }
11768
+ const text2 = await res.text().catch(() => "");
11769
+ log.warn(
11770
+ {
11771
+ workspaceId: this.workspaceId,
11772
+ batchSize: events.length,
11773
+ status: res.status,
11774
+ body: text2.slice(0, 500)
11775
+ },
11776
+ "redaction batch rejected by cloud"
11777
+ );
11778
+ return { ok: false, status: res.status, errorBody: text2.slice(0, 500) };
11779
+ } catch (err) {
11780
+ const msg = err instanceof Error ? err.message : String(err);
11781
+ log.warn(
11782
+ {
11783
+ workspaceId: this.workspaceId,
11784
+ batchSize: events.length,
11785
+ err: msg
11786
+ },
11787
+ "redaction batch request failed"
11788
+ );
11789
+ return { ok: false, status: null, errorBody: msg };
11790
+ } finally {
11791
+ clearTimeout(timeout);
11792
+ }
11793
+ }
11794
+ };
11795
+ __name(redactionSinkFromEnv, "redactionSinkFromEnv");
11796
+ }
11797
+ });
11798
+
11799
+ // src/provenance/redaction-emit-worker.ts
11800
+ var DEFAULT_BASE_INTERVAL_MS, DEFAULT_MAX_INTERVAL_MS, DEFAULT_MAX_ATTEMPTS, RedactionEmitWorker;
11801
+ var init_redaction_emit_worker = __esm({
11802
+ "src/provenance/redaction-emit-worker.ts"() {
11803
+ "use strict";
11804
+ init_esm_shims();
11805
+ init_logger();
11806
+ init_redaction_sink();
11807
+ DEFAULT_BASE_INTERVAL_MS = 3e4;
11808
+ DEFAULT_MAX_INTERVAL_MS = 5 * 6e4;
11809
+ DEFAULT_MAX_ATTEMPTS = 100;
11810
+ RedactionEmitWorker = class {
11811
+ static {
11812
+ __name(this, "RedactionEmitWorker");
11813
+ }
11814
+ name = "redaction-emit-worker";
11815
+ store;
11816
+ sink;
11817
+ baseIntervalMs;
11818
+ maxIntervalMs;
11819
+ maxAttempts;
11820
+ batchSize;
11821
+ timer = null;
11822
+ inflight = null;
11823
+ /** Current backoff interval. Reset to base on success, grows on failure. */
11824
+ currentIntervalMs;
11825
+ stopped = false;
11826
+ constructor(opts) {
11827
+ this.store = opts.store;
11828
+ this.sink = opts.sink;
11829
+ this.baseIntervalMs = opts.baseIntervalMs ?? DEFAULT_BASE_INTERVAL_MS;
11830
+ this.maxIntervalMs = opts.maxIntervalMs ?? DEFAULT_MAX_INTERVAL_MS;
11831
+ this.maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
11832
+ this.batchSize = Math.min(
11833
+ opts.batchSize ?? CLOUD_REDACTION_BATCH_CAP,
11834
+ CLOUD_REDACTION_BATCH_CAP
11835
+ );
11836
+ this.currentIntervalMs = this.baseIntervalMs;
11837
+ }
11838
+ async start() {
11839
+ const log = getLogger(this.name);
11840
+ if (!this.sink) {
11841
+ log.info(
11842
+ "redaction emit worker disabled (no sink configured \u2014 local/offline mode); SQLite queue continues to capture rows for forensic inspection"
11843
+ );
11844
+ return;
11845
+ }
11846
+ log.info(
11847
+ {
11848
+ baseIntervalMs: this.baseIntervalMs,
11849
+ maxIntervalMs: this.maxIntervalMs,
11850
+ maxAttempts: this.maxAttempts,
11851
+ batchSize: this.batchSize,
11852
+ apiBaseUrl: this.sink.apiBaseUrl
11853
+ },
11854
+ "redaction emit worker starting"
11855
+ );
11856
+ this.inflight = this.tick();
11857
+ await this.inflight;
11858
+ }
11859
+ async stop() {
11860
+ this.stopped = true;
11861
+ if (this.timer) {
11862
+ clearTimeout(this.timer);
11863
+ this.timer = null;
11864
+ }
11865
+ if (this.inflight) {
11866
+ try {
11867
+ await this.inflight;
11868
+ } catch {
11869
+ }
11870
+ }
11871
+ }
11872
+ /**
11873
+ * One drain pass. Pulls a batch, attempts POST, transitions rows,
11874
+ * adjusts backoff, schedules the next tick (when not stopped).
11875
+ *
11876
+ * Exposed for tests so they can step the worker without sleeping.
11877
+ */
11878
+ async tick() {
11879
+ if (!this.sink) return;
11880
+ const log = getLogger(this.name);
11881
+ let succeeded = false;
11882
+ try {
11883
+ const pending = this.store.listPending(this.batchSize);
11884
+ if (pending.length === 0) {
11885
+ succeeded = true;
11886
+ return;
11887
+ }
11888
+ const eventIds = pending.map((r) => r.event_id);
11889
+ const events = pending.map((r) => ({ event_id: r.event_id, ...r.payload }));
11890
+ const result = await this.sink.post(events);
11891
+ if (result.ok) {
11892
+ const transitioned = this.store.markSent(eventIds);
11893
+ log.info(
11894
+ {
11895
+ batchSize: pending.length,
11896
+ transitioned,
11897
+ inserted: result.inserted
11898
+ },
11899
+ "redaction batch drained"
11900
+ );
11901
+ succeeded = true;
11902
+ } else {
11903
+ const errSummary = `status=${result.status ?? "network"} body=${result.errorBody.slice(0, 200)}`;
11904
+ this.store.markFailed(eventIds, errSummary);
11905
+ const archived = this.store.archiveExhausted(this.maxAttempts);
11906
+ if (archived.length > 0) {
11907
+ log.error(
11908
+ {
11909
+ archivedCount: archived.length,
11910
+ maxAttempts: this.maxAttempts,
11911
+ sampleIds: archived.slice(0, 3)
11912
+ },
11913
+ "redaction events archived after exhausting retries \u2014 review needed"
11914
+ );
11915
+ }
11916
+ }
11917
+ } catch (err) {
11918
+ log.error(
11919
+ { err: err instanceof Error ? err.message : String(err) },
11920
+ "redaction worker tick failed unexpectedly"
11921
+ );
11922
+ } finally {
11923
+ if (succeeded) {
11924
+ this.currentIntervalMs = this.baseIntervalMs;
11925
+ } else {
11926
+ this.currentIntervalMs = Math.min(this.currentIntervalMs * 2, this.maxIntervalMs);
11927
+ }
11928
+ this.scheduleNext();
11929
+ }
11930
+ }
11931
+ scheduleNext() {
11932
+ if (this.stopped) return;
11933
+ if (this.timer) clearTimeout(this.timer);
11934
+ this.timer = setTimeout(() => {
11935
+ this.inflight = this.tick();
11936
+ }, this.currentIntervalMs);
11937
+ if (typeof this.timer.unref === "function") this.timer.unref();
11938
+ }
11939
+ };
11940
+ }
11941
+ });
11942
+
11376
11943
  // src/compounding/engine.ts
11377
11944
  import { join as join23, relative as relative11 } from "path";
11378
11945
  function stripFrontmatterIfPresent(text2) {
@@ -11765,14 +12332,14 @@ function cloudSinkFromEnv(env = process.env) {
11765
12332
  adminServiceKey
11766
12333
  });
11767
12334
  }
11768
- var DEFAULT_API_BASE_URL, REQUEST_TIMEOUT_MS, CloudProvenanceSink;
12335
+ var DEFAULT_API_BASE_URL2, REQUEST_TIMEOUT_MS2, CloudProvenanceSink;
11769
12336
  var init_cloud_sink = __esm({
11770
12337
  "src/provenance/cloud-sink.ts"() {
11771
12338
  "use strict";
11772
12339
  init_esm_shims();
11773
12340
  init_logger();
11774
- DEFAULT_API_BASE_URL = "https://wotw.dev";
11775
- REQUEST_TIMEOUT_MS = 5e3;
12341
+ DEFAULT_API_BASE_URL2 = "https://wotw.dev";
12342
+ REQUEST_TIMEOUT_MS2 = 5e3;
11776
12343
  CloudProvenanceSink = class {
11777
12344
  static {
11778
12345
  __name(this, "CloudProvenanceSink");
@@ -11783,7 +12350,7 @@ var init_cloud_sink = __esm({
11783
12350
  fetchImpl;
11784
12351
  constructor(opts) {
11785
12352
  this.wikiId = opts.wikiId;
11786
- this.apiBaseUrl = opts.apiBaseUrl ?? DEFAULT_API_BASE_URL;
12353
+ this.apiBaseUrl = opts.apiBaseUrl ?? DEFAULT_API_BASE_URL2;
11787
12354
  if (!this.apiBaseUrl.startsWith("https://")) {
11788
12355
  throw new Error(
11789
12356
  `cloud-sink: apiBaseUrl must be https:// (got "${this.apiBaseUrl}"); WOTW_API_BASE_URL is misconfigured. Refusing to send admin key over plaintext.`
@@ -11813,7 +12380,7 @@ var init_cloud_sink = __esm({
11813
12380
  record_json: record
11814
12381
  };
11815
12382
  const controller = new AbortController();
11816
- const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
12383
+ const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
11817
12384
  try {
11818
12385
  const res = await this.fetchImpl(url, {
11819
12386
  method: "POST",
@@ -11976,6 +12543,24 @@ async function main() {
11976
12543
  );
11977
12544
  }
11978
12545
  }
12546
+ const redactionEmitStore = new RedactionEmitStore({
12547
+ path: `${config.wiki_root}/.wotw/redaction-emit.db`
12548
+ });
12549
+ const redactionWorkspaceId = process.env.WOTW_WIKI_ID;
12550
+ const redactionSink = redactionSinkFromEnv();
12551
+ const redactionEmitWorker = new RedactionEmitWorker({
12552
+ store: redactionEmitStore,
12553
+ sink: redactionSink
12554
+ });
12555
+ log.info(
12556
+ {
12557
+ path: redactionEmitStore.path,
12558
+ sinkActive: !!redactionSink,
12559
+ workspaceId: redactionWorkspaceId ?? null,
12560
+ counts: redactionEmitStore.countByStatus()
12561
+ },
12562
+ "redaction-emit store ready"
12563
+ );
11979
12564
  const costTracker = new CostTracker({
11980
12565
  trackFile: config.cost.track_file,
11981
12566
  maxDailyUsd: config.cost.max_daily_usd,
@@ -12023,7 +12608,9 @@ async function main() {
12023
12608
  runtimeMode,
12024
12609
  deadLetter,
12025
12610
  factStore,
12026
- factIndex
12611
+ factIndex,
12612
+ redactionEmitStore,
12613
+ redactionWorkspaceId
12027
12614
  });
12028
12615
  const compounding = new CompoundingEngine({
12029
12616
  config,
@@ -12067,6 +12654,7 @@ async function main() {
12067
12654
  daemon.attachSubsystem(mcp);
12068
12655
  daemon.attachSubsystem(lintScheduler);
12069
12656
  if (dekArchiveScheduler) daemon.attachSubsystem(dekArchiveScheduler);
12657
+ daemon.attachSubsystem(redactionEmitWorker);
12070
12658
  watcher.startReconciliation(5 * 60 * 1e3);
12071
12659
  const mcpUrl = `http://${config.server.host}:${config.server.port}/mcp`;
12072
12660
  log.info(
@@ -12104,6 +12692,9 @@ var init_entry = __esm({
12104
12692
  init_watcher();
12105
12693
  init_server();
12106
12694
  init_chain();
12695
+ init_redaction_emit_store();
12696
+ init_redaction_emit_worker();
12697
+ init_redaction_sink();
12107
12698
  init_engine();
12108
12699
  __name(main, "main");
12109
12700
  void main();