@3030-labs/wotw 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/dist/cli/index.js +615 -24
- package/dist/cli/index.js.map +1 -1
- package/dist/daemon/entry.js +624 -53
- package/dist/daemon/entry.js.map +1 -1
- package/dist/index.d.ts +9 -0
- package/dist/index.js +41 -13
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/daemon/entry.js
CHANGED
|
@@ -2331,14 +2331,14 @@ function cloudSinkFromEnv(env = process.env) {
|
|
|
2331
2331
|
adminServiceKey
|
|
2332
2332
|
});
|
|
2333
2333
|
}
|
|
2334
|
-
var
|
|
2334
|
+
var DEFAULT_API_BASE_URL2, REQUEST_TIMEOUT_MS2, CloudProvenanceSink;
|
|
2335
2335
|
var init_cloud_sink = __esm({
|
|
2336
2336
|
"src/provenance/cloud-sink.ts"() {
|
|
2337
2337
|
"use strict";
|
|
2338
2338
|
init_esm_shims();
|
|
2339
2339
|
init_logger();
|
|
2340
|
-
|
|
2341
|
-
|
|
2340
|
+
DEFAULT_API_BASE_URL2 = "https://wotw.dev";
|
|
2341
|
+
REQUEST_TIMEOUT_MS2 = 5e3;
|
|
2342
2342
|
CloudProvenanceSink = class {
|
|
2343
2343
|
static {
|
|
2344
2344
|
__name(this, "CloudProvenanceSink");
|
|
@@ -2349,7 +2349,7 @@ var init_cloud_sink = __esm({
|
|
|
2349
2349
|
fetchImpl;
|
|
2350
2350
|
constructor(opts) {
|
|
2351
2351
|
this.wikiId = opts.wikiId;
|
|
2352
|
-
this.apiBaseUrl = opts.apiBaseUrl ??
|
|
2352
|
+
this.apiBaseUrl = opts.apiBaseUrl ?? DEFAULT_API_BASE_URL2;
|
|
2353
2353
|
if (!this.apiBaseUrl.startsWith("https://")) {
|
|
2354
2354
|
throw new Error(
|
|
2355
2355
|
`cloud-sink: apiBaseUrl must be https:// (got "${this.apiBaseUrl}"); WOTW_API_BASE_URL is misconfigured. Refusing to send admin key over plaintext.`
|
|
@@ -2379,7 +2379,7 @@ var init_cloud_sink = __esm({
|
|
|
2379
2379
|
record_json: record
|
|
2380
2380
|
};
|
|
2381
2381
|
const controller = new AbortController();
|
|
2382
|
-
const timeout = setTimeout(() => controller.abort(),
|
|
2382
|
+
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
|
|
2383
2383
|
try {
|
|
2384
2384
|
const res = await this.fetchImpl(url, {
|
|
2385
2385
|
method: "POST",
|
|
@@ -2532,10 +2532,10 @@ var store_exports = {};
|
|
|
2532
2532
|
__export(store_exports, {
|
|
2533
2533
|
KeyStore: () => KeyStore
|
|
2534
2534
|
});
|
|
2535
|
-
import
|
|
2536
|
-
import { randomUUID as
|
|
2537
|
-
import { dirname as
|
|
2538
|
-
var
|
|
2535
|
+
import Database2 from "better-sqlite3";
|
|
2536
|
+
import { randomUUID as randomUUID4 } from "crypto";
|
|
2537
|
+
import { dirname as dirname11 } from "path";
|
|
2538
|
+
var SCHEMA_VERSION2, SCHEMA_SQL2, KeyStore;
|
|
2539
2539
|
var init_store = __esm({
|
|
2540
2540
|
"src/keys/store.ts"() {
|
|
2541
2541
|
"use strict";
|
|
@@ -2544,8 +2544,8 @@ var init_store = __esm({
|
|
|
2544
2544
|
init_fs();
|
|
2545
2545
|
init_logger();
|
|
2546
2546
|
init_envelope();
|
|
2547
|
-
|
|
2548
|
-
|
|
2547
|
+
SCHEMA_VERSION2 = 1;
|
|
2548
|
+
SCHEMA_SQL2 = `
|
|
2549
2549
|
CREATE TABLE IF NOT EXISTS workspace_keys (
|
|
2550
2550
|
key_id TEXT PRIMARY KEY,
|
|
2551
2551
|
workspace_id TEXT NOT NULL,
|
|
@@ -2584,10 +2584,10 @@ var init_store = __esm({
|
|
|
2584
2584
|
this.kek = opts.kek;
|
|
2585
2585
|
this.dekCache = /* @__PURE__ */ new Map();
|
|
2586
2586
|
if (!opts.inMemory) {
|
|
2587
|
-
ensureDirSync(
|
|
2587
|
+
ensureDirSync(dirname11(this.path));
|
|
2588
2588
|
}
|
|
2589
2589
|
try {
|
|
2590
|
-
this.db = new
|
|
2590
|
+
this.db = new Database2(opts.inMemory ? ":memory:" : this.path);
|
|
2591
2591
|
} catch (err) {
|
|
2592
2592
|
if (looksLikeNativeBindingFailure(err)) {
|
|
2593
2593
|
throw nativeBindingLoadError("better-sqlite3", err);
|
|
@@ -2601,15 +2601,15 @@ var init_store = __esm({
|
|
|
2601
2601
|
migrate() {
|
|
2602
2602
|
const log = getLogger("key-store");
|
|
2603
2603
|
const currentVersion = this.db.pragma("user_version", { simple: true });
|
|
2604
|
-
if (currentVersion ===
|
|
2605
|
-
if (currentVersion >
|
|
2604
|
+
if (currentVersion === SCHEMA_VERSION2) return;
|
|
2605
|
+
if (currentVersion > SCHEMA_VERSION2) {
|
|
2606
2606
|
throw new Error(
|
|
2607
|
-
`keys.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${
|
|
2607
|
+
`keys.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${SCHEMA_VERSION2}) \u2014 refusing to downgrade`
|
|
2608
2608
|
);
|
|
2609
2609
|
}
|
|
2610
|
-
log.info({ from: currentVersion, to:
|
|
2611
|
-
this.db.exec(
|
|
2612
|
-
this.db.pragma(`user_version = ${
|
|
2610
|
+
log.info({ from: currentVersion, to: SCHEMA_VERSION2 }, "running keys.db migrations");
|
|
2611
|
+
this.db.exec(SCHEMA_SQL2);
|
|
2612
|
+
this.db.pragma(`user_version = ${SCHEMA_VERSION2}`);
|
|
2613
2613
|
}
|
|
2614
2614
|
/**
|
|
2615
2615
|
* Provision a brand-new active DEK for the workspace. Fails if an
|
|
@@ -2626,7 +2626,7 @@ var init_store = __esm({
|
|
|
2626
2626
|
`cannot provision: workspace ${workspaceId} already has an active key (${existing.key_id})`
|
|
2627
2627
|
);
|
|
2628
2628
|
}
|
|
2629
|
-
const key_id =
|
|
2629
|
+
const key_id = randomUUID4();
|
|
2630
2630
|
const dek = generateDek();
|
|
2631
2631
|
const env = wrapDek(dek, this.kek);
|
|
2632
2632
|
this.db.prepare(
|
|
@@ -2690,7 +2690,7 @@ var init_store = __esm({
|
|
|
2690
2690
|
dek: this.unwrapWithCache(prevRow)
|
|
2691
2691
|
};
|
|
2692
2692
|
}
|
|
2693
|
-
const key_id =
|
|
2693
|
+
const key_id = randomUUID4();
|
|
2694
2694
|
const dek = generateDek();
|
|
2695
2695
|
const env = wrapDek(dek, this.kek);
|
|
2696
2696
|
this.db.prepare(
|
|
@@ -2892,16 +2892,16 @@ __export(store_exports2, {
|
|
|
2892
2892
|
factHash: () => factHash,
|
|
2893
2893
|
questionHash: () => questionHash
|
|
2894
2894
|
});
|
|
2895
|
-
import
|
|
2895
|
+
import Database3 from "better-sqlite3";
|
|
2896
2896
|
import { createHash as createHash3 } from "crypto";
|
|
2897
|
-
import { dirname as
|
|
2897
|
+
import { dirname as dirname12 } from "path";
|
|
2898
2898
|
function factHash(entity, statement, wikiPageId, createdAt) {
|
|
2899
2899
|
return createHash3("sha256").update(`${wikiPageId}\0${entity}\0${statement}\0${createdAt}`).digest("hex");
|
|
2900
2900
|
}
|
|
2901
2901
|
function questionHash(factId, question) {
|
|
2902
2902
|
return createHash3("sha256").update(`${factId}\0${question}`).digest("hex");
|
|
2903
2903
|
}
|
|
2904
|
-
var
|
|
2904
|
+
var SCHEMA_VERSION3, SCHEMA_SQL3, FactStore;
|
|
2905
2905
|
var init_store2 = __esm({
|
|
2906
2906
|
"src/facts/store.ts"() {
|
|
2907
2907
|
"use strict";
|
|
@@ -2909,8 +2909,8 @@ var init_store2 = __esm({
|
|
|
2909
2909
|
init_actionable_error();
|
|
2910
2910
|
init_fs();
|
|
2911
2911
|
init_logger();
|
|
2912
|
-
|
|
2913
|
-
|
|
2912
|
+
SCHEMA_VERSION3 = 1;
|
|
2913
|
+
SCHEMA_SQL3 = `
|
|
2914
2914
|
CREATE TABLE IF NOT EXISTS facts (
|
|
2915
2915
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
2916
2916
|
wiki_page_id TEXT NOT NULL,
|
|
@@ -2941,10 +2941,10 @@ var init_store2 = __esm({
|
|
|
2941
2941
|
constructor(opts) {
|
|
2942
2942
|
this.path = opts.path;
|
|
2943
2943
|
if (!opts.inMemory) {
|
|
2944
|
-
ensureDirSync(
|
|
2944
|
+
ensureDirSync(dirname12(this.path));
|
|
2945
2945
|
}
|
|
2946
2946
|
try {
|
|
2947
|
-
this.db = new
|
|
2947
|
+
this.db = new Database3(opts.inMemory ? ":memory:" : this.path);
|
|
2948
2948
|
} catch (err) {
|
|
2949
2949
|
if (looksLikeNativeBindingFailure(err)) {
|
|
2950
2950
|
throw nativeBindingLoadError("better-sqlite3", err);
|
|
@@ -2959,15 +2959,15 @@ var init_store2 = __esm({
|
|
|
2959
2959
|
migrate() {
|
|
2960
2960
|
const log = getLogger("fact-store");
|
|
2961
2961
|
const currentVersion = this.db.pragma("user_version", { simple: true });
|
|
2962
|
-
if (currentVersion ===
|
|
2963
|
-
if (currentVersion >
|
|
2962
|
+
if (currentVersion === SCHEMA_VERSION3) return;
|
|
2963
|
+
if (currentVersion > SCHEMA_VERSION3) {
|
|
2964
2964
|
throw new Error(
|
|
2965
|
-
`facts.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${
|
|
2965
|
+
`facts.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${SCHEMA_VERSION3}) \u2014 refusing to downgrade`
|
|
2966
2966
|
);
|
|
2967
2967
|
}
|
|
2968
|
-
log.info({ from: currentVersion, to:
|
|
2969
|
-
this.db.exec(
|
|
2970
|
-
this.db.pragma(`user_version = ${
|
|
2968
|
+
log.info({ from: currentVersion, to: SCHEMA_VERSION3 }, "running facts.db migrations");
|
|
2969
|
+
this.db.exec(SCHEMA_SQL3);
|
|
2970
|
+
this.db.pragma(`user_version = ${SCHEMA_VERSION3}`);
|
|
2971
2971
|
}
|
|
2972
2972
|
/**
|
|
2973
2973
|
* Insert a fact + return its row id and assigned fact_hash. The hash
|
|
@@ -3438,6 +3438,7 @@ async function loadConfig(searchFrom) {
|
|
|
3438
3438
|
const withEnv = applyEnvOverrides(merged);
|
|
3439
3439
|
const validated = validateConfig(withEnv);
|
|
3440
3440
|
validateHostedConfig(validated);
|
|
3441
|
+
validateHostedRedactionSink(validated);
|
|
3441
3442
|
return { config: validated, path: path2 };
|
|
3442
3443
|
}
|
|
3443
3444
|
__name(loadConfig, "loadConfig");
|
|
@@ -3522,6 +3523,16 @@ function applyEnvOverrides(config) {
|
|
|
3522
3523
|
}
|
|
3523
3524
|
__name(applyEnvOverrides, "applyEnvOverrides");
|
|
3524
3525
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
3526
|
+
function validateHostedRedactionSink(config, env = process.env) {
|
|
3527
|
+
if (!config.hosted.enabled) return;
|
|
3528
|
+
const secret = env.WOTW_CLOUD_SINK_SECRET;
|
|
3529
|
+
if (!secret || secret.length === 0) {
|
|
3530
|
+
throw new Error(
|
|
3531
|
+
"Config error: hosted.enabled is true but WOTW_CLOUD_SINK_SECRET is unset. The redaction-emit wire requires this secret to authenticate with /api/internal/redaction-log; running hosted-mode without it would silently drop every compliance redaction event."
|
|
3532
|
+
);
|
|
3533
|
+
}
|
|
3534
|
+
}
|
|
3535
|
+
__name(validateHostedRedactionSink, "validateHostedRedactionSink");
|
|
3525
3536
|
function validateHostedConfig(config) {
|
|
3526
3537
|
if (!config.hosted.enabled) return;
|
|
3527
3538
|
if (!config.hosted.tenant_id || config.hosted.tenant_id.length === 0) {
|
|
@@ -6346,19 +6357,22 @@ var DEFAULT_REDACTIONS = [
|
|
|
6346
6357
|
{
|
|
6347
6358
|
name: "aws-access-key",
|
|
6348
6359
|
pattern: /\bAKIA[0-9A-Z]{16}\b/g,
|
|
6349
|
-
replacement: "[REDACTED:AWS_ACCESS_KEY]"
|
|
6360
|
+
replacement: "[REDACTED:AWS_ACCESS_KEY]",
|
|
6361
|
+
cloud_rule_id: "credential_pattern_01"
|
|
6350
6362
|
},
|
|
6351
6363
|
{
|
|
6352
6364
|
name: "aws-secret-key",
|
|
6353
6365
|
pattern: /\b[A-Za-z0-9/+=]{40}\b(?=.*(?:secret|aws))/gi,
|
|
6354
|
-
replacement: "[REDACTED:AWS_SECRET_KEY]"
|
|
6366
|
+
replacement: "[REDACTED:AWS_SECRET_KEY]",
|
|
6367
|
+
cloud_rule_id: "credential_pattern_02"
|
|
6355
6368
|
},
|
|
6356
6369
|
{
|
|
6357
6370
|
name: "github-token",
|
|
6358
6371
|
// Review item 2: also catch GitHub fine-grained personal access
|
|
6359
6372
|
// tokens (`github_pat_*`, 82+ chars per docs).
|
|
6360
6373
|
pattern: /\bgh[pousr]_[A-Za-z0-9]{36,255}\b|\bgithub_pat_[A-Za-z0-9_]{50,}\b/g,
|
|
6361
|
-
replacement: "[REDACTED:GITHUB_TOKEN]"
|
|
6374
|
+
replacement: "[REDACTED:GITHUB_TOKEN]",
|
|
6375
|
+
cloud_rule_id: "credential_pattern_03"
|
|
6362
6376
|
},
|
|
6363
6377
|
{
|
|
6364
6378
|
name: "anthropic-api-key",
|
|
@@ -6366,7 +6380,8 @@ var DEFAULT_REDACTIONS = [
|
|
|
6366
6380
|
// window stays generous enough to catch both legacy and current
|
|
6367
6381
|
// formats including api03- prefix.
|
|
6368
6382
|
pattern: /\bsk-ant-[A-Za-z0-9-_]{80,120}\b/g,
|
|
6369
|
-
replacement: "[REDACTED:ANTHROPIC_API_KEY]"
|
|
6383
|
+
replacement: "[REDACTED:ANTHROPIC_API_KEY]",
|
|
6384
|
+
cloud_rule_id: "credential_pattern_04"
|
|
6370
6385
|
},
|
|
6371
6386
|
{
|
|
6372
6387
|
name: "openai-api-key",
|
|
@@ -6375,31 +6390,38 @@ var DEFAULT_REDACTIONS = [
|
|
|
6375
6390
|
// `sk-svcacct-*`, `sk-admin-*` all use `-` separators after the
|
|
6376
6391
|
// prefix and longer character set. Updated character class.
|
|
6377
6392
|
pattern: /\bsk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{20,200}\b|\bsk-[A-Za-z0-9]{20,200}\b/g,
|
|
6378
|
-
replacement: "[REDACTED:OPENAI_API_KEY]"
|
|
6393
|
+
replacement: "[REDACTED:OPENAI_API_KEY]",
|
|
6394
|
+
cloud_rule_id: "credential_pattern_05"
|
|
6379
6395
|
},
|
|
6380
6396
|
{
|
|
6381
6397
|
name: "gemini-api-key",
|
|
6382
|
-
//
|
|
6383
|
-
//
|
|
6384
|
-
|
|
6385
|
-
|
|
6398
|
+
// Google AI Studio API keys come in two formats:
|
|
6399
|
+
// - legacy: `AIza` + 35 chars
|
|
6400
|
+
// - current: `AQ.` + ~40-80 url-safe chars (rolled out 2024-2025; the
|
|
6401
|
+
// `AIza`-only rule silently missed these, leaking new-format keys)
|
|
6402
|
+
pattern: /\bAIza[A-Za-z0-9_-]{35}\b|\bAQ\.[A-Za-z0-9_-]{40,80}\b/g,
|
|
6403
|
+
replacement: "[REDACTED:GEMINI_API_KEY]",
|
|
6404
|
+
cloud_rule_id: "credential_pattern_06"
|
|
6386
6405
|
},
|
|
6387
6406
|
{
|
|
6388
6407
|
name: "wotw-daemon-token",
|
|
6389
6408
|
// Review item 2: daemon tokens emitted by `wotw user add` are
|
|
6390
6409
|
// `wotw_` + base64url chars. Pre-fix these went unredacted.
|
|
6391
6410
|
pattern: /\bwotw_[A-Za-z0-9_-]{30,200}\b/g,
|
|
6392
|
-
replacement: "[REDACTED:WOTW_TOKEN]"
|
|
6411
|
+
replacement: "[REDACTED:WOTW_TOKEN]",
|
|
6412
|
+
cloud_rule_id: "credential_pattern_07"
|
|
6393
6413
|
},
|
|
6394
6414
|
{
|
|
6395
6415
|
name: "private-key-block",
|
|
6396
6416
|
pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g,
|
|
6397
|
-
replacement: "[REDACTED:PRIVATE_KEY_BLOCK]"
|
|
6417
|
+
replacement: "[REDACTED:PRIVATE_KEY_BLOCK]",
|
|
6418
|
+
cloud_rule_id: "credential_pattern_08"
|
|
6398
6419
|
},
|
|
6399
6420
|
{
|
|
6400
6421
|
name: "jwt",
|
|
6401
6422
|
pattern: /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g,
|
|
6402
|
-
replacement: "[REDACTED:JWT]"
|
|
6423
|
+
replacement: "[REDACTED:JWT]",
|
|
6424
|
+
cloud_rule_id: "credential_pattern_09"
|
|
6403
6425
|
},
|
|
6404
6426
|
{
|
|
6405
6427
|
// L-SEC-3: This pattern is deliberately scoped to full `scheme://`
|
|
@@ -6410,27 +6432,48 @@ var DEFAULT_REDACTIONS = [
|
|
|
6410
6432
|
// Verified by unit tests in test/unit/sanitize.test.ts.
|
|
6411
6433
|
name: "password-in-url",
|
|
6412
6434
|
pattern: /(\w+:\/\/[^:/\s]+:)[^@\s]+(@)/g,
|
|
6413
|
-
replacement: "$1[REDACTED]$2"
|
|
6435
|
+
replacement: "$1[REDACTED]$2",
|
|
6436
|
+
cloud_rule_id: "credential_pattern_10"
|
|
6414
6437
|
},
|
|
6415
6438
|
{
|
|
6439
|
+
// PII — stays daemon-local. cloud_rule_id intentionally omitted: the
|
|
6440
|
+
// PASS-024 cloud whitelist accepts only credential_pattern_01..10 +
|
|
6441
|
+
// truncation_32kb, treating PII metadata as data-that-shouldn't-leave-
|
|
6442
|
+
// the-daemon. Redaction still fires on-disk; sink emission is skipped.
|
|
6416
6443
|
name: "credit-card",
|
|
6417
6444
|
pattern: /\b(?:\d[ -]*?){13,16}\b/g,
|
|
6418
6445
|
replacement: "[REDACTED:PAN]"
|
|
6419
6446
|
},
|
|
6420
6447
|
{
|
|
6448
|
+
// PII — see credit-card note above.
|
|
6421
6449
|
name: "us-ssn",
|
|
6422
6450
|
pattern: /\b\d{3}-\d{2}-\d{4}\b/g,
|
|
6423
6451
|
replacement: "[REDACTED:SSN]"
|
|
6424
6452
|
}
|
|
6425
6453
|
];
|
|
6426
|
-
function
|
|
6454
|
+
function sanitizeWithEvents(input, rules = DEFAULT_REDACTIONS) {
|
|
6455
|
+
const events = [];
|
|
6427
6456
|
let out = input;
|
|
6428
6457
|
for (const rule of rules) {
|
|
6458
|
+
rule.pattern.lastIndex = 0;
|
|
6459
|
+
let matchedBytes = 0;
|
|
6460
|
+
for (const m of out.matchAll(rule.pattern)) {
|
|
6461
|
+
matchedBytes += Buffer.byteLength(m[0], "utf8");
|
|
6462
|
+
}
|
|
6463
|
+
if (matchedBytes === 0) continue;
|
|
6464
|
+
rule.pattern.lastIndex = 0;
|
|
6429
6465
|
out = out.replace(rule.pattern, rule.replacement);
|
|
6466
|
+
if (rule.cloud_rule_id) {
|
|
6467
|
+
events.push({
|
|
6468
|
+
rule_name: rule.name,
|
|
6469
|
+
cloud_rule_id: rule.cloud_rule_id,
|
|
6470
|
+
byte_count: matchedBytes
|
|
6471
|
+
});
|
|
6472
|
+
}
|
|
6430
6473
|
}
|
|
6431
|
-
return out;
|
|
6474
|
+
return { output: out, events };
|
|
6432
6475
|
}
|
|
6433
|
-
__name(
|
|
6476
|
+
__name(sanitizeWithEvents, "sanitizeWithEvents");
|
|
6434
6477
|
|
|
6435
6478
|
// src/ingestion/prompt-builder.ts
|
|
6436
6479
|
init_page();
|
|
@@ -6441,6 +6484,8 @@ var EXISTING_PAGES_FULL_LIST_LIMIT = 200;
|
|
|
6441
6484
|
async function buildIngestionPrompt(opts) {
|
|
6442
6485
|
const read = opts.readFile ?? ((p) => readFileSync6(p, "utf8"));
|
|
6443
6486
|
const excerpts = [];
|
|
6487
|
+
const emitStore = opts.redactionEmitStore && opts.workspaceId ? opts.redactionEmitStore : null;
|
|
6488
|
+
const emitWorkspaceId = opts.workspaceId ?? "";
|
|
6444
6489
|
for (const file of opts.files) {
|
|
6445
6490
|
try {
|
|
6446
6491
|
const raw = read(file);
|
|
@@ -6456,12 +6501,52 @@ async function buildIngestionPrompt(opts) {
|
|
|
6456
6501
|
{ path: file, rawBytes, capBytes: MAX_EXCERPT_BYTES },
|
|
6457
6502
|
"source file truncated for prompt \u2014 model sees only the first MAX_EXCERPT_BYTES"
|
|
6458
6503
|
);
|
|
6504
|
+
if (emitStore) {
|
|
6505
|
+
try {
|
|
6506
|
+
emitStore.enqueue(emitWorkspaceId, {
|
|
6507
|
+
redacted_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6508
|
+
rule_id: "truncation_32kb",
|
|
6509
|
+
source_file_path: file,
|
|
6510
|
+
redaction_byte_count: rawBytes - MAX_EXCERPT_BYTES
|
|
6511
|
+
});
|
|
6512
|
+
} catch (storeErr) {
|
|
6513
|
+
getLogger("prompt-builder").warn(
|
|
6514
|
+
{
|
|
6515
|
+
path: file,
|
|
6516
|
+
err: storeErr instanceof Error ? storeErr.message : String(storeErr)
|
|
6517
|
+
},
|
|
6518
|
+
"redaction-emit enqueue failed (truncation); prompt continues"
|
|
6519
|
+
);
|
|
6520
|
+
}
|
|
6521
|
+
}
|
|
6459
6522
|
} else {
|
|
6460
6523
|
body = raw;
|
|
6461
6524
|
}
|
|
6525
|
+
const { output: sanitized, events } = sanitizeWithEvents(body);
|
|
6526
|
+
if (emitStore && events.length > 0) {
|
|
6527
|
+
for (const event of events) {
|
|
6528
|
+
try {
|
|
6529
|
+
emitStore.enqueue(emitWorkspaceId, {
|
|
6530
|
+
redacted_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6531
|
+
rule_id: event.cloud_rule_id,
|
|
6532
|
+
source_file_path: file,
|
|
6533
|
+
redaction_byte_count: event.byte_count
|
|
6534
|
+
});
|
|
6535
|
+
} catch (storeErr) {
|
|
6536
|
+
getLogger("prompt-builder").warn(
|
|
6537
|
+
{
|
|
6538
|
+
path: file,
|
|
6539
|
+
rule: event.rule_name,
|
|
6540
|
+
err: storeErr instanceof Error ? storeErr.message : String(storeErr)
|
|
6541
|
+
},
|
|
6542
|
+
"redaction-emit enqueue failed (credential); prompt continues"
|
|
6543
|
+
);
|
|
6544
|
+
}
|
|
6545
|
+
}
|
|
6546
|
+
}
|
|
6462
6547
|
excerpts.push({
|
|
6463
6548
|
path: file,
|
|
6464
|
-
excerpt:
|
|
6549
|
+
excerpt: sanitized,
|
|
6465
6550
|
bytes: rawBytes,
|
|
6466
6551
|
truncated
|
|
6467
6552
|
});
|
|
@@ -7019,7 +7104,9 @@ var IngestionQueue = class {
|
|
|
7019
7104
|
const prompt = await buildIngestionPrompt({
|
|
7020
7105
|
config: this.opts.config,
|
|
7021
7106
|
files: batch.paths,
|
|
7022
|
-
existingPages
|
|
7107
|
+
existingPages,
|
|
7108
|
+
redactionEmitStore: this.opts.redactionEmitStore ?? null,
|
|
7109
|
+
workspaceId: this.opts.redactionWorkspaceId
|
|
7023
7110
|
});
|
|
7024
7111
|
const sourceFiles = [...batch.paths];
|
|
7025
7112
|
const sourceHashes = [];
|
|
@@ -10954,6 +11041,469 @@ async function readJsonBody(req) {
|
|
|
10954
11041
|
}
|
|
10955
11042
|
__name(readJsonBody, "readJsonBody");
|
|
10956
11043
|
|
|
11044
|
+
// src/provenance/redaction-emit-store.ts
|
|
11045
|
+
init_esm_shims();
|
|
11046
|
+
init_actionable_error();
|
|
11047
|
+
init_fs();
|
|
11048
|
+
init_logger();
|
|
11049
|
+
import Database from "better-sqlite3";
|
|
11050
|
+
import { randomUUID as randomUUID3 } from "crypto";
|
|
11051
|
+
import { dirname as dirname10 } from "path";
|
|
11052
|
+
var SCHEMA_VERSION = 1;
|
|
11053
|
+
var SCHEMA_SQL = `
|
|
11054
|
+
CREATE TABLE IF NOT EXISTS pending_redaction_emits (
|
|
11055
|
+
event_id TEXT PRIMARY KEY,
|
|
11056
|
+
workspace_id TEXT NOT NULL,
|
|
11057
|
+
payload_json TEXT NOT NULL,
|
|
11058
|
+
created_at TEXT NOT NULL,
|
|
11059
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
11060
|
+
last_attempt_at TEXT,
|
|
11061
|
+
last_error TEXT,
|
|
11062
|
+
status TEXT NOT NULL DEFAULT 'pending'
|
|
11063
|
+
CHECK (status IN ('pending','sent','archived'))
|
|
11064
|
+
);
|
|
11065
|
+
CREATE INDEX IF NOT EXISTS idx_pending_redaction_emits_drain
|
|
11066
|
+
ON pending_redaction_emits(status, created_at);
|
|
11067
|
+
CREATE INDEX IF NOT EXISTS idx_pending_redaction_emits_workspace
|
|
11068
|
+
ON pending_redaction_emits(workspace_id, created_at);
|
|
11069
|
+
`;
|
|
11070
|
+
var RedactionEmitStore = class {
|
|
11071
|
+
static {
|
|
11072
|
+
__name(this, "RedactionEmitStore");
|
|
11073
|
+
}
|
|
11074
|
+
path;
|
|
11075
|
+
db;
|
|
11076
|
+
constructor(opts) {
|
|
11077
|
+
this.path = opts.path;
|
|
11078
|
+
if (!opts.inMemory) {
|
|
11079
|
+
ensureDirSync(dirname10(this.path));
|
|
11080
|
+
}
|
|
11081
|
+
try {
|
|
11082
|
+
this.db = new Database(opts.inMemory ? ":memory:" : this.path);
|
|
11083
|
+
} catch (err) {
|
|
11084
|
+
if (looksLikeNativeBindingFailure(err)) {
|
|
11085
|
+
throw nativeBindingLoadError("better-sqlite3", err);
|
|
11086
|
+
}
|
|
11087
|
+
throw err;
|
|
11088
|
+
}
|
|
11089
|
+
this.db.pragma("journal_mode = WAL");
|
|
11090
|
+
this.db.pragma("foreign_keys = ON");
|
|
11091
|
+
this.migrate();
|
|
11092
|
+
}
|
|
11093
|
+
migrate() {
|
|
11094
|
+
const log = getLogger("redaction-emit-store");
|
|
11095
|
+
const currentVersion = this.db.pragma("user_version", { simple: true });
|
|
11096
|
+
if (currentVersion === SCHEMA_VERSION) return;
|
|
11097
|
+
if (currentVersion > SCHEMA_VERSION) {
|
|
11098
|
+
throw new Error(
|
|
11099
|
+
`redaction-emit.db at ${this.path} is at schema version ${currentVersion} (newer than this daemon's ${SCHEMA_VERSION}) \u2014 refusing to downgrade`
|
|
11100
|
+
);
|
|
11101
|
+
}
|
|
11102
|
+
log.info({ from: currentVersion, to: SCHEMA_VERSION }, "running redaction-emit.db migrations");
|
|
11103
|
+
this.db.exec(SCHEMA_SQL);
|
|
11104
|
+
this.db.pragma(`user_version = ${SCHEMA_VERSION}`);
|
|
11105
|
+
}
|
|
11106
|
+
/**
|
|
11107
|
+
* Write a new 'pending' row. Returns the generated event_id. Caller
|
|
11108
|
+
* (prompt-builder) must call this BEFORE attempting cloud emission —
|
|
11109
|
+
* the SQLite append is the durability anchor.
|
|
11110
|
+
*
|
|
11111
|
+
* `now` is injected for testability; defaults to ISO 8601 now.
|
|
11112
|
+
*/
|
|
11113
|
+
enqueue(workspaceId, payload, now = (/* @__PURE__ */ new Date()).toISOString()) {
|
|
11114
|
+
const event_id = randomUUID3();
|
|
11115
|
+
this.db.prepare(
|
|
11116
|
+
`INSERT INTO pending_redaction_emits
|
|
11117
|
+
(event_id, workspace_id, payload_json, created_at, attempts, status)
|
|
11118
|
+
VALUES (?, ?, ?, ?, 0, 'pending')`
|
|
11119
|
+
).run(event_id, workspaceId, JSON.stringify(payload), now);
|
|
11120
|
+
return event_id;
|
|
11121
|
+
}
|
|
11122
|
+
/**
|
|
11123
|
+
* List pending rows in creation order, capped at `limit`. The cloud
|
|
11124
|
+
* endpoint caps batches at 1000 events; the worker passes 1000 here
|
|
11125
|
+
* to fill each batch.
|
|
11126
|
+
*/
|
|
11127
|
+
listPending(limit) {
|
|
11128
|
+
const rows = this.db.prepare(
|
|
11129
|
+
`SELECT event_id, workspace_id, payload_json, created_at, attempts,
|
|
11130
|
+
last_attempt_at, last_error, status
|
|
11131
|
+
FROM pending_redaction_emits
|
|
11132
|
+
WHERE status = 'pending'
|
|
11133
|
+
ORDER BY created_at ASC, event_id ASC
|
|
11134
|
+
LIMIT ?`
|
|
11135
|
+
).all(limit);
|
|
11136
|
+
return rows.map(this.toRow);
|
|
11137
|
+
}
|
|
11138
|
+
/**
|
|
11139
|
+
* Mark a batch of pending rows as 'sent' after a successful cloud
|
|
11140
|
+
* POST. Atomic transaction — partial failure rolls back. Returns the
|
|
11141
|
+
* number of rows that actually transitioned (pending → sent).
|
|
11142
|
+
*
|
|
11143
|
+
* Rows already in 'sent' state are no-ops; this is the idempotency
|
|
11144
|
+
* guarantee on the daemon side (re-drain after a crash where SQLite
|
|
11145
|
+
* commit hadn't flushed). Rows in 'archived' state are NOT touched —
|
|
11146
|
+
* those are forensic-final.
|
|
11147
|
+
*/
|
|
11148
|
+
markSent(eventIds, now = (/* @__PURE__ */ new Date()).toISOString()) {
|
|
11149
|
+
if (eventIds.length === 0) return 0;
|
|
11150
|
+
const update = this.db.prepare(
|
|
11151
|
+
`UPDATE pending_redaction_emits
|
|
11152
|
+
SET status = 'sent', last_attempt_at = ?
|
|
11153
|
+
WHERE event_id = ? AND status = 'pending'`
|
|
11154
|
+
);
|
|
11155
|
+
return this.db.transaction((ids) => {
|
|
11156
|
+
let changes = 0;
|
|
11157
|
+
for (const id of ids) {
|
|
11158
|
+
const result = update.run(now, id);
|
|
11159
|
+
changes += Number(result.changes);
|
|
11160
|
+
}
|
|
11161
|
+
return changes;
|
|
11162
|
+
})(eventIds);
|
|
11163
|
+
}
|
|
11164
|
+
/**
|
|
11165
|
+
* Increment attempts counter + record last_error for a batch that
|
|
11166
|
+
* failed to POST. Rows stay 'pending' so the next drain tick retries
|
|
11167
|
+
* them. Does NOT touch rows already in 'sent' or 'archived' state.
|
|
11168
|
+
*/
|
|
11169
|
+
markFailed(eventIds, error, now = (/* @__PURE__ */ new Date()).toISOString()) {
|
|
11170
|
+
if (eventIds.length === 0) return 0;
|
|
11171
|
+
const update = this.db.prepare(
|
|
11172
|
+
`UPDATE pending_redaction_emits
|
|
11173
|
+
SET attempts = attempts + 1,
|
|
11174
|
+
last_attempt_at = ?,
|
|
11175
|
+
last_error = ?
|
|
11176
|
+
WHERE event_id = ? AND status = 'pending'`
|
|
11177
|
+
);
|
|
11178
|
+
return this.db.transaction((ids) => {
|
|
11179
|
+
let changes = 0;
|
|
11180
|
+
for (const id of ids) {
|
|
11181
|
+
const result = update.run(now, error.slice(0, 500), id);
|
|
11182
|
+
changes += Number(result.changes);
|
|
11183
|
+
}
|
|
11184
|
+
return changes;
|
|
11185
|
+
})(eventIds);
|
|
11186
|
+
}
|
|
11187
|
+
/**
|
|
11188
|
+
* Move terminally-failed rows (attempts >= maxAttempts) to 'archived'.
|
|
11189
|
+
* Archived rows remain in the table for forensic inspection — never
|
|
11190
|
+
* deleted. Returns the list of event_ids that transitioned.
|
|
11191
|
+
*
|
|
11192
|
+
* Called by the worker after each failed batch to evict rows that
|
|
11193
|
+
* are stuck retrying forever. The cap matches the worker's
|
|
11194
|
+
* MAX_ATTEMPTS constant.
|
|
11195
|
+
*/
|
|
11196
|
+
archiveExhausted(maxAttempts) {
|
|
11197
|
+
return this.db.transaction(() => {
|
|
11198
|
+
const rows = this.db.prepare(
|
|
11199
|
+
`SELECT event_id FROM pending_redaction_emits
|
|
11200
|
+
WHERE status = 'pending' AND attempts >= ?`
|
|
11201
|
+
).all(maxAttempts);
|
|
11202
|
+
if (rows.length === 0) return [];
|
|
11203
|
+
const ids = rows.map((r) => r.event_id);
|
|
11204
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
11205
|
+
this.db.prepare(
|
|
11206
|
+
`UPDATE pending_redaction_emits SET status = 'archived'
|
|
11207
|
+
WHERE event_id IN (${placeholders}) AND status = 'pending'`
|
|
11208
|
+
).run(...ids);
|
|
11209
|
+
return ids;
|
|
11210
|
+
})();
|
|
11211
|
+
}
|
|
11212
|
+
/** Count rows by status. Useful for diagnostics + tests. */
|
|
11213
|
+
countByStatus() {
|
|
11214
|
+
const rows = this.db.prepare(`SELECT status, COUNT(*) as n FROM pending_redaction_emits GROUP BY status`).all();
|
|
11215
|
+
const out = {
|
|
11216
|
+
pending: 0,
|
|
11217
|
+
sent: 0,
|
|
11218
|
+
archived: 0
|
|
11219
|
+
};
|
|
11220
|
+
for (const r of rows) out[r.status] = r.n;
|
|
11221
|
+
return out;
|
|
11222
|
+
}
|
|
11223
|
+
/** Schema version (diagnostics + tests). */
|
|
11224
|
+
schemaVersion() {
|
|
11225
|
+
return this.db.pragma("user_version", { simple: true });
|
|
11226
|
+
}
|
|
11227
|
+
/** Close the underlying handle. Idempotent. */
|
|
11228
|
+
close() {
|
|
11229
|
+
if (this.db.open) this.db.close();
|
|
11230
|
+
}
|
|
11231
|
+
toRow(raw) {
|
|
11232
|
+
return {
|
|
11233
|
+
event_id: raw.event_id,
|
|
11234
|
+
workspace_id: raw.workspace_id,
|
|
11235
|
+
payload: JSON.parse(raw.payload_json),
|
|
11236
|
+
created_at: raw.created_at,
|
|
11237
|
+
attempts: raw.attempts,
|
|
11238
|
+
last_attempt_at: raw.last_attempt_at,
|
|
11239
|
+
last_error: raw.last_error,
|
|
11240
|
+
status: raw.status
|
|
11241
|
+
};
|
|
11242
|
+
}
|
|
11243
|
+
};
|
|
11244
|
+
|
|
11245
|
+
// src/provenance/redaction-emit-worker.ts
|
|
11246
|
+
init_esm_shims();
|
|
11247
|
+
init_logger();
|
|
11248
|
+
|
|
11249
|
+
// src/provenance/redaction-sink.ts
|
|
11250
|
+
init_esm_shims();
|
|
11251
|
+
init_logger();
|
|
11252
|
+
var DEFAULT_API_BASE_URL = "https://wotw.dev";
|
|
11253
|
+
var REQUEST_TIMEOUT_MS = 1e4;
|
|
11254
|
+
var CLOUD_REDACTION_BATCH_CAP = 1e3;
|
|
11255
|
+
var RedactionSink = class {
|
|
11256
|
+
static {
|
|
11257
|
+
__name(this, "RedactionSink");
|
|
11258
|
+
}
|
|
11259
|
+
workspaceId;
|
|
11260
|
+
apiBaseUrl;
|
|
11261
|
+
sinkSecret;
|
|
11262
|
+
fetchImpl;
|
|
11263
|
+
constructor(opts) {
|
|
11264
|
+
this.workspaceId = opts.workspaceId;
|
|
11265
|
+
this.apiBaseUrl = opts.apiBaseUrl ?? DEFAULT_API_BASE_URL;
|
|
11266
|
+
if (!this.apiBaseUrl.startsWith("https://")) {
|
|
11267
|
+
throw new Error(
|
|
11268
|
+
`redaction-sink: apiBaseUrl must be https:// (got "${this.apiBaseUrl}"); WOTW_API_BASE_URL is misconfigured. Refusing to send sink key over plaintext.`
|
|
11269
|
+
);
|
|
11270
|
+
}
|
|
11271
|
+
this.sinkSecret = opts.sinkSecret;
|
|
11272
|
+
this.fetchImpl = opts.fetchImpl ?? fetch;
|
|
11273
|
+
}
|
|
11274
|
+
/**
|
|
11275
|
+
* POST a single batch of redaction events to the cloud. Never throws;
|
|
11276
|
+
* caller decides whether to retry based on the returned `ok` field.
|
|
11277
|
+
*
|
|
11278
|
+
* The batch MUST contain at least one event and at most
|
|
11279
|
+
* CLOUD_REDACTION_BATCH_CAP events — the caller (worker) trims to fit.
|
|
11280
|
+
*/
|
|
11281
|
+
async post(events) {
|
|
11282
|
+
const log = getLogger("provenance.redaction-sink");
|
|
11283
|
+
if (events.length === 0) {
|
|
11284
|
+
return { ok: true, inserted: 0 };
|
|
11285
|
+
}
|
|
11286
|
+
if (events.length > CLOUD_REDACTION_BATCH_CAP) {
|
|
11287
|
+
return {
|
|
11288
|
+
ok: false,
|
|
11289
|
+
status: null,
|
|
11290
|
+
errorBody: `client-side batch cap exceeded (got ${events.length}, max ${CLOUD_REDACTION_BATCH_CAP})`
|
|
11291
|
+
};
|
|
11292
|
+
}
|
|
11293
|
+
const url = `${this.apiBaseUrl}/api/internal/redaction-log`;
|
|
11294
|
+
const body = {
|
|
11295
|
+
workspace_id: this.workspaceId,
|
|
11296
|
+
events: events.map((e) => ({
|
|
11297
|
+
event_id: e.event_id,
|
|
11298
|
+
redacted_at: e.redacted_at,
|
|
11299
|
+
rule_id: e.rule_id,
|
|
11300
|
+
source_file_path: e.source_file_path,
|
|
11301
|
+
redaction_byte_count: e.redaction_byte_count
|
|
11302
|
+
}))
|
|
11303
|
+
};
|
|
11304
|
+
const controller = new AbortController();
|
|
11305
|
+
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
|
|
11306
|
+
try {
|
|
11307
|
+
const res = await this.fetchImpl(url, {
|
|
11308
|
+
method: "POST",
|
|
11309
|
+
headers: {
|
|
11310
|
+
"Content-Type": "application/json",
|
|
11311
|
+
"x-sink-key": this.sinkSecret
|
|
11312
|
+
},
|
|
11313
|
+
body: JSON.stringify(body),
|
|
11314
|
+
signal: controller.signal
|
|
11315
|
+
});
|
|
11316
|
+
if (res.ok) {
|
|
11317
|
+
let inserted = events.length;
|
|
11318
|
+
try {
|
|
11319
|
+
const data = await res.json();
|
|
11320
|
+
if (typeof data.inserted === "number") inserted = data.inserted;
|
|
11321
|
+
} catch {
|
|
11322
|
+
}
|
|
11323
|
+
log.debug(
|
|
11324
|
+
{
|
|
11325
|
+
workspaceId: this.workspaceId,
|
|
11326
|
+
batchSize: events.length,
|
|
11327
|
+
inserted
|
|
11328
|
+
},
|
|
11329
|
+
"redaction batch accepted by cloud"
|
|
11330
|
+
);
|
|
11331
|
+
return { ok: true, inserted };
|
|
11332
|
+
}
|
|
11333
|
+
const text = await res.text().catch(() => "");
|
|
11334
|
+
log.warn(
|
|
11335
|
+
{
|
|
11336
|
+
workspaceId: this.workspaceId,
|
|
11337
|
+
batchSize: events.length,
|
|
11338
|
+
status: res.status,
|
|
11339
|
+
body: text.slice(0, 500)
|
|
11340
|
+
},
|
|
11341
|
+
"redaction batch rejected by cloud"
|
|
11342
|
+
);
|
|
11343
|
+
return { ok: false, status: res.status, errorBody: text.slice(0, 500) };
|
|
11344
|
+
} catch (err) {
|
|
11345
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
11346
|
+
log.warn(
|
|
11347
|
+
{
|
|
11348
|
+
workspaceId: this.workspaceId,
|
|
11349
|
+
batchSize: events.length,
|
|
11350
|
+
err: msg
|
|
11351
|
+
},
|
|
11352
|
+
"redaction batch request failed"
|
|
11353
|
+
);
|
|
11354
|
+
return { ok: false, status: null, errorBody: msg };
|
|
11355
|
+
} finally {
|
|
11356
|
+
clearTimeout(timeout);
|
|
11357
|
+
}
|
|
11358
|
+
}
|
|
11359
|
+
};
|
|
11360
|
+
function redactionSinkFromEnv(env = process.env) {
|
|
11361
|
+
const wikiId = env.WOTW_WIKI_ID;
|
|
11362
|
+
const sinkSecret = env.WOTW_CLOUD_SINK_SECRET;
|
|
11363
|
+
if (!wikiId || !sinkSecret) return null;
|
|
11364
|
+
return new RedactionSink({
|
|
11365
|
+
workspaceId: wikiId,
|
|
11366
|
+
apiBaseUrl: env.WOTW_API_BASE_URL || void 0,
|
|
11367
|
+
sinkSecret
|
|
11368
|
+
});
|
|
11369
|
+
}
|
|
11370
|
+
__name(redactionSinkFromEnv, "redactionSinkFromEnv");
|
|
11371
|
+
|
|
11372
|
+
// src/provenance/redaction-emit-worker.ts
|
|
11373
|
+
var DEFAULT_BASE_INTERVAL_MS = 3e4;
|
|
11374
|
+
var DEFAULT_MAX_INTERVAL_MS = 5 * 6e4;
|
|
11375
|
+
var DEFAULT_MAX_ATTEMPTS = 100;
|
|
11376
|
+
var RedactionEmitWorker = class {
|
|
11377
|
+
static {
|
|
11378
|
+
__name(this, "RedactionEmitWorker");
|
|
11379
|
+
}
|
|
11380
|
+
name = "redaction-emit-worker";
|
|
11381
|
+
store;
|
|
11382
|
+
sink;
|
|
11383
|
+
baseIntervalMs;
|
|
11384
|
+
maxIntervalMs;
|
|
11385
|
+
maxAttempts;
|
|
11386
|
+
batchSize;
|
|
11387
|
+
timer = null;
|
|
11388
|
+
inflight = null;
|
|
11389
|
+
/** Current backoff interval. Reset to base on success, grows on failure. */
|
|
11390
|
+
currentIntervalMs;
|
|
11391
|
+
stopped = false;
|
|
11392
|
+
constructor(opts) {
|
|
11393
|
+
this.store = opts.store;
|
|
11394
|
+
this.sink = opts.sink;
|
|
11395
|
+
this.baseIntervalMs = opts.baseIntervalMs ?? DEFAULT_BASE_INTERVAL_MS;
|
|
11396
|
+
this.maxIntervalMs = opts.maxIntervalMs ?? DEFAULT_MAX_INTERVAL_MS;
|
|
11397
|
+
this.maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
|
|
11398
|
+
this.batchSize = Math.min(
|
|
11399
|
+
opts.batchSize ?? CLOUD_REDACTION_BATCH_CAP,
|
|
11400
|
+
CLOUD_REDACTION_BATCH_CAP
|
|
11401
|
+
);
|
|
11402
|
+
this.currentIntervalMs = this.baseIntervalMs;
|
|
11403
|
+
}
|
|
11404
|
+
async start() {
|
|
11405
|
+
const log = getLogger(this.name);
|
|
11406
|
+
if (!this.sink) {
|
|
11407
|
+
log.info(
|
|
11408
|
+
"redaction emit worker disabled (no sink configured \u2014 local/offline mode); SQLite queue continues to capture rows for forensic inspection"
|
|
11409
|
+
);
|
|
11410
|
+
return;
|
|
11411
|
+
}
|
|
11412
|
+
log.info(
|
|
11413
|
+
{
|
|
11414
|
+
baseIntervalMs: this.baseIntervalMs,
|
|
11415
|
+
maxIntervalMs: this.maxIntervalMs,
|
|
11416
|
+
maxAttempts: this.maxAttempts,
|
|
11417
|
+
batchSize: this.batchSize,
|
|
11418
|
+
apiBaseUrl: this.sink.apiBaseUrl
|
|
11419
|
+
},
|
|
11420
|
+
"redaction emit worker starting"
|
|
11421
|
+
);
|
|
11422
|
+
this.inflight = this.tick();
|
|
11423
|
+
await this.inflight;
|
|
11424
|
+
}
|
|
11425
|
+
async stop() {
|
|
11426
|
+
this.stopped = true;
|
|
11427
|
+
if (this.timer) {
|
|
11428
|
+
clearTimeout(this.timer);
|
|
11429
|
+
this.timer = null;
|
|
11430
|
+
}
|
|
11431
|
+
if (this.inflight) {
|
|
11432
|
+
try {
|
|
11433
|
+
await this.inflight;
|
|
11434
|
+
} catch {
|
|
11435
|
+
}
|
|
11436
|
+
}
|
|
11437
|
+
}
|
|
11438
|
+
/**
|
|
11439
|
+
* One drain pass. Pulls a batch, attempts POST, transitions rows,
|
|
11440
|
+
* adjusts backoff, schedules the next tick (when not stopped).
|
|
11441
|
+
*
|
|
11442
|
+
* Exposed for tests so they can step the worker without sleeping.
|
|
11443
|
+
*/
|
|
11444
|
+
async tick() {
|
|
11445
|
+
if (!this.sink) return;
|
|
11446
|
+
const log = getLogger(this.name);
|
|
11447
|
+
let succeeded = false;
|
|
11448
|
+
try {
|
|
11449
|
+
const pending = this.store.listPending(this.batchSize);
|
|
11450
|
+
if (pending.length === 0) {
|
|
11451
|
+
succeeded = true;
|
|
11452
|
+
return;
|
|
11453
|
+
}
|
|
11454
|
+
const eventIds = pending.map((r) => r.event_id);
|
|
11455
|
+
const events = pending.map((r) => ({ event_id: r.event_id, ...r.payload }));
|
|
11456
|
+
const result = await this.sink.post(events);
|
|
11457
|
+
if (result.ok) {
|
|
11458
|
+
const transitioned = this.store.markSent(eventIds);
|
|
11459
|
+
log.info(
|
|
11460
|
+
{
|
|
11461
|
+
batchSize: pending.length,
|
|
11462
|
+
transitioned,
|
|
11463
|
+
inserted: result.inserted
|
|
11464
|
+
},
|
|
11465
|
+
"redaction batch drained"
|
|
11466
|
+
);
|
|
11467
|
+
succeeded = true;
|
|
11468
|
+
} else {
|
|
11469
|
+
const errSummary = `status=${result.status ?? "network"} body=${result.errorBody.slice(0, 200)}`;
|
|
11470
|
+
this.store.markFailed(eventIds, errSummary);
|
|
11471
|
+
const archived = this.store.archiveExhausted(this.maxAttempts);
|
|
11472
|
+
if (archived.length > 0) {
|
|
11473
|
+
log.error(
|
|
11474
|
+
{
|
|
11475
|
+
archivedCount: archived.length,
|
|
11476
|
+
maxAttempts: this.maxAttempts,
|
|
11477
|
+
sampleIds: archived.slice(0, 3)
|
|
11478
|
+
},
|
|
11479
|
+
"redaction events archived after exhausting retries \u2014 review needed"
|
|
11480
|
+
);
|
|
11481
|
+
}
|
|
11482
|
+
}
|
|
11483
|
+
} catch (err) {
|
|
11484
|
+
log.error(
|
|
11485
|
+
{ err: err instanceof Error ? err.message : String(err) },
|
|
11486
|
+
"redaction worker tick failed unexpectedly"
|
|
11487
|
+
);
|
|
11488
|
+
} finally {
|
|
11489
|
+
if (succeeded) {
|
|
11490
|
+
this.currentIntervalMs = this.baseIntervalMs;
|
|
11491
|
+
} else {
|
|
11492
|
+
this.currentIntervalMs = Math.min(this.currentIntervalMs * 2, this.maxIntervalMs);
|
|
11493
|
+
}
|
|
11494
|
+
this.scheduleNext();
|
|
11495
|
+
}
|
|
11496
|
+
}
|
|
11497
|
+
scheduleNext() {
|
|
11498
|
+
if (this.stopped) return;
|
|
11499
|
+
if (this.timer) clearTimeout(this.timer);
|
|
11500
|
+
this.timer = setTimeout(() => {
|
|
11501
|
+
this.inflight = this.tick();
|
|
11502
|
+
}, this.currentIntervalMs);
|
|
11503
|
+
if (typeof this.timer.unref === "function") this.timer.unref();
|
|
11504
|
+
}
|
|
11505
|
+
};
|
|
11506
|
+
|
|
10957
11507
|
// src/compounding/engine.ts
|
|
10958
11508
|
init_esm_shims();
|
|
10959
11509
|
init_errors();
|
|
@@ -11429,6 +11979,24 @@ async function main() {
|
|
|
11429
11979
|
);
|
|
11430
11980
|
}
|
|
11431
11981
|
}
|
|
11982
|
+
const redactionEmitStore = new RedactionEmitStore({
|
|
11983
|
+
path: `${config.wiki_root}/.wotw/redaction-emit.db`
|
|
11984
|
+
});
|
|
11985
|
+
const redactionWorkspaceId = process.env.WOTW_WIKI_ID;
|
|
11986
|
+
const redactionSink = redactionSinkFromEnv();
|
|
11987
|
+
const redactionEmitWorker = new RedactionEmitWorker({
|
|
11988
|
+
store: redactionEmitStore,
|
|
11989
|
+
sink: redactionSink
|
|
11990
|
+
});
|
|
11991
|
+
log.info(
|
|
11992
|
+
{
|
|
11993
|
+
path: redactionEmitStore.path,
|
|
11994
|
+
sinkActive: !!redactionSink,
|
|
11995
|
+
workspaceId: redactionWorkspaceId ?? null,
|
|
11996
|
+
counts: redactionEmitStore.countByStatus()
|
|
11997
|
+
},
|
|
11998
|
+
"redaction-emit store ready"
|
|
11999
|
+
);
|
|
11432
12000
|
const costTracker = new CostTracker({
|
|
11433
12001
|
trackFile: config.cost.track_file,
|
|
11434
12002
|
maxDailyUsd: config.cost.max_daily_usd,
|
|
@@ -11476,7 +12044,9 @@ async function main() {
|
|
|
11476
12044
|
runtimeMode,
|
|
11477
12045
|
deadLetter,
|
|
11478
12046
|
factStore,
|
|
11479
|
-
factIndex
|
|
12047
|
+
factIndex,
|
|
12048
|
+
redactionEmitStore,
|
|
12049
|
+
redactionWorkspaceId
|
|
11480
12050
|
});
|
|
11481
12051
|
const compounding = new CompoundingEngine({
|
|
11482
12052
|
config,
|
|
@@ -11520,6 +12090,7 @@ async function main() {
|
|
|
11520
12090
|
daemon.attachSubsystem(mcp);
|
|
11521
12091
|
daemon.attachSubsystem(lintScheduler);
|
|
11522
12092
|
if (dekArchiveScheduler) daemon.attachSubsystem(dekArchiveScheduler);
|
|
12093
|
+
daemon.attachSubsystem(redactionEmitWorker);
|
|
11523
12094
|
watcher.startReconciliation(5 * 60 * 1e3);
|
|
11524
12095
|
const mcpUrl = `http://${config.server.host}:${config.server.port}/mcp`;
|
|
11525
12096
|
log.info(
|