@remnic/core 9.3.608 → 9.3.610

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  Orchestrator
3
- } from "./chunk-YJ6QCQNE.js";
3
+ } from "./chunk-HJNQQICM.js";
4
4
  import "./chunk-5RIRL3XL.js";
5
5
  import "./chunk-KVEVLBKC.js";
6
6
  import "./chunk-BFBF3XEF.js";
@@ -47,7 +47,7 @@ import "./chunk-VU3SVYMA.js";
47
47
  import "./chunk-H63EDPFJ.js";
48
48
  import "./chunk-PD6O7AXF.js";
49
49
  import "./chunk-YAZNBMNF.js";
50
- import "./chunk-LCR46JY5.js";
50
+ import "./chunk-7YX23JBA.js";
51
51
  import "./chunk-C4SQJZAF.js";
52
52
  import "./chunk-KM2A35EO.js";
53
53
  import "./chunk-4RA3C3EV.js";
@@ -5,7 +5,7 @@ import {
5
5
  // src/extraction-judge-training.ts
6
6
  import path from "path";
7
7
  import { homedir } from "os";
8
- import { appendFile, chmod, mkdir, readFile, readdir } from "fs/promises";
8
+ import { appendFile, chmod, lstat, mkdir, readFile, readdir, realpath } from "fs/promises";
9
9
  function expandTilde(p) {
10
10
  const home = homedir();
11
11
  if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
@@ -36,6 +36,10 @@ function dateStamp(iso) {
36
36
  function trainingFilePathFor(directory, iso) {
37
37
  return path.join(directory, `${dateStamp(iso)}.jsonl`);
38
38
  }
39
+ function isPathInsideDirectory(filePath, directory) {
40
+ const relative = path.relative(directory, filePath);
41
+ return relative === "" || !relative.startsWith("..") && !path.isAbsolute(relative);
42
+ }
39
43
  async function recordJudgeTrainingPair(row, options) {
40
44
  if (!options.enabled) return;
41
45
  const dir = resolveTrainingDir(options);
@@ -56,6 +60,19 @@ async function recordJudgeTrainingPair(row, options) {
56
60
  }
57
61
  async function readJudgeTrainingPairs(options) {
58
62
  const dir = resolveTrainingDir({ enabled: true, ...options });
63
+ try {
64
+ const dirStat = await lstat(dir);
65
+ if (dirStat.isSymbolicLink()) {
66
+ throw new Error("Judge training directory must not be a symlink");
67
+ }
68
+ if (!dirStat.isDirectory()) {
69
+ throw new Error("Judge training path must be a directory");
70
+ }
71
+ } catch (err) {
72
+ const code = err.code;
73
+ if (code === "ENOENT") return { rows: [], malformed: 0 };
74
+ throw err;
75
+ }
59
76
  let entries;
60
77
  try {
61
78
  entries = await readdir(dir);
@@ -66,10 +83,30 @@ async function readJudgeTrainingPairs(options) {
66
83
  }
67
84
  const rows = [];
68
85
  let malformed = 0;
86
+ const resolvedDir = await realpath(dir);
69
87
  entries.sort();
70
88
  for (const name of entries) {
71
89
  if (!name.endsWith(".jsonl")) continue;
72
- const raw = await readFile(path.join(dir, name), "utf-8");
90
+ const filePath = path.join(dir, name);
91
+ let fileStat;
92
+ try {
93
+ fileStat = await lstat(filePath);
94
+ } catch (err) {
95
+ const code = err.code;
96
+ if (code === "ENOENT") continue;
97
+ throw err;
98
+ }
99
+ if (fileStat.isSymbolicLink() || !fileStat.isFile()) continue;
100
+ let resolvedFilePath;
101
+ try {
102
+ resolvedFilePath = await realpath(filePath);
103
+ } catch (err) {
104
+ const code = err.code;
105
+ if (code === "ENOENT") continue;
106
+ throw err;
107
+ }
108
+ if (!isPathInsideDirectory(resolvedFilePath, resolvedDir)) continue;
109
+ const raw = await readFile(resolvedFilePath, "utf-8");
73
110
  for (const line of raw.split("\n")) {
74
111
  if (!line.trim()) continue;
75
112
  let parsed;
@@ -120,4 +157,4 @@ export {
120
157
  readJudgeTrainingPairs,
121
158
  isValidTrainingPair
122
159
  };
123
- //# sourceMappingURL=chunk-LCR46JY5.js.map
160
+ //# sourceMappingURL=chunk-7YX23JBA.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/extraction-judge-training.ts"],"sourcesContent":["/**\n * Extraction Judge Training Data Shim (issue #562, PR 4).\n *\n * Opt-in collector for `(candidate_text, verdict_kind, reason,\n * ground_truth_label?)` tuples. Rows are appended to JSONL files under\n * `~/.remnic/judge-training/<YYYY-MM-DD>.jsonl` so operators can ship the\n * data into a future GRPO training pipeline without exfiltrating live\n * memory content through the regular observation ledger.\n *\n * Gating:\n * - Off by default. Must be explicitly enabled via\n * `collectJudgeTrainingPairs: true` in plugin config.\n * - The ground-truth label is always optional — labels are added out-of-\n * band once reviewers disambiguate the candidate's fate.\n *\n * Privacy: the row carries only what the judge already sees — the\n * candidate text and its metadata. It does NOT carry session keys,\n * principal IDs, or any user identifiers. The file lives in the user's\n * home directory rather than the shared memory directory so it is never\n * committed, sync'd, or bundled into exports.\n */\n\nimport path from \"node:path\";\nimport { homedir } from \"node:os\";\nimport { appendFile, chmod, lstat, mkdir, readFile, readdir, realpath } from \"node:fs/promises\";\nimport { log } from \"./logger.js\";\nimport type { JudgeVerdictKind } from \"./extraction-judge.js\";\n\n/**\n * Persisted training row. Intentionally minimal: just the signal needed\n * to train a judge replacement policy. Schema version is tagged so future\n * readers can migrate older rows.\n */\nexport interface JudgeTrainingPair {\n version: 1;\n ts: string; // ISO-8601\n candidateText: string;\n candidateCategory: string;\n candidateConfidence?: number;\n verdictKind: JudgeVerdictKind;\n reason: string;\n /**\n * Number of prior deferrals when the verdict was resolved. `0` for the\n * first resolution; only set when known (defer pathway).\n */\n priorDeferrals?: number;\n /**\n * Optional human-applied ground-truth label. Added after the fact by a\n * reviewer / labelling script; not present on fresh rows.\n */\n groundTruthLabel?: JudgeVerdictKind;\n}\n\nexport interface JudgeTrainingOptions {\n enabled: boolean;\n /**\n * Override for the output directory. Defaults to\n * `~/.remnic/judge-training`. Tests pass a temp path here.\n */\n directory?: string;\n}\n\n/**\n * Expand a leading `~` / `~/` / `$HOME/` / `${HOME}/` to the process home\n * directory. Node's `fs` APIs do not expand `~` themselves (CLAUDE.md\n * gotcha 17), so every user-facing path input must be funnelled through\n * this helper before it reaches the filesystem.\n */\nfunction expandTilde(p: string): string {\n const home = homedir();\n if (p === \"~\" || p.startsWith(\"~/\") || p.startsWith(\"~\\\\\")) {\n return home + p.slice(1);\n }\n if (p === \"$HOME\" || p.startsWith(\"$HOME/\") || p.startsWith(\"$HOME\\\\\")) {\n return home + p.slice(5);\n }\n if (p === \"${HOME}\" || p.startsWith(\"${HOME}/\") || p.startsWith(\"${HOME}\\\\\")) {\n return home + p.slice(7);\n }\n return p;\n}\n\nexport function resolveTrainingDir(options: JudgeTrainingOptions): string {\n if (options.directory && options.directory.length > 0) {\n // Expand `~` / `$HOME` in the override so operators can write the\n // config as the user sees it (CLAUDE.md gotcha 17).\n return expandTilde(options.directory);\n }\n return path.join(homedir(), \".remnic\", \"judge-training\");\n}\n\nfunction dateStamp(iso: string): string {\n // `YYYY-MM-DD` from an ISO-8601 string. Falls back to today on a parse\n // failure rather than throwing — the caller already wrote a row and the\n // timestamp is best-effort.\n const ms = Date.parse(iso);\n const d = Number.isFinite(ms) ? new Date(ms) : new Date();\n const yyyy = d.getUTCFullYear().toString().padStart(4, \"0\");\n const mm = (d.getUTCMonth() + 1).toString().padStart(2, \"0\");\n const dd = d.getUTCDate().toString().padStart(2, \"0\");\n return `${yyyy}-${mm}-${dd}`;\n}\n\nexport function trainingFilePathFor(directory: string, iso: string): string {\n return path.join(directory, `${dateStamp(iso)}.jsonl`);\n}\n\nfunction isPathInsideDirectory(filePath: string, directory: string): boolean {\n const relative = path.relative(directory, filePath);\n return relative === \"\" || (!relative.startsWith(\"..\") && !path.isAbsolute(relative));\n}\n\n/**\n * Append a single training row. Fails open — write errors are logged at\n * debug level and swallowed, same policy as the telemetry emitter.\n * No-op when `options.enabled` is false.\n */\nexport async function recordJudgeTrainingPair(row: JudgeTrainingPair, options: JudgeTrainingOptions): Promise<void> {\n if (!options.enabled) return;\n const dir = resolveTrainingDir(options);\n const filePath = trainingFilePathFor(dir, row.ts);\n try {\n await mkdir(dir, { recursive: true, mode: 0o700 });\n await appendFile(filePath, `${JSON.stringify(row)}\\n`, {\n encoding: \"utf-8\",\n mode: 0o600,\n });\n await chmod(filePath, 0o600);\n } catch (err) {\n log.debug(\n `extraction-judge-training: append failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`\n );\n }\n}\n\n/**\n * Read all training rows from the configured directory. Returns an empty\n * array when the directory is missing. Malformed lines are skipped and\n * counted in the returned `malformed` tally.\n */\nexport async function readJudgeTrainingPairs(\n options: Pick<JudgeTrainingOptions, \"directory\">\n): Promise<{ rows: JudgeTrainingPair[]; malformed: number }> {\n const dir = resolveTrainingDir({ enabled: true, ...options });\n try {\n const dirStat = await lstat(dir);\n if (dirStat.isSymbolicLink()) {\n throw new Error(\"Judge training directory must not be a symlink\");\n }\n if (!dirStat.isDirectory()) {\n throw new Error(\"Judge training path must be a directory\");\n }\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === \"ENOENT\") return { rows: [], malformed: 0 };\n throw err;\n }\n\n let entries: string[];\n try {\n entries = await readdir(dir);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === \"ENOENT\") return { rows: [], malformed: 0 };\n throw err;\n }\n\n const rows: JudgeTrainingPair[] = [];\n let malformed = 0;\n const resolvedDir = await realpath(dir);\n // Sort so reads are deterministic across platforms.\n entries.sort();\n for (const name of entries) {\n if (!name.endsWith(\".jsonl\")) continue;\n const filePath = path.join(dir, name);\n let fileStat: Awaited<ReturnType<typeof lstat>>;\n try {\n fileStat = await lstat(filePath);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === \"ENOENT\") continue;\n throw err;\n }\n if (fileStat.isSymbolicLink() || !fileStat.isFile()) continue;\n\n let resolvedFilePath: string;\n try {\n resolvedFilePath = await realpath(filePath);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === \"ENOENT\") continue;\n throw err;\n }\n if (!isPathInsideDirectory(resolvedFilePath, resolvedDir)) continue;\n\n const raw = await readFile(resolvedFilePath, \"utf-8\");\n for (const line of raw.split(\"\\n\")) {\n if (!line.trim()) continue;\n let parsed: unknown;\n try {\n parsed = JSON.parse(line);\n } catch {\n malformed += 1;\n continue;\n }\n if (!isValidTrainingPair(parsed)) {\n malformed += 1;\n continue;\n }\n rows.push(parsed);\n }\n }\n return { rows, malformed };\n}\n\n/**\n * Structural validator matching the persisted schema. Forward-compat: an\n * unknown `verdictKind` string is treated as malformed (strict training\n * signal — we do not want to admit unlabelled gibberish into a trainer).\n */\nexport function isValidTrainingPair(value: unknown): value is JudgeTrainingPair {\n if (typeof value !== \"object\" || value === null || Array.isArray(value)) {\n return false;\n }\n const p = value as Record<string, unknown>;\n if (p.version !== 1) return false;\n if (typeof p.ts !== \"string\") return false;\n if (typeof p.candidateText !== \"string\") return false;\n if (typeof p.candidateCategory !== \"string\") return false;\n if (p.verdictKind !== \"accept\" && p.verdictKind !== \"reject\" && p.verdictKind !== \"defer\") {\n return false;\n }\n if (typeof p.reason !== \"string\") return false;\n if (p.candidateConfidence !== undefined && typeof p.candidateConfidence !== \"number\") {\n return false;\n }\n if (p.priorDeferrals !== undefined && typeof p.priorDeferrals !== \"number\") {\n return false;\n }\n if (\n p.groundTruthLabel !== undefined &&\n p.groundTruthLabel !== \"accept\" &&\n p.groundTruthLabel !== \"reject\" &&\n p.groundTruthLabel !== \"defer\"\n ) {\n return false;\n }\n return true;\n}\n"],"mappings":";;;;;AAsBA,OAAO,UAAU;AACjB,SAAS,eAAe;AACxB,SAAS,YAAY,OAAO,OAAO,OAAO,UAAU,SAAS,gBAAgB;AA4C7E,SAAS,YAAY,GAAmB;AACtC,QAAM,OAAO,QAAQ;AACrB,MAAI,MAAM,OAAO,EAAE,WAAW,IAAI,KAAK,EAAE,WAAW,KAAK,GAAG;AAC1D,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,MAAI,MAAM,WAAW,EAAE,WAAW,QAAQ,KAAK,EAAE,WAAW,SAAS,GAAG;AACtE,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,MAAI,MAAM,aAAa,EAAE,WAAW,UAAU,KAAK,EAAE,WAAW,WAAW,GAAG;AAC5E,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,SAAO;AACT;AAEO,SAAS,mBAAmB,SAAuC;AACxE,MAAI,QAAQ,aAAa,QAAQ,UAAU,SAAS,GAAG;AAGrD,WAAO,YAAY,QAAQ,SAAS;AAAA,EACtC;AACA,SAAO,KAAK,KAAK,QAAQ,GAAG,WAAW,gBAAgB;AACzD;AAEA,SAAS,UAAU,KAAqB;AAItC,QAAM,KAAK,KAAK,MAAM,GAAG;AACzB,QAAM,IAAI,OAAO,SAAS,EAAE,IAAI,IAAI,KAAK,EAAE,IAAI,oBAAI,KAAK;AACxD,QAAM,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;AAC1D,QAAM,MAAM,EAAE,YAAY,IAAI,GAAG,SAAS,EAAE,SAAS,GAAG,GAAG;AAC3D,QAAM,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;AACpD,SAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE;AAC5B;AAEO,SAAS,oBAAoB,WAAmB,KAAqB;AAC1E,SAAO,KAAK,KAAK,WAAW,GAAG,UAAU,GAAG,CAAC,QAAQ;AACvD;AAEA,SAAS,sBAAsB,UAAkB,WAA4B;AAC3E,QAAM,WAAW,KAAK,SAAS,WAAW,QAAQ;AAClD,SAAO,aAAa,MAAO,CAAC,SAAS,WAAW,IAAI,KAAK,CAAC,KAAK,WAAW,QAAQ;AACpF;AAOA,eAAsB,wBAAwB,KAAwB,SAA8C;AAClH,MAAI,CAAC,QAAQ,QAAS;AACtB,QAAM,MAAM,mBAAmB,OAAO;AACtC,QAAM,WAAW,oBAAoB,KAAK,IAAI,EAAE;AAChD,MAAI;AACF,UAAM,MAAM,KAAK,EAAE,WAAW,MAAM,MAAM,IAAM,CAAC;AACjD,UAAM,WAAW,UAAU,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,GAAM;AAAA,MACrD,UAAU;AAAA,MACV,MAAM;AAAA,IACR,CAAC;AACD,UAAM,MAAM,UAAU,GAAK;AAAA,EAC7B,SAAS,KAAK;AACZ,QAAI;AAAA,MACF,yDAAyD,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,IAC3G;AAAA,EACF;AACF;AAOA,eAAsB,uBACpB,SAC2D;AAC3D,QAAM,MAAM,mBAAmB,EAAE,SAAS,MAAM,GAAG,QAAQ,CAAC;AAC5D,MAAI;AACF,UAAM,UAAU,MAAM,MAAM,GAAG;AAC/B,QAAI,QAAQ,eAAe,GAAG;AAC5B,YAAM,IAAI,MAAM,gDAAgD;AAAA,IAClE;AACA,QAAI,CAAC,QAAQ,YAAY,GAAG;AAC1B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC3D;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,OAAQ,IAA8B;AAC5C,QAAI,SAAS,SAAU,QAAO,EAAE,MAAM,CAAC,GAAG,WAAW,EAAE;AACvD,UAAM;AAAA,EACR;AAEA,MAAI;AACJ,MAAI;AACF,cAAU,MAAM,QAAQ,GAAG;AAAA,EAC7B,SAAS,KAAK;AACZ,UAAM,OAAQ,IAA8B;AAC5C,QAAI,SAAS,SAAU,QAAO,EAAE,MAAM,CAAC,GAAG,WAAW,EAAE;AACvD,UAAM;AAAA,EACR;AAEA,QAAM,OAA4B,CAAC;AACnC,MAAI,YAAY;AAChB,QAAM,cAAc,MAAM,SAAS,GAAG;AAEtC,UAAQ,KAAK;AACb,aAAW,QAAQ,SAAS;AAC1B,QAAI,CAAC,KAAK,SAAS,QAAQ,EAAG;AAC9B,UAAM,WAAW,KAAK,KAAK,KAAK,IAAI;AACpC,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,MAAM,QAAQ;AAAA,IACjC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,SAAU;AACvB,YAAM;AAAA,IACR;AACA,QAAI,SAAS,eAAe,KAAK,CAAC,SAAS,OAAO,EAAG;AAErD,QAAI;AACJ,QAAI;AACF,yBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC5C,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,SAAU;AACvB,YAAM;AAAA,IACR;AACA,QAAI,CAAC,sBAAsB,kBAAkB,WAAW,EAAG;AAE3D,UAAM,MAAM,MAAM,SAAS,kBAAkB,OAAO;AACpD,eAAW,QAAQ,IAAI,MAAM,IAAI,GAAG;AAClC,UAAI,CAAC,KAAK,KAAK,EAAG;AAClB,UAAI;AACJ,UAAI;AACF,iBAAS,KAAK,MAAM,IAAI;AAAA,MAC1B,QAAQ;AACN,qBAAa;AACb;AAAA,MACF;AACA,UAAI,CAAC,oBAAoB,MAAM,GAAG;AAChC,qBAAa;AACb;AAAA,MACF;AACA,WAAK,KAAK,MAAM;AAAA,IAClB;AAAA,EACF;AACA,SAAO,EAAE,MAAM,UAAU;AAC3B;AAOO,SAAS,oBAAoB,OAA4C;AAC9E,MAAI,OAAO,UAAU,YAAY,UAAU,QAAQ,MAAM,QAAQ,KAAK,GAAG;AACvE,WAAO;AAAA,EACT;AACA,QAAM,IAAI;AACV,MAAI,EAAE,YAAY,EAAG,QAAO;AAC5B,MAAI,OAAO,EAAE,OAAO,SAAU,QAAO;AACrC,MAAI,OAAO,EAAE,kBAAkB,SAAU,QAAO;AAChD,MAAI,OAAO,EAAE,sBAAsB,SAAU,QAAO;AACpD,MAAI,EAAE,gBAAgB,YAAY,EAAE,gBAAgB,YAAY,EAAE,gBAAgB,SAAS;AACzF,WAAO;AAAA,EACT;AACA,MAAI,OAAO,EAAE,WAAW,SAAU,QAAO;AACzC,MAAI,EAAE,wBAAwB,UAAa,OAAO,EAAE,wBAAwB,UAAU;AACpF,WAAO;AAAA,EACT;AACA,MAAI,EAAE,mBAAmB,UAAa,OAAO,EAAE,mBAAmB,UAAU;AAC1E,WAAO;AAAA,EACT;AACA,MACE,EAAE,qBAAqB,UACvB,EAAE,qBAAqB,YACvB,EAAE,qBAAqB,YACvB,EAAE,qBAAqB,SACvB;AACA,WAAO;AAAA,EACT;AACA,SAAO;AACT;","names":[]}
@@ -140,7 +140,7 @@ import {
140
140
  } from "./chunk-YAZNBMNF.js";
141
141
  import {
142
142
  recordJudgeTrainingPair
143
- } from "./chunk-LCR46JY5.js";
143
+ } from "./chunk-7YX23JBA.js";
144
144
  import {
145
145
  createDeferCountMap,
146
146
  createVerdictCache,
@@ -12441,4 +12441,4 @@ export {
12441
12441
  resolvePersistedMemoryRelativePath,
12442
12442
  Orchestrator
12443
12443
  };
12444
- //# sourceMappingURL=chunk-YJ6QCQNE.js.map
12444
+ //# sourceMappingURL=chunk-HJNQQICM.js.map
@@ -4,7 +4,7 @@ import {
4
4
  recordJudgeTrainingPair,
5
5
  resolveTrainingDir,
6
6
  trainingFilePathFor
7
- } from "./chunk-LCR46JY5.js";
7
+ } from "./chunk-7YX23JBA.js";
8
8
  import "./chunk-2ODBA7MQ.js";
9
9
  import "./chunk-PZ5AY32C.js";
10
10
  export {
package/dist/index.js CHANGED
@@ -182,7 +182,7 @@ import {
182
182
  saveTaxonomy,
183
183
  validateSlug,
184
184
  validateTaxonomy
185
- } from "./chunk-YJ6QCQNE.js";
185
+ } from "./chunk-HJNQQICM.js";
186
186
  import "./chunk-5RIRL3XL.js";
187
187
  import {
188
188
  migrateFromEngram,
@@ -265,7 +265,7 @@ import {
265
265
  planRecallMode
266
266
  } from "./chunk-PD6O7AXF.js";
267
267
  import "./chunk-YAZNBMNF.js";
268
- import "./chunk-LCR46JY5.js";
268
+ import "./chunk-7YX23JBA.js";
269
269
  import {
270
270
  clearVerdictCache,
271
271
  createVerdictCache,
@@ -26,7 +26,7 @@ import {
26
26
  sanitizeSessionKeyForFilename,
27
27
  shouldFilterLifecycleRecallCandidate,
28
28
  summarizeGraphShadowComparison
29
- } from "./chunk-YJ6QCQNE.js";
29
+ } from "./chunk-HJNQQICM.js";
30
30
  import "./chunk-5RIRL3XL.js";
31
31
  import "./chunk-KVEVLBKC.js";
32
32
  import "./chunk-BFBF3XEF.js";
@@ -73,7 +73,7 @@ import "./chunk-VU3SVYMA.js";
73
73
  import "./chunk-H63EDPFJ.js";
74
74
  import "./chunk-PD6O7AXF.js";
75
75
  import "./chunk-YAZNBMNF.js";
76
- import "./chunk-LCR46JY5.js";
76
+ import "./chunk-7YX23JBA.js";
77
77
  import "./chunk-C4SQJZAF.js";
78
78
  import "./chunk-KM2A35EO.js";
79
79
  import "./chunk-4RA3C3EV.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@remnic/core",
3
- "version": "9.3.608",
3
+ "version": "9.3.610",
4
4
  "description": "Framework-agnostic Remnic memory engine — orchestrator, storage, extraction, search, trust zones",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -22,7 +22,7 @@
22
22
 
23
23
  import path from "node:path";
24
24
  import { homedir } from "node:os";
25
- import { appendFile, chmod, mkdir, readFile, readdir } from "node:fs/promises";
25
+ import { appendFile, chmod, lstat, mkdir, readFile, readdir, realpath } from "node:fs/promises";
26
26
  import { log } from "./logger.js";
27
27
  import type { JudgeVerdictKind } from "./extraction-judge.js";
28
28
 
@@ -101,22 +101,21 @@ function dateStamp(iso: string): string {
101
101
  return `${yyyy}-${mm}-${dd}`;
102
102
  }
103
103
 
104
- export function trainingFilePathFor(
105
- directory: string,
106
- iso: string,
107
- ): string {
104
+ export function trainingFilePathFor(directory: string, iso: string): string {
108
105
  return path.join(directory, `${dateStamp(iso)}.jsonl`);
109
106
  }
110
107
 
108
+ function isPathInsideDirectory(filePath: string, directory: string): boolean {
109
+ const relative = path.relative(directory, filePath);
110
+ return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
111
+ }
112
+
111
113
  /**
112
114
  * Append a single training row. Fails open — write errors are logged at
113
115
  * debug level and swallowed, same policy as the telemetry emitter.
114
116
  * No-op when `options.enabled` is false.
115
117
  */
116
- export async function recordJudgeTrainingPair(
117
- row: JudgeTrainingPair,
118
- options: JudgeTrainingOptions,
119
- ): Promise<void> {
118
+ export async function recordJudgeTrainingPair(row: JudgeTrainingPair, options: JudgeTrainingOptions): Promise<void> {
120
119
  if (!options.enabled) return;
121
120
  const dir = resolveTrainingDir(options);
122
121
  const filePath = trainingFilePathFor(dir, row.ts);
@@ -129,7 +128,7 @@ export async function recordJudgeTrainingPair(
129
128
  await chmod(filePath, 0o600);
130
129
  } catch (err) {
131
130
  log.debug(
132
- `extraction-judge-training: append failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`,
131
+ `extraction-judge-training: append failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`
133
132
  );
134
133
  }
135
134
  }
@@ -140,9 +139,23 @@ export async function recordJudgeTrainingPair(
140
139
  * counted in the returned `malformed` tally.
141
140
  */
142
141
  export async function readJudgeTrainingPairs(
143
- options: Pick<JudgeTrainingOptions, "directory">,
142
+ options: Pick<JudgeTrainingOptions, "directory">
144
143
  ): Promise<{ rows: JudgeTrainingPair[]; malformed: number }> {
145
144
  const dir = resolveTrainingDir({ enabled: true, ...options });
145
+ try {
146
+ const dirStat = await lstat(dir);
147
+ if (dirStat.isSymbolicLink()) {
148
+ throw new Error("Judge training directory must not be a symlink");
149
+ }
150
+ if (!dirStat.isDirectory()) {
151
+ throw new Error("Judge training path must be a directory");
152
+ }
153
+ } catch (err) {
154
+ const code = (err as NodeJS.ErrnoException).code;
155
+ if (code === "ENOENT") return { rows: [], malformed: 0 };
156
+ throw err;
157
+ }
158
+
146
159
  let entries: string[];
147
160
  try {
148
161
  entries = await readdir(dir);
@@ -154,11 +167,33 @@ export async function readJudgeTrainingPairs(
154
167
 
155
168
  const rows: JudgeTrainingPair[] = [];
156
169
  let malformed = 0;
170
+ const resolvedDir = await realpath(dir);
157
171
  // Sort so reads are deterministic across platforms.
158
172
  entries.sort();
159
173
  for (const name of entries) {
160
174
  if (!name.endsWith(".jsonl")) continue;
161
- const raw = await readFile(path.join(dir, name), "utf-8");
175
+ const filePath = path.join(dir, name);
176
+ let fileStat: Awaited<ReturnType<typeof lstat>>;
177
+ try {
178
+ fileStat = await lstat(filePath);
179
+ } catch (err) {
180
+ const code = (err as NodeJS.ErrnoException).code;
181
+ if (code === "ENOENT") continue;
182
+ throw err;
183
+ }
184
+ if (fileStat.isSymbolicLink() || !fileStat.isFile()) continue;
185
+
186
+ let resolvedFilePath: string;
187
+ try {
188
+ resolvedFilePath = await realpath(filePath);
189
+ } catch (err) {
190
+ const code = (err as NodeJS.ErrnoException).code;
191
+ if (code === "ENOENT") continue;
192
+ throw err;
193
+ }
194
+ if (!isPathInsideDirectory(resolvedFilePath, resolvedDir)) continue;
195
+
196
+ const raw = await readFile(resolvedFilePath, "utf-8");
162
197
  for (const line of raw.split("\n")) {
163
198
  if (!line.trim()) continue;
164
199
  let parsed: unknown;
@@ -192,18 +227,11 @@ export function isValidTrainingPair(value: unknown): value is JudgeTrainingPair
192
227
  if (typeof p.ts !== "string") return false;
193
228
  if (typeof p.candidateText !== "string") return false;
194
229
  if (typeof p.candidateCategory !== "string") return false;
195
- if (
196
- p.verdictKind !== "accept" &&
197
- p.verdictKind !== "reject" &&
198
- p.verdictKind !== "defer"
199
- ) {
230
+ if (p.verdictKind !== "accept" && p.verdictKind !== "reject" && p.verdictKind !== "defer") {
200
231
  return false;
201
232
  }
202
233
  if (typeof p.reason !== "string") return false;
203
- if (
204
- p.candidateConfidence !== undefined &&
205
- typeof p.candidateConfidence !== "number"
206
- ) {
234
+ if (p.candidateConfidence !== undefined && typeof p.candidateConfidence !== "number") {
207
235
  return false;
208
236
  }
209
237
  if (p.priorDeferrals !== undefined && typeof p.priorDeferrals !== "number") {
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/extraction-judge-training.ts"],"sourcesContent":["/**\n * Extraction Judge Training Data Shim (issue #562, PR 4).\n *\n * Opt-in collector for `(candidate_text, verdict_kind, reason,\n * ground_truth_label?)` tuples. Rows are appended to JSONL files under\n * `~/.remnic/judge-training/<YYYY-MM-DD>.jsonl` so operators can ship the\n * data into a future GRPO training pipeline without exfiltrating live\n * memory content through the regular observation ledger.\n *\n * Gating:\n * - Off by default. Must be explicitly enabled via\n * `collectJudgeTrainingPairs: true` in plugin config.\n * - The ground-truth label is always optional — labels are added out-of-\n * band once reviewers disambiguate the candidate's fate.\n *\n * Privacy: the row carries only what the judge already sees — the\n * candidate text and its metadata. It does NOT carry session keys,\n * principal IDs, or any user identifiers. The file lives in the user's\n * home directory rather than the shared memory directory so it is never\n * committed, sync'd, or bundled into exports.\n */\n\nimport path from \"node:path\";\nimport { homedir } from \"node:os\";\nimport { appendFile, chmod, mkdir, readFile, readdir } from \"node:fs/promises\";\nimport { log } from \"./logger.js\";\nimport type { JudgeVerdictKind } from \"./extraction-judge.js\";\n\n/**\n * Persisted training row. Intentionally minimal: just the signal needed\n * to train a judge replacement policy. Schema version is tagged so future\n * readers can migrate older rows.\n */\nexport interface JudgeTrainingPair {\n version: 1;\n ts: string; // ISO-8601\n candidateText: string;\n candidateCategory: string;\n candidateConfidence?: number;\n verdictKind: JudgeVerdictKind;\n reason: string;\n /**\n * Number of prior deferrals when the verdict was resolved. `0` for the\n * first resolution; only set when known (defer pathway).\n */\n priorDeferrals?: number;\n /**\n * Optional human-applied ground-truth label. Added after the fact by a\n * reviewer / labelling script; not present on fresh rows.\n */\n groundTruthLabel?: JudgeVerdictKind;\n}\n\nexport interface JudgeTrainingOptions {\n enabled: boolean;\n /**\n * Override for the output directory. Defaults to\n * `~/.remnic/judge-training`. Tests pass a temp path here.\n */\n directory?: string;\n}\n\n/**\n * Expand a leading `~` / `~/` / `$HOME/` / `${HOME}/` to the process home\n * directory. Node's `fs` APIs do not expand `~` themselves (CLAUDE.md\n * gotcha 17), so every user-facing path input must be funnelled through\n * this helper before it reaches the filesystem.\n */\nfunction expandTilde(p: string): string {\n const home = homedir();\n if (p === \"~\" || p.startsWith(\"~/\") || p.startsWith(\"~\\\\\")) {\n return home + p.slice(1);\n }\n if (p === \"$HOME\" || p.startsWith(\"$HOME/\") || p.startsWith(\"$HOME\\\\\")) {\n return home + p.slice(5);\n }\n if (p === \"${HOME}\" || p.startsWith(\"${HOME}/\") || p.startsWith(\"${HOME}\\\\\")) {\n return home + p.slice(7);\n }\n return p;\n}\n\nexport function resolveTrainingDir(options: JudgeTrainingOptions): string {\n if (options.directory && options.directory.length > 0) {\n // Expand `~` / `$HOME` in the override so operators can write the\n // config as the user sees it (CLAUDE.md gotcha 17).\n return expandTilde(options.directory);\n }\n return path.join(homedir(), \".remnic\", \"judge-training\");\n}\n\nfunction dateStamp(iso: string): string {\n // `YYYY-MM-DD` from an ISO-8601 string. Falls back to today on a parse\n // failure rather than throwing — the caller already wrote a row and the\n // timestamp is best-effort.\n const ms = Date.parse(iso);\n const d = Number.isFinite(ms) ? new Date(ms) : new Date();\n const yyyy = d.getUTCFullYear().toString().padStart(4, \"0\");\n const mm = (d.getUTCMonth() + 1).toString().padStart(2, \"0\");\n const dd = d.getUTCDate().toString().padStart(2, \"0\");\n return `${yyyy}-${mm}-${dd}`;\n}\n\nexport function trainingFilePathFor(\n directory: string,\n iso: string,\n): string {\n return path.join(directory, `${dateStamp(iso)}.jsonl`);\n}\n\n/**\n * Append a single training row. Fails open — write errors are logged at\n * debug level and swallowed, same policy as the telemetry emitter.\n * No-op when `options.enabled` is false.\n */\nexport async function recordJudgeTrainingPair(\n row: JudgeTrainingPair,\n options: JudgeTrainingOptions,\n): Promise<void> {\n if (!options.enabled) return;\n const dir = resolveTrainingDir(options);\n const filePath = trainingFilePathFor(dir, row.ts);\n try {\n await mkdir(dir, { recursive: true, mode: 0o700 });\n await appendFile(filePath, `${JSON.stringify(row)}\\n`, {\n encoding: \"utf-8\",\n mode: 0o600,\n });\n await chmod(filePath, 0o600);\n } catch (err) {\n log.debug(\n `extraction-judge-training: append failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`,\n );\n }\n}\n\n/**\n * Read all training rows from the configured directory. Returns an empty\n * array when the directory is missing. Malformed lines are skipped and\n * counted in the returned `malformed` tally.\n */\nexport async function readJudgeTrainingPairs(\n options: Pick<JudgeTrainingOptions, \"directory\">,\n): Promise<{ rows: JudgeTrainingPair[]; malformed: number }> {\n const dir = resolveTrainingDir({ enabled: true, ...options });\n let entries: string[];\n try {\n entries = await readdir(dir);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === \"ENOENT\") return { rows: [], malformed: 0 };\n throw err;\n }\n\n const rows: JudgeTrainingPair[] = [];\n let malformed = 0;\n // Sort so reads are deterministic across platforms.\n entries.sort();\n for (const name of entries) {\n if (!name.endsWith(\".jsonl\")) continue;\n const raw = await readFile(path.join(dir, name), \"utf-8\");\n for (const line of raw.split(\"\\n\")) {\n if (!line.trim()) continue;\n let parsed: unknown;\n try {\n parsed = JSON.parse(line);\n } catch {\n malformed += 1;\n continue;\n }\n if (!isValidTrainingPair(parsed)) {\n malformed += 1;\n continue;\n }\n rows.push(parsed);\n }\n }\n return { rows, malformed };\n}\n\n/**\n * Structural validator matching the persisted schema. Forward-compat: an\n * unknown `verdictKind` string is treated as malformed (strict training\n * signal — we do not want to admit unlabelled gibberish into a trainer).\n */\nexport function isValidTrainingPair(value: unknown): value is JudgeTrainingPair {\n if (typeof value !== \"object\" || value === null || Array.isArray(value)) {\n return false;\n }\n const p = value as Record<string, unknown>;\n if (p.version !== 1) return false;\n if (typeof p.ts !== \"string\") return false;\n if (typeof p.candidateText !== \"string\") return false;\n if (typeof p.candidateCategory !== \"string\") return false;\n if (\n p.verdictKind !== \"accept\" &&\n p.verdictKind !== \"reject\" &&\n p.verdictKind !== \"defer\"\n ) {\n return false;\n }\n if (typeof p.reason !== \"string\") return false;\n if (\n p.candidateConfidence !== undefined &&\n typeof p.candidateConfidence !== \"number\"\n ) {\n return false;\n }\n if (p.priorDeferrals !== undefined && typeof p.priorDeferrals !== \"number\") {\n return false;\n }\n if (\n p.groundTruthLabel !== undefined &&\n p.groundTruthLabel !== \"accept\" &&\n p.groundTruthLabel !== \"reject\" &&\n p.groundTruthLabel !== \"defer\"\n ) {\n return false;\n }\n return true;\n}\n"],"mappings":";;;;;AAsBA,OAAO,UAAU;AACjB,SAAS,eAAe;AACxB,SAAS,YAAY,OAAO,OAAO,UAAU,eAAe;AA4C5D,SAAS,YAAY,GAAmB;AACtC,QAAM,OAAO,QAAQ;AACrB,MAAI,MAAM,OAAO,EAAE,WAAW,IAAI,KAAK,EAAE,WAAW,KAAK,GAAG;AAC1D,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,MAAI,MAAM,WAAW,EAAE,WAAW,QAAQ,KAAK,EAAE,WAAW,SAAS,GAAG;AACtE,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,MAAI,MAAM,aAAa,EAAE,WAAW,UAAU,KAAK,EAAE,WAAW,WAAW,GAAG;AAC5E,WAAO,OAAO,EAAE,MAAM,CAAC;AAAA,EACzB;AACA,SAAO;AACT;AAEO,SAAS,mBAAmB,SAAuC;AACxE,MAAI,QAAQ,aAAa,QAAQ,UAAU,SAAS,GAAG;AAGrD,WAAO,YAAY,QAAQ,SAAS;AAAA,EACtC;AACA,SAAO,KAAK,KAAK,QAAQ,GAAG,WAAW,gBAAgB;AACzD;AAEA,SAAS,UAAU,KAAqB;AAItC,QAAM,KAAK,KAAK,MAAM,GAAG;AACzB,QAAM,IAAI,OAAO,SAAS,EAAE,IAAI,IAAI,KAAK,EAAE,IAAI,oBAAI,KAAK;AACxD,QAAM,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;AAC1D,QAAM,MAAM,EAAE,YAAY,IAAI,GAAG,SAAS,EAAE,SAAS,GAAG,GAAG;AAC3D,QAAM,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;AACpD,SAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE;AAC5B;AAEO,SAAS,oBACd,WACA,KACQ;AACR,SAAO,KAAK,KAAK,WAAW,GAAG,UAAU,GAAG,CAAC,QAAQ;AACvD;AAOA,eAAsB,wBACpB,KACA,SACe;AACf,MAAI,CAAC,QAAQ,QAAS;AACtB,QAAM,MAAM,mBAAmB,OAAO;AACtC,QAAM,WAAW,oBAAoB,KAAK,IAAI,EAAE;AAChD,MAAI;AACF,UAAM,MAAM,KAAK,EAAE,WAAW,MAAM,MAAM,IAAM,CAAC;AACjD,UAAM,WAAW,UAAU,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,GAAM;AAAA,MACrD,UAAU;AAAA,MACV,MAAM;AAAA,IACR,CAAC;AACD,UAAM,MAAM,UAAU,GAAK;AAAA,EAC7B,SAAS,KAAK;AACZ,QAAI;AAAA,MACF,yDAAyD,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,IAC3G;AAAA,EACF;AACF;AAOA,eAAsB,uBACpB,SAC2D;AAC3D,QAAM,MAAM,mBAAmB,EAAE,SAAS,MAAM,GAAG,QAAQ,CAAC;AAC5D,MAAI;AACJ,MAAI;AACF,cAAU,MAAM,QAAQ,GAAG;AAAA,EAC7B,SAAS,KAAK;AACZ,UAAM,OAAQ,IAA8B;AAC5C,QAAI,SAAS,SAAU,QAAO,EAAE,MAAM,CAAC,GAAG,WAAW,EAAE;AACvD,UAAM;AAAA,EACR;AAEA,QAAM,OAA4B,CAAC;AACnC,MAAI,YAAY;AAEhB,UAAQ,KAAK;AACb,aAAW,QAAQ,SAAS;AAC1B,QAAI,CAAC,KAAK,SAAS,QAAQ,EAAG;AAC9B,UAAM,MAAM,MAAM,SAAS,KAAK,KAAK,KAAK,IAAI,GAAG,OAAO;AACxD,eAAW,QAAQ,IAAI,MAAM,IAAI,GAAG;AAClC,UAAI,CAAC,KAAK,KAAK,EAAG;AAClB,UAAI;AACJ,UAAI;AACF,iBAAS,KAAK,MAAM,IAAI;AAAA,MAC1B,QAAQ;AACN,qBAAa;AACb;AAAA,MACF;AACA,UAAI,CAAC,oBAAoB,MAAM,GAAG;AAChC,qBAAa;AACb;AAAA,MACF;AACA,WAAK,KAAK,MAAM;AAAA,IAClB;AAAA,EACF;AACA,SAAO,EAAE,MAAM,UAAU;AAC3B;AAOO,SAAS,oBAAoB,OAA4C;AAC9E,MAAI,OAAO,UAAU,YAAY,UAAU,QAAQ,MAAM,QAAQ,KAAK,GAAG;AACvE,WAAO;AAAA,EACT;AACA,QAAM,IAAI;AACV,MAAI,EAAE,YAAY,EAAG,QAAO;AAC5B,MAAI,OAAO,EAAE,OAAO,SAAU,QAAO;AACrC,MAAI,OAAO,EAAE,kBAAkB,SAAU,QAAO;AAChD,MAAI,OAAO,EAAE,sBAAsB,SAAU,QAAO;AACpD,MACE,EAAE,gBAAgB,YAClB,EAAE,gBAAgB,YAClB,EAAE,gBAAgB,SAClB;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,EAAE,WAAW,SAAU,QAAO;AACzC,MACE,EAAE,wBAAwB,UAC1B,OAAO,EAAE,wBAAwB,UACjC;AACA,WAAO;AAAA,EACT;AACA,MAAI,EAAE,mBAAmB,UAAa,OAAO,EAAE,mBAAmB,UAAU;AAC1E,WAAO;AAAA,EACT;AACA,MACE,EAAE,qBAAqB,UACvB,EAAE,qBAAqB,YACvB,EAAE,qBAAqB,YACvB,EAAE,qBAAqB,SACvB;AACA,WAAO;AAAA,EACT;AACA,SAAO;AACT;","names":[]}