@ijfw/memory-server 1.4.3 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. package/fixtures/truncation-corpus/_generate-corpus.js +367 -0
  2. package/fixtures/truncation-corpus/fx-01-clean-exit-01/events.jsonl +2 -0
  3. package/fixtures/truncation-corpus/fx-01-clean-exit-01/intent-journal.jsonl +2 -0
  4. package/fixtures/truncation-corpus/fx-01-clean-exit-01/meta.json +18 -0
  5. package/fixtures/truncation-corpus/fx-01-clean-exit-01/target/.ijfw/state/workflow.json +1 -0
  6. package/fixtures/truncation-corpus/fx-01-clean-exit-02/events.jsonl +2 -0
  7. package/fixtures/truncation-corpus/fx-01-clean-exit-02/intent-journal.jsonl +2 -0
  8. package/fixtures/truncation-corpus/fx-01-clean-exit-02/meta.json +18 -0
  9. package/fixtures/truncation-corpus/fx-01-clean-exit-02/target/.ijfw/state/workflow.json +1 -0
  10. package/fixtures/truncation-corpus/fx-01-clean-exit-03/events.jsonl +2 -0
  11. package/fixtures/truncation-corpus/fx-01-clean-exit-03/intent-journal.jsonl +2 -0
  12. package/fixtures/truncation-corpus/fx-01-clean-exit-03/meta.json +18 -0
  13. package/fixtures/truncation-corpus/fx-01-clean-exit-03/target/.ijfw/state/workflow.json +1 -0
  14. package/fixtures/truncation-corpus/fx-01-clean-exit-04/events.jsonl +2 -0
  15. package/fixtures/truncation-corpus/fx-01-clean-exit-04/intent-journal.jsonl +2 -0
  16. package/fixtures/truncation-corpus/fx-01-clean-exit-04/meta.json +18 -0
  17. package/fixtures/truncation-corpus/fx-01-clean-exit-04/target/.ijfw/state/workflow.json +1 -0
  18. package/fixtures/truncation-corpus/fx-01-clean-exit-05/events.jsonl +2 -0
  19. package/fixtures/truncation-corpus/fx-01-clean-exit-05/intent-journal.jsonl +2 -0
  20. package/fixtures/truncation-corpus/fx-01-clean-exit-05/meta.json +18 -0
  21. package/fixtures/truncation-corpus/fx-01-clean-exit-05/target/.ijfw/state/workflow.json +1 -0
  22. package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/events.jsonl +1 -0
  23. package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/intent-journal.jsonl +3 -0
  24. package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/meta.json +18 -0
  25. package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/snapshots/v-midO-1-advance.json +11 -0
  26. package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/target/.ijfw/state/workflow.json +1 -0
  27. package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/events.jsonl +1 -0
  28. package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/intent-journal.jsonl +3 -0
  29. package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/meta.json +18 -0
  30. package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/snapshots/v-midO-2-advance.json +11 -0
  31. package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/target/.ijfw/state/workflow.json +1 -0
  32. package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/events.jsonl +1 -0
  33. package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/intent-journal.jsonl +3 -0
  34. package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/meta.json +18 -0
  35. package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/snapshots/v-midO-3-advance.json +11 -0
  36. package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/target/.ijfw/state/workflow.json +1 -0
  37. package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/events.jsonl +1 -0
  38. package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/intent-journal.jsonl +3 -0
  39. package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/meta.json +18 -0
  40. package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/snapshots/v-midO-4-advance.json +11 -0
  41. package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/target/.ijfw/state/workflow.json +1 -0
  42. package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/events.jsonl +1 -0
  43. package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/intent-journal.jsonl +3 -0
  44. package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/meta.json +18 -0
  45. package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/snapshots/v-midO-5-advance.json +11 -0
  46. package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/target/.ijfw/state/workflow.json +1 -0
  47. package/fixtures/truncation-corpus/fx-03-mid-append-01/events.jsonl +1 -0
  48. package/fixtures/truncation-corpus/fx-03-mid-append-01/intent-journal.jsonl +3 -0
  49. package/fixtures/truncation-corpus/fx-03-mid-append-01/meta.json +18 -0
  50. package/fixtures/truncation-corpus/fx-03-mid-append-01/target/.ijfw/blackboard/decisions.jsonl +1 -0
  51. package/fixtures/truncation-corpus/fx-03-mid-append-02/events.jsonl +1 -0
  52. package/fixtures/truncation-corpus/fx-03-mid-append-02/intent-journal.jsonl +3 -0
  53. package/fixtures/truncation-corpus/fx-03-mid-append-02/meta.json +18 -0
  54. package/fixtures/truncation-corpus/fx-03-mid-append-02/target/.ijfw/blackboard/decisions.jsonl +1 -0
  55. package/fixtures/truncation-corpus/fx-03-mid-append-03/events.jsonl +1 -0
  56. package/fixtures/truncation-corpus/fx-03-mid-append-03/intent-journal.jsonl +3 -0
  57. package/fixtures/truncation-corpus/fx-03-mid-append-03/meta.json +18 -0
  58. package/fixtures/truncation-corpus/fx-03-mid-append-03/target/.ijfw/blackboard/decisions.jsonl +1 -0
  59. package/fixtures/truncation-corpus/fx-03-mid-append-04/events.jsonl +1 -0
  60. package/fixtures/truncation-corpus/fx-03-mid-append-04/intent-journal.jsonl +3 -0
  61. package/fixtures/truncation-corpus/fx-03-mid-append-04/meta.json +18 -0
  62. package/fixtures/truncation-corpus/fx-03-mid-append-04/target/.ijfw/blackboard/decisions.jsonl +1 -0
  63. package/fixtures/truncation-corpus/fx-03-mid-append-05/events.jsonl +1 -0
  64. package/fixtures/truncation-corpus/fx-03-mid-append-05/intent-journal.jsonl +3 -0
  65. package/fixtures/truncation-corpus/fx-03-mid-append-05/meta.json +18 -0
  66. package/fixtures/truncation-corpus/fx-03-mid-append-05/target/.ijfw/blackboard/decisions.jsonl +1 -0
  67. package/fixtures/truncation-corpus/fx-04-no-events-01/events.jsonl +0 -0
  68. package/fixtures/truncation-corpus/fx-04-no-events-01/intent-journal.jsonl +1 -0
  69. package/fixtures/truncation-corpus/fx-04-no-events-01/meta.json +18 -0
  70. package/fixtures/truncation-corpus/fx-04-no-events-01/snapshots/v-noEv-1-set-phase.json +11 -0
  71. package/fixtures/truncation-corpus/fx-04-no-events-01/target/.ijfw/state/workflow.json +1 -0
  72. package/fixtures/truncation-corpus/fx-04-no-events-02/events.jsonl +0 -0
  73. package/fixtures/truncation-corpus/fx-04-no-events-02/intent-journal.jsonl +1 -0
  74. package/fixtures/truncation-corpus/fx-04-no-events-02/meta.json +18 -0
  75. package/fixtures/truncation-corpus/fx-04-no-events-02/snapshots/v-noEv-2-set-phase.json +11 -0
  76. package/fixtures/truncation-corpus/fx-04-no-events-02/target/.ijfw/state/workflow.json +1 -0
  77. package/fixtures/truncation-corpus/fx-04-no-events-03/events.jsonl +0 -0
  78. package/fixtures/truncation-corpus/fx-04-no-events-03/intent-journal.jsonl +1 -0
  79. package/fixtures/truncation-corpus/fx-04-no-events-03/meta.json +18 -0
  80. package/fixtures/truncation-corpus/fx-04-no-events-03/snapshots/v-noEv-3-set-phase.json +11 -0
  81. package/fixtures/truncation-corpus/fx-04-no-events-03/target/.ijfw/state/workflow.json +1 -0
  82. package/fixtures/truncation-corpus/fx-04-no-events-04/events.jsonl +0 -0
  83. package/fixtures/truncation-corpus/fx-04-no-events-04/intent-journal.jsonl +1 -0
  84. package/fixtures/truncation-corpus/fx-04-no-events-04/meta.json +18 -0
  85. package/fixtures/truncation-corpus/fx-04-no-events-04/snapshots/v-noEv-4-set-phase.json +11 -0
  86. package/fixtures/truncation-corpus/fx-04-no-events-04/target/.ijfw/state/workflow.json +1 -0
  87. package/fixtures/truncation-corpus/fx-04-no-events-05/events.jsonl +0 -0
  88. package/fixtures/truncation-corpus/fx-04-no-events-05/intent-journal.jsonl +1 -0
  89. package/fixtures/truncation-corpus/fx-04-no-events-05/meta.json +18 -0
  90. package/fixtures/truncation-corpus/fx-04-no-events-05/snapshots/v-noEv-5-set-phase.json +11 -0
  91. package/fixtures/truncation-corpus/fx-04-no-events-05/target/.ijfw/state/workflow.json +1 -0
  92. package/fixtures/truncation-corpus/fx-05-error-terminated-01/events.jsonl +2 -0
  93. package/fixtures/truncation-corpus/fx-05-error-terminated-01/intent-journal.jsonl +3 -0
  94. package/fixtures/truncation-corpus/fx-05-error-terminated-01/meta.json +18 -0
  95. package/fixtures/truncation-corpus/fx-05-error-terminated-01/snapshots/v-errT-1-partial.json +11 -0
  96. package/fixtures/truncation-corpus/fx-05-error-terminated-01/target/.ijfw/state/workflow.json +1 -0
  97. package/fixtures/truncation-corpus/fx-05-error-terminated-02/events.jsonl +2 -0
  98. package/fixtures/truncation-corpus/fx-05-error-terminated-02/intent-journal.jsonl +3 -0
  99. package/fixtures/truncation-corpus/fx-05-error-terminated-02/meta.json +18 -0
  100. package/fixtures/truncation-corpus/fx-05-error-terminated-02/target/.ijfw/blackboard/decisions.jsonl +1 -0
  101. package/fixtures/truncation-corpus/fx-05-error-terminated-03/events.jsonl +2 -0
  102. package/fixtures/truncation-corpus/fx-05-error-terminated-03/intent-journal.jsonl +3 -0
  103. package/fixtures/truncation-corpus/fx-05-error-terminated-03/meta.json +18 -0
  104. package/fixtures/truncation-corpus/fx-05-error-terminated-03/snapshots/v-errT-3-partial.json +11 -0
  105. package/fixtures/truncation-corpus/fx-05-error-terminated-03/target/.ijfw/state/workflow.json +1 -0
  106. package/fixtures/truncation-corpus/fx-05-error-terminated-04/events.jsonl +2 -0
  107. package/fixtures/truncation-corpus/fx-05-error-terminated-04/intent-journal.jsonl +3 -0
  108. package/fixtures/truncation-corpus/fx-05-error-terminated-04/meta.json +18 -0
  109. package/fixtures/truncation-corpus/fx-05-error-terminated-04/target/.ijfw/blackboard/decisions.jsonl +1 -0
  110. package/fixtures/truncation-corpus/fx-05-error-terminated-05/events.jsonl +2 -0
  111. package/fixtures/truncation-corpus/fx-05-error-terminated-05/intent-journal.jsonl +3 -0
  112. package/fixtures/truncation-corpus/fx-05-error-terminated-05/meta.json +18 -0
  113. package/fixtures/truncation-corpus/fx-05-error-terminated-05/snapshots/v-errT-5-partial.json +11 -0
  114. package/fixtures/truncation-corpus/fx-05-error-terminated-05/target/.ijfw/state/workflow.json +1 -0
  115. package/package.json +1 -1
  116. package/src/active-extension-writer.js +144 -64
  117. package/src/api-client.js +43 -5
  118. package/src/audit-roster.js +80 -5
  119. package/src/blackboard.js +298 -6
  120. package/src/cli-run.js +33 -5
  121. package/src/codex-agents.js +96 -5
  122. package/src/cost/aggregator.js +39 -9
  123. package/src/cost/pricing.js +57 -0
  124. package/src/cost/readers/gemini.js +1 -1
  125. package/src/cross-audit-chunker.js +189 -0
  126. package/src/cross-dispatcher.js +124 -21
  127. package/src/cross-orchestrator-cli.js +550 -14
  128. package/src/cross-orchestrator.js +1171 -10
  129. package/src/cross-project-search.js +195 -9
  130. package/src/dashboard-client-planning.html +273 -0
  131. package/src/dashboard-client-waves.html +304 -0
  132. package/src/dashboard-client.html +17 -2
  133. package/src/dashboard-server.js +152 -0
  134. package/src/deploy-alerts.js +150 -0
  135. package/src/design/iframe-bridge.js +242 -0
  136. package/src/design-companion.js +144 -0
  137. package/src/dispatch/checkpoint-cli.js +97 -0
  138. package/src/dispatch/colon-syntax.js +81 -1
  139. package/src/dispatch/extension.js +27 -1
  140. package/src/dispatch/registry-cli.js +4 -1
  141. package/src/dispatch/wave-cli.js +323 -0
  142. package/src/dispatch/worktree-cli.js +40 -0
  143. package/src/dispatch-planner.js +97 -2
  144. package/src/dream/runner.mjs +47 -11
  145. package/src/dream/stage-runner.js +40 -0
  146. package/src/dream/state-file.js +102 -0
  147. package/src/extension-installer.js +70 -24
  148. package/src/extension-quota-tracker.js +4 -2
  149. package/src/extension-registry.js +289 -35
  150. package/src/feedback-detector.js +26 -0
  151. package/src/fs-lock.js +259 -7
  152. package/src/gate-result.js +95 -1
  153. package/src/hero-line.js +86 -5
  154. package/src/intent-router.js +35 -0
  155. package/src/lib/a11y-contract.js +117 -0
  156. package/src/lib/atomic-io.js +29 -8
  157. package/src/lib/cache-keepalive.js +150 -0
  158. package/src/lib/jsonl-rotation.js +104 -0
  159. package/src/lib/lighthouse-pillar.js +121 -0
  160. package/src/lib/llm-call.js +121 -0
  161. package/src/lib/playwright-baseline.js +205 -0
  162. package/src/lib/rekor-bridge.js +221 -0
  163. package/src/lib/repo-map.js +392 -0
  164. package/src/lib/shasum-verify.js +164 -0
  165. package/src/lib/sketches-gc.js +132 -0
  166. package/src/lib/tmp-suffix.js +62 -0
  167. package/src/lib/ui-review-runner.js +554 -0
  168. package/src/lib/uispec-drift.js +301 -0
  169. package/src/lib/uispec-intake.js +381 -0
  170. package/src/lib/worktree-guards.js +118 -0
  171. package/src/lib/worktree-recovery.js +100 -0
  172. package/src/memory/auto-linker.js +152 -0
  173. package/src/memory/benchmark.js +498 -0
  174. package/src/memory/dedup.js +126 -0
  175. package/src/memory/embedding-cache.js +136 -0
  176. package/src/memory/fact-extractor.js +168 -0
  177. package/src/memory/fts5.js +65 -1
  178. package/src/memory/migrations/004-bitemporal.js +91 -0
  179. package/src/memory/migrations/005-vector-cache.js +61 -0
  180. package/src/memory/migrations/006-obsidian-graph.js +46 -0
  181. package/src/memory/migrations/007-skill-telemetry.js +24 -0
  182. package/src/memory/migrations/008-write-provenance.js +41 -0
  183. package/src/memory/obsidian-parser.js +91 -0
  184. package/src/memory/query-dataview.js +86 -0
  185. package/src/memory/search.js +10 -0
  186. package/src/memory/temporal.js +529 -0
  187. package/src/memory/tokenize.js +10 -0
  188. package/src/memory-facts-handler.js +37 -0
  189. package/src/memory-feedback.js +260 -2
  190. package/src/model-refresh.js +292 -0
  191. package/src/observability/cost-anomaly.js +166 -0
  192. package/src/observability/evaluator-checkpoint-contract.js +117 -0
  193. package/src/observability/trace-id.js +163 -0
  194. package/src/orchestrator/agents-md-blackboard.js +152 -0
  195. package/src/orchestrator/checkpoint-contract.md +140 -0
  196. package/src/orchestrator/debug-trident.js +570 -0
  197. package/src/orchestrator/merge-block-aware.js +350 -0
  198. package/src/orchestrator/plan-checker.js +475 -0
  199. package/src/orchestrator/post-done-runner.js +249 -0
  200. package/src/orchestrator/review.js +136 -0
  201. package/src/orchestrator/runtime-loop.js +430 -0
  202. package/src/orchestrator/skill-telemetry-sink.js +29 -0
  203. package/src/orchestrator/skill-telemetry.js +37 -0
  204. package/src/orchestrator/state-events.js +459 -0
  205. package/src/orchestrator/state-sdk.js +1764 -0
  206. package/src/orchestrator/status-protocol.js +235 -0
  207. package/src/orchestrator/subagent-telemetry.js +452 -0
  208. package/src/orchestrator/termination.js +160 -0
  209. package/src/orchestrator/verification-gate.js +281 -0
  210. package/src/orchestrator/wave-state.js +564 -0
  211. package/src/orchestrator/worktree-provision.js +77 -0
  212. package/src/override-use-registry.js +111 -5
  213. package/src/receipts.js +36 -4
  214. package/src/recovery/checkpoint.js +56 -3
  215. package/src/recovery/code-fixer.js +656 -0
  216. package/src/recovery/truncation.js +317 -0
  217. package/src/redactor.js +75 -6
  218. package/src/runtime-mediator.js +15 -0
  219. package/src/sanitizer.js +10 -0
  220. package/src/search-hybrid.js +139 -0
  221. package/src/server.js +603 -59
  222. package/src/swarm/worktree.js +27 -4
  223. package/src/swarm-config.js +113 -12
  224. package/src/team/domain-templates/book.json +51 -0
  225. package/src/team/domain-templates/business.json +41 -0
  226. package/src/team/domain-templates/content.json +50 -0
  227. package/src/team/domain-templates/design.json +44 -0
  228. package/src/team/domain-templates/research.json +41 -0
  229. package/src/team/domain-templates/software.json +40 -0
  230. package/src/team/generator.js +278 -3
  231. package/src/team/modify.js +203 -0
  232. package/src/team/schemas.js +48 -0
  233. package/src/update-apply.js +19 -3
@@ -0,0 +1,498 @@
1
+ // IJFW v1.5.0 T22 (Wave E) -- memory benchmark harness.
2
+ //
3
+ // Genre-matches mem0 / Zep / Graphiti published memory benchmarks: same axes,
4
+ // same numeric shape, run against IJFW's own 3-tier store. Output is a JSON
5
+ // artifact that can be diffed across builds + cited in marketing.
6
+ //
7
+ // 3-tier model recap:
8
+ // hot = markdown files at <root>/.ijfw/memory/*.md (linear regex; always
9
+ // available; used as auto-index source for warm + fallback when warm
10
+ // is cold).
11
+ // warm = SQLite FTS5 at <root>/.ijfw/index/memory.db (porter unicode61).
12
+ // Inserts go via indexEntry(); searches via searchFts5() (warm path)
13
+ // or searchMemory() (warm-first w/ hot fallback envelope).
14
+ // cold = pgvector / embedded vectors (migration 005 + embedding-cache).
15
+ // Not exercised here by design -- the cold path needs a model and
16
+ // this harness ships with zero new deps. Axis is RESERVED so future
17
+ // runs can drop in numbers without changing the artifact schema.
18
+ //
19
+ // Axes measured (industry-aligned subset; not all of mem0's "LoCoMo" axes
20
+ // translate -- this is a coding-memory benchmark, not a conversational one):
21
+ //
22
+ // 1. ingest_throughput_rps -- inserts / second (warm tier, single writer).
23
+ // 2. ingest_latency_ms -- p50 / p95 / p99 per-insert.
24
+ // 3. query_latency_ms -- p50 / p95 / p99 per warm-tier search.
25
+ // 4. recall_at_k -- recall@1, @3, @5 against a known
26
+ // query-answer set (porter stemming +
27
+ // synonym expansion both count as hits
28
+ // if they resolve to the gold doc).
29
+ // 5. storage_bytes_per_memory -- on-disk db size / row count.
30
+ // 6. corpus_size -- # rows + # query-answer pairs.
31
+ // 7. hot_tier_query_latency_ms -- linear-regex hot tier (provenance check;
32
+ // should be slower than warm on >50 rows
33
+ // -- if it isn't, warm tier is broken).
34
+ // 8. cold_tier -- { available: false, reason: 'no-embedding-model' }
35
+ // reserved schema slot.
36
+ // 9. staleness_filter -- { default_excludes_stale: bool,
37
+ // stale_visible_with_flag: bool }
38
+ // sanity proof the warm filter still gates.
39
+ //
40
+ // What this harness does NOT do (yet -- on the v1.5.0 backlog):
41
+ // - cross-tier promotion timing (hot->warm happens at first search; warm
42
+ // never promotes to cold without a model). Future T23+ work owns the
43
+ // bi-temporal + decay-on-retrieval axes.
44
+ // - multi-writer throughput. Single-writer is the published norm because
45
+ // SQLite's BEGIN IMMEDIATE queue dominates; that's already covered by
46
+ // test-memory-fts5.js's concurrent-writers test.
47
+ // - memory cost in RAM. SQLite page cache is bounded; an "RSS during
48
+ // benchmark" axis adds value but needs platform-specific tooling.
49
+ //
50
+ // Determinism:
51
+ // - Default corpus is seeded; same input -> same gold mapping. Latency
52
+ // numbers will vary across machines; that's expected (and is why we
53
+ // report p50/p95/p99, not means).
54
+ // - The default queries are chosen so a porter-stemmed FTS5 over the
55
+ // default corpus hits recall@5 == 1.0 -- the test asserts this exact
56
+ // property so a regression in synonyms / tokenizer / search ordering
57
+ // gets caught as soon as it lands.
58
+ //
59
+ // Output:
60
+ // { axes, corpus, runs, results, schema_version, ijfw_version, ts_iso }
61
+ // written to <out_dir>/memory-<unix_ms>.json by default. Result-only
62
+ // callers (in-test) can call runBenchmark({write: false}) to skip the
63
+ // write and consume the JS object directly.
64
+ //
65
+ // Public surface:
66
+ // runBenchmark(opts) -> Promise<results>
67
+ // loadDefaultCorpus() -> { docs, queries }
68
+ // buildSyntheticCorpus(n, seed) -> { docs, queries }
69
+ // percentile(arr, p) -> number (utility, exported for tests)
70
+ // BENCHMARK_SCHEMA_VERSION -- bump on shape change
71
+ //
72
+ // Zero new deps; uses only what fts5.js + search.js + node:* already pull in.
73
+
74
+ import { mkdtempSync, mkdirSync, writeFileSync, statSync, existsSync, rmSync } from 'node:fs';
75
+ import { join, resolve } from 'node:path';
76
+ import { tmpdir } from 'node:os';
77
+ import { performance } from 'node:perf_hooks';
78
+
79
+ import {
80
+ openDb as openMemoryDb,
81
+ indexEntry,
82
+ searchFts5,
83
+ rowCount,
84
+ closeDb,
85
+ dbPathFor,
86
+ } from './fts5.js';
87
+ import { searchMemory } from './search.js';
88
+
89
+ export const BENCHMARK_SCHEMA_VERSION = 1;
90
+
91
+ // --- Percentile helper ------------------------------------------------------
92
+ //
93
+ // Linear-interpolated percentile over a numeric array. Returns 0 on empty.
94
+ // Exported so the test file can assert on the same values the harness reports.
95
+ export function percentile(values, p) {
96
+ if (!Array.isArray(values) || values.length === 0) return 0;
97
+ if (!(p >= 0 && p <= 100)) throw new RangeError('percentile: p must be in [0,100]');
98
+ const arr = values.slice().sort((a, b) => a - b);
99
+ if (arr.length === 1) return arr[0];
100
+ const rank = (p / 100) * (arr.length - 1);
101
+ const lo = Math.floor(rank);
102
+ const hi = Math.ceil(rank);
103
+ if (lo === hi) return arr[lo];
104
+ const frac = rank - lo;
105
+ return arr[lo] * (1 - frac) + arr[hi] * frac;
106
+ }
107
+
108
+ function mean(values) {
109
+ if (!values.length) return 0;
110
+ let s = 0;
111
+ for (const v of values) s += v;
112
+ return s / values.length;
113
+ }
114
+
115
+ // Deterministic PRNG (mulberry32) so the synthetic corpus is reproducible
116
+ // across runs + machines. Same seed => same docs/queries/gold-mapping.
117
+ function mulberry32(seed) {
118
+ let a = seed >>> 0;
119
+ return function() {
120
+ a = (a + 0x6d2b79f5) >>> 0;
121
+ let t = a;
122
+ t = Math.imul(t ^ (t >>> 15), t | 1);
123
+ t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
124
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
125
+ };
126
+ }
127
+
128
+ // --- Corpora -----------------------------------------------------------------
129
+ //
130
+ // The default corpus is hand-curated so the gold-answer set is unambiguous
131
+ // (each query has exactly one "right" doc). The synthetic corpus is for
132
+ // scaling tests -- it generates N docs, each with a unique anchor token, and
133
+ // builds 1 query per doc.
134
+
135
+ // Curated corpus: each doc has at least one anchor token that is unique to
136
+ // it across the whole set, so the gold-answer mapping is unambiguous and
137
+ // recall@5 is achievable. Bodies are short, realistic coding-memory facts.
138
+ const DEFAULT_DOCS = [
139
+ // [id, body]
140
+ ['auth-jwt', 'JWT bearer tokens authenticate API requests with HS256 signing.'],
141
+ ['auth-oauth', 'OAuth2 device authorization flow uniqueoauthflowanchor for CLI clients.'],
142
+ ['auth-session', 'Server session validateSession reads cookie and DB row.'],
143
+ ['cache-redis', 'Redis cache with TTL eviction policy for hot keys.'],
144
+ ['cache-lru', 'LRU memoization of expensive query results in process memory.'],
145
+ ['cache-cdn', 'CDN edge caching with revalidate semantics on stale entries.'],
146
+ ['db-postgres', 'Postgres transactions with serializable isolation level.'],
147
+ ['db-sqlite', 'SQLite WAL journal mode for concurrent readers.'],
148
+ ['db-migration', 'Schema migration runner advances uniqueuserversionanchor on success.'],
149
+ ['search-fts5', 'FTS5 porter tokenizer stems plural and verb forms.'],
150
+ ['search-vector', 'Vector cosine similarity over embedded chunks for semantic recall.'],
151
+ ['search-hybrid', 'Hybrid search blends BM25 lexical scores with vector cosine ranking.'],
152
+ ['mem-tiers', 'Memory tiers: hot markdown, warm FTS5 index, cold vector store.'],
153
+ ['mem-staleness', 'Cascading staleness propagation flags superseded memory rows.'],
154
+ ['mem-temporal', 'Bitemporal validity windows replace prior facts when contradicted.'],
155
+ ['rag-chunk', 'Chunk documents into 512 token windows with 64 token overlap.'],
156
+ ['rag-rerank', 'Reranking with a uniquererankeranchor after first pass dense retrieval.'],
157
+ ['rag-eval', 'Retrieval eval uses recall at k and mean reciprocal rank metrics.'],
158
+ ['mcp-protocol', 'MCP uniquejsonrpcanchor over stdio with initialized handshake.'],
159
+ ['mcp-tools', 'MCP tools list advertises uniqueijfwstateanchor and memory search.'],
160
+ ['mcp-resources', 'MCP resources expose project memory markdown files for reading.'],
161
+ ['cli-codex', 'Codex CLI honours uniquecodexagentsanchor and emits stdout JSON.'],
162
+ ['cli-gemini', 'Gemini CLI uses uniquegeminimdanchor and MCP registration config.'],
163
+ ['cli-cursor', 'Cursor MCP config lives at uniquecursorconfanchor with workspace scope.'],
164
+ ['hook-pretool', 'PreToolUse hook validates arguments before tool execution.'],
165
+ ['hook-posttool', 'PostToolUse hook reports observations back into the session.'],
166
+ ['hook-stop', 'Stop hook closes the wave and writes ship gate receipt.'],
167
+ ['plan-spec', 'Spec phase clarifies what a phase delivers with ambiguity scoring.'],
168
+ ['plan-review', 'Plan review fires uniquetridentanchor before execute begins.'],
169
+ ['plan-execute', 'Execute phase dispatches subagents per wave with checkpoints.'],
170
+ ];
171
+
172
+ // Each query has a UNIQUE anchor token (or a stem-unique phrase) that resolves
173
+ // to exactly one doc. Single-token queries avoid FTS5 multi-token AND footguns;
174
+ // the gold-answer mapping stays unambiguous; porter stemming still earns its
175
+ // keep because the query token is rarely a literal substring of the body.
176
+ const DEFAULT_QUERIES = [
177
+ { q: 'HS256', gold: 'auth-jwt' },
178
+ { q: 'uniqueoauthflowanchor', gold: 'auth-oauth' },
179
+ { q: 'validateSession', gold: 'auth-session' },
180
+ { q: 'Redis', gold: 'cache-redis' },
181
+ { q: 'memoization', gold: 'cache-lru' },
182
+ { q: 'revalidate', gold: 'cache-cdn' },
183
+ { q: 'serializable', gold: 'db-postgres' },
184
+ { q: 'WAL', gold: 'db-sqlite' },
185
+ { q: 'uniqueuserversionanchor', gold: 'db-migration' },
186
+ { q: 'porter', gold: 'search-fts5' },
187
+ { q: 'cosine', gold: 'search-vector' },
188
+ { q: 'BM25', gold: 'search-hybrid' },
189
+ { q: 'tiers', gold: 'mem-tiers' },
190
+ { q: 'staleness', gold: 'mem-staleness' },
191
+ { q: 'bitemporal', gold: 'mem-temporal' },
192
+ { q: 'overlap', gold: 'rag-chunk' },
193
+ { q: 'uniquererankeranchor', gold: 'rag-rerank' },
194
+ { q: 'reciprocal', gold: 'rag-eval' },
195
+ { q: 'uniquejsonrpcanchor', gold: 'mcp-protocol' },
196
+ { q: 'uniqueijfwstateanchor', gold: 'mcp-tools' },
197
+ { q: 'resources', gold: 'mcp-resources' },
198
+ { q: 'uniquecodexagentsanchor', gold: 'cli-codex' },
199
+ { q: 'uniquegeminimdanchor', gold: 'cli-gemini' },
200
+ { q: 'uniquecursorconfanchor', gold: 'cli-cursor' },
201
+ { q: 'PreToolUse', gold: 'hook-pretool' },
202
+ { q: 'PostToolUse', gold: 'hook-posttool' },
203
+ { q: 'wave', gold: 'hook-stop' },
204
+ { q: 'ambiguity', gold: 'plan-spec' },
205
+ { q: 'uniquetridentanchor', gold: 'plan-review' },
206
+ { q: 'subagents', gold: 'plan-execute' },
207
+ ];
208
+
209
+ // FTS5 query sanitizer -- strips characters that FTS5 treats as operators
210
+ // or column-qualifiers (so a hyphen, dot, or underscore in a user query
211
+ // doesn't blow up with "no such column" / "syntax error near"). Mirrors
212
+ // what a production query layer would do; published numbers can't depend
213
+ // on the caller hand-sanitizing every input. Internal helper -- exported
214
+ // only for callers that want to share the sanitizer (and for tests).
215
+ export function sanitizeFtsQuery(q) {
216
+ if (typeof q !== 'string') return '';
217
+ // Replace any FTS5 special / column-separator chars with a space, then
218
+ // collapse whitespace. Keeps alphanumerics + spaces.
219
+ return q.replace(/[^a-zA-Z0-9_\s]/g, ' ').replace(/\s+/g, ' ').trim();
220
+ }
221
+
222
+ export function loadDefaultCorpus() {
223
+ return {
224
+ docs: DEFAULT_DOCS.map(([id, body]) => ({ id, body })),
225
+ queries: DEFAULT_QUERIES.map(q => ({ ...q })),
226
+ };
227
+ }
228
+
229
+ // Synthetic corpus: N docs each with an "anchor" token that uniquely identifies
230
+ // the gold doc, plus filler text drawn from the default corpus to keep FTS5
231
+ // from trivially hitting on tf-idf. Useful when callers want scaling numbers
232
+ // at sizes the curated corpus can't reach (100/500/1000).
233
+ export function buildSyntheticCorpus(n = 100, seed = 42) {
234
+ const rand = mulberry32(seed);
235
+ const docs = [];
236
+ const queries = [];
237
+ const filler = DEFAULT_DOCS.map(d => d[1]);
238
+ for (let i = 0; i < n; i++) {
239
+ const anchor = `syntheticanchor${i}token`;
240
+ const f = filler[Math.floor(rand() * filler.length)];
241
+ const id = `synth-${i}`;
242
+ docs.push({ id, body: `${anchor} -- ${f}` });
243
+ queries.push({ q: anchor, gold: id });
244
+ }
245
+ return { docs, queries };
246
+ }
247
+
248
+ // --- The harness ------------------------------------------------------------
249
+
250
+ /**
251
+ * runBenchmark(opts) -> Promise<results>
252
+ *
253
+ * opts:
254
+ * corpus -- { docs:[{id,body}], queries:[{q,gold}] } | undefined (default)
255
+ * root -- existing project root to use; if absent, a temp dir is made
256
+ * and removed on completion.
257
+ * write -- write the JSON artifact to disk (default true).
258
+ * out_dir -- override artifact dir (default <root>/.ijfw/benchmarks).
259
+ * k_set -- recall@k values to compute (default [1, 3, 5]).
260
+ * warmup -- # warm-up queries before timed phase (default 3).
261
+ * query_runs -- # iterations through the full query set for latency stats
262
+ * (default 3 -- gives 3x #queries timed samples).
263
+ *
264
+ * returns the full results object even when write=false.
265
+ */
266
+ export async function runBenchmark(opts = {}) {
267
+ const corpus = opts.corpus || loadDefaultCorpus();
268
+ if (!corpus || !Array.isArray(corpus.docs) || !Array.isArray(corpus.queries)) {
269
+ throw new Error('runBenchmark: corpus must be { docs, queries }');
270
+ }
271
+ const write = opts.write !== false;
272
+ const kSet = (opts.k_set && opts.k_set.length) ? opts.k_set.slice() : [1, 3, 5];
273
+ const warmup = Number.isInteger(opts.warmup) && opts.warmup >= 0 ? opts.warmup : 3;
274
+ const queryRuns = Number.isInteger(opts.query_runs) && opts.query_runs > 0 ? opts.query_runs : 3;
275
+
276
+ let root = opts.root;
277
+ let madeTmp = false;
278
+ if (!root) {
279
+ root = mkdtempSync(join(tmpdir(), 'ijfw-bench-'));
280
+ madeTmp = true;
281
+ } else {
282
+ root = resolve(root);
283
+ if (!existsSync(root)) mkdirSync(root, { recursive: true });
284
+ }
285
+
286
+ const startedAt = Date.now();
287
+ const t0 = performance.now();
288
+ let db = null;
289
+ let results;
290
+
291
+ try {
292
+ db = await openMemoryDb(root);
293
+
294
+ // --- Ingest phase ------------------------------------------------------
295
+ // Map docId -> warm rowId so we can recall@k by gold doc later.
296
+ const goldRowByDocId = new Map();
297
+ const ingestLatencies = [];
298
+ const ingestStart = performance.now();
299
+ for (const doc of corpus.docs) {
300
+ const t = performance.now();
301
+ const inserted = indexEntry(db, { body: doc.body, source: doc.id, session_id: 'bench' });
302
+ const ms = performance.now() - t;
303
+ ingestLatencies.push(ms);
304
+ goldRowByDocId.set(doc.id, Number(inserted.id));
305
+ }
306
+ const ingestElapsed = performance.now() - ingestStart;
307
+ const ingestThroughput = corpus.docs.length / (ingestElapsed / 1000);
308
+
309
+ // --- Warm-up queries (un-timed; primes prepared statements + page cache) -
310
+ for (let i = 0; i < warmup && i < corpus.queries.length; i++) {
311
+ const wq = sanitizeFtsQuery(corpus.queries[i].q);
312
+ if (wq) {
313
+ try { searchFts5(db, wq, 10); } catch { /* ignore warm-up faults */ }
314
+ }
315
+ }
316
+
317
+ // --- Query phase: warm tier (FTS5) ------------------------------------
318
+ const queryLatencies = [];
319
+ // Per-query hit table: { [gold]: [hitsAtKArray, ...] }
320
+ // We compute recall@k by checking if gold is in the top-k of the result.
321
+ const hitCounts = new Map(); // k -> #hits
322
+ const totalQueries = corpus.queries.length * queryRuns;
323
+ for (const k of kSet) hitCounts.set(k, 0);
324
+
325
+ const maxK = Math.max(...kSet);
326
+ for (let run = 0; run < queryRuns; run++) {
327
+ for (const { q, gold } of corpus.queries) {
328
+ const safeQ = sanitizeFtsQuery(q);
329
+ const t = performance.now();
330
+ let rows;
331
+ try {
332
+ rows = safeQ ? searchFts5(db, safeQ, maxK) : [];
333
+ } catch {
334
+ rows = [];
335
+ }
336
+ const ms = performance.now() - t;
337
+ queryLatencies.push(ms);
338
+ const goldRow = goldRowByDocId.get(gold);
339
+ for (const k of kSet) {
340
+ const topK = rows.slice(0, k);
341
+ if (topK.some(r => Number(r.id) === goldRow)) {
342
+ hitCounts.set(k, hitCounts.get(k) + 1);
343
+ }
344
+ }
345
+ }
346
+ }
347
+
348
+ const recallAtK = {};
349
+ for (const k of kSet) recallAtK[`recall@${k}`] = hitCounts.get(k) / totalQueries;
350
+
351
+ // --- Storage cost ------------------------------------------------------
352
+ const dbFile = dbPathFor(root);
353
+ let dbBytes = 0;
354
+ try { dbBytes = statSync(dbFile).size; } catch { /* db file may be -wal-suffixed in WAL mode; tolerate */ }
355
+ const rowsIndexed = rowCount(db);
356
+ const bytesPerMemory = rowsIndexed > 0 ? dbBytes / rowsIndexed : 0;
357
+
358
+ // --- Hot-tier query provenance -----------------------------------------
359
+ // Re-run a couple of queries through searchMemory() with an empty file
360
+ // list to force the hot-linear fallback (warm tier is populated but the
361
+ // call path returns hot when files==[]). Captures hot-tier latency as
362
+ // a sanity column; it WILL be slower than warm on a 30-row corpus.
363
+ const hotLatencies = [];
364
+ for (const { q } of corpus.queries.slice(0, Math.min(5, corpus.queries.length))) {
365
+ const safeQ = sanitizeFtsQuery(q);
366
+ if (!safeQ) continue;
367
+ const t = performance.now();
368
+ try { searchMemory(safeQ, [], 10); } catch { /* hot-linear empty -> [], no throw */ }
369
+ hotLatencies.push(performance.now() - t);
370
+ }
371
+
372
+ // --- Staleness filter sanity ------------------------------------------
373
+ // Mark one row stale, prove the default filter hides it, prove
374
+ // include_stale=true surfaces it.
375
+ let defaultExcludesStale = null;
376
+ let staleVisibleWithFlag = null;
377
+ try {
378
+ const firstDoc = corpus.docs[0];
379
+ const rowId = goldRowByDocId.get(firstDoc.id);
380
+ db.prepare('UPDATE memory_entries SET stale_candidate = 1 WHERE id = ?').run(rowId);
381
+ const queryBody = sanitizeFtsQuery(
382
+ firstDoc.body.split(/\s+/).filter(t => /^[a-zA-Z]+$/.test(t)).slice(0, 2).join(' ')
383
+ );
384
+ const defaultHits = queryBody ? searchFts5(db, queryBody, 20) : [];
385
+ defaultExcludesStale = !defaultHits.some(r => Number(r.id) === rowId);
386
+ const allHits = queryBody ? searchFts5(db, queryBody, 20, { include_stale: true }) : [];
387
+ staleVisibleWithFlag = allHits.some(r => Number(r.id) === rowId);
388
+ // Reset so the staleness mutation doesn't leak into other axes that
389
+ // re-query the warm tier after this point.
390
+ db.prepare('UPDATE memory_entries SET stale_candidate = 0 WHERE id = ?').run(rowId);
391
+ } catch {
392
+ // Pre-v3 schema (no stale_candidate column) -- leave as null.
393
+ }
394
+
395
+ const totalElapsed = performance.now() - t0;
396
+
397
+ results = {
398
+ schema_version: BENCHMARK_SCHEMA_VERSION,
399
+ ijfw_version: process.env.IJFW_VERSION || '1.5.0',
400
+ ts_iso: new Date(startedAt).toISOString(),
401
+ duration_ms: Math.round(totalElapsed * 1000) / 1000,
402
+ corpus: {
403
+ docs: corpus.docs.length,
404
+ queries: corpus.queries.length,
405
+ query_runs: queryRuns,
406
+ total_query_samples: totalQueries,
407
+ },
408
+ axes: {
409
+ ingest: {
410
+ throughput_rps: round(ingestThroughput, 2),
411
+ latency_ms: {
412
+ p50: round(percentile(ingestLatencies, 50), 3),
413
+ p95: round(percentile(ingestLatencies, 95), 3),
414
+ p99: round(percentile(ingestLatencies, 99), 3),
415
+ mean: round(mean(ingestLatencies), 3),
416
+ min: round(Math.min(...ingestLatencies), 3),
417
+ max: round(Math.max(...ingestLatencies), 3),
418
+ },
419
+ },
420
+ query_warm_fts5: {
421
+ latency_ms: {
422
+ p50: round(percentile(queryLatencies, 50), 3),
423
+ p95: round(percentile(queryLatencies, 95), 3),
424
+ p99: round(percentile(queryLatencies, 99), 3),
425
+ mean: round(mean(queryLatencies), 3),
426
+ min: round(Math.min(...queryLatencies), 3),
427
+ max: round(Math.max(...queryLatencies), 3),
428
+ },
429
+ recall: recallAtK,
430
+ },
431
+ query_hot_linear: {
432
+ // sample only -- not the published number, just provenance.
433
+ samples: hotLatencies.length,
434
+ latency_ms: {
435
+ p50: round(percentile(hotLatencies, 50), 3),
436
+ p95: round(percentile(hotLatencies, 95), 3),
437
+ mean: round(mean(hotLatencies), 3),
438
+ },
439
+ },
440
+ query_cold_vector: {
441
+ available: false,
442
+ reason: 'no-embedding-model-bound-in-benchmark-harness',
443
+ },
444
+ storage: {
445
+ db_bytes: dbBytes,
446
+ rows_indexed: rowsIndexed,
447
+ bytes_per_memory: round(bytesPerMemory, 2),
448
+ },
449
+ staleness_filter: {
450
+ default_excludes_stale: defaultExcludesStale,
451
+ stale_visible_with_flag: staleVisibleWithFlag,
452
+ },
453
+ },
454
+ };
455
+
456
+ // --- Write artifact ----------------------------------------------------
457
+ if (write) {
458
+ const outDir = opts.out_dir
459
+ ? resolve(opts.out_dir)
460
+ : join(resolveArtifactRoot(opts.root), '.ijfw', 'benchmarks');
461
+ mkdirSync(outDir, { recursive: true });
462
+ const artifactPath = join(outDir, `memory-${startedAt}.json`);
463
+ writeFileSync(artifactPath, JSON.stringify(results, null, 2) + '\n', 'utf8');
464
+ results.artifact_path = artifactPath;
465
+ }
466
+ } finally {
467
+ if (db) closeDb(db);
468
+ if (madeTmp) {
469
+ try { rmSync(root, { recursive: true, force: true, maxRetries: 5, retryDelay: 50 }); } catch { /* tolerate */ }
470
+ }
471
+ }
472
+
473
+ return results;
474
+ }
475
+
476
+ // Round to N decimals -- keeps the artifact human-diffable without sacrificing
477
+ // the sub-microsecond resolution callers actually need for percentile detail.
478
+ function round(x, n) {
479
+ if (!Number.isFinite(x)) return x;
480
+ const m = Math.pow(10, n);
481
+ return Math.round(x * m) / m;
482
+ }
483
+
484
+ // When `opts.root` is provided we write artifacts into THAT root; when it's
485
+ // a temp dir we want the artifact to land somewhere persistent (cwd). We
486
+ // pick the explicit root if set, otherwise process.cwd().
487
+ function resolveArtifactRoot(rootArg) {
488
+ if (rootArg && typeof rootArg === 'string') return resolve(rootArg);
489
+ return resolve(process.cwd());
490
+ }
491
+
492
+ export default {
493
+ runBenchmark,
494
+ loadDefaultCorpus,
495
+ buildSyntheticCorpus,
496
+ percentile,
497
+ BENCHMARK_SCHEMA_VERSION,
498
+ };
@@ -0,0 +1,126 @@
1
+ /**
2
+ * H5.6 — Semantic dedup at ingest time.
3
+ *
4
+ * Competitors (Graphiti, mem0) dedup near-duplicate memories at ingest so a
5
+ * months-old project doesn't accrue 47 nearly-identical "decided to use
6
+ * Postgres" entries. IJFW historically appended on every store, so this
7
+ * module closes the bloat gap.
8
+ *
9
+ * Approach: cheap Jaccard similarity over token sets (same primitive that
10
+ * cross-audit-chunker.mergeFindings uses for finding clustering). Pure JS,
11
+ * zero deps, fully deterministic. No vector model required.
12
+ *
13
+ * Public surface:
14
+ * - tokenize(text) → Set<string>
15
+ * - jaccard(a, b) → number ∈ [0,1]
16
+ * - findNearDuplicate(content, recent, t?) → { match, similarity } | null
17
+ * - readDedupConfig(env?) → { enabled, threshold, windowSize }
18
+ *
19
+ * Env knobs (read at call time, NOT cached, so tests can flip per-call):
20
+ * - IJFW_DEDUP_OFF=1 → disable entirely (returns null always)
21
+ * - IJFW_DEDUP_THRESHOLD=0.85 → Jaccard cutoff (default 0.85)
22
+ * - IJFW_DEDUP_WINDOW=50 → look back this many recent memories (default 50)
23
+ */
24
+
25
+ const DEFAULT_THRESHOLD = 0.85;
26
+ const DEFAULT_WINDOW = 50;
27
+ // Token floor — strings under this length are noise (no real dedup signal).
28
+ const MIN_TOKEN_LEN = 3;
29
+
30
+ /**
31
+ * tokenize(text)
32
+ *
33
+ * Lowercased word set, dropping tokens shorter than MIN_TOKEN_LEN so things
34
+ * like "a", "is", "to" don't dominate the Jaccard ratio.
35
+ */
36
+ export function tokenize(text) {
37
+ if (typeof text !== 'string') return new Set();
38
+ return new Set(
39
+ text.toLowerCase()
40
+ .split(/\W+/)
41
+ .filter(t => t.length >= MIN_TOKEN_LEN)
42
+ );
43
+ }
44
+
45
+ /**
46
+ * jaccard(a, b)
47
+ *
48
+ * Set similarity. Returns 1 when both empty, 0 when only one empty.
49
+ * Matches the convention in cross-audit-chunker.jaccard.
50
+ */
51
+ export function jaccard(a, b) {
52
+ const tokA = a instanceof Set ? a : tokenize(a);
53
+ const tokB = b instanceof Set ? b : tokenize(b);
54
+ if (tokA.size === 0 && tokB.size === 0) return 1;
55
+ if (tokA.size === 0 || tokB.size === 0) return 0;
56
+ let inter = 0;
57
+ for (const t of tokA) if (tokB.has(t)) inter++;
58
+ const uni = tokA.size + tokB.size - inter;
59
+ return uni === 0 ? 0 : inter / uni;
60
+ }
61
+
62
+ /**
63
+ * readDedupConfig(env?)
64
+ *
65
+ * Resolve runtime config. Reads process.env unless `env` is supplied (for tests).
66
+ * Threshold is clamped to [0,1]. Window is clamped to [1, 500].
67
+ */
68
+ export function readDedupConfig(env = process.env) {
69
+ const enabled = env.IJFW_DEDUP_OFF !== '1' && env.IJFW_DEDUP_OFF !== 'true';
70
+ let threshold = DEFAULT_THRESHOLD;
71
+ if (env.IJFW_DEDUP_THRESHOLD != null && env.IJFW_DEDUP_THRESHOLD !== '') {
72
+ const n = Number(env.IJFW_DEDUP_THRESHOLD);
73
+ if (Number.isFinite(n)) threshold = Math.max(0, Math.min(1, n));
74
+ }
75
+ let windowSize = DEFAULT_WINDOW;
76
+ if (env.IJFW_DEDUP_WINDOW != null && env.IJFW_DEDUP_WINDOW !== '') {
77
+ const n = parseInt(env.IJFW_DEDUP_WINDOW, 10);
78
+ if (Number.isFinite(n) && n > 0) windowSize = Math.max(1, Math.min(500, n));
79
+ }
80
+ return { enabled, threshold, windowSize };
81
+ }
82
+
83
+ /**
84
+ * findNearDuplicate(content, recentMemories, threshold?)
85
+ *
86
+ * Walks the last N (default: full array) recentMemories and returns the
87
+ * first entry whose Jaccard similarity to `content` meets or exceeds
88
+ * `threshold`. Each entry should be `{ id, content, ... }`; we only read
89
+ * `id` and `content`. Returns `null` if nothing matches.
90
+ *
91
+ * Iteration is most-recent-first when callers pass a chronologically-ordered
92
+ * recents array — they should slice the tail and reverse before calling.
93
+ * We don't reorder for them so behavior is predictable.
94
+ *
95
+ * @param {string} content
96
+ * @param {Array<{id:string, content:string}>} recentMemories
97
+ * @param {number} [threshold] — default from readDedupConfig()
98
+ * @returns {{match:{id:string,content:string}, similarity:number} | null}
99
+ */
100
+ export function findNearDuplicate(content, recentMemories, threshold) {
101
+ if (typeof content !== 'string' || !content.trim()) return null;
102
+ if (!Array.isArray(recentMemories) || recentMemories.length === 0) return null;
103
+ const cfg = readDedupConfig();
104
+ if (!cfg.enabled) return null;
105
+ const t = (typeof threshold === 'number' && threshold >= 0 && threshold <= 1)
106
+ ? threshold
107
+ : cfg.threshold;
108
+
109
+ const tokContent = tokenize(content);
110
+ // Empty token set → no signal. Don't claim dedup.
111
+ if (tokContent.size === 0) return null;
112
+
113
+ let best = null;
114
+ for (const mem of recentMemories) {
115
+ if (!mem || typeof mem.content !== 'string') continue;
116
+ const sim = jaccard(tokContent, tokenize(mem.content));
117
+ if (sim >= t) {
118
+ // Short-circuit on first match; callers expect the most-recent
119
+ // matching entry (assuming they pre-ordered).
120
+ return { match: mem, similarity: sim };
121
+ }
122
+ // Track best-effort closest-but-not-quite for diagnostics (unused here).
123
+ if (!best || sim > best.similarity) best = { match: mem, similarity: sim };
124
+ }
125
+ return null;
126
+ }