@ijfw/memory-server 1.4.4 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-memorize +14 -7
- package/fixtures/team/book.json +6 -6
- package/fixtures/team/business.json +146 -20
- package/fixtures/team/content.json +6 -6
- package/fixtures/team/design.json +148 -20
- package/fixtures/team/mixed.json +206 -27
- package/fixtures/team/research.json +146 -20
- package/fixtures/team/software.json +148 -20
- package/fixtures/truncation-corpus/_generate-corpus.js +367 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-01/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-01/intent-journal.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-01/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-01/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-02/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-02/intent-journal.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-02/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-02/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-03/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-03/intent-journal.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-03/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-03/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-04/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-04/intent-journal.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-04/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-04/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-05/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-05/intent-journal.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-05/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-01-clean-exit-05/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/snapshots/v-midO-1-advance.json +11 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-01/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/snapshots/v-midO-2-advance.json +11 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-02/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/snapshots/v-midO-3-advance.json +11 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-03/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/snapshots/v-midO-4-advance.json +11 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-04/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/snapshots/v-midO-5-advance.json +11 -0
- package/fixtures/truncation-corpus/fx-02-mid-overwrite-05/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-01/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-01/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-01/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-01/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-02/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-02/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-02/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-02/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-03/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-03/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-03/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-03/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-04/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-04/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-04/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-04/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-05/events.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-05/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-05/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-03-mid-append-05/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-01/events.jsonl +0 -0
- package/fixtures/truncation-corpus/fx-04-no-events-01/intent-journal.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-01/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-04-no-events-01/snapshots/v-noEv-1-set-phase.json +11 -0
- package/fixtures/truncation-corpus/fx-04-no-events-01/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-02/events.jsonl +0 -0
- package/fixtures/truncation-corpus/fx-04-no-events-02/intent-journal.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-02/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-04-no-events-02/snapshots/v-noEv-2-set-phase.json +11 -0
- package/fixtures/truncation-corpus/fx-04-no-events-02/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-03/events.jsonl +0 -0
- package/fixtures/truncation-corpus/fx-04-no-events-03/intent-journal.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-03/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-04-no-events-03/snapshots/v-noEv-3-set-phase.json +11 -0
- package/fixtures/truncation-corpus/fx-04-no-events-03/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-04/events.jsonl +0 -0
- package/fixtures/truncation-corpus/fx-04-no-events-04/intent-journal.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-04/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-04-no-events-04/snapshots/v-noEv-4-set-phase.json +11 -0
- package/fixtures/truncation-corpus/fx-04-no-events-04/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-05/events.jsonl +0 -0
- package/fixtures/truncation-corpus/fx-04-no-events-05/intent-journal.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-04-no-events-05/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-04-no-events-05/snapshots/v-noEv-5-set-phase.json +11 -0
- package/fixtures/truncation-corpus/fx-04-no-events-05/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-01/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-01/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-01/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-01/snapshots/v-errT-1-partial.json +11 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-01/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-02/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-02/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-02/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-02/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-03/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-03/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-03/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-03/snapshots/v-errT-3-partial.json +11 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-03/target/.ijfw/state/workflow.json +1 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-04/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-04/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-04/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-04/target/.ijfw/blackboard/decisions.jsonl +1 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-05/events.jsonl +2 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-05/intent-journal.jsonl +3 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-05/meta.json +18 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-05/snapshots/v-errT-5-partial.json +11 -0
- package/fixtures/truncation-corpus/fx-05-error-terminated-05/target/.ijfw/state/workflow.json +1 -0
- package/package.json +6 -3
- package/src/active-extension-writer.js +144 -64
- package/src/api-client.js +43 -5
- package/src/audit-roster.js +80 -5
- package/src/blackboard.js +298 -6
- package/src/cli-run.js +33 -5
- package/src/codex-agents.js +96 -5
- package/src/cost/aggregator.js +39 -9
- package/src/cost/pricing.js +57 -0
- package/src/cost/readers/gemini.js +1 -1
- package/src/cross-audit-chunker.js +189 -0
- package/src/cross-dispatcher.js +124 -21
- package/src/cross-orchestrator-cli.js +754 -159
- package/src/cross-orchestrator.js +1065 -17
- package/src/cross-project-search.js +195 -9
- package/src/dashboard-client-waves.html +304 -0
- package/src/dashboard-client.html +5 -1
- package/src/dashboard-server.js +73 -0
- package/src/deploy-alerts.js +150 -0
- package/src/design/iframe-bridge.js +242 -0
- package/src/design-companion.js +144 -0
- package/src/dispatch/checkpoint-cli.js +97 -0
- package/src/dispatch/colon-syntax.js +81 -1
- package/src/dispatch/extension.js +26 -2
- package/src/dispatch/registry-cli.js +4 -1
- package/src/dispatch/wave-cli.js +201 -6
- package/src/dispatch/worktree-cli.js +40 -0
- package/src/dispatch-planner.js +97 -2
- package/src/dream/runner.mjs +47 -11
- package/src/dream/stage-runner.js +40 -0
- package/src/dream/state-file.js +102 -0
- package/src/extension-installer.js +70 -24
- package/src/extension-quota-tracker.js +4 -2
- package/src/extension-registry.js +289 -35
- package/src/feedback-detector.js +26 -0
- package/src/fs-lock.js +259 -7
- package/src/gate-result.js +95 -1
- package/src/hardware-signer.js +4 -2
- package/src/hero-line.js +86 -5
- package/src/intent-router.js +35 -0
- package/src/lib/a11y-contract.js +117 -0
- package/src/lib/atomic-io.js +29 -8
- package/src/lib/cache-keepalive.js +150 -0
- package/src/lib/jsonl-rotation.js +104 -0
- package/src/lib/lighthouse-pillar.js +121 -0
- package/src/lib/llm-call.js +121 -0
- package/src/lib/playwright-baseline.js +205 -0
- package/src/lib/rekor-bridge.js +221 -0
- package/src/lib/repo-map.js +392 -0
- package/src/lib/shasum-verify.js +164 -0
- package/src/lib/sketches-gc.js +132 -0
- package/src/lib/tmp-suffix.js +62 -0
- package/src/lib/ui-review-runner.js +595 -0
- package/src/lib/uispec-drift.js +301 -0
- package/src/lib/uispec-intake.js +381 -0
- package/src/lib/worktree-guards.js +118 -0
- package/src/lib/worktree-recovery.js +100 -0
- package/src/memory/auto-linker.js +267 -0
- package/src/memory/benchmark.js +498 -0
- package/src/memory/dedup.js +126 -0
- package/src/memory/embedding-cache.js +136 -0
- package/src/memory/fact-extractor.js +168 -0
- package/src/memory/fts5.js +65 -1
- package/src/memory/migration-runner.js +6 -1
- package/src/memory/migrations/004-bitemporal.js +91 -0
- package/src/memory/migrations/005-vector-cache.js +61 -0
- package/src/memory/migrations/006-obsidian-graph.js +46 -0
- package/src/memory/migrations/007-skill-telemetry.js +24 -0
- package/src/memory/migrations/008-write-provenance.js +41 -0
- package/src/memory/migrations/009-obsidian-backfill.js +50 -0
- package/src/memory/obsidian-parser.js +152 -0
- package/src/memory/query-dataview.js +86 -0
- package/src/memory/search.js +46 -15
- package/src/memory/temporal.js +529 -0
- package/src/memory/tokenize.js +10 -0
- package/src/memory-facts-handler.js +37 -0
- package/src/memory-feedback.js +260 -2
- package/src/model-refresh.js +292 -0
- package/src/observability/cost-anomaly.js +166 -0
- package/src/observability/evaluator-checkpoint-contract.js +117 -0
- package/src/observability/trace-id.js +163 -0
- package/src/orchestrator/agents-md-blackboard.js +152 -0
- package/src/orchestrator/checkpoint-contract.md +140 -0
- package/src/orchestrator/debug-trident-trigger.js +374 -0
- package/src/orchestrator/debug-trident.js +570 -0
- package/src/orchestrator/merge-block-aware.js +350 -0
- package/src/orchestrator/plan-checker.js +475 -0
- package/src/orchestrator/post-done-runner.js +277 -0
- package/src/orchestrator/review.js +38 -3
- package/src/orchestrator/skill-telemetry-sink.js +29 -0
- package/src/orchestrator/skill-telemetry.js +37 -0
- package/src/orchestrator/state-events.js +459 -0
- package/src/orchestrator/state-sdk.js +1932 -0
- package/src/orchestrator/status-protocol.js +84 -17
- package/src/orchestrator/subagent-telemetry.js +471 -0
- package/src/orchestrator/termination.js +160 -0
- package/src/orchestrator/verification-gate.js +200 -16
- package/src/orchestrator/wave-state.js +332 -23
- package/src/orchestrator/worktree-provision.js +77 -0
- package/src/override-resolver.js +5 -3
- package/src/override-use-registry.js +111 -5
- package/src/receipts.js +36 -4
- package/src/recovery/checkpoint.js +56 -3
- package/src/recovery/code-fixer.js +961 -0
- package/src/recovery/truncation.js +317 -0
- package/src/redactor.js +75 -6
- package/src/runtime-mediator.js +15 -1
- package/src/sanitizer.js +10 -0
- package/src/search-hybrid.js +139 -0
- package/src/server.js +795 -112
- package/src/swarm/worktree.js +27 -4
- package/src/swarm-config.js +102 -17
- package/src/team/domain-templates/book.json +51 -0
- package/src/team/domain-templates/business.json +44 -0
- package/src/team/domain-templates/content.json +50 -0
- package/src/team/domain-templates/design.json +44 -0
- package/src/team/domain-templates/research.json +44 -0
- package/src/team/domain-templates/software.json +40 -0
- package/src/team/generator.js +440 -3
- package/src/team/modify.js +203 -0
- package/src/team/schemas.js +48 -0
- package/src/update-apply.js +19 -3
- package/src/dashboard-charts.js +0 -239
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
// IJFW v1.5.0 T22 (Wave E) -- memory benchmark harness.
|
|
2
|
+
//
|
|
3
|
+
// Genre-matches mem0 / Zep / Graphiti published memory benchmarks: same axes,
|
|
4
|
+
// same numeric shape, run against IJFW's own 3-tier store. Output is a JSON
|
|
5
|
+
// artifact that can be diffed across builds + cited in marketing.
|
|
6
|
+
//
|
|
7
|
+
// 3-tier model recap:
|
|
8
|
+
// hot = markdown files at <root>/.ijfw/memory/*.md (linear regex; always
|
|
9
|
+
// available; used as auto-index source for warm + fallback when warm
|
|
10
|
+
// is cold).
|
|
11
|
+
// warm = SQLite FTS5 at <root>/.ijfw/index/memory.db (porter unicode61).
|
|
12
|
+
// Inserts go via indexEntry(); searches via searchFts5() (warm path)
|
|
13
|
+
// or searchMemory() (warm-first w/ hot fallback envelope).
|
|
14
|
+
// cold = pgvector / embedded vectors (migration 005 + embedding-cache).
|
|
15
|
+
// Not exercised here by design -- the cold path needs a model and
|
|
16
|
+
// this harness ships with zero new deps. Axis is RESERVED so future
|
|
17
|
+
// runs can drop in numbers without changing the artifact schema.
|
|
18
|
+
//
|
|
19
|
+
// Axes measured (industry-aligned subset; not all of mem0's "LoCoMo" axes
|
|
20
|
+
// translate -- this is a coding-memory benchmark, not a conversational one):
|
|
21
|
+
//
|
|
22
|
+
// 1. ingest_throughput_rps -- inserts / second (warm tier, single writer).
|
|
23
|
+
// 2. ingest_latency_ms -- p50 / p95 / p99 per-insert.
|
|
24
|
+
// 3. query_latency_ms -- p50 / p95 / p99 per warm-tier search.
|
|
25
|
+
// 4. recall_at_k -- recall@1, @3, @5 against a known
|
|
26
|
+
// query-answer set (porter stemming +
|
|
27
|
+
// synonym expansion both count as hits
|
|
28
|
+
// if they resolve to the gold doc).
|
|
29
|
+
// 5. storage_bytes_per_memory -- on-disk db size / row count.
|
|
30
|
+
// 6. corpus_size -- # rows + # query-answer pairs.
|
|
31
|
+
// 7. hot_tier_query_latency_ms -- linear-regex hot tier (provenance check;
|
|
32
|
+
// should be slower than warm on >50 rows
|
|
33
|
+
// -- if it isn't, warm tier is broken).
|
|
34
|
+
// 8. cold_tier -- { available: false, reason: 'no-embedding-model' }
|
|
35
|
+
// reserved schema slot.
|
|
36
|
+
// 9. staleness_filter -- { default_excludes_stale: bool,
|
|
37
|
+
// stale_visible_with_flag: bool }
|
|
38
|
+
// sanity proof the warm filter still gates.
|
|
39
|
+
//
|
|
40
|
+
// What this harness does NOT do (yet -- on the v1.5.0 backlog):
|
|
41
|
+
// - cross-tier promotion timing (hot->warm happens at first search; warm
|
|
42
|
+
// never promotes to cold without a model). Future T23+ work owns the
|
|
43
|
+
// bi-temporal + decay-on-retrieval axes.
|
|
44
|
+
// - multi-writer throughput. Single-writer is the published norm because
|
|
45
|
+
// SQLite's BEGIN IMMEDIATE queue dominates; that's already covered by
|
|
46
|
+
// test-memory-fts5.js's concurrent-writers test.
|
|
47
|
+
// - memory cost in RAM. SQLite page cache is bounded; an "RSS during
|
|
48
|
+
// benchmark" axis adds value but needs platform-specific tooling.
|
|
49
|
+
//
|
|
50
|
+
// Determinism:
|
|
51
|
+
// - Default corpus is seeded; same input -> same gold mapping. Latency
|
|
52
|
+
// numbers will vary across machines; that's expected (and is why we
|
|
53
|
+
// report p50/p95/p99, not means).
|
|
54
|
+
// - The default queries are chosen so a porter-stemmed FTS5 over the
|
|
55
|
+
// default corpus hits recall@5 == 1.0 -- the test asserts this exact
|
|
56
|
+
// property so a regression in synonyms / tokenizer / search ordering
|
|
57
|
+
// gets caught as soon as it lands.
|
|
58
|
+
//
|
|
59
|
+
// Output:
|
|
60
|
+
// { axes, corpus, runs, results, schema_version, ijfw_version, ts_iso }
|
|
61
|
+
// written to <out_dir>/memory-<unix_ms>.json by default. Result-only
|
|
62
|
+
// callers (in-test) can call runBenchmark({write: false}) to skip the
|
|
63
|
+
// write and consume the JS object directly.
|
|
64
|
+
//
|
|
65
|
+
// Public surface:
|
|
66
|
+
// runBenchmark(opts) -> Promise<results>
|
|
67
|
+
// loadDefaultCorpus() -> { docs, queries }
|
|
68
|
+
// buildSyntheticCorpus(n, seed) -> { docs, queries }
|
|
69
|
+
// percentile(arr, p) -> number (utility, exported for tests)
|
|
70
|
+
// BENCHMARK_SCHEMA_VERSION -- bump on shape change
|
|
71
|
+
//
|
|
72
|
+
// Zero new deps; uses only what fts5.js + search.js + node:* already pull in.
|
|
73
|
+
|
|
74
|
+
import { mkdtempSync, mkdirSync, writeFileSync, statSync, existsSync, rmSync } from 'node:fs';
|
|
75
|
+
import { join, resolve } from 'node:path';
|
|
76
|
+
import { tmpdir } from 'node:os';
|
|
77
|
+
import { performance } from 'node:perf_hooks';
|
|
78
|
+
|
|
79
|
+
import {
|
|
80
|
+
openDb as openMemoryDb,
|
|
81
|
+
indexEntry,
|
|
82
|
+
searchFts5,
|
|
83
|
+
rowCount,
|
|
84
|
+
closeDb,
|
|
85
|
+
dbPathFor,
|
|
86
|
+
} from './fts5.js';
|
|
87
|
+
import { searchMemory } from './search.js';
|
|
88
|
+
|
|
89
|
+
export const BENCHMARK_SCHEMA_VERSION = 1;
|
|
90
|
+
|
|
91
|
+
// --- Percentile helper ------------------------------------------------------
|
|
92
|
+
//
|
|
93
|
+
// Linear-interpolated percentile over a numeric array. Returns 0 on empty.
|
|
94
|
+
// Exported so the test file can assert on the same values the harness reports.
|
|
95
|
+
export function percentile(values, p) {
|
|
96
|
+
if (!Array.isArray(values) || values.length === 0) return 0;
|
|
97
|
+
if (!(p >= 0 && p <= 100)) throw new RangeError('percentile: p must be in [0,100]');
|
|
98
|
+
const arr = values.slice().sort((a, b) => a - b);
|
|
99
|
+
if (arr.length === 1) return arr[0];
|
|
100
|
+
const rank = (p / 100) * (arr.length - 1);
|
|
101
|
+
const lo = Math.floor(rank);
|
|
102
|
+
const hi = Math.ceil(rank);
|
|
103
|
+
if (lo === hi) return arr[lo];
|
|
104
|
+
const frac = rank - lo;
|
|
105
|
+
return arr[lo] * (1 - frac) + arr[hi] * frac;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function mean(values) {
|
|
109
|
+
if (!values.length) return 0;
|
|
110
|
+
let s = 0;
|
|
111
|
+
for (const v of values) s += v;
|
|
112
|
+
return s / values.length;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Deterministic PRNG (mulberry32) so the synthetic corpus is reproducible
|
|
116
|
+
// across runs + machines. Same seed => same docs/queries/gold-mapping.
|
|
117
|
+
function mulberry32(seed) {
|
|
118
|
+
let a = seed >>> 0;
|
|
119
|
+
return function() {
|
|
120
|
+
a = (a + 0x6d2b79f5) >>> 0;
|
|
121
|
+
let t = a;
|
|
122
|
+
t = Math.imul(t ^ (t >>> 15), t | 1);
|
|
123
|
+
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
|
|
124
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// --- Corpora -----------------------------------------------------------------
|
|
129
|
+
//
|
|
130
|
+
// The default corpus is hand-curated so the gold-answer set is unambiguous
|
|
131
|
+
// (each query has exactly one "right" doc). The synthetic corpus is for
|
|
132
|
+
// scaling tests -- it generates N docs, each with a unique anchor token, and
|
|
133
|
+
// builds 1 query per doc.
|
|
134
|
+
|
|
135
|
+
// Curated corpus: each doc has at least one anchor token that is unique to
|
|
136
|
+
// it across the whole set, so the gold-answer mapping is unambiguous and
|
|
137
|
+
// recall@5 is achievable. Bodies are short, realistic coding-memory facts.
|
|
138
|
+
const DEFAULT_DOCS = [
|
|
139
|
+
// [id, body]
|
|
140
|
+
['auth-jwt', 'JWT bearer tokens authenticate API requests with HS256 signing.'],
|
|
141
|
+
['auth-oauth', 'OAuth2 device authorization flow uniqueoauthflowanchor for CLI clients.'],
|
|
142
|
+
['auth-session', 'Server session validateSession reads cookie and DB row.'],
|
|
143
|
+
['cache-redis', 'Redis cache with TTL eviction policy for hot keys.'],
|
|
144
|
+
['cache-lru', 'LRU memoization of expensive query results in process memory.'],
|
|
145
|
+
['cache-cdn', 'CDN edge caching with revalidate semantics on stale entries.'],
|
|
146
|
+
['db-postgres', 'Postgres transactions with serializable isolation level.'],
|
|
147
|
+
['db-sqlite', 'SQLite WAL journal mode for concurrent readers.'],
|
|
148
|
+
['db-migration', 'Schema migration runner advances uniqueuserversionanchor on success.'],
|
|
149
|
+
['search-fts5', 'FTS5 porter tokenizer stems plural and verb forms.'],
|
|
150
|
+
['search-vector', 'Vector cosine similarity over embedded chunks for semantic recall.'],
|
|
151
|
+
['search-hybrid', 'Hybrid search blends BM25 lexical scores with vector cosine ranking.'],
|
|
152
|
+
['mem-tiers', 'Memory tiers: hot markdown, warm FTS5 index, cold vector store.'],
|
|
153
|
+
['mem-staleness', 'Cascading staleness propagation flags superseded memory rows.'],
|
|
154
|
+
['mem-temporal', 'Bitemporal validity windows replace prior facts when contradicted.'],
|
|
155
|
+
['rag-chunk', 'Chunk documents into 512 token windows with 64 token overlap.'],
|
|
156
|
+
['rag-rerank', 'Reranking with a uniquererankeranchor after first pass dense retrieval.'],
|
|
157
|
+
['rag-eval', 'Retrieval eval uses recall at k and mean reciprocal rank metrics.'],
|
|
158
|
+
['mcp-protocol', 'MCP uniquejsonrpcanchor over stdio with initialized handshake.'],
|
|
159
|
+
['mcp-tools', 'MCP tools list advertises uniqueijfwstateanchor and memory search.'],
|
|
160
|
+
['mcp-resources', 'MCP resources expose project memory markdown files for reading.'],
|
|
161
|
+
['cli-codex', 'Codex CLI honours uniquecodexagentsanchor and emits stdout JSON.'],
|
|
162
|
+
['cli-gemini', 'Gemini CLI uses uniquegeminimdanchor and MCP registration config.'],
|
|
163
|
+
['cli-cursor', 'Cursor MCP config lives at uniquecursorconfanchor with workspace scope.'],
|
|
164
|
+
['hook-pretool', 'PreToolUse hook validates arguments before tool execution.'],
|
|
165
|
+
['hook-posttool', 'PostToolUse hook reports observations back into the session.'],
|
|
166
|
+
['hook-stop', 'Stop hook closes the wave and writes ship gate receipt.'],
|
|
167
|
+
['plan-spec', 'Spec phase clarifies what a phase delivers with ambiguity scoring.'],
|
|
168
|
+
['plan-review', 'Plan review fires uniquetridentanchor before execute begins.'],
|
|
169
|
+
['plan-execute', 'Execute phase dispatches subagents per wave with checkpoints.'],
|
|
170
|
+
];
|
|
171
|
+
|
|
172
|
+
// Each query has a UNIQUE anchor token (or a stem-unique phrase) that resolves
|
|
173
|
+
// to exactly one doc. Single-token queries avoid FTS5 multi-token AND footguns;
|
|
174
|
+
// the gold-answer mapping stays unambiguous; porter stemming still earns its
|
|
175
|
+
// keep because the query token is rarely a literal substring of the body.
|
|
176
|
+
const DEFAULT_QUERIES = [
|
|
177
|
+
{ q: 'HS256', gold: 'auth-jwt' },
|
|
178
|
+
{ q: 'uniqueoauthflowanchor', gold: 'auth-oauth' },
|
|
179
|
+
{ q: 'validateSession', gold: 'auth-session' },
|
|
180
|
+
{ q: 'Redis', gold: 'cache-redis' },
|
|
181
|
+
{ q: 'memoization', gold: 'cache-lru' },
|
|
182
|
+
{ q: 'revalidate', gold: 'cache-cdn' },
|
|
183
|
+
{ q: 'serializable', gold: 'db-postgres' },
|
|
184
|
+
{ q: 'WAL', gold: 'db-sqlite' },
|
|
185
|
+
{ q: 'uniqueuserversionanchor', gold: 'db-migration' },
|
|
186
|
+
{ q: 'porter', gold: 'search-fts5' },
|
|
187
|
+
{ q: 'cosine', gold: 'search-vector' },
|
|
188
|
+
{ q: 'BM25', gold: 'search-hybrid' },
|
|
189
|
+
{ q: 'tiers', gold: 'mem-tiers' },
|
|
190
|
+
{ q: 'staleness', gold: 'mem-staleness' },
|
|
191
|
+
{ q: 'bitemporal', gold: 'mem-temporal' },
|
|
192
|
+
{ q: 'overlap', gold: 'rag-chunk' },
|
|
193
|
+
{ q: 'uniquererankeranchor', gold: 'rag-rerank' },
|
|
194
|
+
{ q: 'reciprocal', gold: 'rag-eval' },
|
|
195
|
+
{ q: 'uniquejsonrpcanchor', gold: 'mcp-protocol' },
|
|
196
|
+
{ q: 'uniqueijfwstateanchor', gold: 'mcp-tools' },
|
|
197
|
+
{ q: 'resources', gold: 'mcp-resources' },
|
|
198
|
+
{ q: 'uniquecodexagentsanchor', gold: 'cli-codex' },
|
|
199
|
+
{ q: 'uniquegeminimdanchor', gold: 'cli-gemini' },
|
|
200
|
+
{ q: 'uniquecursorconfanchor', gold: 'cli-cursor' },
|
|
201
|
+
{ q: 'PreToolUse', gold: 'hook-pretool' },
|
|
202
|
+
{ q: 'PostToolUse', gold: 'hook-posttool' },
|
|
203
|
+
{ q: 'wave', gold: 'hook-stop' },
|
|
204
|
+
{ q: 'ambiguity', gold: 'plan-spec' },
|
|
205
|
+
{ q: 'uniquetridentanchor', gold: 'plan-review' },
|
|
206
|
+
{ q: 'subagents', gold: 'plan-execute' },
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
// FTS5 query sanitizer -- strips characters that FTS5 treats as operators
|
|
210
|
+
// or column-qualifiers (so a hyphen, dot, or underscore in a user query
|
|
211
|
+
// doesn't blow up with "no such column" / "syntax error near"). Mirrors
|
|
212
|
+
// what a production query layer would do; published numbers can't depend
|
|
213
|
+
// on the caller hand-sanitizing every input. Internal helper -- exported
|
|
214
|
+
// only for callers that want to share the sanitizer (and for tests).
|
|
215
|
+
export function sanitizeFtsQuery(q) {
|
|
216
|
+
if (typeof q !== 'string') return '';
|
|
217
|
+
// Replace any FTS5 special / column-separator chars with a space, then
|
|
218
|
+
// collapse whitespace. Keeps alphanumerics + spaces.
|
|
219
|
+
return q.replace(/[^a-zA-Z0-9_\s]/g, ' ').replace(/\s+/g, ' ').trim();
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export function loadDefaultCorpus() {
|
|
223
|
+
return {
|
|
224
|
+
docs: DEFAULT_DOCS.map(([id, body]) => ({ id, body })),
|
|
225
|
+
queries: DEFAULT_QUERIES.map(q => ({ ...q })),
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Synthetic corpus: N docs each with an "anchor" token that uniquely identifies
|
|
230
|
+
// the gold doc, plus filler text drawn from the default corpus to keep FTS5
|
|
231
|
+
// from trivially hitting on tf-idf. Useful when callers want scaling numbers
|
|
232
|
+
// at sizes the curated corpus can't reach (100/500/1000).
|
|
233
|
+
export function buildSyntheticCorpus(n = 100, seed = 42) {
|
|
234
|
+
const rand = mulberry32(seed);
|
|
235
|
+
const docs = [];
|
|
236
|
+
const queries = [];
|
|
237
|
+
const filler = DEFAULT_DOCS.map(d => d[1]);
|
|
238
|
+
for (let i = 0; i < n; i++) {
|
|
239
|
+
const anchor = `syntheticanchor${i}token`;
|
|
240
|
+
const f = filler[Math.floor(rand() * filler.length)];
|
|
241
|
+
const id = `synth-${i}`;
|
|
242
|
+
docs.push({ id, body: `${anchor} -- ${f}` });
|
|
243
|
+
queries.push({ q: anchor, gold: id });
|
|
244
|
+
}
|
|
245
|
+
return { docs, queries };
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// --- The harness ------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* runBenchmark(opts) -> Promise<results>
|
|
252
|
+
*
|
|
253
|
+
* opts:
|
|
254
|
+
* corpus -- { docs:[{id,body}], queries:[{q,gold}] } | undefined (default)
|
|
255
|
+
* root -- existing project root to use; if absent, a temp dir is made
|
|
256
|
+
* and removed on completion.
|
|
257
|
+
* write -- write the JSON artifact to disk (default true).
|
|
258
|
+
* out_dir -- override artifact dir (default <root>/.ijfw/benchmarks).
|
|
259
|
+
* k_set -- recall@k values to compute (default [1, 3, 5]).
|
|
260
|
+
* warmup -- # warm-up queries before timed phase (default 3).
|
|
261
|
+
* query_runs -- # iterations through the full query set for latency stats
|
|
262
|
+
* (default 3 -- gives 3x #queries timed samples).
|
|
263
|
+
*
|
|
264
|
+
* returns the full results object even when write=false.
|
|
265
|
+
*/
|
|
266
|
+
export async function runBenchmark(opts = {}) {
|
|
267
|
+
const corpus = opts.corpus || loadDefaultCorpus();
|
|
268
|
+
if (!corpus || !Array.isArray(corpus.docs) || !Array.isArray(corpus.queries)) {
|
|
269
|
+
throw new Error('runBenchmark: corpus must be { docs, queries }');
|
|
270
|
+
}
|
|
271
|
+
const write = opts.write !== false;
|
|
272
|
+
const kSet = (opts.k_set && opts.k_set.length) ? opts.k_set.slice() : [1, 3, 5];
|
|
273
|
+
const warmup = Number.isInteger(opts.warmup) && opts.warmup >= 0 ? opts.warmup : 3;
|
|
274
|
+
const queryRuns = Number.isInteger(opts.query_runs) && opts.query_runs > 0 ? opts.query_runs : 3;
|
|
275
|
+
|
|
276
|
+
let root = opts.root;
|
|
277
|
+
let madeTmp = false;
|
|
278
|
+
if (!root) {
|
|
279
|
+
root = mkdtempSync(join(tmpdir(), 'ijfw-bench-'));
|
|
280
|
+
madeTmp = true;
|
|
281
|
+
} else {
|
|
282
|
+
root = resolve(root);
|
|
283
|
+
if (!existsSync(root)) mkdirSync(root, { recursive: true });
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const startedAt = Date.now();
|
|
287
|
+
const t0 = performance.now();
|
|
288
|
+
let db = null;
|
|
289
|
+
let results;
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
db = await openMemoryDb(root);
|
|
293
|
+
|
|
294
|
+
// --- Ingest phase ------------------------------------------------------
|
|
295
|
+
// Map docId -> warm rowId so we can recall@k by gold doc later.
|
|
296
|
+
const goldRowByDocId = new Map();
|
|
297
|
+
const ingestLatencies = [];
|
|
298
|
+
const ingestStart = performance.now();
|
|
299
|
+
for (const doc of corpus.docs) {
|
|
300
|
+
const t = performance.now();
|
|
301
|
+
const inserted = indexEntry(db, { body: doc.body, source: doc.id, session_id: 'bench' });
|
|
302
|
+
const ms = performance.now() - t;
|
|
303
|
+
ingestLatencies.push(ms);
|
|
304
|
+
goldRowByDocId.set(doc.id, Number(inserted.id));
|
|
305
|
+
}
|
|
306
|
+
const ingestElapsed = performance.now() - ingestStart;
|
|
307
|
+
const ingestThroughput = corpus.docs.length / (ingestElapsed / 1000);
|
|
308
|
+
|
|
309
|
+
// --- Warm-up queries (un-timed; primes prepared statements + page cache) -
|
|
310
|
+
for (let i = 0; i < warmup && i < corpus.queries.length; i++) {
|
|
311
|
+
const wq = sanitizeFtsQuery(corpus.queries[i].q);
|
|
312
|
+
if (wq) {
|
|
313
|
+
try { searchFts5(db, wq, 10); } catch { /* ignore warm-up faults */ }
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// --- Query phase: warm tier (FTS5) ------------------------------------
|
|
318
|
+
const queryLatencies = [];
|
|
319
|
+
// Per-query hit table: { [gold]: [hitsAtKArray, ...] }
|
|
320
|
+
// We compute recall@k by checking if gold is in the top-k of the result.
|
|
321
|
+
const hitCounts = new Map(); // k -> #hits
|
|
322
|
+
const totalQueries = corpus.queries.length * queryRuns;
|
|
323
|
+
for (const k of kSet) hitCounts.set(k, 0);
|
|
324
|
+
|
|
325
|
+
const maxK = Math.max(...kSet);
|
|
326
|
+
for (let run = 0; run < queryRuns; run++) {
|
|
327
|
+
for (const { q, gold } of corpus.queries) {
|
|
328
|
+
const safeQ = sanitizeFtsQuery(q);
|
|
329
|
+
const t = performance.now();
|
|
330
|
+
let rows;
|
|
331
|
+
try {
|
|
332
|
+
rows = safeQ ? searchFts5(db, safeQ, maxK) : [];
|
|
333
|
+
} catch {
|
|
334
|
+
rows = [];
|
|
335
|
+
}
|
|
336
|
+
const ms = performance.now() - t;
|
|
337
|
+
queryLatencies.push(ms);
|
|
338
|
+
const goldRow = goldRowByDocId.get(gold);
|
|
339
|
+
for (const k of kSet) {
|
|
340
|
+
const topK = rows.slice(0, k);
|
|
341
|
+
if (topK.some(r => Number(r.id) === goldRow)) {
|
|
342
|
+
hitCounts.set(k, hitCounts.get(k) + 1);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const recallAtK = {};
|
|
349
|
+
for (const k of kSet) recallAtK[`recall@${k}`] = hitCounts.get(k) / totalQueries;
|
|
350
|
+
|
|
351
|
+
// --- Storage cost ------------------------------------------------------
|
|
352
|
+
const dbFile = dbPathFor(root);
|
|
353
|
+
let dbBytes = 0;
|
|
354
|
+
try { dbBytes = statSync(dbFile).size; } catch { /* db file may be -wal-suffixed in WAL mode; tolerate */ }
|
|
355
|
+
const rowsIndexed = rowCount(db);
|
|
356
|
+
const bytesPerMemory = rowsIndexed > 0 ? dbBytes / rowsIndexed : 0;
|
|
357
|
+
|
|
358
|
+
// --- Hot-tier query provenance -----------------------------------------
|
|
359
|
+
// Re-run a couple of queries through searchMemory() with an empty file
|
|
360
|
+
// list to force the hot-linear fallback (warm tier is populated but the
|
|
361
|
+
// call path returns hot when files==[]). Captures hot-tier latency as
|
|
362
|
+
// a sanity column; it WILL be slower than warm on a 30-row corpus.
|
|
363
|
+
const hotLatencies = [];
|
|
364
|
+
for (const { q } of corpus.queries.slice(0, Math.min(5, corpus.queries.length))) {
|
|
365
|
+
const safeQ = sanitizeFtsQuery(q);
|
|
366
|
+
if (!safeQ) continue;
|
|
367
|
+
const t = performance.now();
|
|
368
|
+
try { searchMemory(safeQ, [], 10); } catch { /* hot-linear empty -> [], no throw */ }
|
|
369
|
+
hotLatencies.push(performance.now() - t);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// --- Staleness filter sanity ------------------------------------------
|
|
373
|
+
// Mark one row stale, prove the default filter hides it, prove
|
|
374
|
+
// include_stale=true surfaces it.
|
|
375
|
+
let defaultExcludesStale = null;
|
|
376
|
+
let staleVisibleWithFlag = null;
|
|
377
|
+
try {
|
|
378
|
+
const firstDoc = corpus.docs[0];
|
|
379
|
+
const rowId = goldRowByDocId.get(firstDoc.id);
|
|
380
|
+
db.prepare('UPDATE memory_entries SET stale_candidate = 1 WHERE id = ?').run(rowId);
|
|
381
|
+
const queryBody = sanitizeFtsQuery(
|
|
382
|
+
firstDoc.body.split(/\s+/).filter(t => /^[a-zA-Z]+$/.test(t)).slice(0, 2).join(' ')
|
|
383
|
+
);
|
|
384
|
+
const defaultHits = queryBody ? searchFts5(db, queryBody, 20) : [];
|
|
385
|
+
defaultExcludesStale = !defaultHits.some(r => Number(r.id) === rowId);
|
|
386
|
+
const allHits = queryBody ? searchFts5(db, queryBody, 20, { include_stale: true }) : [];
|
|
387
|
+
staleVisibleWithFlag = allHits.some(r => Number(r.id) === rowId);
|
|
388
|
+
// Reset so the staleness mutation doesn't leak into other axes that
|
|
389
|
+
// re-query the warm tier after this point.
|
|
390
|
+
db.prepare('UPDATE memory_entries SET stale_candidate = 0 WHERE id = ?').run(rowId);
|
|
391
|
+
} catch {
|
|
392
|
+
// Pre-v3 schema (no stale_candidate column) -- leave as null.
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
const totalElapsed = performance.now() - t0;
|
|
396
|
+
|
|
397
|
+
results = {
|
|
398
|
+
schema_version: BENCHMARK_SCHEMA_VERSION,
|
|
399
|
+
ijfw_version: process.env.IJFW_VERSION || '1.5.0',
|
|
400
|
+
ts_iso: new Date(startedAt).toISOString(),
|
|
401
|
+
duration_ms: Math.round(totalElapsed * 1000) / 1000,
|
|
402
|
+
corpus: {
|
|
403
|
+
docs: corpus.docs.length,
|
|
404
|
+
queries: corpus.queries.length,
|
|
405
|
+
query_runs: queryRuns,
|
|
406
|
+
total_query_samples: totalQueries,
|
|
407
|
+
},
|
|
408
|
+
axes: {
|
|
409
|
+
ingest: {
|
|
410
|
+
throughput_rps: round(ingestThroughput, 2),
|
|
411
|
+
latency_ms: {
|
|
412
|
+
p50: round(percentile(ingestLatencies, 50), 3),
|
|
413
|
+
p95: round(percentile(ingestLatencies, 95), 3),
|
|
414
|
+
p99: round(percentile(ingestLatencies, 99), 3),
|
|
415
|
+
mean: round(mean(ingestLatencies), 3),
|
|
416
|
+
min: round(Math.min(...ingestLatencies), 3),
|
|
417
|
+
max: round(Math.max(...ingestLatencies), 3),
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
query_warm_fts5: {
|
|
421
|
+
latency_ms: {
|
|
422
|
+
p50: round(percentile(queryLatencies, 50), 3),
|
|
423
|
+
p95: round(percentile(queryLatencies, 95), 3),
|
|
424
|
+
p99: round(percentile(queryLatencies, 99), 3),
|
|
425
|
+
mean: round(mean(queryLatencies), 3),
|
|
426
|
+
min: round(Math.min(...queryLatencies), 3),
|
|
427
|
+
max: round(Math.max(...queryLatencies), 3),
|
|
428
|
+
},
|
|
429
|
+
recall: recallAtK,
|
|
430
|
+
},
|
|
431
|
+
query_hot_linear: {
|
|
432
|
+
// sample only -- not the published number, just provenance.
|
|
433
|
+
samples: hotLatencies.length,
|
|
434
|
+
latency_ms: {
|
|
435
|
+
p50: round(percentile(hotLatencies, 50), 3),
|
|
436
|
+
p95: round(percentile(hotLatencies, 95), 3),
|
|
437
|
+
mean: round(mean(hotLatencies), 3),
|
|
438
|
+
},
|
|
439
|
+
},
|
|
440
|
+
query_cold_vector: {
|
|
441
|
+
available: false,
|
|
442
|
+
reason: 'no-embedding-model-bound-in-benchmark-harness',
|
|
443
|
+
},
|
|
444
|
+
storage: {
|
|
445
|
+
db_bytes: dbBytes,
|
|
446
|
+
rows_indexed: rowsIndexed,
|
|
447
|
+
bytes_per_memory: round(bytesPerMemory, 2),
|
|
448
|
+
},
|
|
449
|
+
staleness_filter: {
|
|
450
|
+
default_excludes_stale: defaultExcludesStale,
|
|
451
|
+
stale_visible_with_flag: staleVisibleWithFlag,
|
|
452
|
+
},
|
|
453
|
+
},
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
// --- Write artifact ----------------------------------------------------
|
|
457
|
+
if (write) {
|
|
458
|
+
const outDir = opts.out_dir
|
|
459
|
+
? resolve(opts.out_dir)
|
|
460
|
+
: join(resolveArtifactRoot(opts.root), '.ijfw', 'benchmarks');
|
|
461
|
+
mkdirSync(outDir, { recursive: true });
|
|
462
|
+
const artifactPath = join(outDir, `memory-${startedAt}.json`);
|
|
463
|
+
writeFileSync(artifactPath, JSON.stringify(results, null, 2) + '\n', 'utf8');
|
|
464
|
+
results.artifact_path = artifactPath;
|
|
465
|
+
}
|
|
466
|
+
} finally {
|
|
467
|
+
if (db) closeDb(db);
|
|
468
|
+
if (madeTmp) {
|
|
469
|
+
try { rmSync(root, { recursive: true, force: true, maxRetries: 5, retryDelay: 50 }); } catch { /* tolerate */ }
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
return results;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Round to N decimals -- keeps the artifact human-diffable without sacrificing
|
|
477
|
+
// the sub-microsecond resolution callers actually need for percentile detail.
|
|
478
|
+
function round(x, n) {
|
|
479
|
+
if (!Number.isFinite(x)) return x;
|
|
480
|
+
const m = Math.pow(10, n);
|
|
481
|
+
return Math.round(x * m) / m;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// When `opts.root` is provided we write artifacts into THAT root; when it's
|
|
485
|
+
// a temp dir we want the artifact to land somewhere persistent (cwd). We
|
|
486
|
+
// pick the explicit root if set, otherwise process.cwd().
|
|
487
|
+
function resolveArtifactRoot(rootArg) {
|
|
488
|
+
if (rootArg && typeof rootArg === 'string') return resolve(rootArg);
|
|
489
|
+
return resolve(process.cwd());
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
export default {
|
|
493
|
+
runBenchmark,
|
|
494
|
+
loadDefaultCorpus,
|
|
495
|
+
buildSyntheticCorpus,
|
|
496
|
+
percentile,
|
|
497
|
+
BENCHMARK_SCHEMA_VERSION,
|
|
498
|
+
};
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* H5.6 — Semantic dedup at ingest time.
|
|
3
|
+
*
|
|
4
|
+
* Competitors (Graphiti, mem0) dedup near-duplicate memories at ingest so a
|
|
5
|
+
* months-old project doesn't accrue 47 nearly-identical "decided to use
|
|
6
|
+
* Postgres" entries. IJFW historically appended on every store, so this
|
|
7
|
+
* module closes the bloat gap.
|
|
8
|
+
*
|
|
9
|
+
* Approach: cheap Jaccard similarity over token sets (same primitive that
|
|
10
|
+
* cross-audit-chunker.mergeFindings uses for finding clustering). Pure JS,
|
|
11
|
+
* zero deps, fully deterministic. No vector model required.
|
|
12
|
+
*
|
|
13
|
+
* Public surface:
|
|
14
|
+
* - tokenize(text) → Set<string>
|
|
15
|
+
* - jaccard(a, b) → number ∈ [0,1]
|
|
16
|
+
* - findNearDuplicate(content, recent, t?) → { match, similarity } | null
|
|
17
|
+
* - readDedupConfig(env?) → { enabled, threshold, windowSize }
|
|
18
|
+
*
|
|
19
|
+
* Env knobs (read at call time, NOT cached, so tests can flip per-call):
|
|
20
|
+
* - IJFW_DEDUP_OFF=1 → disable entirely (returns null always)
|
|
21
|
+
* - IJFW_DEDUP_THRESHOLD=0.85 → Jaccard cutoff (default 0.85)
|
|
22
|
+
* - IJFW_DEDUP_WINDOW=50 → look back this many recent memories (default 50)
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const DEFAULT_THRESHOLD = 0.85;
|
|
26
|
+
const DEFAULT_WINDOW = 50;
|
|
27
|
+
// Token floor — strings under this length are noise (no real dedup signal).
|
|
28
|
+
const MIN_TOKEN_LEN = 3;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* tokenize(text)
|
|
32
|
+
*
|
|
33
|
+
* Lowercased word set, dropping tokens shorter than MIN_TOKEN_LEN so things
|
|
34
|
+
* like "a", "is", "to" don't dominate the Jaccard ratio.
|
|
35
|
+
*/
|
|
36
|
+
export function tokenize(text) {
|
|
37
|
+
if (typeof text !== 'string') return new Set();
|
|
38
|
+
return new Set(
|
|
39
|
+
text.toLowerCase()
|
|
40
|
+
.split(/\W+/)
|
|
41
|
+
.filter(t => t.length >= MIN_TOKEN_LEN)
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* jaccard(a, b)
|
|
47
|
+
*
|
|
48
|
+
* Set similarity. Returns 1 when both empty, 0 when only one empty.
|
|
49
|
+
* Matches the convention in cross-audit-chunker.jaccard.
|
|
50
|
+
*/
|
|
51
|
+
export function jaccard(a, b) {
|
|
52
|
+
const tokA = a instanceof Set ? a : tokenize(a);
|
|
53
|
+
const tokB = b instanceof Set ? b : tokenize(b);
|
|
54
|
+
if (tokA.size === 0 && tokB.size === 0) return 1;
|
|
55
|
+
if (tokA.size === 0 || tokB.size === 0) return 0;
|
|
56
|
+
let inter = 0;
|
|
57
|
+
for (const t of tokA) if (tokB.has(t)) inter++;
|
|
58
|
+
const uni = tokA.size + tokB.size - inter;
|
|
59
|
+
return uni === 0 ? 0 : inter / uni;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* readDedupConfig(env?)
|
|
64
|
+
*
|
|
65
|
+
* Resolve runtime config. Reads process.env unless `env` is supplied (for tests).
|
|
66
|
+
* Threshold is clamped to [0,1]. Window is clamped to [1, 500].
|
|
67
|
+
*/
|
|
68
|
+
export function readDedupConfig(env = process.env) {
|
|
69
|
+
const enabled = env.IJFW_DEDUP_OFF !== '1' && env.IJFW_DEDUP_OFF !== 'true';
|
|
70
|
+
let threshold = DEFAULT_THRESHOLD;
|
|
71
|
+
if (env.IJFW_DEDUP_THRESHOLD != null && env.IJFW_DEDUP_THRESHOLD !== '') {
|
|
72
|
+
const n = Number(env.IJFW_DEDUP_THRESHOLD);
|
|
73
|
+
if (Number.isFinite(n)) threshold = Math.max(0, Math.min(1, n));
|
|
74
|
+
}
|
|
75
|
+
let windowSize = DEFAULT_WINDOW;
|
|
76
|
+
if (env.IJFW_DEDUP_WINDOW != null && env.IJFW_DEDUP_WINDOW !== '') {
|
|
77
|
+
const n = parseInt(env.IJFW_DEDUP_WINDOW, 10);
|
|
78
|
+
if (Number.isFinite(n) && n > 0) windowSize = Math.max(1, Math.min(500, n));
|
|
79
|
+
}
|
|
80
|
+
return { enabled, threshold, windowSize };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* findNearDuplicate(content, recentMemories, threshold?)
|
|
85
|
+
*
|
|
86
|
+
* Walks the last N (default: full array) recentMemories and returns the
|
|
87
|
+
* first entry whose Jaccard similarity to `content` meets or exceeds
|
|
88
|
+
* `threshold`. Each entry should be `{ id, content, ... }`; we only read
|
|
89
|
+
* `id` and `content`. Returns `null` if nothing matches.
|
|
90
|
+
*
|
|
91
|
+
* Iteration is most-recent-first when callers pass a chronologically-ordered
|
|
92
|
+
* recents array — they should slice the tail and reverse before calling.
|
|
93
|
+
* We don't reorder for them so behavior is predictable.
|
|
94
|
+
*
|
|
95
|
+
* @param {string} content
|
|
96
|
+
* @param {Array<{id:string, content:string}>} recentMemories
|
|
97
|
+
* @param {number} [threshold] — default from readDedupConfig()
|
|
98
|
+
* @returns {{match:{id:string,content:string}, similarity:number} | null}
|
|
99
|
+
*/
|
|
100
|
+
export function findNearDuplicate(content, recentMemories, threshold) {
|
|
101
|
+
if (typeof content !== 'string' || !content.trim()) return null;
|
|
102
|
+
if (!Array.isArray(recentMemories) || recentMemories.length === 0) return null;
|
|
103
|
+
const cfg = readDedupConfig();
|
|
104
|
+
if (!cfg.enabled) return null;
|
|
105
|
+
const t = (typeof threshold === 'number' && threshold >= 0 && threshold <= 1)
|
|
106
|
+
? threshold
|
|
107
|
+
: cfg.threshold;
|
|
108
|
+
|
|
109
|
+
const tokContent = tokenize(content);
|
|
110
|
+
// Empty token set → no signal. Don't claim dedup.
|
|
111
|
+
if (tokContent.size === 0) return null;
|
|
112
|
+
|
|
113
|
+
let best = null;
|
|
114
|
+
for (const mem of recentMemories) {
|
|
115
|
+
if (!mem || typeof mem.content !== 'string') continue;
|
|
116
|
+
const sim = jaccard(tokContent, tokenize(mem.content));
|
|
117
|
+
if (sim >= t) {
|
|
118
|
+
// Short-circuit on first match; callers expect the most-recent
|
|
119
|
+
// matching entry (assuming they pre-ordered).
|
|
120
|
+
return { match: mem, similarity: sim };
|
|
121
|
+
}
|
|
122
|
+
// Track best-effort closest-but-not-quite for diagnostics (unused here).
|
|
123
|
+
if (!best || sim > best.similarity) best = { match: mem, similarity: sim };
|
|
124
|
+
}
|
|
125
|
+
return null;
|
|
126
|
+
}
|