nlm-memory 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -34
- package/dist/cli/nlm.js +223 -33
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/adapters/cursor.d.ts +45 -0
- package/dist/core/adapters/cursor.js +397 -0
- package/dist/core/adapters/cursor.js.map +1 -0
- package/dist/core/adapters/from-source.js +10 -0
- package/dist/core/adapters/from-source.js.map +1 -1
- package/dist/core/adapters/windsurf.d.ts +44 -0
- package/dist/core/adapters/windsurf.js +299 -0
- package/dist/core/adapters/windsurf.js.map +1 -0
- package/dist/core/hook/claude-settings.d.ts +12 -5
- package/dist/core/hook/claude-settings.js +21 -6
- package/dist/core/hook/claude-settings.js.map +1 -1
- package/dist/core/sources/source-registry.d.ts +1 -1
- package/dist/core/sources/source-registry.js +18 -0
- package/dist/core/sources/source-registry.js.map +1 -1
- package/dist/core/storage/sqlite-session-store.d.ts +2 -0
- package/dist/core/storage/sqlite-session-store.js +38 -2
- package/dist/core/storage/sqlite-session-store.js.map +1 -1
- package/dist/hook/hook-auth.d.ts +13 -0
- package/dist/hook/hook-auth.js +19 -0
- package/dist/hook/hook-auth.js.map +1 -0
- package/dist/hook/prompt-recall-hook.js +7 -1
- package/dist/hook/prompt-recall-hook.js.map +1 -1
- package/dist/hook/session-start-hook.js +4 -1
- package/dist/hook/session-start-hook.js.map +1 -1
- package/dist/hook/stop-hook.js +4 -1
- package/dist/hook/stop-hook.js.map +1 -1
- package/dist/http/app.d.ts +2 -0
- package/dist/http/app.js +76 -1
- package/dist/http/app.js.map +1 -1
- package/dist/install/claude-code.js +1 -1
- package/dist/install/claude-code.js.map +1 -1
- package/dist/install/cursor.d.ts +25 -0
- package/dist/install/cursor.js +43 -0
- package/dist/install/cursor.js.map +1 -0
- package/dist/install/nlm-dir-perms.d.ts +19 -0
- package/dist/install/nlm-dir-perms.js +43 -0
- package/dist/install/nlm-dir-perms.js.map +1 -0
- package/dist/install/ollama.d.ts +18 -1
- package/dist/install/ollama.js +62 -7
- package/dist/install/ollama.js.map +1 -1
- package/dist/install/setup.d.ts +4 -0
- package/dist/install/setup.js +141 -18
- package/dist/install/setup.js.map +1 -1
- package/dist/install/windsurf.d.ts +25 -0
- package/dist/install/windsurf.js +43 -0
- package/dist/install/windsurf.js.map +1 -0
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/shared/types.d.ts +4 -0
- package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/index-CSPTTeeM.js +69 -0
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/plugin/scripts/prompt-recall-hook.mjs +55 -4
- package/plugin/scripts/stop-hook.mjs +57 -6
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/dist/ui/assets/index-B_qIVV0k.js +0 -69
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
- package/logs/CHANGELOG/CHANGELOG.md +0 -337
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -885
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/from-source.ts +0 -57
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -166
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -260
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -765
- package/src/hook/prompt-recall-hook.ts +0 -174
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -165
- package/src/hook/stop-hook.ts +0 -236
- package/src/http/app.ts +0 -1137
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/ollama.ts +0 -211
- package/src/install/setup.ts +0 -368
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -145
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -136
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -308
- package/src/ui/pages/Search.tsx +0 -93
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -262
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1766
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -60
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -205
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -248
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -120
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pure scoring functions for the LongMemEval harness. Two metrics:
|
|
3
|
-
*
|
|
4
|
-
* - R@k (recall at k): did the retriever return any gold session ID in
|
|
5
|
-
* its top-k results? Standard benchmark metric.
|
|
6
|
-
* - Session-body hit: did the gold answer text appear anywhere in the
|
|
7
|
-
* bodies of the top-k returned sessions? NLM-specific companion that
|
|
8
|
-
* captures session-as-primary-unit value the strict-ID R@k can miss
|
|
9
|
-
* (e.g. a session that supersedes the gold session and quotes its
|
|
10
|
-
* decision).
|
|
11
|
-
*
|
|
12
|
-
* Both functions are deterministic and dependency-free so the harness can
|
|
13
|
-
* test them with synthetic inputs.
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
export interface ScoreInputs {
|
|
17
|
-
readonly returnedIds: ReadonlyArray<string>;
|
|
18
|
-
readonly goldIds: ReadonlyArray<string>;
|
|
19
|
-
/** Map id → body for the bodies of the top-k returned sessions. */
|
|
20
|
-
readonly returnedBodies: ReadonlyArray<string>;
|
|
21
|
-
/** Some LongMemEval answers are ints (counting questions); coerced to string. */
|
|
22
|
-
readonly answer: string | number | boolean;
|
|
23
|
-
readonly k: number;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export interface SingleScore {
|
|
27
|
-
readonly recallAtK: 0 | 1;
|
|
28
|
-
readonly sessionBodyHit: 0 | 1;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/** Score a single question. Returns 0/1 indicators that aggregate via mean. */
|
|
32
|
-
export function scoreOne(input: ScoreInputs): SingleScore {
|
|
33
|
-
const topK = input.returnedIds.slice(0, input.k);
|
|
34
|
-
const goldSet = new Set(input.goldIds);
|
|
35
|
-
const recallAtK = topK.some((id) => goldSet.has(id)) ? 1 : 0;
|
|
36
|
-
|
|
37
|
-
// Session-body hit: substring match for multi-word answers; word-boundary
|
|
38
|
-
// match for short answers (single token <4 chars: "3", "yes", numeric
|
|
39
|
-
// counts). Without the boundary, a numeric answer "3" hits every body
|
|
40
|
-
// containing "3 days", "$3", etc., inflating the metric to noise.
|
|
41
|
-
const ans = normalize(String(input.answer));
|
|
42
|
-
let sessionBodyHit: 0 | 1 = 0;
|
|
43
|
-
if (ans.length > 0) {
|
|
44
|
-
const isShortToken = !ans.includes(" ") && ans.length < 4;
|
|
45
|
-
const test = isShortToken
|
|
46
|
-
? (body: string): boolean =>
|
|
47
|
-
new RegExp(`\\b${escapeRegExp(ans)}\\b`).test(normalize(body))
|
|
48
|
-
: (body: string): boolean => normalize(body).includes(ans);
|
|
49
|
-
const bodies = input.returnedBodies.slice(0, input.k);
|
|
50
|
-
for (const body of bodies) {
|
|
51
|
-
if (test(body)) {
|
|
52
|
-
sessionBodyHit = 1;
|
|
53
|
-
break;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
return { recallAtK, sessionBodyHit };
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
export interface Aggregate {
|
|
61
|
-
readonly n: number;
|
|
62
|
-
readonly recallAtK: number;
|
|
63
|
-
readonly sessionBodyHitRate: number;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/** Aggregate per-question scores into mean rates. */
|
|
67
|
-
export function aggregate(scores: ReadonlyArray<SingleScore>): Aggregate {
|
|
68
|
-
const n = scores.length;
|
|
69
|
-
if (n === 0) {
|
|
70
|
-
return { n: 0, recallAtK: 0, sessionBodyHitRate: 0 };
|
|
71
|
-
}
|
|
72
|
-
let r = 0;
|
|
73
|
-
let s = 0;
|
|
74
|
-
for (const x of scores) {
|
|
75
|
-
r += x.recallAtK;
|
|
76
|
-
s += x.sessionBodyHit;
|
|
77
|
-
}
|
|
78
|
-
return {
|
|
79
|
-
n,
|
|
80
|
-
recallAtK: round3(r / n),
|
|
81
|
-
sessionBodyHitRate: round3(s / n),
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
function normalize(s: string): string {
|
|
86
|
-
return s
|
|
87
|
-
.toLowerCase()
|
|
88
|
-
.replace(/[^\p{L}\p{N}\s]/gu, " ")
|
|
89
|
-
.replace(/\s+/g, " ")
|
|
90
|
-
.trim();
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function round3(x: number): number {
|
|
94
|
-
return Math.round(x * 1000) / 1000;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function escapeRegExp(s: string): string {
|
|
98
|
-
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
99
|
-
}
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LongMemEval dataset schema. Mirrors the published JSON shape from
|
|
3
|
-
* huggingface.co/datasets/xiaowu0162/longmemeval-cleaned.
|
|
4
|
-
*
|
|
5
|
-
* Each instance: a question against a haystack of past chat sessions. The
|
|
6
|
-
* gold session IDs are in `answer_session_ids` — that's what the retrieval
|
|
7
|
-
* step is scored against (R@k: was any gold ID returned in the top k).
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
export interface LongMemEvalTurn {
|
|
11
|
-
readonly role: "user" | "assistant";
|
|
12
|
-
readonly content: string;
|
|
13
|
-
readonly has_answer?: boolean;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface LongMemEvalInstance {
|
|
17
|
-
readonly question_id: string;
|
|
18
|
-
readonly question_type: string;
|
|
19
|
-
readonly question: string;
|
|
20
|
-
// LongMemEval answers are sometimes ints/booleans for counting and
|
|
21
|
-
// temporal-reasoning questions — coerce at the call site.
|
|
22
|
-
readonly answer: string | number | boolean;
|
|
23
|
-
readonly question_date: string;
|
|
24
|
-
readonly haystack_session_ids: ReadonlyArray<string>;
|
|
25
|
-
readonly haystack_dates: ReadonlyArray<string>;
|
|
26
|
-
readonly haystack_sessions: ReadonlyArray<ReadonlyArray<LongMemEvalTurn>>;
|
|
27
|
-
readonly answer_session_ids: ReadonlyArray<string>;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/** Serialize a session's turn list to a single body string for NLM ingest. */
|
|
31
|
-
export function turnsToBody(turns: ReadonlyArray<LongMemEvalTurn>): string {
|
|
32
|
-
return turns
|
|
33
|
-
.map((t) => `${t.role === "user" ? "User" : "Assistant"}: ${t.content}`)
|
|
34
|
-
.join("\n\n");
|
|
35
|
-
}
|
|
@@ -1,239 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""NLM daily digest — composes and posts a Telegram summary of recall activity.
|
|
3
|
-
|
|
4
|
-
Called from nlm-daily-digest.sh. Reads from the local NLM daemon (default
|
|
5
|
-
http://localhost:3940) and posts to the Telegram chat configured in env.
|
|
6
|
-
|
|
7
|
-
Token / chat id are read from the Whtnxt Agent .env (sourced by the wrapper
|
|
8
|
-
shell script). NLM port defaults to 3940 but honors NLM_PORT.
|
|
9
|
-
|
|
10
|
-
No external deps — urllib only. Exits non-zero on any unrecoverable error so
|
|
11
|
-
the cron log captures the failure.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
|
|
16
|
-
import json
|
|
17
|
-
import os
|
|
18
|
-
import re
|
|
19
|
-
import sys
|
|
20
|
-
import urllib.parse
|
|
21
|
-
import urllib.request
|
|
22
|
-
from collections import Counter
|
|
23
|
-
from datetime import datetime, time, timedelta, timezone
|
|
24
|
-
from pathlib import Path
|
|
25
|
-
from typing import Any
|
|
26
|
-
|
|
27
|
-
# Hook log lives alongside the daemon DB at ~/.nlm/hook-log.jsonl unless
|
|
28
|
-
# overridden via NLM_HOOK_LOG (mirrors the TS hook-log module).
|
|
29
|
-
HOOK_LOG_PATH = Path(os.environ.get("NLM_HOOK_LOG", str(Path.home() / ".nlm" / "hook-log.jsonl")))
|
|
30
|
-
|
|
31
|
-
# Local timezone for yesterday-window math. Cron fires at 7am CT and Edward
|
|
32
|
-
# thinks in CT — "yesterday" means yesterday-in-CT, not yesterday-in-UTC.
|
|
33
|
-
LOCAL_TZ = datetime.now().astimezone().tzinfo
|
|
34
|
-
|
|
35
|
-
# Patterns that mark a recall as a probe/test, not real agent usage.
|
|
36
|
-
# Match is case-insensitive substring against the query text.
|
|
37
|
-
PROBE_PATTERNS: tuple[str, ...] = (
|
|
38
|
-
"concurrency probe",
|
|
39
|
-
"test probe",
|
|
40
|
-
"path test",
|
|
41
|
-
"recall test",
|
|
42
|
-
"smoke",
|
|
43
|
-
"cutover-test",
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def is_probe(query: str | None) -> bool:
|
|
48
|
-
if not query:
|
|
49
|
-
return False
|
|
50
|
-
q = query.lower()
|
|
51
|
-
return any(p in q for p in PROBE_PATTERNS)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def http_get_json(url: str, timeout: int = 5) -> Any:
|
|
55
|
-
req = urllib.request.Request(url, headers={"User-Agent": "nlm-daily-digest/1.0"})
|
|
56
|
-
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
57
|
-
return json.loads(resp.read().decode("utf-8"))
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def post_telegram(token: str, chat_id: str, text: str) -> None:
|
|
61
|
-
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
|
62
|
-
payload = urllib.parse.urlencode({
|
|
63
|
-
"chat_id": chat_id,
|
|
64
|
-
"text": text,
|
|
65
|
-
"disable_web_page_preview": "true",
|
|
66
|
-
}).encode("utf-8")
|
|
67
|
-
req = urllib.request.Request(
|
|
68
|
-
url,
|
|
69
|
-
data=payload,
|
|
70
|
-
headers={"User-Agent": "nlm-daily-digest/1.0"},
|
|
71
|
-
)
|
|
72
|
-
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
73
|
-
body = json.loads(resp.read().decode("utf-8"))
|
|
74
|
-
if not body.get("ok"):
|
|
75
|
-
raise RuntimeError(f"telegram api error: {body}")
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def yesterday_window() -> tuple[datetime, datetime]:
|
|
79
|
-
"""Return (start, end) of yesterday in local tz, both tz-aware."""
|
|
80
|
-
today_local = datetime.now(LOCAL_TZ).date()
|
|
81
|
-
yesterday = today_local - timedelta(days=1)
|
|
82
|
-
start = datetime.combine(yesterday, time(0, 0), tzinfo=LOCAL_TZ)
|
|
83
|
-
end = datetime.combine(today_local, time(0, 0), tzinfo=LOCAL_TZ)
|
|
84
|
-
return start, end
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def parse_iso(ts: str) -> datetime | None:
|
|
88
|
-
try:
|
|
89
|
-
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
90
|
-
except (TypeError, ValueError, AttributeError):
|
|
91
|
-
return None
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def hook_liveness_check(dataset_sessions: list[dict[str, Any]]) -> str | None:
|
|
95
|
-
"""Return an alert string if Claude Code ran yesterday but the hook did not fire.
|
|
96
|
-
|
|
97
|
-
Returns None when either: (a) no Claude Code sessions yesterday, in which
|
|
98
|
-
case silence is expected, or (b) hook fires were recorded as expected.
|
|
99
|
-
|
|
100
|
-
This is the load-bearing liveness check. The install-time smoke test only
|
|
101
|
-
catches malformed commands at install moment — this catches post-install
|
|
102
|
-
drift (node upgrades, dist moves, settings.json hand-edits, Claude Code
|
|
103
|
-
hook dispatcher changes) by correlating real usage with real hook output.
|
|
104
|
-
"""
|
|
105
|
-
start, end = yesterday_window()
|
|
106
|
-
|
|
107
|
-
cc_sessions_yesterday = 0
|
|
108
|
-
for s in dataset_sessions:
|
|
109
|
-
if not str(s.get("runtime", "")).startswith("claude-code"):
|
|
110
|
-
continue
|
|
111
|
-
ts = parse_iso(str(s.get("started_at", "")))
|
|
112
|
-
if ts is None:
|
|
113
|
-
continue
|
|
114
|
-
if start <= ts < end:
|
|
115
|
-
cc_sessions_yesterday += 1
|
|
116
|
-
|
|
117
|
-
if cc_sessions_yesterday == 0:
|
|
118
|
-
return None # Edward didn't use Claude Code yesterday; silence is fine.
|
|
119
|
-
|
|
120
|
-
if not HOOK_LOG_PATH.exists():
|
|
121
|
-
return (
|
|
122
|
-
f"⚠️ hook silent: {cc_sessions_yesterday} Claude Code sessions yesterday, "
|
|
123
|
-
f"0 hook fires (log file missing at {HOOK_LOG_PATH})"
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
live_fires_yesterday = 0
|
|
127
|
-
with HOOK_LOG_PATH.open("r", encoding="utf-8") as f:
|
|
128
|
-
for line in f:
|
|
129
|
-
try:
|
|
130
|
-
entry = json.loads(line)
|
|
131
|
-
except json.JSONDecodeError:
|
|
132
|
-
continue
|
|
133
|
-
if entry.get("mode") != "live":
|
|
134
|
-
continue
|
|
135
|
-
ts = parse_iso(str(entry.get("ts", "")))
|
|
136
|
-
if ts is None:
|
|
137
|
-
continue
|
|
138
|
-
if start <= ts < end:
|
|
139
|
-
live_fires_yesterday += 1
|
|
140
|
-
|
|
141
|
-
if live_fires_yesterday == 0:
|
|
142
|
-
return (
|
|
143
|
-
f"⚠️ hook silent: {cc_sessions_yesterday} Claude Code sessions yesterday, "
|
|
144
|
-
f"0 live hook fires — check `nlm hook install` + ~/.claude/settings.json"
|
|
145
|
-
)
|
|
146
|
-
return None
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def compose(stats: dict[str, Any], recent: list[dict[str, Any]], port: int, hook_alert: str | None = None) -> str:
|
|
150
|
-
"""Format the digest message body."""
|
|
151
|
-
# `stats` window is server-side (7 days currently). `recent` is the
|
|
152
|
-
# last ~200 events we use to compute the *24h* real-traffic slice.
|
|
153
|
-
cutoff = datetime.now(timezone.utc) - timedelta(hours=24)
|
|
154
|
-
real_24h: list[dict[str, Any]] = []
|
|
155
|
-
for e in recent:
|
|
156
|
-
try:
|
|
157
|
-
ts = datetime.fromisoformat(e["ts"].replace("Z", "+00:00"))
|
|
158
|
-
except (KeyError, ValueError):
|
|
159
|
-
continue
|
|
160
|
-
if ts < cutoff:
|
|
161
|
-
continue
|
|
162
|
-
if is_probe(e.get("query")):
|
|
163
|
-
continue
|
|
164
|
-
real_24h.append(e)
|
|
165
|
-
|
|
166
|
-
src_24h = Counter(e.get("source", "?") for e in real_24h)
|
|
167
|
-
top_q = Counter(e.get("query", "") for e in real_24h if e.get("query")).most_common(5)
|
|
168
|
-
|
|
169
|
-
# 7-day stats (server-computed) — strip probes from the totals.
|
|
170
|
-
total_7d = int(stats.get("total", 0))
|
|
171
|
-
probes_7d = sum(
|
|
172
|
-
count for q, count in stats.get("top_queries", [])
|
|
173
|
-
if is_probe(q)
|
|
174
|
-
) if isinstance(stats.get("top_queries"), list) else 0
|
|
175
|
-
real_7d = max(total_7d - probes_7d, 0)
|
|
176
|
-
|
|
177
|
-
useful = stats.get("useful_hit_rate")
|
|
178
|
-
useful_line = (
|
|
179
|
-
"useful_hit_rate: pending (see docs/methodology/useful-hit-rate.md)"
|
|
180
|
-
if useful is None
|
|
181
|
-
else f"useful_hit_rate (7d): {useful:.0%}"
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
src_str = " · ".join(f"{k}={v}" for k, v in sorted(src_24h.items())) or "none"
|
|
185
|
-
|
|
186
|
-
top_lines = "\n".join(f" {n}. {q[:60]}" for n, (q, _) in enumerate(top_q, 1)) or " (none)"
|
|
187
|
-
|
|
188
|
-
today = datetime.now().strftime("%a %Y-%m-%d")
|
|
189
|
-
alert_block = f"{hook_alert}\n\n" if hook_alert else ""
|
|
190
|
-
return (
|
|
191
|
-
f"NLM digest — {today}\n"
|
|
192
|
-
f"\n"
|
|
193
|
-
f"{alert_block}"
|
|
194
|
-
f"Last 24h (real traffic): {len(real_24h)} queries · {src_str}\n"
|
|
195
|
-
f"Last 7d: {real_7d} real / {total_7d} total · hit_rate {float(stats.get('hit_rate', 0)):.0%}\n"
|
|
196
|
-
f"{useful_line}\n"
|
|
197
|
-
f"\n"
|
|
198
|
-
f"Top real queries (24h):\n"
|
|
199
|
-
f"{top_lines}\n"
|
|
200
|
-
f"\n"
|
|
201
|
-
f"UI: http://localhost:{port}/ui/"
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def main() -> int:
|
|
206
|
-
port = int(os.environ.get("NLM_PORT", "3940"))
|
|
207
|
-
token = os.environ.get("TELEGRAM_BOT_TOKEN")
|
|
208
|
-
chat_id = os.environ.get("TELEGRAM_CHAT_ID")
|
|
209
|
-
if not token or not chat_id:
|
|
210
|
-
print("missing TELEGRAM_BOT_TOKEN or TELEGRAM_CHAT_ID", file=sys.stderr)
|
|
211
|
-
return 2
|
|
212
|
-
|
|
213
|
-
base = f"http://localhost:{port}"
|
|
214
|
-
try:
|
|
215
|
-
stats = http_get_json(f"{base}/api/recall/stats")
|
|
216
|
-
recent_resp = http_get_json(f"{base}/api/recall/recent?limit=200")
|
|
217
|
-
dataset_resp = http_get_json(f"{base}/api/dataset", timeout=15)
|
|
218
|
-
except Exception as e:
|
|
219
|
-
# Daemon down — post an alert instead of failing silently.
|
|
220
|
-
text = f"NLM digest — {datetime.now().strftime('%a %Y-%m-%d')}\n\nDaemon unreachable at {base}\n{e}"
|
|
221
|
-
try:
|
|
222
|
-
post_telegram(token, chat_id, text)
|
|
223
|
-
except Exception as send_err:
|
|
224
|
-
print(f"telegram send also failed: {send_err}", file=sys.stderr)
|
|
225
|
-
return 1
|
|
226
|
-
|
|
227
|
-
recent: list[dict[str, Any]] = recent_resp.get("entries", []) if isinstance(recent_resp, dict) else []
|
|
228
|
-
dataset_sessions: list[dict[str, Any]] = (
|
|
229
|
-
dataset_resp.get("sessions", []) if isinstance(dataset_resp, dict) else []
|
|
230
|
-
)
|
|
231
|
-
hook_alert = hook_liveness_check(dataset_sessions)
|
|
232
|
-
text = compose(stats, recent, port, hook_alert=hook_alert)
|
|
233
|
-
post_telegram(token, chat_id, text)
|
|
234
|
-
print(f"[{datetime.now().isoformat()}] digest posted ({len(text)} chars)")
|
|
235
|
-
return 0
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if __name__ == "__main__":
|
|
239
|
-
sys.exit(main())
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# NLM daily digest — posts a morning Telegram summary of recall activity.
|
|
3
|
-
#
|
|
4
|
-
# Cron entry (7:00am CT, runs after daily-reminders at 6:50am):
|
|
5
|
-
# 0 7 * * * "/Users/echalupa/Documents/Coding Projects/nlm-memory-ts/scripts/nlm-daily-digest.sh" >> "/Users/echalupa/Documents/Coding Projects/nlm-memory-ts/logs/daily-digest/digest.log" 2>&1
|
|
6
|
-
#
|
|
7
|
-
# Reads credentials from the Whtnxt Agent .env (Telegram bot token + chat id).
|
|
8
|
-
|
|
9
|
-
set -euo pipefail
|
|
10
|
-
|
|
11
|
-
REPO_DIR="/Users/echalupa/Documents/Coding Projects/nlm-memory-ts"
|
|
12
|
-
WHTNXT_ENV="/Users/echalupa/Documents/Coding Projects/Whtnxt Agent/.env"
|
|
13
|
-
LOG_DIR="${REPO_DIR}/logs/daily-digest"
|
|
14
|
-
mkdir -p "${LOG_DIR}"
|
|
15
|
-
|
|
16
|
-
# shellcheck source=/dev/null
|
|
17
|
-
set -a
|
|
18
|
-
. "${WHTNXT_ENV}"
|
|
19
|
-
set +a
|
|
20
|
-
|
|
21
|
-
: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN missing}"
|
|
22
|
-
: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID missing}"
|
|
23
|
-
|
|
24
|
-
# Populate useful-hit-log before stats fetch so useful_hit_rate is live.
|
|
25
|
-
/Users/echalupa/.nvm/versions/node/v22.22.1/bin/nlm useful-scan --days 1 \
|
|
26
|
-
>> "${LOG_DIR}/useful-scan.log" 2>&1 || true
|
|
27
|
-
|
|
28
|
-
python3 "${REPO_DIR}/scripts/nlm-daily-digest.py"
|
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `nlm classify-parity` — Phase C parity verification harness.
|
|
3
|
-
*
|
|
4
|
-
* Reads N sessions from ~/.nlm/canonical.sqlite (read-only by default),
|
|
5
|
-
* runs the TS OllamaClient.classify on each body, diffs the result
|
|
6
|
-
* against the persisted Python classifier output, and prints aggregate
|
|
7
|
-
* metrics: Jaccard similarity on entities/decisions/open sets, label
|
|
8
|
-
* exact match rate, summary length delta, schema-failure count.
|
|
9
|
-
*
|
|
10
|
-
* Safe: opens the live store in readonly mode. Does not write anything
|
|
11
|
-
* back. Designed to be run interactively from a terminal during the
|
|
12
|
-
* Phase C cutover-prep window.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
import { homedir } from "node:os";
|
|
16
|
-
import { resolve } from "node:path";
|
|
17
|
-
import Database from "better-sqlite3";
|
|
18
|
-
import * as sqliteVec from "sqlite-vec";
|
|
19
|
-
import type { LLMClient } from "../ports/llm-client.js";
|
|
20
|
-
import { LLMUnreachableError } from "../ports/llm-client.js";
|
|
21
|
-
import { DeepSeekClient } from "../llm/deepseek-client.js";
|
|
22
|
-
import { OllamaClient, ClassifierSchemaError } from "../llm/ollama-client.js";
|
|
23
|
-
import { autoloadEnv } from "../llm/env-autoload.js";
|
|
24
|
-
|
|
25
|
-
export type Provider = "ollama" | "deepseek";
|
|
26
|
-
|
|
27
|
-
interface CliOptions {
|
|
28
|
-
readonly limit: number;
|
|
29
|
-
readonly dbPath: string;
|
|
30
|
-
readonly ollamaUrl: string;
|
|
31
|
-
readonly classifyModel: string;
|
|
32
|
-
readonly provider: Provider;
|
|
33
|
-
readonly verbose: boolean;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
interface SessionRow {
|
|
37
|
-
id: string;
|
|
38
|
-
label: string;
|
|
39
|
-
summary: string;
|
|
40
|
-
body: string | null;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
interface PersistedClassification {
|
|
44
|
-
label: string;
|
|
45
|
-
summary: string;
|
|
46
|
-
entities: string[];
|
|
47
|
-
decisions: string[];
|
|
48
|
-
open: string[];
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
interface DiffMetrics {
|
|
52
|
-
sessionId: string;
|
|
53
|
-
labelMatch: boolean;
|
|
54
|
-
labelTs: string;
|
|
55
|
-
labelPy: string;
|
|
56
|
-
entityJaccard: number;
|
|
57
|
-
decisionJaccard: number;
|
|
58
|
-
openJaccard: number;
|
|
59
|
-
summaryDeltaChars: number;
|
|
60
|
-
schemaFailure: boolean;
|
|
61
|
-
errorMessage?: string;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export interface ParityReport {
|
|
65
|
-
attempted: number;
|
|
66
|
-
succeeded: number;
|
|
67
|
-
schemaFailures: number;
|
|
68
|
-
networkFailures: number;
|
|
69
|
-
labelExactMatchRate: number;
|
|
70
|
-
meanEntityJaccard: number;
|
|
71
|
-
meanDecisionJaccard: number;
|
|
72
|
-
meanOpenJaccard: number;
|
|
73
|
-
diffs: ReadonlyArray<DiffMetrics>;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function parseArgs(argv: string[]): CliOptions {
|
|
77
|
-
const flag = (name: string, fallback?: string): string | undefined => {
|
|
78
|
-
const i = argv.indexOf(name);
|
|
79
|
-
if (i === -1) return fallback;
|
|
80
|
-
return argv[i + 1] ?? fallback;
|
|
81
|
-
};
|
|
82
|
-
const limit = Number.parseInt(flag("--limit", "10") ?? "10", 10);
|
|
83
|
-
const providerRaw = (flag("--provider", "deepseek") ?? "deepseek").toLowerCase();
|
|
84
|
-
const provider: Provider = providerRaw === "ollama" ? "ollama" : "deepseek";
|
|
85
|
-
const defaultModel = provider === "deepseek" ? "deepseek-v4-flash" : "phi4-mini:latest";
|
|
86
|
-
return {
|
|
87
|
-
limit: Number.isFinite(limit) && limit > 0 ? limit : 10,
|
|
88
|
-
dbPath:
|
|
89
|
-
flag("--db", process.env["NLM_DB_PATH"] ?? resolve(homedir(), ".nlm/canonical.sqlite")) ??
|
|
90
|
-
resolve(homedir(), ".nlm/canonical.sqlite"),
|
|
91
|
-
ollamaUrl: flag("--ollama", process.env["NLM_OLLAMA_URL"] ?? "http://localhost:11434") ?? "http://localhost:11434",
|
|
92
|
-
classifyModel: flag("--model", defaultModel) ?? defaultModel,
|
|
93
|
-
provider,
|
|
94
|
-
verbose: argv.includes("--verbose"),
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function buildClient(opts: { provider: Provider; classifyModel: string; ollamaUrl: string }): LLMClient {
|
|
99
|
-
if (opts.provider === "deepseek") {
|
|
100
|
-
autoloadEnv();
|
|
101
|
-
return new DeepSeekClient({ classifyModel: opts.classifyModel });
|
|
102
|
-
}
|
|
103
|
-
return new OllamaClient({ baseUrl: opts.ollamaUrl, classifyModel: opts.classifyModel });
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function jaccard(a: ReadonlyArray<string>, b: ReadonlyArray<string>): number {
|
|
107
|
-
const setA = new Set(a.map((s) => s.toLowerCase().trim()));
|
|
108
|
-
const setB = new Set(b.map((s) => s.toLowerCase().trim()));
|
|
109
|
-
if (setA.size === 0 && setB.size === 0) return 1;
|
|
110
|
-
const inter = new Set([...setA].filter((x) => setB.has(x)));
|
|
111
|
-
const union = new Set([...setA, ...setB]);
|
|
112
|
-
return inter.size / union.size;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
export async function runParity(opts: CliOptions): Promise<ParityReport> {
|
|
116
|
-
const db = new Database(opts.dbPath, { readonly: true });
|
|
117
|
-
sqliteVec.load(db);
|
|
118
|
-
|
|
119
|
-
const rows = db
|
|
120
|
-
.prepare<[number], SessionRow>(
|
|
121
|
-
`SELECT id, label, summary, body
|
|
122
|
-
FROM sessions
|
|
123
|
-
WHERE body IS NOT NULL AND body != ''
|
|
124
|
-
ORDER BY started_at DESC
|
|
125
|
-
LIMIT ?`,
|
|
126
|
-
)
|
|
127
|
-
.all(opts.limit);
|
|
128
|
-
|
|
129
|
-
const persistedById = new Map<string, PersistedClassification>();
|
|
130
|
-
for (const r of rows) {
|
|
131
|
-
const entities = db
|
|
132
|
-
.prepare<[string], { entity_canonical: string }>(
|
|
133
|
-
"SELECT entity_canonical FROM session_entities WHERE session_id = ?",
|
|
134
|
-
)
|
|
135
|
-
.all(r.id)
|
|
136
|
-
.map((x) => x.entity_canonical);
|
|
137
|
-
const markers = db
|
|
138
|
-
.prepare<[string], { kind: "decision" | "open"; text: string }>(
|
|
139
|
-
"SELECT kind, text FROM markers WHERE session_id = ? ORDER BY position",
|
|
140
|
-
)
|
|
141
|
-
.all(r.id);
|
|
142
|
-
persistedById.set(r.id, {
|
|
143
|
-
label: r.label,
|
|
144
|
-
summary: r.summary,
|
|
145
|
-
entities,
|
|
146
|
-
decisions: markers.filter((m) => m.kind === "decision").map((m) => m.text),
|
|
147
|
-
open: markers.filter((m) => m.kind === "open").map((m) => m.text),
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
db.close();
|
|
151
|
-
|
|
152
|
-
const client = buildClient(opts);
|
|
153
|
-
|
|
154
|
-
const diffs: DiffMetrics[] = [];
|
|
155
|
-
let schemaFailures = 0;
|
|
156
|
-
let networkFailures = 0;
|
|
157
|
-
|
|
158
|
-
let idx = 0;
|
|
159
|
-
for (const r of rows) {
|
|
160
|
-
idx += 1;
|
|
161
|
-
const py = persistedById.get(r.id);
|
|
162
|
-
if (!py || !r.body) continue;
|
|
163
|
-
|
|
164
|
-
const t0 = Date.now();
|
|
165
|
-
try {
|
|
166
|
-
const ts = await client.classify(r.body);
|
|
167
|
-
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
|
168
|
-
const labelMatch = ts.label.toLowerCase().trim() === py.label.toLowerCase().trim();
|
|
169
|
-
const entJ = jaccard(ts.entities, py.entities);
|
|
170
|
-
const decJ = jaccard(ts.decisions, py.decisions);
|
|
171
|
-
const openJ = jaccard(ts.open, py.open);
|
|
172
|
-
diffs.push({
|
|
173
|
-
sessionId: r.id,
|
|
174
|
-
labelMatch,
|
|
175
|
-
labelTs: ts.label,
|
|
176
|
-
labelPy: py.label,
|
|
177
|
-
entityJaccard: entJ,
|
|
178
|
-
decisionJaccard: decJ,
|
|
179
|
-
openJaccard: openJ,
|
|
180
|
-
summaryDeltaChars: ts.summary.length - py.summary.length,
|
|
181
|
-
schemaFailure: false,
|
|
182
|
-
});
|
|
183
|
-
if (opts.verbose) {
|
|
184
|
-
const tag = labelMatch ? "EQ " : "DIFF";
|
|
185
|
-
process.stderr.write(
|
|
186
|
-
` [${idx}/${rows.length}] ${elapsed}s ${tag} ${r.id} ent=${entJ.toFixed(2)} dec=${decJ.toFixed(2)} open=${openJ.toFixed(2)}\n`,
|
|
187
|
-
);
|
|
188
|
-
}
|
|
189
|
-
} catch (e) {
|
|
190
|
-
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
|
191
|
-
const message = e instanceof Error ? e.message : String(e);
|
|
192
|
-
if (e instanceof ClassifierSchemaError) schemaFailures += 1;
|
|
193
|
-
else if (e instanceof LLMUnreachableError) networkFailures += 1;
|
|
194
|
-
diffs.push({
|
|
195
|
-
sessionId: r.id,
|
|
196
|
-
labelMatch: false,
|
|
197
|
-
labelTs: "",
|
|
198
|
-
labelPy: py.label,
|
|
199
|
-
entityJaccard: 0,
|
|
200
|
-
decisionJaccard: 0,
|
|
201
|
-
openJaccard: 0,
|
|
202
|
-
summaryDeltaChars: 0,
|
|
203
|
-
schemaFailure: e instanceof ClassifierSchemaError,
|
|
204
|
-
errorMessage: message,
|
|
205
|
-
});
|
|
206
|
-
if (opts.verbose) {
|
|
207
|
-
process.stderr.write(
|
|
208
|
-
` [${idx}/${rows.length}] ${elapsed}s ERR ${r.id} :: ${message}\n`,
|
|
209
|
-
);
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
const successes = diffs.filter((d) => !d.errorMessage);
|
|
215
|
-
const mean = (xs: ReadonlyArray<number>): number =>
|
|
216
|
-
xs.length === 0 ? 0 : Math.round((xs.reduce((a, b) => a + b, 0) / xs.length) * 1000) / 1000;
|
|
217
|
-
|
|
218
|
-
return {
|
|
219
|
-
attempted: diffs.length,
|
|
220
|
-
succeeded: successes.length,
|
|
221
|
-
schemaFailures,
|
|
222
|
-
networkFailures,
|
|
223
|
-
labelExactMatchRate: mean(successes.map((d) => (d.labelMatch ? 1 : 0))),
|
|
224
|
-
meanEntityJaccard: mean(successes.map((d) => d.entityJaccard)),
|
|
225
|
-
meanDecisionJaccard: mean(successes.map((d) => d.decisionJaccard)),
|
|
226
|
-
meanOpenJaccard: mean(successes.map((d) => d.openJaccard)),
|
|
227
|
-
diffs,
|
|
228
|
-
};
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
export async function main(): Promise<void> {
|
|
232
|
-
const opts = parseArgs(process.argv.slice(2));
|
|
233
|
-
console.error(`nlm classify-parity: ${opts.limit} sessions from ${opts.dbPath}`);
|
|
234
|
-
console.error(
|
|
235
|
-
` provider: ${opts.provider} model: ${opts.classifyModel}` +
|
|
236
|
-
(opts.provider === "ollama" ? ` ollama: ${opts.ollamaUrl}` : ""),
|
|
237
|
-
);
|
|
238
|
-
const report = await runParity(opts);
|
|
239
|
-
|
|
240
|
-
console.error("");
|
|
241
|
-
console.error(`attempted: ${report.attempted}`);
|
|
242
|
-
console.error(`succeeded: ${report.succeeded}`);
|
|
243
|
-
console.error(`schema failures: ${report.schemaFailures}`);
|
|
244
|
-
console.error(`network failures: ${report.networkFailures}`);
|
|
245
|
-
console.error(`label exact match: ${(report.labelExactMatchRate * 100).toFixed(1)}%`);
|
|
246
|
-
console.error(`mean Jaccard ents: ${report.meanEntityJaccard.toFixed(3)}`);
|
|
247
|
-
console.error(`mean Jaccard decs: ${report.meanDecisionJaccard.toFixed(3)}`);
|
|
248
|
-
console.error(`mean Jaccard open: ${report.meanOpenJaccard.toFixed(3)}`);
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
const isMain = import.meta.url === `file://${process.argv[1]}`;
|
|
252
|
-
if (isMain) {
|
|
253
|
-
main().catch((e) => {
|
|
254
|
-
console.error("classify-parity fatal:", e);
|
|
255
|
-
process.exit(1);
|
|
256
|
-
});
|
|
257
|
-
}
|