nlm-memory 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -34
- package/dist/cli/nlm.js +2 -1
- package/dist/cli/nlm.js.map +1 -1
- package/dist/http/app.js +2 -1
- package/dist/http/app.js.map +1 -1
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
- package/logs/CHANGELOG/CHANGELOG.md +0 -209
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/migrations/014_sources_cursor.sql +0 -30
- package/migrations/015_sources_windsurf.sql +0 -30
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -1078
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/cursor.ts +0 -486
- package/src/core/adapters/from-source.ts +0 -67
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/adapters/windsurf.ts +0 -386
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -187
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -278
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -810
- package/src/hook/hook-auth.ts +0 -18
- package/src/hook/prompt-recall-hook.ts +0 -180
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -168
- package/src/hook/stop-hook.ts +0 -239
- package/src/http/app.ts +0 -1215
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/cursor.ts +0 -68
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/nlm-dir-perms.ts +0 -55
- package/src/install/ollama.ts +0 -284
- package/src/install/setup.ts +0 -489
- package/src/install/windsurf.ts +0 -68
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -149
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -199
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -354
- package/src/ui/pages/Search.tsx +0 -386
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -473
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1890
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -100
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -218
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -260
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -122
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/cursor.test.ts +0 -485
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/adapters/windsurf.test.ts +0 -416
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Append-only JSONL citation log. One line per (conversationId, citedId)
|
|
3
|
-
* that the Stop hook detected. This is the training-data substrate for the
|
|
4
|
-
* future learned reranker: each row is a (query, returned_id, was_cited)
|
|
5
|
-
* triple once joined against ~/.nlm/query_log.jsonl by conversationId.
|
|
6
|
-
*
|
|
7
|
-
* Path defaults to ~/.nlm/citation-log.jsonl, overridable via
|
|
8
|
-
* NLM_CITATION_LOG. Telemetry path — never raises.
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import { appendFile, mkdir, readFile, stat } from "node:fs/promises";
|
|
12
|
-
import { dirname, join } from "node:path";
|
|
13
|
-
import { homedir } from "node:os";
|
|
14
|
-
|
|
15
|
-
export type CitationKind = "tool_use" | "prose";
|
|
16
|
-
|
|
17
|
-
export interface CitationEntry {
|
|
18
|
-
readonly conversationId: string;
|
|
19
|
-
readonly citedId: string;
|
|
20
|
-
readonly kind?: CitationKind;
|
|
21
|
-
readonly responsePreview?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export interface CitationStats {
|
|
25
|
-
readonly days: number;
|
|
26
|
-
readonly total: number;
|
|
27
|
-
readonly distinct_ids: number;
|
|
28
|
-
readonly top_ids: ReadonlyArray<{ readonly id: string; readonly count: number }>;
|
|
29
|
-
readonly log_present: boolean;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function defaultLogPath(): string {
|
|
33
|
-
return process.env["NLM_CITATION_LOG"] ?? join(homedir(), ".nlm", "citation-log.jsonl");
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export async function appendCitation(
|
|
37
|
-
entry: CitationEntry,
|
|
38
|
-
logPath: string = defaultLogPath(),
|
|
39
|
-
): Promise<void> {
|
|
40
|
-
try {
|
|
41
|
-
await mkdir(dirname(logPath), { recursive: true });
|
|
42
|
-
const payload = {
|
|
43
|
-
ts: new Date().toISOString(),
|
|
44
|
-
conversation_id: entry.conversationId,
|
|
45
|
-
cited_id: entry.citedId,
|
|
46
|
-
...(entry.kind !== undefined ? { kind: entry.kind } : {}),
|
|
47
|
-
...(entry.responsePreview !== undefined
|
|
48
|
-
? { response_preview: entry.responsePreview }
|
|
49
|
-
: {}),
|
|
50
|
-
};
|
|
51
|
-
await appendFile(logPath, JSON.stringify(payload) + "\n", "utf8");
|
|
52
|
-
} catch {
|
|
53
|
-
// Telemetry failure must never break the call path.
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export async function citationStats(
|
|
58
|
-
days: number,
|
|
59
|
-
logPath: string = defaultLogPath(),
|
|
60
|
-
): Promise<CitationStats> {
|
|
61
|
-
const base: CitationStats = {
|
|
62
|
-
days,
|
|
63
|
-
total: 0,
|
|
64
|
-
distinct_ids: 0,
|
|
65
|
-
top_ids: [],
|
|
66
|
-
log_present: false,
|
|
67
|
-
};
|
|
68
|
-
try {
|
|
69
|
-
await stat(logPath);
|
|
70
|
-
} catch {
|
|
71
|
-
return base;
|
|
72
|
-
}
|
|
73
|
-
let raw: string;
|
|
74
|
-
try {
|
|
75
|
-
raw = await readFile(logPath, "utf8");
|
|
76
|
-
} catch {
|
|
77
|
-
return { ...base, log_present: true };
|
|
78
|
-
}
|
|
79
|
-
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
80
|
-
const counts = new Map<string, number>();
|
|
81
|
-
let total = 0;
|
|
82
|
-
for (const line of raw.split("\n")) {
|
|
83
|
-
const trimmed = line.trim();
|
|
84
|
-
if (!trimmed) continue;
|
|
85
|
-
let entry: Record<string, unknown>;
|
|
86
|
-
try {
|
|
87
|
-
entry = JSON.parse(trimmed) as Record<string, unknown>;
|
|
88
|
-
} catch {
|
|
89
|
-
continue;
|
|
90
|
-
}
|
|
91
|
-
const tsRaw = entry["ts"];
|
|
92
|
-
if (typeof tsRaw !== "string") continue;
|
|
93
|
-
const ts = Date.parse(tsRaw);
|
|
94
|
-
if (!Number.isFinite(ts) || ts < cutoff) continue;
|
|
95
|
-
const id = entry["cited_id"];
|
|
96
|
-
if (typeof id !== "string" || !id) continue;
|
|
97
|
-
total += 1;
|
|
98
|
-
counts.set(id, (counts.get(id) ?? 0) + 1);
|
|
99
|
-
}
|
|
100
|
-
const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 20);
|
|
101
|
-
return {
|
|
102
|
-
days,
|
|
103
|
-
total,
|
|
104
|
-
distinct_ids: counts.size,
|
|
105
|
-
top_ids: sorted.map(([id, count]) => ({ id, count })),
|
|
106
|
-
log_present: true,
|
|
107
|
-
};
|
|
108
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Session-list filters used before scoring.
|
|
3
|
-
*
|
|
4
|
-
* Pure function over a session array. Mirrors recall.py:_apply_filters.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type { Session, RecallKindFilter } from "@shared/types.js";
|
|
8
|
-
|
|
9
|
-
export interface RecallFilter {
|
|
10
|
-
readonly entity?: string;
|
|
11
|
-
readonly kind?: RecallKindFilter;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export function applyFilter(
|
|
15
|
-
sessions: ReadonlyArray<Session>,
|
|
16
|
-
filter: RecallFilter,
|
|
17
|
-
): ReadonlyArray<Session> {
|
|
18
|
-
const { entity, kind } = filter;
|
|
19
|
-
if (!entity && !kind) return sessions;
|
|
20
|
-
|
|
21
|
-
return sessions.filter((s) => {
|
|
22
|
-
if (entity && !s.entities.includes(entity)) return false;
|
|
23
|
-
if (kind === "decision" && s.decisions.length === 0) return false;
|
|
24
|
-
if (kind === "open" && s.open.length === 0) return false;
|
|
25
|
-
return true;
|
|
26
|
-
});
|
|
27
|
-
}
|
package/src/core/recall/index.ts
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
export { RecallService } from "./recall-service.js";
|
|
2
|
-
export type { RecallServiceDeps } from "./recall-service.js";
|
|
3
|
-
export { keywordMatchFields } from "./match-fields.js";
|
|
4
|
-
export { applyFilter } from "./filter.js";
|
|
5
|
-
export type { RecallFilter } from "./filter.js";
|
|
6
|
-
export { tokenize, tokenSet } from "./tokenize.js";
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Computes which session fields a keyword query matched, for the `matchedIn`
|
|
3
|
-
* badge on a RecallHit. Pure function — no DB, no I/O. FTS5 BM25 ranks the
|
|
4
|
-
* whole row; this recovers per-field attribution from the resolved Session,
|
|
5
|
-
* including decisions/open which live in the markers table (not in FTS).
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { MatchField, Session } from "@shared/types.js";
|
|
9
|
-
import { tokenSet } from "./tokenize.js";
|
|
10
|
-
|
|
11
|
-
type SessionFields = Pick<Session, "label" | "summary" | "decisions" | "open">;
|
|
12
|
-
|
|
13
|
-
export function keywordMatchFields(
|
|
14
|
-
session: SessionFields,
|
|
15
|
-
queryTokens: ReadonlySet<string>,
|
|
16
|
-
): ReadonlyArray<MatchField> {
|
|
17
|
-
if (queryTokens.size === 0) return [];
|
|
18
|
-
const fields: MatchField[] = [];
|
|
19
|
-
|
|
20
|
-
if (overlaps(queryTokens, tokenSet(session.label))) fields.push("label");
|
|
21
|
-
if (overlaps(queryTokens, joinedTokens(session.decisions))) fields.push("decisions");
|
|
22
|
-
if (overlaps(queryTokens, joinedTokens(session.open))) fields.push("open");
|
|
23
|
-
if (overlaps(queryTokens, tokenSet(session.summary))) fields.push("summary");
|
|
24
|
-
|
|
25
|
-
return fields;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function joinedTokens(values: ReadonlyArray<string>): Set<string> {
|
|
29
|
-
const out = new Set<string>();
|
|
30
|
-
for (const v of values) {
|
|
31
|
-
for (const t of tokenSet(v)) out.add(t);
|
|
32
|
-
}
|
|
33
|
-
return out;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
function overlaps(a: ReadonlySet<string>, b: ReadonlySet<string>): boolean {
|
|
37
|
-
const [small, large] = a.size <= b.size ? [a, b] : [b, a];
|
|
38
|
-
for (const item of small) if (large.has(item)) return true;
|
|
39
|
-
return false;
|
|
40
|
-
}
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Query log + stats aggregation. Mirrors recall.py's log_query() / stats().
|
|
3
|
-
*
|
|
4
|
-
* Telemetry path — never raises. The HTTP recall handler calls logQuery()
|
|
5
|
-
* after each /api/recall response; /api/recall/stats reads the same file
|
|
6
|
-
* back to drive the Pulse agent-recall observability panel.
|
|
7
|
-
*
|
|
8
|
-
* File format: one JSON object per line at $NLM_QUERY_LOG or
|
|
9
|
-
* ~/.nlm/query_log.jsonl. Append-only.
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { appendFile, mkdir, readFile, stat } from "node:fs/promises";
|
|
13
|
-
import { dirname, join } from "node:path";
|
|
14
|
-
import { homedir } from "node:os";
|
|
15
|
-
import type { RecallKindFilter, RecallMode } from "@shared/types.js";
|
|
16
|
-
import { readUsefulHitRate, defaultUsefulHitLogPath } from "./useful-scan.js";
|
|
17
|
-
|
|
18
|
-
export interface LogEntry {
|
|
19
|
-
readonly source: string;
|
|
20
|
-
readonly query: string | null;
|
|
21
|
-
readonly entity: string | null;
|
|
22
|
-
readonly kind: RecallKindFilter | null;
|
|
23
|
-
readonly mode: RecallMode;
|
|
24
|
-
readonly limit: number;
|
|
25
|
-
readonly nResults: number;
|
|
26
|
-
readonly returnedIds: ReadonlyArray<string>;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface StatsResult {
|
|
30
|
-
readonly days: number;
|
|
31
|
-
readonly total: number;
|
|
32
|
-
readonly with_results: number;
|
|
33
|
-
readonly hit_rate: number;
|
|
34
|
-
// null until nlm useful-scan has run and populated useful-hit-log.jsonl with
|
|
35
|
-
// measurable entries. Non-null once ≥1 measurable entry exists in the window.
|
|
36
|
-
readonly useful_hit_rate: number | null;
|
|
37
|
-
readonly by_source: Record<string, number>;
|
|
38
|
-
readonly top_queries: ReadonlyArray<{ readonly query: string; readonly count: number }>;
|
|
39
|
-
readonly log_present: boolean;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
function defaultLogPath(): string {
|
|
43
|
-
return process.env["NLM_QUERY_LOG"] ?? join(homedir(), ".nlm", "query_log.jsonl");
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export async function logQuery(
|
|
47
|
-
entry: LogEntry,
|
|
48
|
-
logPath: string = defaultLogPath(),
|
|
49
|
-
): Promise<void> {
|
|
50
|
-
try {
|
|
51
|
-
await mkdir(dirname(logPath), { recursive: true });
|
|
52
|
-
const payload = {
|
|
53
|
-
ts: new Date().toISOString(),
|
|
54
|
-
source: entry.source,
|
|
55
|
-
query: entry.query,
|
|
56
|
-
entity: entry.entity,
|
|
57
|
-
kind: entry.kind,
|
|
58
|
-
mode: entry.mode,
|
|
59
|
-
limit: entry.limit,
|
|
60
|
-
n_results: entry.nResults,
|
|
61
|
-
returned_ids: entry.returnedIds,
|
|
62
|
-
};
|
|
63
|
-
await appendFile(logPath, JSON.stringify(payload) + "\n", "utf8");
|
|
64
|
-
} catch {
|
|
65
|
-
// Telemetry must never break the call path.
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export async function recallStats(
|
|
70
|
-
days: number,
|
|
71
|
-
logPath: string = defaultLogPath(),
|
|
72
|
-
usefulHitLogPath: string = defaultUsefulHitLogPath(),
|
|
73
|
-
): Promise<StatsResult> {
|
|
74
|
-
const useful_hit_rate = await readUsefulHitRate(usefulHitLogPath, days);
|
|
75
|
-
|
|
76
|
-
const base: StatsResult = {
|
|
77
|
-
days,
|
|
78
|
-
total: 0,
|
|
79
|
-
with_results: 0,
|
|
80
|
-
hit_rate: 0,
|
|
81
|
-
useful_hit_rate,
|
|
82
|
-
by_source: {},
|
|
83
|
-
top_queries: [],
|
|
84
|
-
log_present: false,
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
try {
|
|
88
|
-
await stat(logPath);
|
|
89
|
-
} catch {
|
|
90
|
-
return base;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
94
|
-
const bySource = new Map<string, number>();
|
|
95
|
-
const queryCounts = new Map<string, number>();
|
|
96
|
-
let total = 0;
|
|
97
|
-
let withResults = 0;
|
|
98
|
-
|
|
99
|
-
let raw: string;
|
|
100
|
-
try {
|
|
101
|
-
raw = await readFile(logPath, "utf8");
|
|
102
|
-
} catch {
|
|
103
|
-
return { ...base, log_present: true };
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
for (const line of raw.split("\n")) {
|
|
107
|
-
const trimmed = line.trim();
|
|
108
|
-
if (!trimmed) continue;
|
|
109
|
-
let entry: Record<string, unknown>;
|
|
110
|
-
try {
|
|
111
|
-
entry = JSON.parse(trimmed) as Record<string, unknown>;
|
|
112
|
-
} catch {
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
const tsRaw = entry["ts"];
|
|
116
|
-
if (typeof tsRaw !== "string") continue;
|
|
117
|
-
const ts = Date.parse(tsRaw);
|
|
118
|
-
if (!Number.isFinite(ts) || ts < cutoff) continue;
|
|
119
|
-
|
|
120
|
-
total += 1;
|
|
121
|
-
const n = typeof entry["n_results"] === "number" ? entry["n_results"] : 0;
|
|
122
|
-
if (n > 0) withResults += 1;
|
|
123
|
-
|
|
124
|
-
const source = typeof entry["source"] === "string" ? entry["source"] : "unknown";
|
|
125
|
-
bySource.set(source, (bySource.get(source) ?? 0) + 1);
|
|
126
|
-
|
|
127
|
-
const q = entry["query"];
|
|
128
|
-
if (typeof q === "string" && q) {
|
|
129
|
-
const norm = q.toLowerCase().trim();
|
|
130
|
-
queryCounts.set(norm, (queryCounts.get(norm) ?? 0) + 1);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const sortedSources = [...bySource.entries()].sort((a, b) => b[1] - a[1]);
|
|
135
|
-
const sortedQueries = [...queryCounts.entries()]
|
|
136
|
-
.sort((a, b) => b[1] - a[1])
|
|
137
|
-
.slice(0, 5);
|
|
138
|
-
|
|
139
|
-
return {
|
|
140
|
-
days,
|
|
141
|
-
total,
|
|
142
|
-
with_results: withResults,
|
|
143
|
-
hit_rate: total === 0 ? 0 : Math.round((withResults / total) * 1000) / 1000,
|
|
144
|
-
useful_hit_rate,
|
|
145
|
-
by_source: Object.fromEntries(sortedSources),
|
|
146
|
-
top_queries: sortedQueries.map(([query, count]) => ({ query, count })),
|
|
147
|
-
log_present: true,
|
|
148
|
-
};
|
|
149
|
-
}
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Query shape detection for force-include of keyword rank-1 in hybrid recall.
|
|
3
|
-
*
|
|
4
|
-
* Build F (2026-05-26): when a query has both a temporal marker and a
|
|
5
|
-
* named-entity-shaped token, the keyword-leg rank-1 session is force-included
|
|
6
|
-
* in the merged top-k result. Diagnostic justification: of 7 hybrid temporal
|
|
7
|
-
* misses where keyword found the right session, 5 had keyword rank=1 and
|
|
8
|
-
* pure RRF demoted them out of top-5 because the same session wasn't in
|
|
9
|
-
* semantic's top-15. Build E′ (asymmetric multiplicative boost) contributed
|
|
10
|
-
* zero — boost magnitude was too small to overcome the "appears in both lists
|
|
11
|
-
* at lower rank" advantage. Force-include sidesteps RRF math entirely.
|
|
12
|
-
*
|
|
13
|
-
* Probe data (n=500 LongMemEval-S, hybrid k=5):
|
|
14
|
-
* - 17.3% of temporal-reasoning queries match the shape
|
|
15
|
-
* - 0% of single-session-preference, 0% of single-session-assistant
|
|
16
|
-
* - 1.4-2.6% of other types — bounded blast radius
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
const TEMPORAL_PATTERNS: ReadonlyArray<RegExp> = [
|
|
20
|
-
/\b\d+\s+(day|week|month|year)s?\s+ago\b/i,
|
|
21
|
-
/\b(last|past|next)\s+(week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i,
|
|
22
|
-
/\bwhen\s+did\b/i,
|
|
23
|
-
/\b(before|after)\s+I\b/,
|
|
24
|
-
/\bago\b/i,
|
|
25
|
-
/\b(yesterday|today|tomorrow)\b/i,
|
|
26
|
-
/\bhow\s+(long|many)\s+(days?|weeks?|months?|years?)\s+ago\b/i,
|
|
27
|
-
];
|
|
28
|
-
|
|
29
|
-
const COMMON_CAPS_NON_NE: ReadonlySet<string> = new Set([
|
|
30
|
-
"monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday",
|
|
31
|
-
"january", "february", "march", "april", "may", "june", "july", "august",
|
|
32
|
-
"september", "october", "november", "december",
|
|
33
|
-
"i", "i'd", "i've", "i'm", "i'll",
|
|
34
|
-
]);
|
|
35
|
-
|
|
36
|
-
export interface QueryShape {
|
|
37
|
-
readonly hasTemporal: boolean;
|
|
38
|
-
readonly hasNamedEntity: boolean;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export function detectQueryShape(query: string): QueryShape {
|
|
42
|
-
if (!query) return { hasTemporal: false, hasNamedEntity: false };
|
|
43
|
-
const hasTemporal = TEMPORAL_PATTERNS.some((re) => re.test(query));
|
|
44
|
-
const hasNamedEntity = detectNamedEntity(query);
|
|
45
|
-
return { hasTemporal, hasNamedEntity };
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function detectNamedEntity(query: string): boolean {
|
|
49
|
-
const tokens = query.split(/[\s,.;:!?()"'`]+/).filter((t) => t.length > 0);
|
|
50
|
-
if (tokens.length === 0) return false;
|
|
51
|
-
for (let i = 1; i < tokens.length; i++) {
|
|
52
|
-
const tok = tokens[i];
|
|
53
|
-
if (tok && isNamedEntityToken(tok)) return true;
|
|
54
|
-
}
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function isNamedEntityToken(tok: string): boolean {
|
|
59
|
-
if (COMMON_CAPS_NON_NE.has(tok.toLowerCase())) return false;
|
|
60
|
-
if (tok.length < 2) return false;
|
|
61
|
-
if (/^[A-Z]{2,}$/.test(tok)) return true;
|
|
62
|
-
const hasUpper = /[A-Z]/.test(tok);
|
|
63
|
-
const hasLower = /[a-z]/.test(tok);
|
|
64
|
-
if (hasUpper && hasLower) return true;
|
|
65
|
-
return false;
|
|
66
|
-
}
|