nlm-memory 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -34
- package/dist/cli/nlm.js +223 -33
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/adapters/cursor.d.ts +45 -0
- package/dist/core/adapters/cursor.js +397 -0
- package/dist/core/adapters/cursor.js.map +1 -0
- package/dist/core/adapters/from-source.js +10 -0
- package/dist/core/adapters/from-source.js.map +1 -1
- package/dist/core/adapters/windsurf.d.ts +44 -0
- package/dist/core/adapters/windsurf.js +299 -0
- package/dist/core/adapters/windsurf.js.map +1 -0
- package/dist/core/hook/claude-settings.d.ts +12 -5
- package/dist/core/hook/claude-settings.js +21 -6
- package/dist/core/hook/claude-settings.js.map +1 -1
- package/dist/core/sources/source-registry.d.ts +1 -1
- package/dist/core/sources/source-registry.js +18 -0
- package/dist/core/sources/source-registry.js.map +1 -1
- package/dist/core/storage/sqlite-session-store.d.ts +2 -0
- package/dist/core/storage/sqlite-session-store.js +38 -2
- package/dist/core/storage/sqlite-session-store.js.map +1 -1
- package/dist/hook/hook-auth.d.ts +13 -0
- package/dist/hook/hook-auth.js +19 -0
- package/dist/hook/hook-auth.js.map +1 -0
- package/dist/hook/prompt-recall-hook.js +7 -1
- package/dist/hook/prompt-recall-hook.js.map +1 -1
- package/dist/hook/session-start-hook.js +4 -1
- package/dist/hook/session-start-hook.js.map +1 -1
- package/dist/hook/stop-hook.js +4 -1
- package/dist/hook/stop-hook.js.map +1 -1
- package/dist/http/app.d.ts +2 -0
- package/dist/http/app.js +76 -1
- package/dist/http/app.js.map +1 -1
- package/dist/install/claude-code.js +1 -1
- package/dist/install/claude-code.js.map +1 -1
- package/dist/install/cursor.d.ts +25 -0
- package/dist/install/cursor.js +43 -0
- package/dist/install/cursor.js.map +1 -0
- package/dist/install/nlm-dir-perms.d.ts +19 -0
- package/dist/install/nlm-dir-perms.js +43 -0
- package/dist/install/nlm-dir-perms.js.map +1 -0
- package/dist/install/ollama.d.ts +18 -1
- package/dist/install/ollama.js +62 -7
- package/dist/install/ollama.js.map +1 -1
- package/dist/install/setup.d.ts +4 -0
- package/dist/install/setup.js +141 -18
- package/dist/install/setup.js.map +1 -1
- package/dist/install/windsurf.d.ts +25 -0
- package/dist/install/windsurf.js +43 -0
- package/dist/install/windsurf.js.map +1 -0
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/shared/types.d.ts +4 -0
- package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/index-CSPTTeeM.js +69 -0
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/plugin/scripts/prompt-recall-hook.mjs +55 -4
- package/plugin/scripts/stop-hook.mjs +57 -6
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/dist/ui/assets/index-B_qIVV0k.js +0 -69
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
- package/logs/CHANGELOG/CHANGELOG.md +0 -337
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -885
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/from-source.ts +0 -57
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -166
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -260
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -765
- package/src/hook/prompt-recall-hook.ts +0 -174
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -165
- package/src/hook/stop-hook.ts +0 -236
- package/src/http/app.ts +0 -1137
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/ollama.ts +0 -211
- package/src/install/setup.ts +0 -368
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -145
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -136
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -308
- package/src/ui/pages/Search.tsx +0 -93
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -262
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1766
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -60
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -205
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -248
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -120
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,320 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* RecallService — the use case. Composes filters, keyword scoring, and
|
|
3
|
-
* semantic search into a single recall operation.
|
|
4
|
-
*
|
|
5
|
-
* Depends only on ports (SessionStore, LLMClient). No framework imports,
|
|
6
|
-
* no SQLite, no HTTP. Tests substitute fake adapters.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
import type { LLMClient } from "@ports/llm-client.js";
|
|
10
|
-
import { LLMUnreachableError } from "@ports/llm-client.js";
|
|
11
|
-
import type {
|
|
12
|
-
KeywordNeighbor,
|
|
13
|
-
SemanticNeighbor,
|
|
14
|
-
SessionStore,
|
|
15
|
-
} from "@ports/session-store.js";
|
|
16
|
-
import type {
|
|
17
|
-
MatchField,
|
|
18
|
-
RecallHit,
|
|
19
|
-
RecallMode,
|
|
20
|
-
RecallQuery,
|
|
21
|
-
RecallResult,
|
|
22
|
-
Session,
|
|
23
|
-
} from "@shared/types.js";
|
|
24
|
-
import { applyFilter } from "./filter.js";
|
|
25
|
-
import { keywordMatchFields } from "./match-fields.js";
|
|
26
|
-
import { detectQueryShape } from "./query-shape.js";
|
|
27
|
-
import { tokenSet } from "./tokenize.js";
|
|
28
|
-
|
|
29
|
-
const DEFAULT_LIMIT = 20;
|
|
30
|
-
const MAX_LIMIT = 100;
|
|
31
|
-
// Reciprocal Rank Fusion constant (Cormack et al. 2009). k=60 is the
|
|
32
|
-
// canonical literature default. RRF combines ranked lists from multiple
|
|
33
|
-
// retrievers by summing 1/(k + rank) per retriever, ignoring raw scores —
|
|
34
|
-
// robust to wildly different score distributions (BM25 unbounded vs cosine
|
|
35
|
-
// in [-1,1]) without requiring normalization.
|
|
36
|
-
const RRF_K = 60;
|
|
37
|
-
const SEMANTIC_OVERFETCH = 3;
|
|
38
|
-
const KEYWORD_OVERFETCH = 3;
|
|
39
|
-
|
|
40
|
-
export interface RecallServiceDeps {
|
|
41
|
-
readonly store: SessionStore;
|
|
42
|
-
readonly llm: LLMClient;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export class RecallService {
|
|
46
|
-
constructor(private readonly deps: RecallServiceDeps) {}
|
|
47
|
-
|
|
48
|
-
async search(input: RecallQuery): Promise<RecallResult> {
|
|
49
|
-
const mode: RecallMode = input.mode ?? "keyword";
|
|
50
|
-
const limit = clampLimit(input.limit);
|
|
51
|
-
const entity = input.entity ?? null;
|
|
52
|
-
const kind = input.kind ?? null;
|
|
53
|
-
|
|
54
|
-
const empty: RecallResult = {
|
|
55
|
-
query: input.query,
|
|
56
|
-
entity,
|
|
57
|
-
kind,
|
|
58
|
-
mode,
|
|
59
|
-
limit,
|
|
60
|
-
total: 0,
|
|
61
|
-
results: [],
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
if (!input.query && !entity && !kind) return empty;
|
|
65
|
-
|
|
66
|
-
// 1. Search legs — ranked neighbor IDs only. No session bodies loaded.
|
|
67
|
-
const kwNeighbors: ReadonlyArray<KeywordNeighbor> =
|
|
68
|
-
(mode === "keyword" || mode === "hybrid") && input.query
|
|
69
|
-
? await this.deps.store.keywordSearch(input.query, limit * KEYWORD_OVERFETCH)
|
|
70
|
-
: [];
|
|
71
|
-
|
|
72
|
-
let semNeighbors: ReadonlyArray<SemanticNeighbor> = [];
|
|
73
|
-
let semError: "ollama_unreachable" | null = null;
|
|
74
|
-
if ((mode === "semantic" || mode === "hybrid") && input.query) {
|
|
75
|
-
try {
|
|
76
|
-
const embedding = await this.deps.llm.embed(input.query, "query");
|
|
77
|
-
semNeighbors = await this.deps.store.semanticSearch(
|
|
78
|
-
embedding.vector,
|
|
79
|
-
limit * SEMANTIC_OVERFETCH,
|
|
80
|
-
);
|
|
81
|
-
} catch (err) {
|
|
82
|
-
if (err instanceof LLMUnreachableError) {
|
|
83
|
-
semError = "ollama_unreachable";
|
|
84
|
-
} else {
|
|
85
|
-
throw err;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
if (mode === "semantic" && semError) {
|
|
91
|
-
return { ...empty, modeUnavailable: semError };
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// 2. Resolve ONLY the hit sessions — never the whole corpus. The
|
|
95
|
-
// entity/kind filter is applied to the fetched hits; a filtered-out
|
|
96
|
-
// session is absent from byId and is skipped during resolution.
|
|
97
|
-
const hitIds = uniqueIds(kwNeighbors, semNeighbors);
|
|
98
|
-
const hitSessions = await this.deps.store.getByIds(hitIds);
|
|
99
|
-
const filterArgs: { entity?: string; kind?: typeof input.kind } = {};
|
|
100
|
-
if (input.entity !== undefined) filterArgs.entity = input.entity;
|
|
101
|
-
if (input.kind !== undefined) filterArgs.kind = input.kind;
|
|
102
|
-
const byId = new Map<string, Session>(
|
|
103
|
-
applyFilter(hitSessions, filterArgs).map((s) => [s.id, s]),
|
|
104
|
-
);
|
|
105
|
-
|
|
106
|
-
// 3. Build hits from the resolved sessions, preserving leg rank order.
|
|
107
|
-
const queryTokens = input.query
|
|
108
|
-
? new Set(tokenSet(input.query))
|
|
109
|
-
: new Set<string>();
|
|
110
|
-
|
|
111
|
-
const kwHits: KeywordHit[] = [];
|
|
112
|
-
for (const n of kwNeighbors) {
|
|
113
|
-
const session = byId.get(n.sessionId);
|
|
114
|
-
if (!session) continue;
|
|
115
|
-
kwHits.push({
|
|
116
|
-
session,
|
|
117
|
-
score: n.score,
|
|
118
|
-
matchedIn: keywordMatchFields(session, queryTokens),
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const semHits: SemanticHit[] = [];
|
|
123
|
-
for (const n of semNeighbors) {
|
|
124
|
-
const session = byId.get(n.sessionId);
|
|
125
|
-
if (!session) continue;
|
|
126
|
-
semHits.push({ session, similarity: cosineFromL2(n.distance) });
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// 4. Finalize per mode.
|
|
130
|
-
if (mode === "keyword") {
|
|
131
|
-
return finalize(input.query, entity, kind, mode, limit, kwHits.map(toKeywordHit));
|
|
132
|
-
}
|
|
133
|
-
if (mode === "semantic") {
|
|
134
|
-
return finalize(input.query, entity, kind, mode, limit, semHits.map(toSemanticHit));
|
|
135
|
-
}
|
|
136
|
-
const merged = mergeHybrid(kwHits, semHits);
|
|
137
|
-
const shape = detectQueryShape(input.query);
|
|
138
|
-
const forceIncluded = (shape.hasTemporal && shape.hasNamedEntity)
|
|
139
|
-
? forceIncludeKeywordTop(merged, kwHits, limit)
|
|
140
|
-
: merged;
|
|
141
|
-
const result = finalize(input.query, entity, kind, mode, limit, forceIncluded);
|
|
142
|
-
return semError ? { ...result, modeUnavailable: semError } : result;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
function uniqueIds(
|
|
147
|
-
kw: ReadonlyArray<KeywordNeighbor>,
|
|
148
|
-
sem: ReadonlyArray<SemanticNeighbor>,
|
|
149
|
-
): ReadonlyArray<string> {
|
|
150
|
-
const ids = new Set<string>();
|
|
151
|
-
for (const n of kw) ids.add(n.sessionId);
|
|
152
|
-
for (const n of sem) ids.add(n.sessionId);
|
|
153
|
-
return [...ids];
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
interface KeywordHit {
|
|
157
|
-
readonly session: Session;
|
|
158
|
-
readonly score: number;
|
|
159
|
-
readonly matchedIn: ReadonlyArray<MatchField>;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
interface SemanticHit {
|
|
163
|
-
readonly session: Session;
|
|
164
|
-
readonly similarity: number;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
/**
|
|
168
|
-
* Reciprocal Rank Fusion across the keyword + semantic legs.
|
|
169
|
-
*
|
|
170
|
-
* matchScore = Σ 1/(RRF_K + rank_i) for each retriever the session appears in.
|
|
171
|
-
* A session at rank 1 in both retrievers therefore scores ~0.0328 (the max
|
|
172
|
-
* possible with two retrievers at k=60); a session at rank 1 in one
|
|
173
|
-
* retriever and absent from the other scores ~0.0164.
|
|
174
|
-
*
|
|
175
|
-
* keywordScore and semanticScore stay populated as min-max normalized
|
|
176
|
-
* informational values so the UI can show "how strong was each leg" —
|
|
177
|
-
* they're no longer used to compute matchScore.
|
|
178
|
-
*/
|
|
179
|
-
function mergeHybrid(
|
|
180
|
-
kwHits: ReadonlyArray<KeywordHit>,
|
|
181
|
-
semHits: ReadonlyArray<SemanticHit>,
|
|
182
|
-
): ReadonlyArray<RecallHit> {
|
|
183
|
-
const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
|
|
184
|
-
const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
|
|
185
|
-
|
|
186
|
-
const kwRank = new Map<string, number>();
|
|
187
|
-
kwHits.forEach((h, i) => kwRank.set(h.session.id, i + 1));
|
|
188
|
-
const semRank = new Map<string, number>();
|
|
189
|
-
semHits.forEach((h, i) => semRank.set(h.session.id, i + 1));
|
|
190
|
-
|
|
191
|
-
const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.session.id, h]));
|
|
192
|
-
const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.session.id, h]));
|
|
193
|
-
const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
|
|
194
|
-
|
|
195
|
-
const rows: RecallHit[] = [];
|
|
196
|
-
for (const id of allIds) {
|
|
197
|
-
const kw = kwMap.get(id);
|
|
198
|
-
const sem = semMap.get(id);
|
|
199
|
-
const session = (kw ?? sem)!.session;
|
|
200
|
-
const kRank = kwRank.get(id);
|
|
201
|
-
const sRank = semRank.get(id);
|
|
202
|
-
const rrf =
|
|
203
|
-
(kRank !== undefined ? 1 / (RRF_K + kRank) : 0) +
|
|
204
|
-
(sRank !== undefined ? 1 / (RRF_K + sRank) : 0);
|
|
205
|
-
const matchedIn = uniqueFields(kw?.matchedIn ?? [], sem ? (["semantic"] as MatchField[]) : []);
|
|
206
|
-
rows.push({
|
|
207
|
-
...sessionHitFields(session),
|
|
208
|
-
matchScore: round4(rrf),
|
|
209
|
-
matchedIn,
|
|
210
|
-
keywordScore: kw ? round4(kw.score / maxKw) : 0,
|
|
211
|
-
semanticScore: sem ? round4(sem.similarity / maxSem) : 0,
|
|
212
|
-
});
|
|
213
|
-
}
|
|
214
|
-
rows.sort((a, b) => b.matchScore - a.matchScore);
|
|
215
|
-
return rows;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
/**
|
|
219
|
-
* Force-include the keyword-leg rank-1 session into the merged top-`limit`
|
|
220
|
-
* result. Only invoked when the query shape (temporal + named entity)
|
|
221
|
-
* indicates a Mode A pattern where pure RRF is known to demote keyword
|
|
222
|
-
* winners (see query-shape.ts for diagnosis). If the rank-1 keyword session
|
|
223
|
-
* is already in the limited top-N, no change. Otherwise it's inserted at
|
|
224
|
-
* position `limit - 1`, displacing the lowest-confidence merged hit.
|
|
225
|
-
*/
|
|
226
|
-
function forceIncludeKeywordTop(
|
|
227
|
-
merged: ReadonlyArray<RecallHit>,
|
|
228
|
-
kwHits: ReadonlyArray<KeywordHit>,
|
|
229
|
-
limit: number,
|
|
230
|
-
): ReadonlyArray<RecallHit> {
|
|
231
|
-
if (kwHits.length === 0 || merged.length === 0) return merged;
|
|
232
|
-
const topId = kwHits[0]!.session.id;
|
|
233
|
-
const top = merged.slice(0, limit);
|
|
234
|
-
if (top.some((h) => h.id === topId)) return merged;
|
|
235
|
-
const forcedHit = merged.find((h) => h.id === topId);
|
|
236
|
-
if (!forcedHit) return merged;
|
|
237
|
-
const kept = top.slice(0, Math.max(0, limit - 1));
|
|
238
|
-
const tail = merged.slice(limit);
|
|
239
|
-
return [...kept, forcedHit, ...tail];
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
function toKeywordHit(h: KeywordHit): RecallHit {
|
|
243
|
-
return {
|
|
244
|
-
...sessionHitFields(h.session),
|
|
245
|
-
matchScore: h.score,
|
|
246
|
-
matchedIn: h.matchedIn,
|
|
247
|
-
};
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
function toSemanticHit(h: SemanticHit): RecallHit {
|
|
251
|
-
return {
|
|
252
|
-
...sessionHitFields(h.session),
|
|
253
|
-
matchScore: h.similarity,
|
|
254
|
-
matchedIn: ["semantic"],
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
function sessionHitFields(s: Session) {
|
|
259
|
-
return {
|
|
260
|
-
id: s.id,
|
|
261
|
-
startedAt: s.startedAt,
|
|
262
|
-
label: s.label,
|
|
263
|
-
summary: s.summary,
|
|
264
|
-
entities: s.entities,
|
|
265
|
-
decisions: s.decisions,
|
|
266
|
-
open: s.open,
|
|
267
|
-
status: s.status,
|
|
268
|
-
} as const;
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
function finalize(
|
|
272
|
-
query: string,
|
|
273
|
-
entity: string | null,
|
|
274
|
-
kind: RecallResult["kind"],
|
|
275
|
-
mode: RecallMode,
|
|
276
|
-
limit: number,
|
|
277
|
-
hits: ReadonlyArray<RecallHit>,
|
|
278
|
-
): RecallResult {
|
|
279
|
-
return {
|
|
280
|
-
query,
|
|
281
|
-
entity,
|
|
282
|
-
kind,
|
|
283
|
-
mode,
|
|
284
|
-
limit,
|
|
285
|
-
total: hits.length,
|
|
286
|
-
results: hits.slice(0, limit),
|
|
287
|
-
};
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
function clampLimit(limit: number | undefined): number {
|
|
291
|
-
const n = limit ?? DEFAULT_LIMIT;
|
|
292
|
-
if (Number.isNaN(n) || n < 1) return 1;
|
|
293
|
-
return Math.min(MAX_LIMIT, Math.trunc(n));
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
function cosineFromL2(distance: number): number {
|
|
297
|
-
// session_embeddings stores unit-normalized vectors. For unit vectors,
|
|
298
|
-
// cos_sim = 1 - L2^2 / 2. Mirrors recall.py:_run_semantic.
|
|
299
|
-
const cos = 1 - (distance * distance) / 2;
|
|
300
|
-
return round4(Math.max(-1, Math.min(1, cos)));
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
function round4(value: number): number {
|
|
304
|
-
return Math.round(value * 10_000) / 10_000;
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
function uniqueFields(
|
|
308
|
-
a: ReadonlyArray<MatchField>,
|
|
309
|
-
b: ReadonlyArray<MatchField>,
|
|
310
|
-
): ReadonlyArray<MatchField> {
|
|
311
|
-
const seen = new Set<MatchField>();
|
|
312
|
-
const out: MatchField[] = [];
|
|
313
|
-
for (const f of [...a, ...b]) {
|
|
314
|
-
if (!seen.has(f)) {
|
|
315
|
-
seen.add(f);
|
|
316
|
-
out.push(f);
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
return out;
|
|
320
|
-
}
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* recentLog — tail the query log for the /live observability panel.
|
|
3
|
-
* Returns the last N entries in chronological order (most recent first).
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { readFileSync, existsSync, statSync } from "node:fs";
|
|
7
|
-
import { homedir } from "node:os";
|
|
8
|
-
import { join } from "node:path";
|
|
9
|
-
|
|
10
|
-
export interface RecentLogEntry {
|
|
11
|
-
readonly ts: string;
|
|
12
|
-
readonly source: string;
|
|
13
|
-
readonly query: string | null;
|
|
14
|
-
readonly entity: string | null;
|
|
15
|
-
readonly kind: string | null;
|
|
16
|
-
readonly mode: string;
|
|
17
|
-
readonly limit: number;
|
|
18
|
-
readonly nResults: number;
|
|
19
|
-
readonly returnedIds: ReadonlyArray<string>;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function defaultLogPath(): string {
|
|
23
|
-
return process.env["NLM_QUERY_LOG"] ?? join(homedir(), ".nlm", "query_log.jsonl");
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const TAIL_BYTES = 256 * 1024;
|
|
27
|
-
|
|
28
|
-
export function recentQueryLog(limit: number, logPath: string = defaultLogPath()): RecentLogEntry[] {
|
|
29
|
-
if (!existsSync(logPath)) return [];
|
|
30
|
-
const size = statSync(logPath).size;
|
|
31
|
-
const start = Math.max(0, size - TAIL_BYTES);
|
|
32
|
-
const tail = readFileSync(logPath, { encoding: "utf8" }).slice(start);
|
|
33
|
-
|
|
34
|
-
const entries: RecentLogEntry[] = [];
|
|
35
|
-
for (const line of tail.split("\n")) {
|
|
36
|
-
const trimmed = line.trim();
|
|
37
|
-
if (!trimmed) continue;
|
|
38
|
-
try {
|
|
39
|
-
const raw = JSON.parse(trimmed) as Record<string, unknown>;
|
|
40
|
-
entries.push({
|
|
41
|
-
ts: typeof raw["ts"] === "string" ? raw["ts"] : "",
|
|
42
|
-
source: typeof raw["source"] === "string" ? raw["source"] : "unknown",
|
|
43
|
-
query: typeof raw["query"] === "string" ? raw["query"] : null,
|
|
44
|
-
entity: typeof raw["entity"] === "string" ? raw["entity"] : null,
|
|
45
|
-
kind: typeof raw["kind"] === "string" ? raw["kind"] : null,
|
|
46
|
-
mode: typeof raw["mode"] === "string" ? raw["mode"] : "keyword",
|
|
47
|
-
limit: typeof raw["limit"] === "number" ? raw["limit"] : 0,
|
|
48
|
-
nResults: typeof raw["n_results"] === "number" ? raw["n_results"] : 0,
|
|
49
|
-
returnedIds: Array.isArray(raw["returned_ids"])
|
|
50
|
-
? raw["returned_ids"].filter((x): x is string => typeof x === "string")
|
|
51
|
-
: [],
|
|
52
|
-
});
|
|
53
|
-
} catch {
|
|
54
|
-
continue;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
entries.sort((a, b) => b.ts.localeCompare(a.ts));
|
|
58
|
-
return entries.slice(0, limit);
|
|
59
|
-
}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tokenizer mirrors recall.py:_TOKEN_RE. Identical regex, lowercase normalize.
|
|
3
|
-
* Pure function. The keyword scorer's parity with the Python implementation
|
|
4
|
-
* starts here.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
const TOKEN_PATTERN = /[A-Za-z0-9][A-Za-z0-9_.-]*/g;
|
|
8
|
-
|
|
9
|
-
export function tokenize(text: string | null | undefined): ReadonlyArray<string> {
|
|
10
|
-
if (!text) return [];
|
|
11
|
-
const matches = text.match(TOKEN_PATTERN);
|
|
12
|
-
if (!matches) return [];
|
|
13
|
-
return matches.map((t) => t.toLowerCase());
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export function tokenSet(text: string | null | undefined): Set<string> {
|
|
17
|
-
return new Set(tokenize(text));
|
|
18
|
-
}
|