nlm-memory 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -34
- package/dist/cli/nlm.js +223 -33
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/adapters/cursor.d.ts +45 -0
- package/dist/core/adapters/cursor.js +397 -0
- package/dist/core/adapters/cursor.js.map +1 -0
- package/dist/core/adapters/from-source.js +10 -0
- package/dist/core/adapters/from-source.js.map +1 -1
- package/dist/core/adapters/windsurf.d.ts +44 -0
- package/dist/core/adapters/windsurf.js +299 -0
- package/dist/core/adapters/windsurf.js.map +1 -0
- package/dist/core/hook/claude-settings.d.ts +12 -5
- package/dist/core/hook/claude-settings.js +21 -6
- package/dist/core/hook/claude-settings.js.map +1 -1
- package/dist/core/sources/source-registry.d.ts +1 -1
- package/dist/core/sources/source-registry.js +18 -0
- package/dist/core/sources/source-registry.js.map +1 -1
- package/dist/core/storage/sqlite-session-store.d.ts +2 -0
- package/dist/core/storage/sqlite-session-store.js +38 -2
- package/dist/core/storage/sqlite-session-store.js.map +1 -1
- package/dist/hook/hook-auth.d.ts +13 -0
- package/dist/hook/hook-auth.js +19 -0
- package/dist/hook/hook-auth.js.map +1 -0
- package/dist/hook/prompt-recall-hook.js +7 -1
- package/dist/hook/prompt-recall-hook.js.map +1 -1
- package/dist/hook/session-start-hook.js +4 -1
- package/dist/hook/session-start-hook.js.map +1 -1
- package/dist/hook/stop-hook.js +4 -1
- package/dist/hook/stop-hook.js.map +1 -1
- package/dist/http/app.d.ts +2 -0
- package/dist/http/app.js +76 -1
- package/dist/http/app.js.map +1 -1
- package/dist/install/claude-code.js +1 -1
- package/dist/install/claude-code.js.map +1 -1
- package/dist/install/cursor.d.ts +25 -0
- package/dist/install/cursor.js +43 -0
- package/dist/install/cursor.js.map +1 -0
- package/dist/install/nlm-dir-perms.d.ts +19 -0
- package/dist/install/nlm-dir-perms.js +43 -0
- package/dist/install/nlm-dir-perms.js.map +1 -0
- package/dist/install/ollama.d.ts +18 -1
- package/dist/install/ollama.js +62 -7
- package/dist/install/ollama.js.map +1 -1
- package/dist/install/setup.d.ts +4 -0
- package/dist/install/setup.js +141 -18
- package/dist/install/setup.js.map +1 -1
- package/dist/install/windsurf.d.ts +25 -0
- package/dist/install/windsurf.js +43 -0
- package/dist/install/windsurf.js.map +1 -0
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/shared/types.d.ts +4 -0
- package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/index-CSPTTeeM.js +69 -0
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/plugin/scripts/prompt-recall-hook.mjs +55 -4
- package/plugin/scripts/stop-hook.mjs +57 -6
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/dist/ui/assets/index-B_qIVV0k.js +0 -69
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
- package/logs/CHANGELOG/CHANGELOG.md +0 -337
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -885
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/from-source.ts +0 -57
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -166
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -260
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -765
- package/src/hook/prompt-recall-hook.ts +0 -174
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -165
- package/src/hook/stop-hook.ts +0 -236
- package/src/http/app.ts +0 -1137
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/ollama.ts +0 -211
- package/src/install/setup.ts +0 -368
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -145
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -136
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -308
- package/src/ui/pages/Search.tsx +0 -93
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -262
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1766
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -60
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -205
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -248
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -120
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,327 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FactRecallService — agent-facing recall over the FactStore.
|
|
3
|
-
*
|
|
4
|
-
* Mirrors RecallService's keyword / semantic / hybrid pattern but works on
|
|
5
|
-
* Fact records, not Session records. Sessions and facts answer different
|
|
6
|
-
* questions and have incompatibly-shaped results, so this is a separate
|
|
7
|
-
* service with its own MCP tool — see Section 4 of factstore-design.md.
|
|
8
|
-
*
|
|
9
|
-
* Filter pipeline:
|
|
10
|
-
* 1. Storage pre-filter (subject, predicate, kind, minConfidence,
|
|
11
|
-
* includeSuperseded). Cheap SQL.
|
|
12
|
-
* 2. Keyword scoring over (value, subject, predicate). Pure, in-memory.
|
|
13
|
-
* 3. Semantic KNN via fact_embeddings vec0 (when mode != keyword).
|
|
14
|
-
* 4. Hybrid merge: 0.6 semantic + 0.4 keyword, matching the session
|
|
15
|
-
* recall weights.
|
|
16
|
-
*
|
|
17
|
-
* Confidence policy: default `minConfidence` is 0.6 (Section 1 of the plan).
|
|
18
|
-
* Facts with classifier confidence in [0.4, 0.6) get written by
|
|
19
|
-
* extractFacts but stay out of agent recall unless the caller lowers the
|
|
20
|
-
* floor explicitly.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
import type { FactStore } from "@ports/fact-store.js";
|
|
24
|
-
import type { LLMClient } from "@ports/llm-client.js";
|
|
25
|
-
import { LLMUnreachableError } from "@ports/llm-client.js";
|
|
26
|
-
import type {
|
|
27
|
-
Fact,
|
|
28
|
-
FactHit,
|
|
29
|
-
FactMatchField,
|
|
30
|
-
FactRecallQuery,
|
|
31
|
-
FactRecallResult,
|
|
32
|
-
RecallMode,
|
|
33
|
-
} from "@shared/types.js";
|
|
34
|
-
import { tokenSet } from "@core/recall/tokenize.js";
|
|
35
|
-
|
|
36
|
-
const DEFAULT_LIMIT = 10;
|
|
37
|
-
const MAX_LIMIT = 100;
|
|
38
|
-
const DEFAULT_MIN_CONFIDENCE = 0.6;
|
|
39
|
-
const STORAGE_FETCH_CAP = 500;
|
|
40
|
-
const HYBRID_KW_WEIGHT = 0.4;
|
|
41
|
-
const HYBRID_SEM_WEIGHT = 0.6;
|
|
42
|
-
const SEMANTIC_OVERFETCH = 3;
|
|
43
|
-
|
|
44
|
-
const FIELD_WEIGHTS = {
|
|
45
|
-
value: 3,
|
|
46
|
-
subject: 1,
|
|
47
|
-
predicate: 1,
|
|
48
|
-
} as const;
|
|
49
|
-
|
|
50
|
-
export interface FactRecallServiceDeps {
|
|
51
|
-
readonly factStore: FactStore;
|
|
52
|
-
readonly llm: LLMClient;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export class FactRecallService {
|
|
56
|
-
constructor(private readonly deps: FactRecallServiceDeps) {}
|
|
57
|
-
|
|
58
|
-
async search(input: FactRecallQuery): Promise<FactRecallResult> {
|
|
59
|
-
const mode: RecallMode = input.mode ?? "keyword";
|
|
60
|
-
const limit = clampLimit(input.limit);
|
|
61
|
-
const subject = input.subject ?? null;
|
|
62
|
-
const predicate = input.predicate ?? null;
|
|
63
|
-
const kind = input.kind ?? null;
|
|
64
|
-
const queryText = (input.query ?? "").trim();
|
|
65
|
-
|
|
66
|
-
const empty: FactRecallResult = {
|
|
67
|
-
query: queryText,
|
|
68
|
-
subject,
|
|
69
|
-
predicate,
|
|
70
|
-
kind,
|
|
71
|
-
mode,
|
|
72
|
-
limit,
|
|
73
|
-
total: 0,
|
|
74
|
-
results: [],
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
// A query with no signal at all → empty. Either free-text query, or a
|
|
78
|
-
// structured filter (subject / predicate / kind) must be provided.
|
|
79
|
-
if (!queryText && subject === null && predicate === null && kind === null) {
|
|
80
|
-
return empty;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
const filter: Parameters<FactStore["listForRecall"]>[0] = {
|
|
84
|
-
includeSuperseded: input.includeSuperseded === true,
|
|
85
|
-
minConfidence: input.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
|
|
86
|
-
limit: STORAGE_FETCH_CAP,
|
|
87
|
-
...(input.subject !== undefined ? { subject: input.subject } : {}),
|
|
88
|
-
...(input.predicate !== undefined ? { predicate: input.predicate } : {}),
|
|
89
|
-
...(input.kind !== undefined ? { kind: input.kind } : {}),
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
const candidates = await this.deps.factStore.listForRecall(filter);
|
|
93
|
-
if (candidates.length === 0) return empty;
|
|
94
|
-
|
|
95
|
-
const byId = new Map<string, Fact>(candidates.map((f) => [f.id, f]));
|
|
96
|
-
const queryTokens = queryText ? new Set(tokenSet(queryText)) : new Set<string>();
|
|
97
|
-
|
|
98
|
-
const kwHits =
|
|
99
|
-
mode === "keyword" || mode === "hybrid"
|
|
100
|
-
? scoreAll(candidates, queryTokens)
|
|
101
|
-
: [];
|
|
102
|
-
|
|
103
|
-
let semHits: ReadonlyArray<SemanticHit> = [];
|
|
104
|
-
let semError: "ollama_unreachable" | null = null;
|
|
105
|
-
if ((mode === "semantic" || mode === "hybrid") && queryText) {
|
|
106
|
-
try {
|
|
107
|
-
semHits = await this.runSemantic(queryText, byId, limit * SEMANTIC_OVERFETCH);
|
|
108
|
-
} catch (err) {
|
|
109
|
-
if (err instanceof LLMUnreachableError) {
|
|
110
|
-
semError = "ollama_unreachable";
|
|
111
|
-
} else {
|
|
112
|
-
throw err;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
if (mode === "semantic" && semError) {
|
|
118
|
-
return { ...empty, modeUnavailable: semError };
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// For pure structured queries (no query text, just subject/predicate),
|
|
122
|
-
// a keyword pass with empty tokens scores zero and a semantic pass has
|
|
123
|
-
// nothing to embed. Fall back to returning the storage filter result
|
|
124
|
-
// ordered by created_at DESC. Applies to keyword AND hybrid — hybrid
|
|
125
|
-
// is the MCP default, so this path catches exact subject+predicate
|
|
126
|
-
// lookups from agent callers that pass no query text.
|
|
127
|
-
if ((mode === "keyword" || mode === "hybrid") && !queryText) {
|
|
128
|
-
const rows = candidates
|
|
129
|
-
.slice(0, limit)
|
|
130
|
-
.map((f) => factToHit(f, 0, []));
|
|
131
|
-
return finalize(queryText, subject, predicate, kind, mode, limit, rows);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (mode === "keyword") {
|
|
135
|
-
return finalize(queryText, subject, predicate, kind, mode, limit, kwHits.map(toKeywordHit));
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if (mode === "semantic") {
|
|
139
|
-
return finalize(queryText, subject, predicate, kind, mode, limit, semHits.map(toSemanticHit));
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// hybrid
|
|
143
|
-
const merged = mergeHybrid(kwHits, semHits, byId);
|
|
144
|
-
const result = finalize(queryText, subject, predicate, kind, mode, limit, merged);
|
|
145
|
-
return semError ? { ...result, modeUnavailable: semError } : result;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
private async runSemantic(
|
|
149
|
-
query: string,
|
|
150
|
-
byId: ReadonlyMap<string, Fact>,
|
|
151
|
-
fetchLimit: number,
|
|
152
|
-
): Promise<ReadonlyArray<SemanticHit>> {
|
|
153
|
-
const embedding = await this.deps.llm.embed(query, "query");
|
|
154
|
-
const neighbors = await this.deps.factStore.semanticSearch(embedding.vector, fetchLimit);
|
|
155
|
-
const hits: SemanticHit[] = [];
|
|
156
|
-
for (const n of neighbors) {
|
|
157
|
-
const fact = byId.get(n.factId);
|
|
158
|
-
if (!fact) continue; // candidate was filtered out by subject/predicate/conf
|
|
159
|
-
hits.push({ fact, similarity: cosineFromL2(n.distance) });
|
|
160
|
-
}
|
|
161
|
-
return hits;
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
interface KeywordHit {
|
|
166
|
-
readonly fact: Fact;
|
|
167
|
-
readonly score: number;
|
|
168
|
-
readonly matchedIn: ReadonlyArray<FactMatchField>;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
interface SemanticHit {
|
|
172
|
-
readonly fact: Fact;
|
|
173
|
-
readonly similarity: number;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
function scoreAll(
|
|
177
|
-
facts: ReadonlyArray<Fact>,
|
|
178
|
-
queryTokens: ReadonlySet<string>,
|
|
179
|
-
): ReadonlyArray<KeywordHit> {
|
|
180
|
-
if (queryTokens.size === 0) return [];
|
|
181
|
-
const hits: KeywordHit[] = [];
|
|
182
|
-
for (const f of facts) {
|
|
183
|
-
const { score, matchedIn } = scoreFact(f, queryTokens);
|
|
184
|
-
if (score > 0) hits.push({ fact: f, score, matchedIn });
|
|
185
|
-
}
|
|
186
|
-
hits.sort((a, b) => b.score - a.score);
|
|
187
|
-
return hits;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
function scoreFact(
|
|
191
|
-
fact: Fact,
|
|
192
|
-
queryTokens: ReadonlySet<string>,
|
|
193
|
-
): { score: number; matchedIn: ReadonlyArray<FactMatchField> } {
|
|
194
|
-
let score = 0;
|
|
195
|
-
const matchedIn: FactMatchField[] = [];
|
|
196
|
-
|
|
197
|
-
const valueMatches = intersectionSize(queryTokens, tokenSet(fact.value));
|
|
198
|
-
if (valueMatches > 0) {
|
|
199
|
-
score += FIELD_WEIGHTS.value * valueMatches;
|
|
200
|
-
matchedIn.push("value");
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
const subjectMatches = intersectionSize(queryTokens, tokenSet(fact.subject));
|
|
204
|
-
if (subjectMatches > 0) {
|
|
205
|
-
score += FIELD_WEIGHTS.subject * subjectMatches;
|
|
206
|
-
matchedIn.push("subject");
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
const predicateMatches = intersectionSize(queryTokens, tokenSet(fact.predicate));
|
|
210
|
-
if (predicateMatches > 0) {
|
|
211
|
-
score += FIELD_WEIGHTS.predicate * predicateMatches;
|
|
212
|
-
matchedIn.push("predicate");
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
return { score, matchedIn };
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
function mergeHybrid(
|
|
219
|
-
kwHits: ReadonlyArray<KeywordHit>,
|
|
220
|
-
semHits: ReadonlyArray<SemanticHit>,
|
|
221
|
-
byId: ReadonlyMap<string, Fact>,
|
|
222
|
-
): ReadonlyArray<FactHit> {
|
|
223
|
-
const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
|
|
224
|
-
const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
|
|
225
|
-
|
|
226
|
-
const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.fact.id, h]));
|
|
227
|
-
const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.fact.id, h]));
|
|
228
|
-
const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
|
|
229
|
-
|
|
230
|
-
const rows: FactHit[] = [];
|
|
231
|
-
for (const id of allIds) {
|
|
232
|
-
const fact = byId.get(id);
|
|
233
|
-
if (!fact) continue;
|
|
234
|
-
const kw = kwMap.get(id);
|
|
235
|
-
const sem = semMap.get(id);
|
|
236
|
-
const kwNorm = kw ? kw.score / maxKw : 0;
|
|
237
|
-
const semNorm = sem ? sem.similarity / maxSem : 0;
|
|
238
|
-
const combined = round4(HYBRID_SEM_WEIGHT * semNorm + HYBRID_KW_WEIGHT * kwNorm);
|
|
239
|
-
const matchedIn = uniqueFields(
|
|
240
|
-
kw?.matchedIn ?? [],
|
|
241
|
-
sem ? (["semantic"] as FactMatchField[]) : [],
|
|
242
|
-
);
|
|
243
|
-
rows.push({
|
|
244
|
-
...fact,
|
|
245
|
-
matchScore: combined,
|
|
246
|
-
matchedIn,
|
|
247
|
-
keywordScore: round4(kwNorm),
|
|
248
|
-
semanticScore: round4(semNorm),
|
|
249
|
-
});
|
|
250
|
-
}
|
|
251
|
-
rows.sort((a, b) => b.matchScore - a.matchScore);
|
|
252
|
-
return rows;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
function factToHit(
|
|
256
|
-
fact: Fact,
|
|
257
|
-
score: number,
|
|
258
|
-
matchedIn: ReadonlyArray<FactMatchField>,
|
|
259
|
-
): FactHit {
|
|
260
|
-
return { ...fact, matchScore: score, matchedIn };
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function toKeywordHit(h: KeywordHit): FactHit {
|
|
264
|
-
return factToHit(h.fact, h.score, h.matchedIn);
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
function toSemanticHit(h: SemanticHit): FactHit {
|
|
268
|
-
return factToHit(h.fact, h.similarity, ["semantic"]);
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
function finalize(
|
|
272
|
-
query: string,
|
|
273
|
-
subject: string | null,
|
|
274
|
-
predicate: string | null,
|
|
275
|
-
kind: FactRecallResult["kind"],
|
|
276
|
-
mode: RecallMode,
|
|
277
|
-
limit: number,
|
|
278
|
-
hits: ReadonlyArray<FactHit>,
|
|
279
|
-
): FactRecallResult {
|
|
280
|
-
return {
|
|
281
|
-
query,
|
|
282
|
-
subject,
|
|
283
|
-
predicate,
|
|
284
|
-
kind,
|
|
285
|
-
mode,
|
|
286
|
-
limit,
|
|
287
|
-
total: hits.length,
|
|
288
|
-
results: hits.slice(0, limit),
|
|
289
|
-
};
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
function clampLimit(limit: number | undefined): number {
|
|
293
|
-
const n = limit ?? DEFAULT_LIMIT;
|
|
294
|
-
if (Number.isNaN(n) || n < 1) return 1;
|
|
295
|
-
return Math.min(MAX_LIMIT, Math.trunc(n));
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
function cosineFromL2(distance: number): number {
|
|
299
|
-
const cos = 1 - (distance * distance) / 2;
|
|
300
|
-
return round4(Math.max(-1, Math.min(1, cos)));
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
function round4(value: number): number {
|
|
304
|
-
return Math.round(value * 10_000) / 10_000;
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
function intersectionSize<T>(a: ReadonlySet<T>, b: ReadonlySet<T>): number {
|
|
308
|
-
const [small, large] = a.size <= b.size ? [a, b] : [b, a];
|
|
309
|
-
let count = 0;
|
|
310
|
-
for (const item of small) if (large.has(item)) count += 1;
|
|
311
|
-
return count;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
function uniqueFields(
|
|
315
|
-
a: ReadonlyArray<FactMatchField>,
|
|
316
|
-
b: ReadonlyArray<FactMatchField>,
|
|
317
|
-
): ReadonlyArray<FactMatchField> {
|
|
318
|
-
const seen = new Set<FactMatchField>();
|
|
319
|
-
const out: FactMatchField[] = [];
|
|
320
|
-
for (const f of [...a, ...b]) {
|
|
321
|
-
if (!seen.has(f)) {
|
|
322
|
-
seen.add(f);
|
|
323
|
-
out.push(f);
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
return out;
|
|
327
|
-
}
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* scanOnce — mtime-gated incremental discovery shared by every adapter.
|
|
3
|
-
*
|
|
4
|
-
* The Python codebase bundled this logic into each adapter (`scan_once` +
|
|
5
|
-
* `record_classified` methods). In the TS port the adapter stays a pure
|
|
6
|
-
* parser (TranscriptAdapter port); the mtime check and adapter_state
|
|
7
|
-
* upsert live here, generic over the adapter. Same behavior, less
|
|
8
|
-
* duplication across claude-code / hermes / pi.
|
|
9
|
-
*
|
|
10
|
-
* Contract (per file under adapter.discover()):
|
|
11
|
-
* - If `now - mtime < idleMinutes * 60s` → still active, skip
|
|
12
|
-
* - Lookup adapter_state by (adapterName, sourcePath):
|
|
13
|
-
* no row + file idle → NEW: parse + return (chunk, supersedes=null)
|
|
14
|
-
* row exists, size match, failures < ceil → UNCHANGED: skip
|
|
15
|
-
* row exists, size match, failures >= ceil → FAILED_CEILING: skip (log once per session)
|
|
16
|
-
* row exists, file grew → RESUMED: parse + return, reset failure_count
|
|
17
|
-
* - After successful classify+insert downstream, call `recordClassified`
|
|
18
|
-
* to upsert adapter_state with the new size + session_id.
|
|
19
|
-
* - On classify/storage failure, call `recordFailed` to increment failure_count.
|
|
20
|
-
* When failure_count reaches MAX_CLASSIFY_FAILURES and the file hasn't grown,
|
|
21
|
-
* the file is permanently skipped until new content arrives.
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
import { statSync } from "node:fs";
|
|
25
|
-
import type Database from "better-sqlite3";
|
|
26
|
-
import type {
|
|
27
|
-
SessionChunk,
|
|
28
|
-
TranscriptAdapter,
|
|
29
|
-
} from "@ports/transcript-adapter.js";
|
|
30
|
-
|
|
31
|
-
export interface ScanResult {
|
|
32
|
-
readonly chunk: SessionChunk;
|
|
33
|
-
readonly supersedes: string | null;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export const MAX_CLASSIFY_FAILURES = 3;
|
|
37
|
-
|
|
38
|
-
interface AdapterStateRow {
|
|
39
|
-
source_path: string;
|
|
40
|
-
file_size: number | null;
|
|
41
|
-
session_id: string | null;
|
|
42
|
-
failure_count: number;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export async function scanOnce(
|
|
46
|
-
adapter: TranscriptAdapter,
|
|
47
|
-
idleMinutes: number,
|
|
48
|
-
db: Database.Database,
|
|
49
|
-
now: number = Date.now(),
|
|
50
|
-
): Promise<ReadonlyArray<ScanResult>> {
|
|
51
|
-
const idleMs = idleMinutes * 60 * 1000;
|
|
52
|
-
const stateRows = db
|
|
53
|
-
.prepare<[string], AdapterStateRow>(
|
|
54
|
-
"SELECT source_path, file_size, session_id, COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ?",
|
|
55
|
-
)
|
|
56
|
-
.all(adapter.name);
|
|
57
|
-
const byPath = new Map<string, AdapterStateRow>(stateRows.map((r) => [r.source_path, r]));
|
|
58
|
-
|
|
59
|
-
const out: ScanResult[] = [];
|
|
60
|
-
const files = await adapter.discover();
|
|
61
|
-
|
|
62
|
-
for (const path of files) {
|
|
63
|
-
let st;
|
|
64
|
-
try {
|
|
65
|
-
st = statSync(path);
|
|
66
|
-
} catch {
|
|
67
|
-
continue;
|
|
68
|
-
}
|
|
69
|
-
const age = now - st.mtimeMs;
|
|
70
|
-
if (age < idleMs) continue;
|
|
71
|
-
|
|
72
|
-
const prior = byPath.get(path);
|
|
73
|
-
let supersedes: string | null = null;
|
|
74
|
-
if (prior) {
|
|
75
|
-
const sizeUnchanged = (prior.file_size ?? 0) === st.size;
|
|
76
|
-
if (sizeUnchanged) {
|
|
77
|
-
// File hasn't grown — skip whether clean or failed. Failures only
|
|
78
|
-
// retry when the transcript file receives new content.
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
// File grew: reset failure_count so resume gets a clean slate.
|
|
82
|
-
if (prior.failure_count >= MAX_CLASSIFY_FAILURES) {
|
|
83
|
-
db.prepare(
|
|
84
|
-
"UPDATE adapter_state SET failure_count = 0 WHERE adapter_name = ? AND source_path = ?",
|
|
85
|
-
).run(adapter.name, path);
|
|
86
|
-
}
|
|
87
|
-
supersedes = prior.session_id;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const chunk = await adapter.parseSession(path);
|
|
91
|
-
if (!chunk) continue;
|
|
92
|
-
out.push({ chunk, supersedes });
|
|
93
|
-
}
|
|
94
|
-
return out;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export function recordClassified(
|
|
98
|
-
db: Database.Database,
|
|
99
|
-
adapterName: string,
|
|
100
|
-
sourcePath: string,
|
|
101
|
-
sessionId: string,
|
|
102
|
-
): void {
|
|
103
|
-
let size = 0;
|
|
104
|
-
try {
|
|
105
|
-
size = statSync(sourcePath).size;
|
|
106
|
-
} catch {
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
db.prepare(
|
|
110
|
-
`INSERT INTO adapter_state
|
|
111
|
-
(adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
|
|
112
|
-
VALUES (?, ?, ?, ?, ?, 0, datetime('now'))
|
|
113
|
-
ON CONFLICT(adapter_name, source_path) DO UPDATE SET
|
|
114
|
-
last_offset = excluded.last_offset,
|
|
115
|
-
file_size = excluded.file_size,
|
|
116
|
-
session_id = excluded.session_id,
|
|
117
|
-
failure_count = 0,
|
|
118
|
-
last_processed_at = excluded.last_processed_at`,
|
|
119
|
-
).run(adapterName, sourcePath, size, size, sessionId);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
export function recordFailed(
|
|
123
|
-
db: Database.Database,
|
|
124
|
-
adapterName: string,
|
|
125
|
-
sourcePath: string,
|
|
126
|
-
): void {
|
|
127
|
-
let size = 0;
|
|
128
|
-
try {
|
|
129
|
-
size = statSync(sourcePath).size;
|
|
130
|
-
} catch {
|
|
131
|
-
return;
|
|
132
|
-
}
|
|
133
|
-
db.prepare(
|
|
134
|
-
`INSERT INTO adapter_state
|
|
135
|
-
(adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
|
|
136
|
-
VALUES (?, ?, ?, ?, NULL, 1, datetime('now'))
|
|
137
|
-
ON CONFLICT(adapter_name, source_path) DO UPDATE SET
|
|
138
|
-
file_size = excluded.file_size,
|
|
139
|
-
failure_count = failure_count + 1,
|
|
140
|
-
last_processed_at = excluded.last_processed_at`,
|
|
141
|
-
).run(adapterName, sourcePath, size, size);
|
|
142
|
-
}
|
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ScanScheduler — periodic ingest loop. Ports `scheduler.py`.
|
|
3
|
-
*
|
|
4
|
-
* Each tick walks the registered adapters, runs scanOnce to discover idle
|
|
5
|
-
* transcript files, classifies the resulting SessionChunks via the active
|
|
6
|
-
* classifier, and persists them through SqliteSessionStore.insertSession
|
|
7
|
-
* with the embedder. Records adapter_state after each successful insert
|
|
8
|
-
* so the next tick is incremental.
|
|
9
|
-
*
|
|
10
|
-
* Single-process: the scheduler runs alongside the HTTP server (Phase D
|
|
11
|
-
* wires it into `nlm start`). No worker thread; Node's event loop is
|
|
12
|
-
* enough — adapter discovery is filesystem-bound and the per-chunk
|
|
13
|
-
* classify call is async-awaited with a wall-clock timeout to keep the
|
|
14
|
-
* tick loop responsive.
|
|
15
|
-
*
|
|
16
|
-
* Confidence floor of 0.3 mirrors Python: classifier outputs below that
|
|
17
|
-
* are skipped rather than persisted as low-quality noise.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
import type { LLMClient } from "@ports/llm-client.js";
|
|
21
|
-
import type { TranscriptAdapter } from "@ports/transcript-adapter.js";
|
|
22
|
-
import { extractFacts } from "@core/facts/extract-facts.js";
|
|
23
|
-
import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
|
|
24
|
-
import type {
|
|
25
|
-
IngestRecord,
|
|
26
|
-
SqliteSessionStore,
|
|
27
|
-
} from "@core/storage/sqlite-session-store.js";
|
|
28
|
-
import { MAX_CLASSIFY_FAILURES, recordClassified, recordFailed, scanOnce } from "./scan-once.js";
|
|
29
|
-
|
|
30
|
-
const DEFAULT_INTERVAL_MS = 30 * 60 * 1000; // 30 min, matches Python default
|
|
31
|
-
const DEFAULT_CLASSIFY_TIMEOUT_MS = 120_000;
|
|
32
|
-
const DEFAULT_CONFIDENCE_FLOOR = 0.3;
|
|
33
|
-
const DEFAULT_IDLE_MINUTES = 15;
|
|
34
|
-
const BODY_CAP = 200_000;
|
|
35
|
-
|
|
36
|
-
export interface SchedulerOptions {
|
|
37
|
-
readonly store: SqliteSessionStore;
|
|
38
|
-
readonly adapters: ReadonlyArray<TranscriptAdapter>;
|
|
39
|
-
readonly classifier: LLMClient;
|
|
40
|
-
readonly embedder?: LLMClient | null;
|
|
41
|
-
/**
|
|
42
|
-
* FactStore for Phase B.2 fact ingest. When provided, the scheduler
|
|
43
|
-
* extracts facts from each classify result and persists them atomically
|
|
44
|
-
* with the session row. Optional — when null, sessions ingest as before
|
|
45
|
-
* with no facts written (backwards-compatible default for tests not yet
|
|
46
|
-
* updated, and for any future caller that wants facts off).
|
|
47
|
-
*/
|
|
48
|
-
readonly factStore?: SqliteFactStore | null;
|
|
49
|
-
readonly intervalMs?: number;
|
|
50
|
-
readonly classifyTimeoutMs?: number;
|
|
51
|
-
readonly confidenceFloor?: number;
|
|
52
|
-
readonly idleMinutes?: number;
|
|
53
|
-
/** Defaults to console.error. Set to a noop in tests. */
|
|
54
|
-
readonly logger?: (msg: string) => void;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export interface TickReport {
|
|
58
|
-
readonly inserted: number;
|
|
59
|
-
readonly skippedLowConfidence: number;
|
|
60
|
-
readonly classifyFailures: number;
|
|
61
|
-
readonly storageFailures: number;
|
|
62
|
-
readonly chunksSeen: number;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export class ScanScheduler {
|
|
66
|
-
private readonly opts: Required<Omit<SchedulerOptions, "embedder" | "factStore">> & {
|
|
67
|
-
readonly embedder: LLMClient | null;
|
|
68
|
-
readonly factStore: SqliteFactStore | null;
|
|
69
|
-
};
|
|
70
|
-
private stopped = true;
|
|
71
|
-
private timer: NodeJS.Timeout | null = null;
|
|
72
|
-
|
|
73
|
-
constructor(opts: SchedulerOptions) {
|
|
74
|
-
this.opts = {
|
|
75
|
-
store: opts.store,
|
|
76
|
-
adapters: opts.adapters,
|
|
77
|
-
classifier: opts.classifier,
|
|
78
|
-
embedder: opts.embedder ?? null,
|
|
79
|
-
factStore: opts.factStore ?? null,
|
|
80
|
-
intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
|
|
81
|
-
classifyTimeoutMs: opts.classifyTimeoutMs ?? DEFAULT_CLASSIFY_TIMEOUT_MS,
|
|
82
|
-
confidenceFloor: opts.confidenceFloor ?? DEFAULT_CONFIDENCE_FLOOR,
|
|
83
|
-
idleMinutes: opts.idleMinutes ?? DEFAULT_IDLE_MINUTES,
|
|
84
|
-
logger: opts.logger ?? ((msg) => console.error(msg)),
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
start(): void {
|
|
89
|
-
if (!this.stopped) return;
|
|
90
|
-
this.stopped = false;
|
|
91
|
-
this.scheduleNext(0);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
stop(): void {
|
|
95
|
-
this.stopped = true;
|
|
96
|
-
if (this.timer) {
|
|
97
|
-
clearTimeout(this.timer);
|
|
98
|
-
this.timer = null;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
private scheduleNext(delayMs: number): void {
|
|
103
|
-
if (this.stopped) return;
|
|
104
|
-
this.timer = setTimeout(() => {
|
|
105
|
-
void this.tick().finally(() => this.scheduleNext(this.opts.intervalMs));
|
|
106
|
-
}, delayMs);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
async tick(): Promise<TickReport> {
|
|
110
|
-
let inserted = 0;
|
|
111
|
-
let skippedLowConfidence = 0;
|
|
112
|
-
let classifyFailures = 0;
|
|
113
|
-
let storageFailures = 0;
|
|
114
|
-
let chunksSeen = 0;
|
|
115
|
-
|
|
116
|
-
for (const adapter of this.opts.adapters) {
|
|
117
|
-
let results;
|
|
118
|
-
try {
|
|
119
|
-
results = await scanOnce(adapter, this.opts.idleMinutes, this.opts.store.rawDb());
|
|
120
|
-
} catch (e) {
|
|
121
|
-
this.opts.logger(
|
|
122
|
-
`[scheduler] scanOnce error for ${adapter.name}: ${e instanceof Error ? e.message : String(e)}`,
|
|
123
|
-
);
|
|
124
|
-
continue;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
for (const { chunk, supersedes } of results) {
|
|
128
|
-
chunksSeen += 1;
|
|
129
|
-
|
|
130
|
-
let classification;
|
|
131
|
-
try {
|
|
132
|
-
classification = await withTimeout(
|
|
133
|
-
this.opts.classifier.classify(chunk.text),
|
|
134
|
-
this.opts.classifyTimeoutMs,
|
|
135
|
-
);
|
|
136
|
-
} catch (e) {
|
|
137
|
-
classifyFailures += 1;
|
|
138
|
-
const reason = e instanceof TimeoutError ? "timed out" : `error: ${e instanceof Error ? e.message : String(e)}`;
|
|
139
|
-
recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
|
|
140
|
-
const failureRow = this.opts.store.rawDb()
|
|
141
|
-
.prepare<[string, string], { failure_count: number }>(
|
|
142
|
-
"SELECT COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ? AND source_path = ?",
|
|
143
|
-
)
|
|
144
|
-
.get(adapter.name, chunk.sourcePath);
|
|
145
|
-
const count = failureRow?.failure_count ?? 1;
|
|
146
|
-
const ceiling = count >= MAX_CLASSIFY_FAILURES ? ` (failure ${count}/${MAX_CLASSIFY_FAILURES} — will skip until file grows)` : ` (failure ${count}/${MAX_CLASSIFY_FAILURES})`;
|
|
147
|
-
this.opts.logger(`[scheduler] classifier ${reason} for ${chunk.id}${ceiling}`);
|
|
148
|
-
continue;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
if (classification.confidence < this.opts.confidenceFloor) {
|
|
152
|
-
skippedLowConfidence += 1;
|
|
153
|
-
continue;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
const record: IngestRecord = {
|
|
157
|
-
id: chunk.id,
|
|
158
|
-
runtime: chunk.runtime,
|
|
159
|
-
runtimeSessionId: chunk.runtimeSessionId || null,
|
|
160
|
-
startedAt: chunk.startedAt,
|
|
161
|
-
endedAt: chunk.endedAt || null,
|
|
162
|
-
durationMin: chunk.durationMin,
|
|
163
|
-
label: classification.label,
|
|
164
|
-
summary: classification.summary,
|
|
165
|
-
body: chunk.text.slice(0, BODY_CAP),
|
|
166
|
-
status: "closed",
|
|
167
|
-
transcriptKind: adapter.transcriptKind,
|
|
168
|
-
transcriptPath: chunk.sourcePath,
|
|
169
|
-
transcriptOffset: chunk.byteRange[0],
|
|
170
|
-
transcriptLength: chunk.byteRange[1],
|
|
171
|
-
entities: classification.entities,
|
|
172
|
-
decisions: classification.decisions,
|
|
173
|
-
openQuestions: classification.open,
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
const factSink = this.opts.factStore
|
|
177
|
-
? {
|
|
178
|
-
factStore: this.opts.factStore,
|
|
179
|
-
facts: extractFacts(classification, chunk.id, chunk.startedAt),
|
|
180
|
-
}
|
|
181
|
-
: null;
|
|
182
|
-
|
|
183
|
-
try {
|
|
184
|
-
await this.opts.store.insertSession(
|
|
185
|
-
record,
|
|
186
|
-
this.opts.embedder,
|
|
187
|
-
supersedes,
|
|
188
|
-
factSink,
|
|
189
|
-
);
|
|
190
|
-
recordClassified(
|
|
191
|
-
this.opts.store.rawDb(),
|
|
192
|
-
adapter.name,
|
|
193
|
-
chunk.sourcePath,
|
|
194
|
-
chunk.id,
|
|
195
|
-
);
|
|
196
|
-
inserted += 1;
|
|
197
|
-
} catch (e) {
|
|
198
|
-
storageFailures += 1;
|
|
199
|
-
recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
|
|
200
|
-
this.opts.logger(
|
|
201
|
-
`[scheduler] storage error for ${chunk.id}: ${e instanceof Error ? e.message : String(e)}`,
|
|
202
|
-
);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
return { inserted, skippedLowConfidence, classifyFailures, storageFailures, chunksSeen };
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
class TimeoutError extends Error {}
|
|
212
|
-
|
|
213
|
-
async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
|
|
214
|
-
let timer: NodeJS.Timeout | undefined;
|
|
215
|
-
try {
|
|
216
|
-
return await Promise.race([
|
|
217
|
-
promise,
|
|
218
|
-
new Promise<T>((_, reject) => {
|
|
219
|
-
timer = setTimeout(() => reject(new TimeoutError(`timed out after ${ms}ms`)), ms);
|
|
220
|
-
}),
|
|
221
|
-
]);
|
|
222
|
-
} finally {
|
|
223
|
-
if (timer) clearTimeout(timer);
|
|
224
|
-
}
|
|
225
|
-
}
|