npm - nlm-memory - Versions diffs - 0.5.0 → 0.5.2 - Mend

nlm-memory 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (257) hide show

package/README.md +89 -34
package/dist/cli/digest.d.ts +20 -0
package/dist/cli/digest.js +142 -0
package/dist/cli/digest.js.map +1 -0
package/dist/cli/nlm.d.ts +1 -0
package/dist/cli/nlm.js +25 -1
package/dist/cli/nlm.js.map +1 -1
package/dist/core/digest/compose.d.ts +38 -0
package/dist/core/digest/compose.js +93 -0
package/dist/core/digest/compose.js.map +1 -0
package/dist/core/digest/hook-liveness.d.ts +32 -0
package/dist/core/digest/hook-liveness.js +54 -0
package/dist/core/digest/hook-liveness.js.map +1 -0
package/dist/http/app.js +2 -1
package/dist/http/app.js.map +1 -1
package/dist/mcp/server.js +20 -1
package/dist/mcp/server.js.map +1 -1
package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
package/dist/ui/index.html +2 -2
package/package.json +26 -1
package/.agents/plugins/marketplace.json +0 -20
package/.github/workflows/ci.yml +0 -30
package/docs/methodology/re-derivation-rate.md +0 -112
package/docs/methodology/useful-hit-rate.md +0 -79
package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
package/docs/plans/desktop-product.md +0 -69
package/docs/plans/factstore-design.md +0 -236
package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
package/logs/CHANGELOG/CHANGELOG.md +0 -209
package/migrations/000_initial_schema.sql +0 -174
package/migrations/001_entity_type_rename.sql +0 -17
package/migrations/002_adapter_state_extend.sql +0 -12
package/migrations/003_session_embeddings.sql +0 -11
package/migrations/004_facts.sql +0 -46
package/migrations/005_sources.sql +0 -31
package/migrations/006_providers.sql +0 -33
package/migrations/007_source_tokens.sql +0 -17
package/migrations/008_fts_rebuild.sql +0 -9
package/migrations/009_session_embedding_chunks.sql +0 -46
package/migrations/010_sources_opencode.sql +0 -30
package/migrations/011_sources_hermes_agent.sql +0 -30
package/migrations/012_sources_aider.sql +0 -30
package/migrations/013_adapter_state_failure_count.sql +0 -12
package/migrations/014_sources_cursor.sql +0 -30
package/migrations/015_sources_windsurf.sql +0 -30
package/plugin-hermes-agent/README.md +0 -49
package/plugin-hermes-agent/__init__.py +0 -75
package/plugin-hermes-agent/plugin.yaml +0 -15
package/scripts/backfill-citations.mjs +0 -0
package/scripts/build-codex-plugin.mjs +0 -61
package/scripts/deepseek-probe.mjs +0 -67
package/scripts/extract-triples.mjs +0 -207
package/scripts/longmemeval/embedding-cache.ts +0 -77
package/scripts/longmemeval/fetch-dataset.sh +0 -25
package/scripts/longmemeval/run-harness.ts +0 -315
package/scripts/longmemeval/scorer.ts +0 -99
package/scripts/longmemeval/tsconfig.json +0 -9
package/scripts/longmemeval/types.ts +0 -35
package/scripts/nlm-daily-digest.py +0 -239
package/scripts/nlm-daily-digest.sh +0 -28
package/src/cli/classify-parity.ts +0 -257
package/src/cli/launchctl-helpers.ts +0 -49
package/src/cli/nlm.ts +0 -1078
package/src/core/actions/actions-log.ts +0 -118
package/src/core/actions/overlay.ts +0 -117
package/src/core/adapters/aider.ts +0 -205
package/src/core/adapters/claude-code.ts +0 -293
package/src/core/adapters/common.ts +0 -54
package/src/core/adapters/cursor.ts +0 -486
package/src/core/adapters/from-source.ts +0 -67
package/src/core/adapters/hermes-agent.ts +0 -240
package/src/core/adapters/hermes.ts +0 -277
package/src/core/adapters/jsonl-generic.ts +0 -208
package/src/core/adapters/opencode.ts +0 -281
package/src/core/adapters/pi.ts +0 -264
package/src/core/adapters/windsurf.ts +0 -386
package/src/core/classifier/prompt.ts +0 -200
package/src/core/dataset/build-dataset.ts +0 -463
package/src/core/embedding/chunk-body.ts +0 -76
package/src/core/embedding/embed-backfill.ts +0 -210
package/src/core/embedding/embed-normalize.ts +0 -135
package/src/core/facts/backfill-facts.ts +0 -254
package/src/core/facts/extract-facts.ts +0 -50
package/src/core/hook/citation-detect.ts +0 -124
package/src/core/hook/cite-memo.ts +0 -68
package/src/core/hook/claude-settings.ts +0 -187
package/src/core/hook/gate.ts +0 -25
package/src/core/hook/hook-log.ts +0 -41
package/src/core/hook/memo-sweep.ts +0 -164
package/src/core/hook/memo.ts +0 -67
package/src/core/hook/pointer-block.ts +0 -26
package/src/core/hook/select.ts +0 -32
package/src/core/hook/transcript.ts +0 -121
package/src/core/ingest/ingest-session.ts +0 -111
package/src/core/providers/provider-models.ts +0 -100
package/src/core/providers/provider-registry.ts +0 -196
package/src/core/recall/citation-log.ts +0 -108
package/src/core/recall/filter.ts +0 -27
package/src/core/recall/index.ts +0 -6
package/src/core/recall/match-fields.ts +0 -40
package/src/core/recall/query-log.ts +0 -149
package/src/core/recall/query-shape.ts +0 -66
package/src/core/recall/recall-service.ts +0 -320
package/src/core/recall/recent-log.ts +0 -59
package/src/core/recall/tokenize.ts +0 -18
package/src/core/recall/useful-scan.ts +0 -336
package/src/core/recall-facts/fact-query-log.ts +0 -150
package/src/core/recall-facts/fact-recall-service.ts +0 -327
package/src/core/scheduler/scan-once.ts +0 -142
package/src/core/scheduler/scheduler.ts +0 -225
package/src/core/sources/source-registry.ts +0 -278
package/src/core/storage/db-restore.ts +0 -133
package/src/core/storage/live-status.ts +0 -45
package/src/core/storage/migrate.ts +0 -72
package/src/core/storage/sqlite-fact-store.ts +0 -304
package/src/core/storage/sqlite-session-store.ts +0 -810
package/src/hook/hook-auth.ts +0 -18
package/src/hook/prompt-recall-hook.ts +0 -180
package/src/hook/session-end-hook.ts +0 -81
package/src/hook/session-start-hook.ts +0 -168
package/src/hook/stop-hook.ts +0 -239
package/src/http/app.ts +0 -1215
package/src/install/claude-code.ts +0 -128
package/src/install/codex.ts +0 -367
package/src/install/cursor.ts +0 -68
package/src/install/hermes-agent.ts +0 -76
package/src/install/hermes.ts +0 -78
package/src/install/nlm-dir-perms.ts +0 -55
package/src/install/ollama.ts +0 -284
package/src/install/setup.ts +0 -489
package/src/install/windsurf.ts +0 -68
package/src/llm/classifier-box.ts +0 -64
package/src/llm/deepseek-client.ts +0 -150
package/src/llm/env-autoload.ts +0 -55
package/src/llm/ollama-client.ts +0 -189
package/src/mcp/server.ts +0 -534
package/src/ports/fact-store.ts +0 -102
package/src/ports/llm-client.ts +0 -52
package/src/ports/logger.ts +0 -16
package/src/ports/session-store.ts +0 -45
package/src/ports/transcript-adapter.ts +0 -55
package/src/shared/types.ts +0 -149
package/src/ui/App.tsx +0 -58
package/src/ui/components/PromoteOpenButton.tsx +0 -65
package/src/ui/components/SessionDrawer.tsx +0 -199
package/src/ui/components/SideNav.tsx +0 -162
package/src/ui/components/Skeleton.tsx +0 -107
package/src/ui/index.html +0 -13
package/src/ui/lib/actions.ts +0 -30
package/src/ui/lib/api.ts +0 -92
package/src/ui/lib/dataset.ts +0 -141
package/src/ui/lib/registries.ts +0 -155
package/src/ui/lib/view-settings.ts +0 -41
package/src/ui/main.tsx +0 -15
package/src/ui/pages/Live.tsx +0 -229
package/src/ui/pages/Pulse.tsx +0 -415
package/src/ui/pages/Recall.tsx +0 -190
package/src/ui/pages/River.tsx +0 -354
package/src/ui/pages/Search.tsx +0 -386
package/src/ui/pages/Stub.tsx +0 -9
package/src/ui/pages/Thread.tsx +0 -473
package/src/ui/pages/settings/Classifier.tsx +0 -227
package/src/ui/pages/settings/Data.tsx +0 -190
package/src/ui/pages/settings/Index.tsx +0 -65
package/src/ui/pages/settings/Labels.tsx +0 -224
package/src/ui/pages/settings/Providers.tsx +0 -305
package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
package/src/ui/pages/settings/Sources.tsx +0 -326
package/src/ui/pages/settings/Views.tsx +0 -96
package/src/ui/styles.css +0 -1890
package/src/ui/tsconfig.json +0 -21
package/src/ui/vite.config.ts +0 -19
package/tests/fixtures/claude_code/short_session.jsonl +0 -2
package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
package/tests/fixtures/facts.ts +0 -17
package/tests/fixtures/golden-corpus.ts +0 -85
package/tests/fixtures/hermes/paired_request_dump.json +0 -24
package/tests/fixtures/hermes/paired_session.json +0 -23
package/tests/fixtures/hermes/request_dump.json +0 -28
package/tests/fixtures/hermes/session_iso.json +0 -38
package/tests/fixtures/hermes/session_unix.json +0 -38
package/tests/fixtures/hermes/system_only.json +0 -18
package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
package/tests/fixtures/pi/short-successful.jsonl +0 -5
package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
package/tests/fixtures/sessions.ts +0 -22
package/tests/integration/backfill-facts.test.ts +0 -362
package/tests/integration/citation-explicit.test.ts +0 -111
package/tests/integration/cite-event.test.ts +0 -169
package/tests/integration/cite-memo.test.ts +0 -87
package/tests/integration/db-restore.test.ts +0 -153
package/tests/integration/embed-backfill.test.ts +0 -176
package/tests/integration/fact-supersedence.test.ts +0 -313
package/tests/integration/fts-index.test.ts +0 -60
package/tests/integration/getbyids-sqlite.test.ts +0 -100
package/tests/integration/hermes-agent-hooks.test.ts +0 -248
package/tests/integration/hook-claude-settings.test.ts +0 -218
package/tests/integration/hook-log.test.ts +0 -54
package/tests/integration/hook-memo.test.ts +0 -68
package/tests/integration/hook-pre-compact.test.ts +0 -105
package/tests/integration/hook-subagent-start.test.ts +0 -102
package/tests/integration/http.test.ts +0 -401
package/tests/integration/keyword-search-fts.test.ts +0 -66
package/tests/integration/mcp-recall-logging.test.ts +0 -88
package/tests/integration/mcp.test.ts +0 -260
package/tests/integration/memo-sweep.test.ts +0 -91
package/tests/integration/prompt-recall-hook.test.ts +0 -88
package/tests/integration/provider-registry.test.ts +0 -107
package/tests/integration/recall-golden.test.ts +0 -59
package/tests/integration/recall-sqlite.test.ts +0 -169
package/tests/integration/scheduler.test.ts +0 -391
package/tests/integration/session-end-hook.test.ts +0 -48
package/tests/integration/session-start-hook.test.ts +0 -126
package/tests/integration/source-registry.test.ts +0 -122
package/tests/integration/sqlite-fact-store.test.ts +0 -346
package/tests/integration/stop-hook.test.ts +0 -560
package/tests/integration/wal-checkpoint.test.ts +0 -49
package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
package/tests/unit/core/adapters/aider.test.ts +0 -230
package/tests/unit/core/adapters/claude-code.test.ts +0 -118
package/tests/unit/core/adapters/cursor.test.ts +0 -485
package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
package/tests/unit/core/adapters/hermes.test.ts +0 -81
package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
package/tests/unit/core/adapters/opencode.test.ts +0 -354
package/tests/unit/core/adapters/pi.test.ts +0 -110
package/tests/unit/core/adapters/windsurf.test.ts +0 -416
package/tests/unit/core/classifier/prompt.test.ts +0 -126
package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
package/tests/unit/core/facts/extract-facts.test.ts +0 -117
package/tests/unit/core/filter.test.ts +0 -40
package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
package/tests/unit/core/hook/citation-detect.test.ts +0 -124
package/tests/unit/core/hook/gate.test.ts +0 -29
package/tests/unit/core/hook/pointer-block.test.ts +0 -22
package/tests/unit/core/hook/select.test.ts +0 -66
package/tests/unit/core/match-fields.test.ts +0 -39
package/tests/unit/core/mcp-cite-session.test.ts +0 -51
package/tests/unit/core/providers/provider-models.test.ts +0 -101
package/tests/unit/core/query-shape.test.ts +0 -92
package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
package/tests/unit/core/recall-service.test.ts +0 -200
package/tests/unit/core/storage/live-status.test.ts +0 -54
package/tests/unit/core/tokenize.test.ts +0 -32
package/tests/unit/core/useful-scan.test.ts +0 -537
package/tests/unit/llm/embed.test.ts +0 -93
package/tests/unit/llm/ollama-client.test.ts +0 -124
package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
package/tsconfig.json +0 -31
package/tsconfig.test.json +0 -11
package/vitest.config.ts +0 -22

package/src/core/recall-facts/fact-recall-service.ts DELETED Viewed

@@ -1,327 +0,0 @@
-/**
- * FactRecallService — agent-facing recall over the FactStore.
- *
- * Mirrors RecallService's keyword / semantic / hybrid pattern but works on
- * Fact records, not Session records. Sessions and facts answer different
- * questions and have incompatibly-shaped results, so this is a separate
- * service with its own MCP tool — see Section 4 of factstore-design.md.
- *
- * Filter pipeline:
- *   1. Storage pre-filter (subject, predicate, kind, minConfidence,
- *      includeSuperseded). Cheap SQL.
- *   2. Keyword scoring over (value, subject, predicate). Pure, in-memory.
- *   3. Semantic KNN via fact_embeddings vec0 (when mode != keyword).
- *   4. Hybrid merge: 0.6 semantic + 0.4 keyword, matching the session
- *      recall weights.
- *
- * Confidence policy: default `minConfidence` is 0.6 (Section 1 of the plan).
- * Facts with classifier confidence in [0.4, 0.6) get written by
- * extractFacts but stay out of agent recall unless the caller lowers the
- * floor explicitly.
- */
-import type { FactStore } from "@ports/fact-store.js";
-import type { LLMClient } from "@ports/llm-client.js";
-import { LLMUnreachableError } from "@ports/llm-client.js";
-import type {
-  Fact,
-  FactHit,
-  FactMatchField,
-  FactRecallQuery,
-  FactRecallResult,
-  RecallMode,
-} from "@shared/types.js";
-import { tokenSet } from "@core/recall/tokenize.js";
-const DEFAULT_LIMIT = 10;
-const MAX_LIMIT = 100;
-const DEFAULT_MIN_CONFIDENCE = 0.6;
-const STORAGE_FETCH_CAP = 500;
-const HYBRID_KW_WEIGHT = 0.4;
-const HYBRID_SEM_WEIGHT = 0.6;
-const SEMANTIC_OVERFETCH = 3;
-const FIELD_WEIGHTS = {
-  value: 3,
-  subject: 1,
-  predicate: 1,
-} as const;
-export interface FactRecallServiceDeps {
-  readonly factStore: FactStore;
-  readonly llm: LLMClient;
-}
-export class FactRecallService {
-  constructor(private readonly deps: FactRecallServiceDeps) {}
-  async search(input: FactRecallQuery): Promise<FactRecallResult> {
-    const mode: RecallMode = input.mode ?? "keyword";
-    const limit = clampLimit(input.limit);
-    const subject = input.subject ?? null;
-    const predicate = input.predicate ?? null;
-    const kind = input.kind ?? null;
-    const queryText = (input.query ?? "").trim();
-    const empty: FactRecallResult = {
-      query: queryText,
-      subject,
-      predicate,
-      kind,
-      mode,
-      limit,
-      total: 0,
-      results: [],
-    };
-    // A query with no signal at all → empty. Either free-text query, or a
-    // structured filter (subject / predicate / kind) must be provided.
-    if (!queryText && subject === null && predicate === null && kind === null) {
-      return empty;
-    }
-    const filter: Parameters<FactStore["listForRecall"]>[0] = {
-      includeSuperseded: input.includeSuperseded === true,
-      minConfidence: input.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
-      limit: STORAGE_FETCH_CAP,
-      ...(input.subject !== undefined ? { subject: input.subject } : {}),
-      ...(input.predicate !== undefined ? { predicate: input.predicate } : {}),
-      ...(input.kind !== undefined ? { kind: input.kind } : {}),
-    };
-    const candidates = await this.deps.factStore.listForRecall(filter);
-    if (candidates.length === 0) return empty;
-    const byId = new Map<string, Fact>(candidates.map((f) => [f.id, f]));
-    const queryTokens = queryText ? new Set(tokenSet(queryText)) : new Set<string>();
-    const kwHits =
-      mode === "keyword" || mode === "hybrid"
-        ? scoreAll(candidates, queryTokens)
-        : [];
-    let semHits: ReadonlyArray<SemanticHit> = [];
-    let semError: "ollama_unreachable" | null = null;
-    if ((mode === "semantic" || mode === "hybrid") && queryText) {
-      try {
-        semHits = await this.runSemantic(queryText, byId, limit * SEMANTIC_OVERFETCH);
-      } catch (err) {
-        if (err instanceof LLMUnreachableError) {
-          semError = "ollama_unreachable";
-        } else {
-          throw err;
-        }
-      }
-    }
-    if (mode === "semantic" && semError) {
-      return { ...empty, modeUnavailable: semError };
-    }
-    // For pure structured queries (no query text, just subject/predicate),
-    // a keyword pass with empty tokens scores zero and a semantic pass has
-    // nothing to embed. Fall back to returning the storage filter result
-    // ordered by created_at DESC. Applies to keyword AND hybrid — hybrid
-    // is the MCP default, so this path catches exact subject+predicate
-    // lookups from agent callers that pass no query text.
-    if ((mode === "keyword" || mode === "hybrid") && !queryText) {
-      const rows = candidates
-        .slice(0, limit)
-        .map((f) => factToHit(f, 0, []));
-      return finalize(queryText, subject, predicate, kind, mode, limit, rows);
-    }
-    if (mode === "keyword") {
-      return finalize(queryText, subject, predicate, kind, mode, limit, kwHits.map(toKeywordHit));
-    }
-    if (mode === "semantic") {
-      return finalize(queryText, subject, predicate, kind, mode, limit, semHits.map(toSemanticHit));
-    }
-    // hybrid
-    const merged = mergeHybrid(kwHits, semHits, byId);
-    const result = finalize(queryText, subject, predicate, kind, mode, limit, merged);
-    return semError ? { ...result, modeUnavailable: semError } : result;
-  }
-  private async runSemantic(
-    query: string,
-    byId: ReadonlyMap<string, Fact>,
-    fetchLimit: number,
-  ): Promise<ReadonlyArray<SemanticHit>> {
-    const embedding = await this.deps.llm.embed(query, "query");
-    const neighbors = await this.deps.factStore.semanticSearch(embedding.vector, fetchLimit);
-    const hits: SemanticHit[] = [];
-    for (const n of neighbors) {
-      const fact = byId.get(n.factId);
-      if (!fact) continue; // candidate was filtered out by subject/predicate/conf
-      hits.push({ fact, similarity: cosineFromL2(n.distance) });
-    }
-    return hits;
-  }
-}
-interface KeywordHit {
-  readonly fact: Fact;
-  readonly score: number;
-  readonly matchedIn: ReadonlyArray<FactMatchField>;
-}
-interface SemanticHit {
-  readonly fact: Fact;
-  readonly similarity: number;
-}
-function scoreAll(
-  facts: ReadonlyArray<Fact>,
-  queryTokens: ReadonlySet<string>,
-): ReadonlyArray<KeywordHit> {
-  if (queryTokens.size === 0) return [];
-  const hits: KeywordHit[] = [];
-  for (const f of facts) {
-    const { score, matchedIn } = scoreFact(f, queryTokens);
-    if (score > 0) hits.push({ fact: f, score, matchedIn });
-  }
-  hits.sort((a, b) => b.score - a.score);
-  return hits;
-}
-function scoreFact(
-  fact: Fact,
-  queryTokens: ReadonlySet<string>,
-): { score: number; matchedIn: ReadonlyArray<FactMatchField> } {
-  let score = 0;
-  const matchedIn: FactMatchField[] = [];
-  const valueMatches = intersectionSize(queryTokens, tokenSet(fact.value));
-  if (valueMatches > 0) {
-    score += FIELD_WEIGHTS.value * valueMatches;
-    matchedIn.push("value");
-  }
-  const subjectMatches = intersectionSize(queryTokens, tokenSet(fact.subject));
-  if (subjectMatches > 0) {
-    score += FIELD_WEIGHTS.subject * subjectMatches;
-    matchedIn.push("subject");
-  }
-  const predicateMatches = intersectionSize(queryTokens, tokenSet(fact.predicate));
-  if (predicateMatches > 0) {
-    score += FIELD_WEIGHTS.predicate * predicateMatches;
-    matchedIn.push("predicate");
-  }
-  return { score, matchedIn };
-}
-function mergeHybrid(
-  kwHits: ReadonlyArray<KeywordHit>,
-  semHits: ReadonlyArray<SemanticHit>,
-  byId: ReadonlyMap<string, Fact>,
-): ReadonlyArray<FactHit> {
-  const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
-  const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
-  const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.fact.id, h]));
-  const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.fact.id, h]));
-  const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
-  const rows: FactHit[] = [];
-  for (const id of allIds) {
-    const fact = byId.get(id);
-    if (!fact) continue;
-    const kw = kwMap.get(id);
-    const sem = semMap.get(id);
-    const kwNorm = kw ? kw.score / maxKw : 0;
-    const semNorm = sem ? sem.similarity / maxSem : 0;
-    const combined = round4(HYBRID_SEM_WEIGHT * semNorm + HYBRID_KW_WEIGHT * kwNorm);
-    const matchedIn = uniqueFields(
-      kw?.matchedIn ?? [],
-      sem ? (["semantic"] as FactMatchField[]) : [],
-    );
-    rows.push({
-      ...fact,
-      matchScore: combined,
-      matchedIn,
-      keywordScore: round4(kwNorm),
-      semanticScore: round4(semNorm),
-    });
-  }
-  rows.sort((a, b) => b.matchScore - a.matchScore);
-  return rows;
-}
-function factToHit(
-  fact: Fact,
-  score: number,
-  matchedIn: ReadonlyArray<FactMatchField>,
-): FactHit {
-  return { ...fact, matchScore: score, matchedIn };
-}
-function toKeywordHit(h: KeywordHit): FactHit {
-  return factToHit(h.fact, h.score, h.matchedIn);
-}
-function toSemanticHit(h: SemanticHit): FactHit {
-  return factToHit(h.fact, h.similarity, ["semantic"]);
-}
-function finalize(
-  query: string,
-  subject: string | null,
-  predicate: string | null,
-  kind: FactRecallResult["kind"],
-  mode: RecallMode,
-  limit: number,
-  hits: ReadonlyArray<FactHit>,
-): FactRecallResult {
-  return {
-    query,
-    subject,
-    predicate,
-    kind,
-    mode,
-    limit,
-    total: hits.length,
-    results: hits.slice(0, limit),
-  };
-}
-function clampLimit(limit: number | undefined): number {
-  const n = limit ?? DEFAULT_LIMIT;
-  if (Number.isNaN(n) || n < 1) return 1;
-  return Math.min(MAX_LIMIT, Math.trunc(n));
-}
-function cosineFromL2(distance: number): number {
-  const cos = 1 - (distance * distance) / 2;
-  return round4(Math.max(-1, Math.min(1, cos)));
-}
-function round4(value: number): number {
-  return Math.round(value * 10_000) / 10_000;
-}
-function intersectionSize<T>(a: ReadonlySet<T>, b: ReadonlySet<T>): number {
-  const [small, large] = a.size <= b.size ? [a, b] : [b, a];
-  let count = 0;
-  for (const item of small) if (large.has(item)) count += 1;
-  return count;
-}
-function uniqueFields(
-  a: ReadonlyArray<FactMatchField>,
-  b: ReadonlyArray<FactMatchField>,
-): ReadonlyArray<FactMatchField> {
-  const seen = new Set<FactMatchField>();
-  const out: FactMatchField[] = [];
-  for (const f of [...a, ...b]) {
-    if (!seen.has(f)) {
-      seen.add(f);
-      out.push(f);
-    }
-  }
-  return out;
-}

package/src/core/scheduler/scan-once.ts DELETED Viewed

@@ -1,142 +0,0 @@
-/**
- * scanOnce — mtime-gated incremental discovery shared by every adapter.
- *
- * The Python codebase bundled this logic into each adapter (`scan_once` +
- * `record_classified` methods). In the TS port the adapter stays a pure
- * parser (TranscriptAdapter port); the mtime check and adapter_state
- * upsert live here, generic over the adapter. Same behavior, less
- * duplication across claude-code / hermes / pi.
- *
- * Contract (per file under adapter.discover()):
- *   - If `now - mtime < idleMinutes * 60s` → still active, skip
- *   - Lookup adapter_state by (adapterName, sourcePath):
- *       no row + file idle                       → NEW: parse + return (chunk, supersedes=null)
- *       row exists, size match, failures < ceil  → UNCHANGED: skip
- *       row exists, size match, failures >= ceil → FAILED_CEILING: skip (log once per session)
- *       row exists, file grew                    → RESUMED: parse + return, reset failure_count
- *   - After successful classify+insert downstream, call `recordClassified`
- *     to upsert adapter_state with the new size + session_id.
- *   - On classify/storage failure, call `recordFailed` to increment failure_count.
- *     When failure_count reaches MAX_CLASSIFY_FAILURES and the file hasn't grown,
- *     the file is permanently skipped until new content arrives.
- */
-import { statSync } from "node:fs";
-import type Database from "better-sqlite3";
-import type {
-  SessionChunk,
-  TranscriptAdapter,
-} from "@ports/transcript-adapter.js";
-export interface ScanResult {
-  readonly chunk: SessionChunk;
-  readonly supersedes: string | null;
-}
-export const MAX_CLASSIFY_FAILURES = 3;
-interface AdapterStateRow {
-  source_path: string;
-  file_size: number | null;
-  session_id: string | null;
-  failure_count: number;
-}
-export async function scanOnce(
-  adapter: TranscriptAdapter,
-  idleMinutes: number,
-  db: Database.Database,
-  now: number = Date.now(),
-): Promise<ReadonlyArray<ScanResult>> {
-  const idleMs = idleMinutes * 60 * 1000;
-  const stateRows = db
-    .prepare<[string], AdapterStateRow>(
-      "SELECT source_path, file_size, session_id, COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ?",
-    )
-    .all(adapter.name);
-  const byPath = new Map<string, AdapterStateRow>(stateRows.map((r) => [r.source_path, r]));
-  const out: ScanResult[] = [];
-  const files = await adapter.discover();
-  for (const path of files) {
-    let st;
-    try {
-      st = statSync(path);
-    } catch {
-      continue;
-    }
-    const age = now - st.mtimeMs;
-    if (age < idleMs) continue;
-    const prior = byPath.get(path);
-    let supersedes: string | null = null;
-    if (prior) {
-      const sizeUnchanged = (prior.file_size ?? 0) === st.size;
-      if (sizeUnchanged) {
-        // File hasn't grown — skip whether clean or failed. Failures only
-        // retry when the transcript file receives new content.
-        continue;
-      }
-      // File grew: reset failure_count so resume gets a clean slate.
-      if (prior.failure_count >= MAX_CLASSIFY_FAILURES) {
-        db.prepare(
-          "UPDATE adapter_state SET failure_count = 0 WHERE adapter_name = ? AND source_path = ?",
-        ).run(adapter.name, path);
-      }
-      supersedes = prior.session_id;
-    }
-    const chunk = await adapter.parseSession(path);
-    if (!chunk) continue;
-    out.push({ chunk, supersedes });
-  }
-  return out;
-}
-export function recordClassified(
-  db: Database.Database,
-  adapterName: string,
-  sourcePath: string,
-  sessionId: string,
-): void {
-  let size = 0;
-  try {
-    size = statSync(sourcePath).size;
-  } catch {
-    return;
-  }
-  db.prepare(
-    `INSERT INTO adapter_state
-       (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
-     VALUES (?, ?, ?, ?, ?, 0, datetime('now'))
-     ON CONFLICT(adapter_name, source_path) DO UPDATE SET
-       last_offset = excluded.last_offset,
-       file_size = excluded.file_size,
-       session_id = excluded.session_id,
-       failure_count = 0,
-       last_processed_at = excluded.last_processed_at`,
-  ).run(adapterName, sourcePath, size, size, sessionId);
-}
-export function recordFailed(
-  db: Database.Database,
-  adapterName: string,
-  sourcePath: string,
-): void {
-  let size = 0;
-  try {
-    size = statSync(sourcePath).size;
-  } catch {
-    return;
-  }
-  db.prepare(
-    `INSERT INTO adapter_state
-       (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
-     VALUES (?, ?, ?, ?, NULL, 1, datetime('now'))
-     ON CONFLICT(adapter_name, source_path) DO UPDATE SET
-       file_size = excluded.file_size,
-       failure_count = failure_count + 1,
-       last_processed_at = excluded.last_processed_at`,
-  ).run(adapterName, sourcePath, size, size);
-}

package/src/core/scheduler/scheduler.ts DELETED Viewed

@@ -1,225 +0,0 @@
-/**
- * ScanScheduler — periodic ingest loop. Ports `scheduler.py`.
- *
- * Each tick walks the registered adapters, runs scanOnce to discover idle
- * transcript files, classifies the resulting SessionChunks via the active
- * classifier, and persists them through SqliteSessionStore.insertSession
- * with the embedder. Records adapter_state after each successful insert
- * so the next tick is incremental.
- *
- * Single-process: the scheduler runs alongside the HTTP server (Phase D
- * wires it into `nlm start`). No worker thread; Node's event loop is
- * enough — adapter discovery is filesystem-bound and the per-chunk
- * classify call is async-awaited with a wall-clock timeout to keep the
- * tick loop responsive.
- *
- * Confidence floor of 0.3 mirrors Python: classifier outputs below that
- * are skipped rather than persisted as low-quality noise.
- */
-import type { LLMClient } from "@ports/llm-client.js";
-import type { TranscriptAdapter } from "@ports/transcript-adapter.js";
-import { extractFacts } from "@core/facts/extract-facts.js";
-import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
-import type {
-  IngestRecord,
-  SqliteSessionStore,
-} from "@core/storage/sqlite-session-store.js";
-import { MAX_CLASSIFY_FAILURES, recordClassified, recordFailed, scanOnce } from "./scan-once.js";
-const DEFAULT_INTERVAL_MS = 30 * 60 * 1000; // 30 min, matches Python default
-const DEFAULT_CLASSIFY_TIMEOUT_MS = 120_000;
-const DEFAULT_CONFIDENCE_FLOOR = 0.3;
-const DEFAULT_IDLE_MINUTES = 15;
-const BODY_CAP = 200_000;
-export interface SchedulerOptions {
-  readonly store: SqliteSessionStore;
-  readonly adapters: ReadonlyArray<TranscriptAdapter>;
-  readonly classifier: LLMClient;
-  readonly embedder?: LLMClient | null;
-  /**
-   * FactStore for Phase B.2 fact ingest. When provided, the scheduler
-   * extracts facts from each classify result and persists them atomically
-   * with the session row. Optional — when null, sessions ingest as before
-   * with no facts written (backwards-compatible default for tests not yet
-   * updated, and for any future caller that wants facts off).
-   */
-  readonly factStore?: SqliteFactStore | null;
-  readonly intervalMs?: number;
-  readonly classifyTimeoutMs?: number;
-  readonly confidenceFloor?: number;
-  readonly idleMinutes?: number;
-  /** Defaults to console.error. Set to a noop in tests. */
-  readonly logger?: (msg: string) => void;
-}
-export interface TickReport {
-  readonly inserted: number;
-  readonly skippedLowConfidence: number;
-  readonly classifyFailures: number;
-  readonly storageFailures: number;
-  readonly chunksSeen: number;
-}
-export class ScanScheduler {
-  private readonly opts: Required<Omit<SchedulerOptions, "embedder" | "factStore">> & {
-    readonly embedder: LLMClient | null;
-    readonly factStore: SqliteFactStore | null;
-  };
-  private stopped = true;
-  private timer: NodeJS.Timeout | null = null;
-  constructor(opts: SchedulerOptions) {
-    this.opts = {
-      store: opts.store,
-      adapters: opts.adapters,
-      classifier: opts.classifier,
-      embedder: opts.embedder ?? null,
-      factStore: opts.factStore ?? null,
-      intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
-      classifyTimeoutMs: opts.classifyTimeoutMs ?? DEFAULT_CLASSIFY_TIMEOUT_MS,
-      confidenceFloor: opts.confidenceFloor ?? DEFAULT_CONFIDENCE_FLOOR,
-      idleMinutes: opts.idleMinutes ?? DEFAULT_IDLE_MINUTES,
-      logger: opts.logger ?? ((msg) => console.error(msg)),
-    };
-  }
-  start(): void {
-    if (!this.stopped) return;
-    this.stopped = false;
-    this.scheduleNext(0);
-  }
-  stop(): void {
-    this.stopped = true;
-    if (this.timer) {
-      clearTimeout(this.timer);
-      this.timer = null;
-    }
-  }
-  private scheduleNext(delayMs: number): void {
-    if (this.stopped) return;
-    this.timer = setTimeout(() => {
-      void this.tick().finally(() => this.scheduleNext(this.opts.intervalMs));
-    }, delayMs);
-  }
-  async tick(): Promise<TickReport> {
-    let inserted = 0;
-    let skippedLowConfidence = 0;
-    let classifyFailures = 0;
-    let storageFailures = 0;
-    let chunksSeen = 0;
-    for (const adapter of this.opts.adapters) {
-      let results;
-      try {
-        results = await scanOnce(adapter, this.opts.idleMinutes, this.opts.store.rawDb());
-      } catch (e) {
-        this.opts.logger(
-          `[scheduler] scanOnce error for ${adapter.name}: ${e instanceof Error ? e.message : String(e)}`,
-        );
-        continue;
-      }
-      for (const { chunk, supersedes } of results) {
-        chunksSeen += 1;
-        let classification;
-        try {
-          classification = await withTimeout(
-            this.opts.classifier.classify(chunk.text),
-            this.opts.classifyTimeoutMs,
-          );
-        } catch (e) {
-          classifyFailures += 1;
-          const reason = e instanceof TimeoutError ? "timed out" : `error: ${e instanceof Error ? e.message : String(e)}`;
-          recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
-          const failureRow = this.opts.store.rawDb()
-            .prepare<[string, string], { failure_count: number }>(
-              "SELECT COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ? AND source_path = ?",
-            )
-            .get(adapter.name, chunk.sourcePath);
-          const count = failureRow?.failure_count ?? 1;
-          const ceiling = count >= MAX_CLASSIFY_FAILURES ? ` (failure ${count}/${MAX_CLASSIFY_FAILURES} — will skip until file grows)` : ` (failure ${count}/${MAX_CLASSIFY_FAILURES})`;
-          this.opts.logger(`[scheduler] classifier ${reason} for ${chunk.id}${ceiling}`);
-          continue;
-        }
-        if (classification.confidence < this.opts.confidenceFloor) {
-          skippedLowConfidence += 1;
-          continue;
-        }
-        const record: IngestRecord = {
-          id: chunk.id,
-          runtime: chunk.runtime,
-          runtimeSessionId: chunk.runtimeSessionId || null,
-          startedAt: chunk.startedAt,
-          endedAt: chunk.endedAt || null,
-          durationMin: chunk.durationMin,
-          label: classification.label,
-          summary: classification.summary,
-          body: chunk.text.slice(0, BODY_CAP),
-          status: "closed",
-          transcriptKind: adapter.transcriptKind,
-          transcriptPath: chunk.sourcePath,
-          transcriptOffset: chunk.byteRange[0],
-          transcriptLength: chunk.byteRange[1],
-          entities: classification.entities,
-          decisions: classification.decisions,
-          openQuestions: classification.open,
-        };
-        const factSink = this.opts.factStore
-          ? {
-              factStore: this.opts.factStore,
-              facts: extractFacts(classification, chunk.id, chunk.startedAt),
-            }
-          : null;
-        try {
-          await this.opts.store.insertSession(
-            record,
-            this.opts.embedder,
-            supersedes,
-            factSink,
-          );
-          recordClassified(
-            this.opts.store.rawDb(),
-            adapter.name,
-            chunk.sourcePath,
-            chunk.id,
-          );
-          inserted += 1;
-        } catch (e) {
-          storageFailures += 1;
-          recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
-          this.opts.logger(
-            `[scheduler] storage error for ${chunk.id}: ${e instanceof Error ? e.message : String(e)}`,
-          );
-        }
-      }
-    }
-    return { inserted, skippedLowConfidence, classifyFailures, storageFailures, chunksSeen };
-  }
-}
-class TimeoutError extends Error {}
-async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
-  let timer: NodeJS.Timeout | undefined;
-  try {
-    return await Promise.race([
-      promise,
-      new Promise<T>((_, reject) => {
-        timer = setTimeout(() => reject(new TimeoutError(`timed out after ${ms}ms`)), ms);
-      }),
-    ]);
-  } finally {
-    if (timer) clearTimeout(timer);
-  }
-}