nlm-memory 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -34
- package/dist/cli/digest.d.ts +20 -0
- package/dist/cli/digest.js +142 -0
- package/dist/cli/digest.js.map +1 -0
- package/dist/cli/nlm.d.ts +1 -0
- package/dist/cli/nlm.js +25 -1
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/digest/compose.d.ts +38 -0
- package/dist/core/digest/compose.js +93 -0
- package/dist/core/digest/compose.js.map +1 -0
- package/dist/core/digest/hook-liveness.d.ts +32 -0
- package/dist/core/digest/hook-liveness.js +54 -0
- package/dist/core/digest/hook-liveness.js.map +1 -0
- package/dist/http/app.js +2 -1
- package/dist/http/app.js.map +1 -1
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
- package/logs/CHANGELOG/CHANGELOG.md +0 -209
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/migrations/014_sources_cursor.sql +0 -30
- package/migrations/015_sources_windsurf.sql +0 -30
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -1078
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/cursor.ts +0 -486
- package/src/core/adapters/from-source.ts +0 -67
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/adapters/windsurf.ts +0 -386
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -187
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -278
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -810
- package/src/hook/hook-auth.ts +0 -18
- package/src/hook/prompt-recall-hook.ts +0 -180
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -168
- package/src/hook/stop-hook.ts +0 -239
- package/src/http/app.ts +0 -1215
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/cursor.ts +0 -68
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/nlm-dir-perms.ts +0 -55
- package/src/install/ollama.ts +0 -284
- package/src/install/setup.ts +0 -489
- package/src/install/windsurf.ts +0 -68
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -149
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -199
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -354
- package/src/ui/pages/Search.tsx +0 -386
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -473
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1890
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -100
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -218
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -260
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -122
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/cursor.test.ts +0 -485
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/adapters/windsurf.test.ts +0 -416
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
-- Migration 010: add 'opencode' to the sources.kind CHECK constraint.
|
|
2
|
-
--
|
|
3
|
-
-- SQLite does not support ALTER COLUMN to modify CHECK constraints in place.
|
|
4
|
-
-- Standard approach: rename → recreate → copy → drop old.
|
|
5
|
-
|
|
6
|
-
PRAGMA foreign_keys = OFF;
|
|
7
|
-
|
|
8
|
-
CREATE TABLE sources_new (
|
|
9
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
-
kind TEXT NOT NULL CHECK (kind IN ('claude-code', 'hermes', 'opencode', 'pi', 'jsonl-generic', 'webhook')),
|
|
11
|
-
name TEXT NOT NULL UNIQUE,
|
|
12
|
-
path_or_url TEXT,
|
|
13
|
-
runtime_label TEXT NOT NULL,
|
|
14
|
-
parse_config TEXT NOT NULL DEFAULT '{}',
|
|
15
|
-
enabled INTEGER NOT NULL DEFAULT 1,
|
|
16
|
-
token TEXT,
|
|
17
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
18
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
|
-
);
|
|
20
|
-
|
|
21
|
-
INSERT INTO sources_new SELECT id, kind, name, path_or_url, runtime_label, parse_config, enabled, token, created_at, updated_at FROM sources;
|
|
22
|
-
|
|
23
|
-
DROP TABLE sources;
|
|
24
|
-
ALTER TABLE sources_new RENAME TO sources;
|
|
25
|
-
|
|
26
|
-
CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled) WHERE enabled = 1;
|
|
27
|
-
|
|
28
|
-
PRAGMA foreign_keys = ON;
|
|
29
|
-
|
|
30
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (10, '010_sources_opencode');
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
-- Migration 011: add 'hermes-agent' to the sources.kind CHECK constraint.
|
|
2
|
-
--
|
|
3
|
-
-- SQLite does not support ALTER COLUMN to modify CHECK constraints in place.
|
|
4
|
-
-- Standard approach: rename → recreate → copy → drop old.
|
|
5
|
-
|
|
6
|
-
PRAGMA foreign_keys = OFF;
|
|
7
|
-
|
|
8
|
-
CREATE TABLE sources_new (
|
|
9
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
-
kind TEXT NOT NULL CHECK (kind IN ('claude-code', 'hermes', 'hermes-agent', 'opencode', 'pi', 'jsonl-generic', 'webhook')),
|
|
11
|
-
name TEXT NOT NULL UNIQUE,
|
|
12
|
-
path_or_url TEXT,
|
|
13
|
-
runtime_label TEXT NOT NULL,
|
|
14
|
-
parse_config TEXT NOT NULL DEFAULT '{}',
|
|
15
|
-
enabled INTEGER NOT NULL DEFAULT 1,
|
|
16
|
-
token TEXT,
|
|
17
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
18
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
|
-
);
|
|
20
|
-
|
|
21
|
-
INSERT INTO sources_new SELECT id, kind, name, path_or_url, runtime_label, parse_config, enabled, token, created_at, updated_at FROM sources;
|
|
22
|
-
|
|
23
|
-
DROP TABLE sources;
|
|
24
|
-
ALTER TABLE sources_new RENAME TO sources;
|
|
25
|
-
|
|
26
|
-
CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled) WHERE enabled = 1;
|
|
27
|
-
|
|
28
|
-
PRAGMA foreign_keys = ON;
|
|
29
|
-
|
|
30
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (11, '011_sources_hermes_agent');
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
-- Migration 012: add 'aider' to the sources.kind CHECK constraint.
|
|
2
|
-
--
|
|
3
|
-
-- SQLite does not support ALTER COLUMN to modify CHECK constraints in place.
|
|
4
|
-
-- Standard approach: rename → recreate → copy → drop old.
|
|
5
|
-
|
|
6
|
-
PRAGMA foreign_keys = OFF;
|
|
7
|
-
|
|
8
|
-
CREATE TABLE sources_new (
|
|
9
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
-
kind TEXT NOT NULL CHECK (kind IN ('claude-code', 'hermes', 'hermes-agent', 'aider', 'opencode', 'pi', 'jsonl-generic', 'webhook')),
|
|
11
|
-
name TEXT NOT NULL UNIQUE,
|
|
12
|
-
path_or_url TEXT,
|
|
13
|
-
runtime_label TEXT NOT NULL,
|
|
14
|
-
parse_config TEXT NOT NULL DEFAULT '{}',
|
|
15
|
-
enabled INTEGER NOT NULL DEFAULT 1,
|
|
16
|
-
token TEXT,
|
|
17
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
18
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
|
-
);
|
|
20
|
-
|
|
21
|
-
INSERT INTO sources_new SELECT id, kind, name, path_or_url, runtime_label, parse_config, enabled, token, created_at, updated_at FROM sources;
|
|
22
|
-
|
|
23
|
-
DROP TABLE sources;
|
|
24
|
-
ALTER TABLE sources_new RENAME TO sources;
|
|
25
|
-
|
|
26
|
-
CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled) WHERE enabled = 1;
|
|
27
|
-
|
|
28
|
-
PRAGMA foreign_keys = ON;
|
|
29
|
-
|
|
30
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (12, '012_sources_aider');
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
-- Migration 013: add failure_count to adapter_state.
|
|
2
|
-
--
|
|
3
|
-
-- Tracks consecutive classify/storage failures per source file. The scheduler
|
|
4
|
-
-- skips files whose failure_count has reached the backoff ceiling (default 3)
|
|
5
|
-
-- and whose file_size has not changed since the last attempt. When the file
|
|
6
|
-
-- grows (new content appended), failure_count resets to 0 and the file is
|
|
7
|
-
-- retried. This prevents a single permanently-broken or over-large transcript
|
|
8
|
-
-- from flooding daemon-err.log on every 30-minute tick.
|
|
9
|
-
|
|
10
|
-
ALTER TABLE adapter_state ADD COLUMN failure_count INTEGER NOT NULL DEFAULT 0;
|
|
11
|
-
|
|
12
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (13, '013_adapter_state_failure_count');
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
-- Migration 014: add 'cursor' to the sources.kind CHECK constraint.
|
|
2
|
-
--
|
|
3
|
-
-- SQLite does not support ALTER COLUMN to modify CHECK constraints in place.
|
|
4
|
-
-- Standard approach: rename → recreate → copy → drop old.
|
|
5
|
-
|
|
6
|
-
PRAGMA foreign_keys = OFF;
|
|
7
|
-
|
|
8
|
-
CREATE TABLE sources_new (
|
|
9
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
-
kind TEXT NOT NULL CHECK (kind IN ('claude-code', 'hermes', 'hermes-agent', 'aider', 'cursor', 'opencode', 'pi', 'jsonl-generic', 'webhook')),
|
|
11
|
-
name TEXT NOT NULL UNIQUE,
|
|
12
|
-
path_or_url TEXT,
|
|
13
|
-
runtime_label TEXT NOT NULL,
|
|
14
|
-
parse_config TEXT NOT NULL DEFAULT '{}',
|
|
15
|
-
enabled INTEGER NOT NULL DEFAULT 1,
|
|
16
|
-
token TEXT,
|
|
17
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
18
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
|
-
);
|
|
20
|
-
|
|
21
|
-
INSERT INTO sources_new SELECT id, kind, name, path_or_url, runtime_label, parse_config, enabled, token, created_at, updated_at FROM sources;
|
|
22
|
-
|
|
23
|
-
DROP TABLE sources;
|
|
24
|
-
ALTER TABLE sources_new RENAME TO sources;
|
|
25
|
-
|
|
26
|
-
CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled) WHERE enabled = 1;
|
|
27
|
-
|
|
28
|
-
PRAGMA foreign_keys = ON;
|
|
29
|
-
|
|
30
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (14, '014_sources_cursor');
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
-- Migration 015: add 'windsurf' to the sources.kind CHECK constraint.
|
|
2
|
-
--
|
|
3
|
-
-- SQLite does not support ALTER COLUMN to modify CHECK constraints in place.
|
|
4
|
-
-- Standard approach: rename → recreate → copy → drop old.
|
|
5
|
-
|
|
6
|
-
PRAGMA foreign_keys = OFF;
|
|
7
|
-
|
|
8
|
-
CREATE TABLE sources_new (
|
|
9
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
-
kind TEXT NOT NULL CHECK (kind IN ('claude-code', 'hermes', 'hermes-agent', 'aider', 'cursor', 'windsurf', 'opencode', 'pi', 'jsonl-generic', 'webhook')),
|
|
11
|
-
name TEXT NOT NULL UNIQUE,
|
|
12
|
-
path_or_url TEXT,
|
|
13
|
-
runtime_label TEXT NOT NULL,
|
|
14
|
-
parse_config TEXT NOT NULL DEFAULT '{}',
|
|
15
|
-
enabled INTEGER NOT NULL DEFAULT 1,
|
|
16
|
-
token TEXT,
|
|
17
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
18
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
|
-
);
|
|
20
|
-
|
|
21
|
-
INSERT INTO sources_new SELECT id, kind, name, path_or_url, runtime_label, parse_config, enabled, token, created_at, updated_at FROM sources;
|
|
22
|
-
|
|
23
|
-
DROP TABLE sources;
|
|
24
|
-
ALTER TABLE sources_new RENAME TO sources;
|
|
25
|
-
|
|
26
|
-
CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled) WHERE enabled = 1;
|
|
27
|
-
|
|
28
|
-
PRAGMA foreign_keys = ON;
|
|
29
|
-
|
|
30
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (15, '015_sources_windsurf');
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
# nlm-memory Hermes Agent plugin
|
|
2
|
-
|
|
3
|
-
This directory is the [NousResearch Hermes Agent](https://github.com/NousResearch/hermes-agent) plugin distribution surface for nlm-memory.
|
|
4
|
-
|
|
5
|
-
## Install
|
|
6
|
-
|
|
7
|
-
Prerequisite: `npm install -g nlm-memory` (puts `nlm` on PATH; the MCP server spawns `nlm mcp`).
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
# Recommended — uses nlm connect to copy the plugin and enable it
|
|
11
|
-
nlm connect hermes-agent
|
|
12
|
-
|
|
13
|
-
# Or manually
|
|
14
|
-
cp -r plugin-hermes-agent ~/.hermes/plugins/nlm-memory
|
|
15
|
-
hermes plugins enable nlm-memory
|
|
16
|
-
nlm connect hermes # also writes the MCP server entry to ~/.hermes/config.yaml
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
## What this ships
|
|
20
|
-
|
|
21
|
-
- **`plugin.yaml`** — Hermes plugin manifest (name: `nlm-memory`, kind: `memory`)
|
|
22
|
-
- **`__init__.py`** — Python shim that registers 6 lifecycle hooks; all delegate to the local nlm daemon at `http://localhost:3940`
|
|
23
|
-
|
|
24
|
-
## How it works
|
|
25
|
-
|
|
26
|
-
| Hermes hook | Action |
|
|
27
|
-
|---|---|
|
|
28
|
-
| `pre_llm_call` | Keyword-recalls up to 3 relevant prior sessions; injects them as context above the user message |
|
|
29
|
-
| `post_llm_call` | Scans the assistant response for cited session IDs and logs prose citation events |
|
|
30
|
-
| `on_session_start` | No-op (memo is created lazily on first recall hit) |
|
|
31
|
-
| `on_session_end` | Clears the per-session surfaced-ID memo |
|
|
32
|
-
| `on_session_finalize` | Clears the per-session surfaced-ID memo |
|
|
33
|
-
| `on_session_reset` | Clears the per-session surfaced-ID memo |
|
|
34
|
-
|
|
35
|
-
## Configuration
|
|
36
|
-
|
|
37
|
-
| Env var | Default | Purpose |
|
|
38
|
-
|---|---|---|
|
|
39
|
-
| `NLM_DAEMON_PORT` | `3940` | Port the nlm daemon listens on |
|
|
40
|
-
|
|
41
|
-
## Daemon required
|
|
42
|
-
|
|
43
|
-
The plugin delegates entirely to the nlm daemon. Start it with `nlm start` or `nlm install` (macOS LaunchAgent). If the daemon is unreachable, every hook silently returns `None` — the agent loop is never blocked.
|
|
44
|
-
|
|
45
|
-
## Uninstall
|
|
46
|
-
|
|
47
|
-
```bash
|
|
48
|
-
nlm disconnect hermes-agent
|
|
49
|
-
```
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
nlm-memory Hermes Agent plugin.
|
|
3
|
-
|
|
4
|
-
Wires 6 Hermes lifecycle hooks to the local nlm-memory daemon
|
|
5
|
-
(http://localhost:3940 by default). All calls are fire-and-forget
|
|
6
|
-
except pre_llm_call, which returns a context string for injection.
|
|
7
|
-
|
|
8
|
-
The daemon must be running for hooks to have effect. If it is unreachable,
|
|
9
|
-
every hook silently returns None so the agent loop is never blocked.
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import json
|
|
13
|
-
import os
|
|
14
|
-
import urllib.error
|
|
15
|
-
import urllib.request
|
|
16
|
-
from typing import Any
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def _base_url() -> str:
|
|
20
|
-
port = os.environ.get("NLM_DAEMON_PORT", "3940")
|
|
21
|
-
return f"http://localhost:{port}"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _post(path: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
25
|
-
url = f"{_base_url()}/api/{path}"
|
|
26
|
-
data = json.dumps(payload).encode()
|
|
27
|
-
req = urllib.request.Request(
|
|
28
|
-
url,
|
|
29
|
-
data=data,
|
|
30
|
-
headers={"Content-Type": "application/json"},
|
|
31
|
-
method="POST",
|
|
32
|
-
)
|
|
33
|
-
try:
|
|
34
|
-
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
35
|
-
return json.loads(resp.read())
|
|
36
|
-
except (urllib.error.URLError, OSError, json.JSONDecodeError):
|
|
37
|
-
return {}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def register(ctx: Any) -> None:
|
|
41
|
-
def pre_llm_call(
|
|
42
|
-
session_id: str,
|
|
43
|
-
user_message: str,
|
|
44
|
-
is_first_turn: bool = False,
|
|
45
|
-
**kwargs: Any,
|
|
46
|
-
) -> str | None:
|
|
47
|
-
result = _post(
|
|
48
|
-
"hook/hermes-agent/pre-turn",
|
|
49
|
-
{"session_id": session_id, "user_message": user_message, "is_first_turn": is_first_turn},
|
|
50
|
-
)
|
|
51
|
-
context = result.get("context")
|
|
52
|
-
return context if isinstance(context, str) and context else None
|
|
53
|
-
|
|
54
|
-
def post_llm_call(
|
|
55
|
-
session_id: str,
|
|
56
|
-
assistant_response: str,
|
|
57
|
-
**kwargs: Any,
|
|
58
|
-
) -> None:
|
|
59
|
-
_post(
|
|
60
|
-
"hook/hermes-agent/post-turn",
|
|
61
|
-
{"session_id": session_id, "assistant_response": assistant_response},
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
def _session_event(event: str, session_id: str | None, **kwargs: Any) -> None:
|
|
65
|
-
_post(
|
|
66
|
-
"hook/hermes-agent/session-lifecycle",
|
|
67
|
-
{"event": event, "session_id": session_id},
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
ctx.register_hook("pre_llm_call", pre_llm_call)
|
|
71
|
-
ctx.register_hook("post_llm_call", post_llm_call)
|
|
72
|
-
ctx.register_hook("on_session_start", lambda session_id, **kw: _session_event("start", session_id, **kw))
|
|
73
|
-
ctx.register_hook("on_session_end", lambda session_id, **kw: _session_event("end", session_id, **kw))
|
|
74
|
-
ctx.register_hook("on_session_finalize", lambda session_id=None, **kw: _session_event("finalize", session_id, **kw))
|
|
75
|
-
ctx.register_hook("on_session_reset", lambda session_id, **kw: _session_event("reset", session_id, **kw))
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
name: nlm-memory
|
|
2
|
-
version: 0.1.0
|
|
3
|
-
description: Non-linear memory for Hermes — surfaces relevant prior sessions at each turn via keyword recall
|
|
4
|
-
kind: memory
|
|
5
|
-
author: pbmagnet4
|
|
6
|
-
provides_hooks:
|
|
7
|
-
- pre_llm_call
|
|
8
|
-
- post_llm_call
|
|
9
|
-
- on_session_start
|
|
10
|
-
- on_session_end
|
|
11
|
-
- on_session_finalize
|
|
12
|
-
- on_session_reset
|
|
13
|
-
requires_env:
|
|
14
|
-
- name: NLM_DAEMON_PORT
|
|
15
|
-
description: "Port the nlm-memory daemon listens on (default: 3940)"
|
|
Binary file
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Bundles each hook entry under src/hook/ as a single-file .mjs into
|
|
4
|
-
* plugin/scripts/, ready for Codex to invoke via ${CLAUDE_PLUGIN_ROOT}.
|
|
5
|
-
*
|
|
6
|
-
* Why single-file bundles: Codex plugins are content-addressed by file hash
|
|
7
|
-
* for the trust system. Shipping a tree of imports would force the user to
|
|
8
|
-
* re-trust every release across dozens of files. One file per hook keeps
|
|
9
|
-
* the trust footprint minimal.
|
|
10
|
-
*
|
|
11
|
-
* Why esbuild not tsc: tsc emits a tree of files with relative imports; the
|
|
12
|
-
* plugin would have to ship `dist/` plus its dependency tree. esbuild flattens
|
|
13
|
-
* to one .mjs per entry with all internal deps inlined. node builtins are
|
|
14
|
-
* left as-is (node:*) and zero runtime deps are bundled because the hook
|
|
15
|
-
* scripts only talk to the daemon over HTTP — no native bindings required.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
import { build } from "esbuild";
|
|
19
|
-
import { resolve, dirname } from "node:path";
|
|
20
|
-
import { fileURLToPath } from "node:url";
|
|
21
|
-
import { mkdirSync } from "node:fs";
|
|
22
|
-
|
|
23
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
24
|
-
const REPO_ROOT = resolve(__dirname, "..");
|
|
25
|
-
const OUT_DIR = resolve(REPO_ROOT, "plugin/scripts");
|
|
26
|
-
|
|
27
|
-
const ENTRIES = [
|
|
28
|
-
"src/hook/prompt-recall-hook.ts",
|
|
29
|
-
"src/hook/stop-hook.ts",
|
|
30
|
-
];
|
|
31
|
-
|
|
32
|
-
mkdirSync(OUT_DIR, { recursive: true });
|
|
33
|
-
|
|
34
|
-
const results = await Promise.all(
|
|
35
|
-
ENTRIES.map(async (entry) => {
|
|
36
|
-
const entryPath = resolve(REPO_ROOT, entry);
|
|
37
|
-
const outName = entry.split("/").pop().replace(/\.ts$/, ".mjs");
|
|
38
|
-
const outFile = resolve(OUT_DIR, outName);
|
|
39
|
-
const result = await build({
|
|
40
|
-
entryPoints: [entryPath],
|
|
41
|
-
outfile: outFile,
|
|
42
|
-
bundle: true,
|
|
43
|
-
platform: "node",
|
|
44
|
-
format: "esm",
|
|
45
|
-
target: "node20",
|
|
46
|
-
minify: false,
|
|
47
|
-
sourcemap: false,
|
|
48
|
-
legalComments: "none",
|
|
49
|
-
logLevel: "warning",
|
|
50
|
-
banner: { js: "#!/usr/bin/env node" },
|
|
51
|
-
tsconfig: resolve(REPO_ROOT, "tsconfig.json"),
|
|
52
|
-
});
|
|
53
|
-
return { entry, outFile, warnings: result.warnings.length };
|
|
54
|
-
}),
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
for (const r of results) {
|
|
58
|
-
process.stdout.write(`bundled ${r.entry} -> ${r.outFile.replace(REPO_ROOT + "/", "")}`);
|
|
59
|
-
if (r.warnings > 0) process.stdout.write(` (${r.warnings} warnings)`);
|
|
60
|
-
process.stdout.write("\n");
|
|
61
|
-
}
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Direct DeepSeek probe — calls the API with the same prompt + a known-failing
|
|
3
|
-
// session body and dumps everything: raw choice content, finish_reason, usage.
|
|
4
|
-
|
|
5
|
-
import { readFileSync } from "node:fs";
|
|
6
|
-
import Database from "better-sqlite3";
|
|
7
|
-
|
|
8
|
-
const envContent = readFileSync(`${process.env.HOME}/.nlm/.env`, "utf8");
|
|
9
|
-
const apiKey = envContent.split("\n").find((l) => l.startsWith("DEEPSEEK_API_KEY="))?.split("=")[1]?.trim();
|
|
10
|
-
if (!apiKey) {
|
|
11
|
-
console.error("no DEEPSEEK_API_KEY in ~/.nlm/.env");
|
|
12
|
-
process.exit(1);
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
const db = new Database(`${process.env.HOME}/.nlm/canonical.sqlite`, { readonly: true });
|
|
16
|
-
const targets = process.argv.slice(2);
|
|
17
|
-
if (targets.length === 0) {
|
|
18
|
-
console.error("usage: deepseek-probe.mjs <session-id> [more ids...]");
|
|
19
|
-
process.exit(1);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// Pull the actual classifier prompt from the build
|
|
23
|
-
const promptModule = await import("/Users/echalupa/Documents/Coding Projects/nlm-memory-ts/src/core/classifier/prompt.ts");
|
|
24
|
-
|
|
25
|
-
for (const sid of targets) {
|
|
26
|
-
const row = db.prepare("SELECT body FROM sessions WHERE id = ?").get(sid);
|
|
27
|
-
if (!row?.body) {
|
|
28
|
-
console.log(`\n=== ${sid}: no body ===`);
|
|
29
|
-
continue;
|
|
30
|
-
}
|
|
31
|
-
const userPrompt = promptModule.buildUserPrompt(row.body, "");
|
|
32
|
-
console.log(`\n=== ${sid} (body ${row.body.length} chars, prompt ${userPrompt.length} chars) ===`);
|
|
33
|
-
|
|
34
|
-
const res = await fetch("https://api.deepseek.com/v1/chat/completions", {
|
|
35
|
-
method: "POST",
|
|
36
|
-
headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
|
|
37
|
-
body: JSON.stringify({
|
|
38
|
-
model: "deepseek-v4-flash",
|
|
39
|
-
messages: [
|
|
40
|
-
{ role: "system", content: promptModule.CLASSIFIER_SYSTEM_PROMPT },
|
|
41
|
-
{ role: "user", content: userPrompt },
|
|
42
|
-
],
|
|
43
|
-
response_format: { type: "json_object" },
|
|
44
|
-
temperature: 0.1,
|
|
45
|
-
max_tokens: 8192,
|
|
46
|
-
stream: false,
|
|
47
|
-
}),
|
|
48
|
-
});
|
|
49
|
-
if (!res.ok) {
|
|
50
|
-
console.log(`HTTP ${res.status}: ${(await res.text()).slice(0, 500)}`);
|
|
51
|
-
continue;
|
|
52
|
-
}
|
|
53
|
-
const data = await res.json();
|
|
54
|
-
const choice = data.choices?.[0];
|
|
55
|
-
console.log(`finish_reason: ${choice?.finish_reason}`);
|
|
56
|
-
console.log(`usage: ${JSON.stringify(data.usage)}`);
|
|
57
|
-
const content = choice?.message?.content ?? "";
|
|
58
|
-
console.log(`content (${content.length} chars):`);
|
|
59
|
-
console.log(content);
|
|
60
|
-
console.log("--- end content ---");
|
|
61
|
-
try {
|
|
62
|
-
JSON.parse(content.replace(/^```(?:json)?\s*|\s*```$/gm, "").trim());
|
|
63
|
-
console.log("JSON.parse: OK");
|
|
64
|
-
} catch (e) {
|
|
65
|
-
console.log(`JSON.parse: FAILED — ${e.message}`);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Extract (query, surfaced_id, surfaced_body, label, weight, source) triples
|
|
4
|
-
* from the NLM telemetry logs for reranker training data.
|
|
5
|
-
*
|
|
6
|
-
* Data sources:
|
|
7
|
-
* ~/.nlm/hook-log.jsonl — one row per UserPromptSubmit/SessionStart fire,
|
|
8
|
-
* fields: ts, conversationId, promptPreview, wouldInject[]
|
|
9
|
-
* ~/.nlm/citation-log.jsonl — one row per detected citation,
|
|
10
|
-
* fields: ts, conversation_id, cited_id, kind
|
|
11
|
-
* ~/.nlm/canonical.sqlite — sessions table for surfaced_body lookup
|
|
12
|
-
*
|
|
13
|
-
* Algorithm:
|
|
14
|
-
* 1. Index citations by (conversationId, citedId, kind).
|
|
15
|
-
* 2. Identify "gold conversations": conversations that produced at least one
|
|
16
|
-
* tool_use citation. These are the only conversations with confirmed
|
|
17
|
-
* positive signal — surfaced-but-not-cited sessions in them are genuine
|
|
18
|
-
* hard negatives.
|
|
19
|
-
* 3. For each hook-log entry with wouldInject.length > 0:
|
|
20
|
-
* - If the conversation is a gold conversation:
|
|
21
|
-
* * tool_use cited sessions → weight 1.0, source "tool_use" (gold positive)
|
|
22
|
-
* * NOT cited sessions → weight 0.0, source "hard_negative"
|
|
23
|
-
* - Prose-only conversations are skipped (signal too noisy).
|
|
24
|
-
* 4. Fetch surfaced_body from SQLite for each (query, surfaced_id) pair.
|
|
25
|
-
* 5. Write JSONL to --output path (or stdout).
|
|
26
|
-
*
|
|
27
|
-
* Dedup: one triple per (query, surfaced_id, source) key — duplicate hook
|
|
28
|
-
* fires for the same (conversationId, id) pair are collapsed.
|
|
29
|
-
*
|
|
30
|
-
* Usage:
|
|
31
|
-
* node scripts/extract-triples.mjs # stdout, last 30d
|
|
32
|
-
* node scripts/extract-triples.mjs --days 7 # last 7 days
|
|
33
|
-
* node scripts/extract-triples.mjs --output triples.jsonl
|
|
34
|
-
* node scripts/extract-triples.mjs --stats # summary only, no output
|
|
35
|
-
*/
|
|
36
|
-
|
|
37
|
-
import { createReadStream, existsSync, writeFileSync } from "node:fs";
|
|
38
|
-
import { createInterface } from "node:readline";
|
|
39
|
-
import { homedir } from "node:os";
|
|
40
|
-
import { join } from "node:path";
|
|
41
|
-
import { createWriteStream } from "node:fs";
|
|
42
|
-
import Database from "better-sqlite3";
|
|
43
|
-
|
|
44
|
-
// ── CLI args ─────────────────────────────────────────────────────────────────
|
|
45
|
-
|
|
46
|
-
const args = process.argv.slice(2);
|
|
47
|
-
const days = (() => {
|
|
48
|
-
const idx = args.indexOf("--days");
|
|
49
|
-
return idx !== -1 ? Number.parseInt(args[idx + 1], 10) || 30 : 30;
|
|
50
|
-
})();
|
|
51
|
-
const outputPath = (() => {
|
|
52
|
-
const idx = args.indexOf("--output");
|
|
53
|
-
return idx !== -1 ? args[idx + 1] : null;
|
|
54
|
-
})();
|
|
55
|
-
const statsOnly = args.includes("--stats");
|
|
56
|
-
|
|
57
|
-
const hookLogPath = process.env["NLM_HOOK_LOG"] ?? join(homedir(), ".nlm", "hook-log.jsonl");
|
|
58
|
-
const citationLogPath = process.env["NLM_CITATION_LOG"] ?? join(homedir(), ".nlm", "citation-log.jsonl");
|
|
59
|
-
const dbPath = process.env["NLM_DB_PATH"] ?? join(homedir(), ".nlm", "canonical.sqlite");
|
|
60
|
-
|
|
61
|
-
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
62
|
-
|
|
63
|
-
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
64
|
-
|
|
65
|
-
async function readJsonl(path) {
|
|
66
|
-
if (!existsSync(path)) return [];
|
|
67
|
-
const rows = [];
|
|
68
|
-
const rl = createInterface({ input: createReadStream(path), crlfDelay: Infinity });
|
|
69
|
-
for await (const line of rl) {
|
|
70
|
-
const trimmed = line.trim();
|
|
71
|
-
if (!trimmed) continue;
|
|
72
|
-
try {
|
|
73
|
-
rows.push(JSON.parse(trimmed));
|
|
74
|
-
} catch {
|
|
75
|
-
// skip malformed lines
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
return rows;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function tsMs(entry, field = "ts") {
|
|
82
|
-
const v = entry[field];
|
|
83
|
-
if (typeof v !== "string") return 0;
|
|
84
|
-
return Date.parse(v);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// ── Step 1: Load citations ────────────────────────────────────────────────────
|
|
88
|
-
|
|
89
|
-
const citationRows = await readJsonl(citationLogPath);
|
|
90
|
-
|
|
91
|
-
// Map: conversationId → Set of tool_use cited IDs
|
|
92
|
-
const toolUseCitations = new Map();
|
|
93
|
-
// Set of gold conversations (those with ≥1 tool_use citation)
|
|
94
|
-
const goldConversations = new Set();
|
|
95
|
-
|
|
96
|
-
for (const row of citationRows) {
|
|
97
|
-
if (row.kind !== "tool_use") continue;
|
|
98
|
-
const convId = row.conversation_id;
|
|
99
|
-
const citedId = row.cited_id;
|
|
100
|
-
if (!convId || !citedId) continue;
|
|
101
|
-
if (!toolUseCitations.has(convId)) toolUseCitations.set(convId, new Set());
|
|
102
|
-
toolUseCitations.get(convId).add(citedId);
|
|
103
|
-
goldConversations.add(convId);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// ── Step 2: Load hook-log entries in window ───────────────────────────────────
|
|
107
|
-
|
|
108
|
-
const hookRows = await readJsonl(hookLogPath);
|
|
109
|
-
|
|
110
|
-
// Collect: for each (query, surfaced_id), record (conversationId, weight, source)
|
|
111
|
-
// Dedup key: `${conversationId}::${query}::${surfaced_id}::${source}`
|
|
112
|
-
const seen = new Set();
|
|
113
|
-
const rawTriples = [];
|
|
114
|
-
|
|
115
|
-
for (const row of hookRows) {
|
|
116
|
-
// Skip stop-hook entries
|
|
117
|
-
if (typeof row.kind === "string") continue;
|
|
118
|
-
const ts = tsMs(row);
|
|
119
|
-
if (!ts || ts < cutoff) continue;
|
|
120
|
-
const conversationId = row.conversationId;
|
|
121
|
-
const query = row.promptPreview;
|
|
122
|
-
const wouldInject = Array.isArray(row.wouldInject) ? row.wouldInject : [];
|
|
123
|
-
if (!conversationId || !query || wouldInject.length === 0) continue;
|
|
124
|
-
// Only process gold conversations
|
|
125
|
-
if (!goldConversations.has(conversationId)) continue;
|
|
126
|
-
|
|
127
|
-
const cited = toolUseCitations.get(conversationId) ?? new Set();
|
|
128
|
-
|
|
129
|
-
for (const surfacedId of wouldInject) {
|
|
130
|
-
if (typeof surfacedId !== "string") continue;
|
|
131
|
-
const isCited = cited.has(surfacedId);
|
|
132
|
-
const source = isCited ? "tool_use" : "hard_negative";
|
|
133
|
-
const weight = isCited ? 1.0 : 0.0;
|
|
134
|
-
const key = `${conversationId}::${query}::${surfacedId}::${source}`;
|
|
135
|
-
if (seen.has(key)) continue;
|
|
136
|
-
seen.add(key);
|
|
137
|
-
rawTriples.push({ query, surfaced_id: surfacedId, weight, source });
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (rawTriples.length === 0) {
|
|
142
|
-
console.error(
|
|
143
|
-
`extract-triples: no triples found. ` +
|
|
144
|
-
`Checked ${goldConversations.size} gold conversations in the last ${days}d. ` +
|
|
145
|
-
`Run nlm useful-scan and ensure tool_use citations exist in citation-log.jsonl.`,
|
|
146
|
-
);
|
|
147
|
-
process.exit(0);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// ── Step 3: Fetch surfaced_body from SQLite ────────────────────────────────────
|
|
151
|
-
|
|
152
|
-
const uniqueIds = [...new Set(rawTriples.map((t) => t.surfaced_id))];
|
|
153
|
-
|
|
154
|
-
let bodyById = new Map();
|
|
155
|
-
if (existsSync(dbPath)) {
|
|
156
|
-
try {
|
|
157
|
-
const db = new Database(dbPath, { readonly: true });
|
|
158
|
-
const placeholders = uniqueIds.map(() => "?").join(",");
|
|
159
|
-
const rows = db.prepare(
|
|
160
|
-
`SELECT id, body FROM sessions WHERE id IN (${placeholders})`,
|
|
161
|
-
).all(...uniqueIds);
|
|
162
|
-
for (const row of rows) {
|
|
163
|
-
bodyById.set(row.id, typeof row.body === "string" ? row.body : "");
|
|
164
|
-
}
|
|
165
|
-
db.close();
|
|
166
|
-
} catch (err) {
|
|
167
|
-
console.error(`extract-triples: sqlite lookup failed — ${err.message}. Bodies will be empty.`);
|
|
168
|
-
}
|
|
169
|
-
} else {
|
|
170
|
-
console.error(`extract-triples: db not found at ${dbPath}. Bodies will be empty.`);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// ── Step 4: Assemble final triples ────────────────────────────────────────────
|
|
174
|
-
|
|
175
|
-
const triples = rawTriples.map((t) => ({
|
|
176
|
-
query: t.query,
|
|
177
|
-
surfaced_id: t.surfaced_id,
|
|
178
|
-
surfaced_body: bodyById.get(t.surfaced_id) ?? "",
|
|
179
|
-
label: t.weight === 1.0 ? "positive" : "negative",
|
|
180
|
-
weight: t.weight,
|
|
181
|
-
source: t.source,
|
|
182
|
-
}));
|
|
183
|
-
|
|
184
|
-
// ── Stats ─────────────────────────────────────────────────────────────────────
|
|
185
|
-
|
|
186
|
-
const positives = triples.filter((t) => t.weight === 1.0).length;
|
|
187
|
-
const negatives = triples.filter((t) => t.weight === 0.0).length;
|
|
188
|
-
const withBody = triples.filter((t) => t.surfaced_body.length > 0).length;
|
|
189
|
-
|
|
190
|
-
console.error(
|
|
191
|
-
`extract-triples: ${triples.length} triples ` +
|
|
192
|
-
`(${positives} positive, ${negatives} hard-negative, ${withBody}/${triples.length} with body) ` +
|
|
193
|
-
`from ${goldConversations.size} gold conversations over last ${days}d`,
|
|
194
|
-
);
|
|
195
|
-
|
|
196
|
-
if (statsOnly) process.exit(0);
|
|
197
|
-
|
|
198
|
-
// ── Step 5: Write output ──────────────────────────────────────────────────────
|
|
199
|
-
|
|
200
|
-
const lines = triples.map((t) => JSON.stringify(t)).join("\n") + "\n";
|
|
201
|
-
|
|
202
|
-
if (outputPath) {
|
|
203
|
-
writeFileSync(outputPath, lines, "utf8");
|
|
204
|
-
console.error(`extract-triples: wrote ${triples.length} rows to ${outputPath}`);
|
|
205
|
-
} else {
|
|
206
|
-
process.stdout.write(lines);
|
|
207
|
-
}
|