@loreai/core 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +4 -0
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +2 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts +45 -0
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/data-dir.d.ts +18 -0
- package/dist/bun/data-dir.d.ts.map +1 -0
- package/dist/bun/db.d.ts +85 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +22 -38
- package/dist/bun/embedding-vendor.d.ts.map +1 -1
- package/dist/bun/embedding-worker-types.d.ts +17 -12
- package/dist/bun/embedding-worker-types.d.ts.map +1 -1
- package/dist/bun/embedding-worker.d.ts +9 -2
- package/dist/bun/embedding-worker.d.ts.map +1 -1
- package/dist/bun/embedding-worker.js +38864 -33
- package/dist/bun/embedding-worker.js.map +4 -4
- package/dist/bun/embedding.d.ts +35 -23
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +17 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/import/detect.d.ts +14 -0
- package/dist/bun/import/detect.d.ts.map +1 -0
- package/dist/bun/import/extract.d.ts +43 -0
- package/dist/bun/import/extract.d.ts.map +1 -0
- package/dist/bun/import/history.d.ts +40 -0
- package/dist/bun/import/history.d.ts.map +1 -0
- package/dist/bun/import/index.d.ts +17 -0
- package/dist/bun/import/index.d.ts.map +1 -0
- package/dist/bun/import/providers/aider.d.ts +2 -0
- package/dist/bun/import/providers/aider.d.ts.map +1 -0
- package/dist/bun/import/providers/claude-code.d.ts +2 -0
- package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
- package/dist/bun/import/providers/cline.d.ts +2 -0
- package/dist/bun/import/providers/cline.d.ts.map +1 -0
- package/dist/bun/import/providers/codex.d.ts +2 -0
- package/dist/bun/import/providers/codex.d.ts.map +1 -0
- package/dist/bun/import/providers/continue.d.ts +2 -0
- package/dist/bun/import/providers/continue.d.ts.map +1 -0
- package/dist/bun/import/providers/index.d.ts +19 -0
- package/dist/bun/import/providers/index.d.ts.map +1 -0
- package/dist/bun/import/providers/opencode.d.ts +2 -0
- package/dist/bun/import/providers/opencode.d.ts.map +1 -0
- package/dist/bun/import/providers/pi.d.ts +2 -0
- package/dist/bun/import/providers/pi.d.ts.map +1 -0
- package/dist/bun/import/types.d.ts +82 -0
- package/dist/bun/import/types.d.ts.map +1 -0
- package/dist/bun/index.d.ts +5 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +3150 -439
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/instruction-detect.d.ts +66 -0
- package/dist/bun/instruction-detect.d.ts.map +1 -0
- package/dist/bun/log.d.ts +9 -0
- package/dist/bun/log.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +139 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/pattern-extract.d.ts +7 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +5 -3
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +1 -1
- package/dist/node/agents-file.d.ts +4 -0
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +2 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts +45 -0
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/data-dir.d.ts +18 -0
- package/dist/node/data-dir.d.ts.map +1 -0
- package/dist/node/db.d.ts +85 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +22 -38
- package/dist/node/embedding-vendor.d.ts.map +1 -1
- package/dist/node/embedding-worker-types.d.ts +17 -12
- package/dist/node/embedding-worker-types.d.ts.map +1 -1
- package/dist/node/embedding-worker.d.ts +9 -2
- package/dist/node/embedding-worker.d.ts.map +1 -1
- package/dist/node/embedding-worker.js +38864 -33
- package/dist/node/embedding-worker.js.map +4 -4
- package/dist/node/embedding.d.ts +35 -23
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +17 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/import/detect.d.ts +14 -0
- package/dist/node/import/detect.d.ts.map +1 -0
- package/dist/node/import/extract.d.ts +43 -0
- package/dist/node/import/extract.d.ts.map +1 -0
- package/dist/node/import/history.d.ts +40 -0
- package/dist/node/import/history.d.ts.map +1 -0
- package/dist/node/import/index.d.ts +17 -0
- package/dist/node/import/index.d.ts.map +1 -0
- package/dist/node/import/providers/aider.d.ts +2 -0
- package/dist/node/import/providers/aider.d.ts.map +1 -0
- package/dist/node/import/providers/claude-code.d.ts +2 -0
- package/dist/node/import/providers/claude-code.d.ts.map +1 -0
- package/dist/node/import/providers/cline.d.ts +2 -0
- package/dist/node/import/providers/cline.d.ts.map +1 -0
- package/dist/node/import/providers/codex.d.ts +2 -0
- package/dist/node/import/providers/codex.d.ts.map +1 -0
- package/dist/node/import/providers/continue.d.ts +2 -0
- package/dist/node/import/providers/continue.d.ts.map +1 -0
- package/dist/node/import/providers/index.d.ts +19 -0
- package/dist/node/import/providers/index.d.ts.map +1 -0
- package/dist/node/import/providers/opencode.d.ts +2 -0
- package/dist/node/import/providers/opencode.d.ts.map +1 -0
- package/dist/node/import/providers/pi.d.ts +2 -0
- package/dist/node/import/providers/pi.d.ts.map +1 -0
- package/dist/node/import/types.d.ts +82 -0
- package/dist/node/import/types.d.ts.map +1 -0
- package/dist/node/index.d.ts +5 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +3150 -439
- package/dist/node/index.js.map +4 -4
- package/dist/node/instruction-detect.d.ts +66 -0
- package/dist/node/instruction-detect.d.ts.map +1 -0
- package/dist/node/log.d.ts +9 -0
- package/dist/node/log.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +139 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/pattern-extract.d.ts +7 -0
- package/dist/node/pattern-extract.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +5 -3
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +1 -1
- package/dist/types/agents-file.d.ts +4 -0
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +2 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts +45 -0
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/data-dir.d.ts +18 -0
- package/dist/types/data-dir.d.ts.map +1 -0
- package/dist/types/db.d.ts +85 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +22 -38
- package/dist/types/embedding-vendor.d.ts.map +1 -1
- package/dist/types/embedding-worker-types.d.ts +17 -12
- package/dist/types/embedding-worker-types.d.ts.map +1 -1
- package/dist/types/embedding-worker.d.ts +9 -2
- package/dist/types/embedding-worker.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +35 -23
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +17 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/import/detect.d.ts +14 -0
- package/dist/types/import/detect.d.ts.map +1 -0
- package/dist/types/import/extract.d.ts +43 -0
- package/dist/types/import/extract.d.ts.map +1 -0
- package/dist/types/import/history.d.ts +40 -0
- package/dist/types/import/history.d.ts.map +1 -0
- package/dist/types/import/index.d.ts +17 -0
- package/dist/types/import/index.d.ts.map +1 -0
- package/dist/types/import/providers/aider.d.ts +2 -0
- package/dist/types/import/providers/aider.d.ts.map +1 -0
- package/dist/types/import/providers/claude-code.d.ts +2 -0
- package/dist/types/import/providers/claude-code.d.ts.map +1 -0
- package/dist/types/import/providers/cline.d.ts +2 -0
- package/dist/types/import/providers/cline.d.ts.map +1 -0
- package/dist/types/import/providers/codex.d.ts +2 -0
- package/dist/types/import/providers/codex.d.ts.map +1 -0
- package/dist/types/import/providers/continue.d.ts +2 -0
- package/dist/types/import/providers/continue.d.ts.map +1 -0
- package/dist/types/import/providers/index.d.ts +19 -0
- package/dist/types/import/providers/index.d.ts.map +1 -0
- package/dist/types/import/providers/opencode.d.ts +2 -0
- package/dist/types/import/providers/opencode.d.ts.map +1 -0
- package/dist/types/import/providers/pi.d.ts +2 -0
- package/dist/types/import/providers/pi.d.ts.map +1 -0
- package/dist/types/import/types.d.ts +82 -0
- package/dist/types/import/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +5 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/instruction-detect.d.ts +66 -0
- package/dist/types/instruction-detect.d.ts.map +1 -0
- package/dist/types/log.d.ts +9 -0
- package/dist/types/log.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +139 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +7 -0
- package/dist/types/pattern-extract.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +5 -3
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +1 -1
- package/package.json +3 -4
- package/src/agents-file.ts +41 -13
- package/src/config.ts +31 -18
- package/src/curator.ts +163 -75
- package/src/data-dir.ts +76 -0
- package/src/db.ts +457 -11
- package/src/distillation.ts +65 -16
- package/src/embedding-vendor.ts +23 -40
- package/src/embedding-worker-types.ts +19 -11
- package/src/embedding-worker.ts +111 -47
- package/src/embedding.ts +224 -174
- package/src/gradient.ts +192 -75
- package/src/import/detect.ts +37 -0
- package/src/import/extract.ts +137 -0
- package/src/import/history.ts +99 -0
- package/src/import/index.ts +45 -0
- package/src/import/providers/aider.ts +207 -0
- package/src/import/providers/claude-code.ts +339 -0
- package/src/import/providers/cline.ts +324 -0
- package/src/import/providers/codex.ts +369 -0
- package/src/import/providers/continue.ts +304 -0
- package/src/import/providers/index.ts +32 -0
- package/src/import/providers/opencode.ts +272 -0
- package/src/import/providers/pi.ts +332 -0
- package/src/import/types.ts +91 -0
- package/src/index.ts +13 -0
- package/src/instruction-detect.ts +275 -0
- package/src/log.ts +91 -3
- package/src/ltm.ts +789 -41
- package/src/pattern-extract.ts +41 -0
- package/src/prompt.ts +7 -1
- package/src/recall.ts +43 -5
- package/src/search.ts +7 -5
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +18 -6
- package/src/types.ts +1 -1
package/dist/bun/index.js
CHANGED
|
@@ -125,6 +125,7 @@ __export(temporal_exports, {
|
|
|
125
125
|
CHUNK_TERMINATOR: () => CHUNK_TERMINATOR,
|
|
126
126
|
bySession: () => bySession,
|
|
127
127
|
count: () => count,
|
|
128
|
+
hasMessages: () => hasMessages,
|
|
128
129
|
markDistilled: () => markDistilled,
|
|
129
130
|
partsToText: () => partsToText,
|
|
130
131
|
prune: () => prune,
|
|
@@ -145,9 +146,8 @@ function sha256(input) {
|
|
|
145
146
|
}
|
|
146
147
|
|
|
147
148
|
// src/db.ts
|
|
148
|
-
import { join, dirname } from "path";
|
|
149
|
+
import { join as join2, dirname } from "path";
|
|
149
150
|
import { mkdirSync } from "fs";
|
|
150
|
-
import { homedir } from "os";
|
|
151
151
|
|
|
152
152
|
// src/git.ts
|
|
153
153
|
import { execSync } from "child_process";
|
|
@@ -210,6 +210,36 @@ function getGitRemote(path) {
|
|
|
210
210
|
}
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
// src/data-dir.ts
|
|
214
|
+
import { existsSync, renameSync } from "node:fs";
|
|
215
|
+
import { join } from "node:path";
|
|
216
|
+
import { homedir } from "node:os";
|
|
217
|
+
var OLD_DIR_NAME = "opencode-lore";
|
|
218
|
+
var NEW_DIR_NAME = "lore";
|
|
219
|
+
var migrationAttempted = false;
|
|
220
|
+
function baseDir() {
|
|
221
|
+
return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
|
|
222
|
+
}
|
|
223
|
+
function migrateDataDir() {
|
|
224
|
+
if (migrationAttempted) return;
|
|
225
|
+
migrationAttempted = true;
|
|
226
|
+
if (process.env.NODE_ENV === "test") return;
|
|
227
|
+
const base = baseDir();
|
|
228
|
+
const oldDir = join(base, OLD_DIR_NAME);
|
|
229
|
+
const newDir = join(base, NEW_DIR_NAME);
|
|
230
|
+
try {
|
|
231
|
+
if (existsSync(oldDir) && !existsSync(newDir)) {
|
|
232
|
+
renameSync(oldDir, newDir);
|
|
233
|
+
console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
|
|
234
|
+
}
|
|
235
|
+
} catch {
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
function dataDir() {
|
|
239
|
+
migrateDataDir();
|
|
240
|
+
return join(baseDir(), NEW_DIR_NAME);
|
|
241
|
+
}
|
|
242
|
+
|
|
213
243
|
// src/db.ts
|
|
214
244
|
function repoNameFromRemote(remote) {
|
|
215
245
|
if (!remote) return null;
|
|
@@ -646,17 +676,123 @@ var MIGRATIONS = [
|
|
|
646
676
|
ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
|
|
647
677
|
ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
|
|
648
678
|
ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
|
|
679
|
+
`,
|
|
680
|
+
`
|
|
681
|
+
-- Version 19: Import history for conversation import idempotency.
|
|
682
|
+
-- Tracks which external agent sessions have been imported to prevent
|
|
683
|
+
-- re-importing unchanged sources and to record user-declined imports.
|
|
684
|
+
CREATE TABLE IF NOT EXISTS import_history (
|
|
685
|
+
id TEXT PRIMARY KEY,
|
|
686
|
+
project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
|
687
|
+
agent_name TEXT NOT NULL,
|
|
688
|
+
source_id TEXT NOT NULL,
|
|
689
|
+
source_hash TEXT NOT NULL,
|
|
690
|
+
entries_created INTEGER NOT NULL DEFAULT 0,
|
|
691
|
+
entries_updated INTEGER NOT NULL DEFAULT 0,
|
|
692
|
+
imported_at INTEGER NOT NULL,
|
|
693
|
+
UNIQUE(project_id, agent_name, source_id)
|
|
694
|
+
);
|
|
695
|
+
CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
|
|
696
|
+
`,
|
|
697
|
+
`
|
|
698
|
+
-- Version 20: Purge worker boilerplate from temporal messages.
|
|
699
|
+
-- Legacy gateway/plugin worker calls (distillation observer, curator,
|
|
700
|
+
-- consolidation, reflector, eval) stored their full system prompts
|
|
701
|
+
-- (containing entire conversation transcripts, up to 1.6MB each) as
|
|
702
|
+
-- temporal messages. These pollute FTS search results by matching
|
|
703
|
+
-- virtually any domain keyword. Safe to delete: their actual output
|
|
704
|
+
-- (distillations, knowledge entries) is stored in dedicated tables.
|
|
705
|
+
DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
|
|
706
|
+
OR content LIKE '%You are a long-term memory curator.%'
|
|
707
|
+
OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
|
|
708
|
+
OR content LIKE '%You are a memory reflector.%'
|
|
709
|
+
OR content LIKE '%You are evaluating distillation quality.%';
|
|
710
|
+
`,
|
|
711
|
+
`
|
|
712
|
+
-- Version 21: Persist avoided compaction data from live sessions.
|
|
713
|
+
-- Historical estimates previously re-simulated avoided compactions from
|
|
714
|
+
-- temporal message token estimates (chars/3), missing system prompt and
|
|
715
|
+
-- tool definition overhead. Persisting the live session's real shadow
|
|
716
|
+
-- context tracking (from actual API-reported total input tokens) gives
|
|
717
|
+
-- accurate post-restart historical estimates.
|
|
718
|
+
ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
|
|
719
|
+
ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
|
|
720
|
+
`,
|
|
721
|
+
`
|
|
722
|
+
-- Version 22: Track when conversation import was last offered/run.
|
|
723
|
+
-- NULL means import has never been offered for this project.
|
|
724
|
+
-- Used by auto-import to avoid re-prompting, and by explicit
|
|
725
|
+
-- \`lore import\` for incremental imports (only newer conversations).
|
|
726
|
+
ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
|
|
727
|
+
|
|
728
|
+
-- Backfill: migrate legacy __declined__ sentinel rows so existing
|
|
729
|
+
-- users who previously declined are not re-prompted after upgrading.
|
|
730
|
+
UPDATE projects SET last_import_at = (
|
|
731
|
+
SELECT ih.imported_at FROM import_history ih
|
|
732
|
+
WHERE ih.project_id = projects.id
|
|
733
|
+
AND ih.source_id = '__declined__'
|
|
734
|
+
LIMIT 1
|
|
735
|
+
)
|
|
736
|
+
WHERE EXISTS (
|
|
737
|
+
SELECT 1 FROM import_history ih
|
|
738
|
+
WHERE ih.project_id = projects.id
|
|
739
|
+
AND ih.source_id = '__declined__'
|
|
740
|
+
);
|
|
741
|
+
`,
|
|
742
|
+
`
|
|
743
|
+
-- Version 23: Persist volatile session tracking state across restarts.
|
|
744
|
+
-- Previously these were in-memory only, causing duplicate processing,
|
|
745
|
+
-- false compaction detection, and expensive prompt cache busts on restart.
|
|
746
|
+
ALTER TABLE session_state ADD COLUMN last_curated_at INTEGER NOT NULL DEFAULT 0;
|
|
747
|
+
ALTER TABLE session_state ADD COLUMN message_count INTEGER NOT NULL DEFAULT 0;
|
|
748
|
+
ALTER TABLE session_state ADD COLUMN turns_since_curation INTEGER NOT NULL DEFAULT 0;
|
|
749
|
+
ALTER TABLE session_state ADD COLUMN ltm_cache_text TEXT;
|
|
750
|
+
ALTER TABLE session_state ADD COLUMN ltm_cache_tokens INTEGER;
|
|
751
|
+
ALTER TABLE session_state ADD COLUMN ltm_pin_text TEXT;
|
|
752
|
+
ALTER TABLE session_state ADD COLUMN ltm_pin_tokens INTEGER;
|
|
753
|
+
ALTER TABLE session_state ADD COLUMN consecutive_text_only_turns INTEGER NOT NULL DEFAULT 0;
|
|
754
|
+
`,
|
|
755
|
+
`
|
|
756
|
+
-- Version 24: Persist remaining volatile session state across restarts.
|
|
757
|
+
-- Session identity (Tier 1/2/3 session correlation)
|
|
758
|
+
ALTER TABLE session_state ADD COLUMN fingerprint TEXT NOT NULL DEFAULT '';
|
|
759
|
+
ALTER TABLE session_state ADD COLUMN header_session_id TEXT;
|
|
760
|
+
ALTER TABLE session_state ADD COLUMN header_name TEXT;
|
|
761
|
+
-- Cache warming state
|
|
762
|
+
ALTER TABLE session_state ADD COLUMN resolved_conversation_ttl TEXT NOT NULL DEFAULT '5m';
|
|
763
|
+
ALTER TABLE session_state ADD COLUMN warmup_state TEXT;
|
|
764
|
+
-- Gradient calibration state (survives restarts to avoid uncalibrated busts)
|
|
765
|
+
ALTER TABLE session_state ADD COLUMN dynamic_context_cap REAL NOT NULL DEFAULT 0;
|
|
766
|
+
ALTER TABLE session_state ADD COLUMN bust_rate_ema REAL NOT NULL DEFAULT -1;
|
|
767
|
+
ALTER TABLE session_state ADD COLUMN inter_bust_interval_ema REAL NOT NULL DEFAULT -1;
|
|
768
|
+
ALTER TABLE session_state ADD COLUMN last_layer INTEGER NOT NULL DEFAULT 0;
|
|
769
|
+
ALTER TABLE session_state ADD COLUMN last_known_input INTEGER NOT NULL DEFAULT 0;
|
|
770
|
+
ALTER TABLE session_state ADD COLUMN last_turn_at INTEGER NOT NULL DEFAULT 0;
|
|
771
|
+
ALTER TABLE session_state ADD COLUMN last_bust_at INTEGER NOT NULL DEFAULT 0;
|
|
772
|
+
`,
|
|
773
|
+
`
|
|
774
|
+
-- Version 25: Adaptive dedup threshold \u2014 store accept/reject feedback
|
|
775
|
+
-- on embedding-based duplicate pairs for per-project threshold calibration.
|
|
776
|
+
-- Titles stored instead of FK IDs because entries are deleted during dedup;
|
|
777
|
+
-- the similarity float is the actual calibration input.
|
|
778
|
+
CREATE TABLE IF NOT EXISTS dedup_feedback (
|
|
779
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
780
|
+
project_id TEXT,
|
|
781
|
+
entry_a_title TEXT NOT NULL,
|
|
782
|
+
entry_b_title TEXT NOT NULL,
|
|
783
|
+
similarity REAL NOT NULL,
|
|
784
|
+
accepted INTEGER NOT NULL,
|
|
785
|
+
source TEXT NOT NULL DEFAULT 'manual',
|
|
786
|
+
created_at INTEGER NOT NULL
|
|
787
|
+
);
|
|
788
|
+
CREATE INDEX IF NOT EXISTS idx_dedup_feedback_project
|
|
789
|
+
ON dedup_feedback(project_id);
|
|
649
790
|
`
|
|
650
791
|
];
|
|
651
|
-
function dataDir() {
|
|
652
|
-
const xdg = process.env.XDG_DATA_HOME;
|
|
653
|
-
const base = xdg || join(homedir(), ".local", "share");
|
|
654
|
-
return join(base, "opencode-lore");
|
|
655
|
-
}
|
|
656
792
|
function dbPath() {
|
|
657
793
|
const envPath = process.env.LORE_DB_PATH;
|
|
658
794
|
if (envPath) return envPath;
|
|
659
|
-
return
|
|
795
|
+
return join2(dataDir(), "lore.db");
|
|
660
796
|
}
|
|
661
797
|
var instance;
|
|
662
798
|
function db() {
|
|
@@ -674,7 +810,7 @@ function db() {
|
|
|
674
810
|
}
|
|
675
811
|
const dir = dataDir();
|
|
676
812
|
mkdirSync(dir, { recursive: true });
|
|
677
|
-
path =
|
|
813
|
+
path = join2(dir, "lore.db");
|
|
678
814
|
}
|
|
679
815
|
const database = new Database(path);
|
|
680
816
|
database.exec("PRAGMA journal_mode = WAL");
|
|
@@ -787,6 +923,11 @@ function close() {
|
|
|
787
923
|
}
|
|
788
924
|
}
|
|
789
925
|
function ensureProject(path, name) {
|
|
926
|
+
if (!process.env.LORE_DB_PATH && /^\/test\//.test(path)) {
|
|
927
|
+
throw new Error(
|
|
928
|
+
`Refusing to create project with test path "${path}" in the production DB. Set LORE_DB_PATH to a temp path, or run tests via \`bun test\` from the repo root.`
|
|
929
|
+
);
|
|
930
|
+
}
|
|
790
931
|
const existing = db().query("SELECT id, git_remote FROM projects WHERE path = ?").get(path);
|
|
791
932
|
if (existing) {
|
|
792
933
|
if (!existing.git_remote) {
|
|
@@ -841,6 +982,15 @@ function isFirstRun() {
|
|
|
841
982
|
const row = db().query("SELECT COUNT(*) as count FROM projects").get();
|
|
842
983
|
return row.count === 0;
|
|
843
984
|
}
|
|
985
|
+
function getLastImportAt(projectPath) {
|
|
986
|
+
const id = ensureProject(projectPath);
|
|
987
|
+
const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
|
|
988
|
+
return row?.last_import_at ?? null;
|
|
989
|
+
}
|
|
990
|
+
function setLastImportAt(projectPath, timestamp) {
|
|
991
|
+
const id = ensureProject(projectPath);
|
|
992
|
+
db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
|
|
993
|
+
}
|
|
844
994
|
function loadForceMinLayer(sessionID) {
|
|
845
995
|
const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
|
|
846
996
|
return row?.force_min_layer ?? 0;
|
|
@@ -859,8 +1009,9 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
859
1009
|
`INSERT INTO session_state (session_id, force_min_layer, updated_at,
|
|
860
1010
|
conversation_cost, worker_cost, conversation_turns,
|
|
861
1011
|
cache_read_tokens, cache_write_tokens,
|
|
862
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
863
|
-
|
|
1012
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1013
|
+
avoided_compactions, avoided_compaction_cost)
|
|
1014
|
+
VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
864
1015
|
ON CONFLICT(session_id) DO UPDATE SET
|
|
865
1016
|
conversation_cost = excluded.conversation_cost,
|
|
866
1017
|
worker_cost = excluded.worker_cost,
|
|
@@ -872,6 +1023,8 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
872
1023
|
ttl_savings = excluded.ttl_savings,
|
|
873
1024
|
ttl_hits = excluded.ttl_hits,
|
|
874
1025
|
batch_savings = excluded.batch_savings,
|
|
1026
|
+
avoided_compactions = excluded.avoided_compactions,
|
|
1027
|
+
avoided_compaction_cost = excluded.avoided_compaction_cost,
|
|
875
1028
|
updated_at = excluded.updated_at`
|
|
876
1029
|
).run(
|
|
877
1030
|
sessionID,
|
|
@@ -886,14 +1039,17 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
886
1039
|
costs.warmupHits,
|
|
887
1040
|
costs.ttlSavings,
|
|
888
1041
|
costs.ttlHits,
|
|
889
|
-
costs.batchSavings
|
|
1042
|
+
costs.batchSavings,
|
|
1043
|
+
costs.avoidedCompactions,
|
|
1044
|
+
costs.avoidedCompactionCost
|
|
890
1045
|
);
|
|
891
1046
|
}
|
|
892
1047
|
function loadSessionCosts(sessionID) {
|
|
893
1048
|
const row = db().query(
|
|
894
1049
|
`SELECT conversation_cost, worker_cost, conversation_turns,
|
|
895
1050
|
cache_read_tokens, cache_write_tokens,
|
|
896
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
1051
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1052
|
+
avoided_compactions, avoided_compaction_cost
|
|
897
1053
|
FROM session_state WHERE session_id = ?`
|
|
898
1054
|
).get(sessionID);
|
|
899
1055
|
if (!row) return null;
|
|
@@ -907,14 +1063,17 @@ function loadSessionCosts(sessionID) {
|
|
|
907
1063
|
warmupHits: row.warmup_hits,
|
|
908
1064
|
ttlSavings: row.ttl_savings,
|
|
909
1065
|
ttlHits: row.ttl_hits,
|
|
910
|
-
batchSavings: row.batch_savings
|
|
1066
|
+
batchSavings: row.batch_savings,
|
|
1067
|
+
avoidedCompactions: row.avoided_compactions,
|
|
1068
|
+
avoidedCompactionCost: row.avoided_compaction_cost
|
|
911
1069
|
};
|
|
912
1070
|
}
|
|
913
1071
|
function loadAllSessionCosts() {
|
|
914
1072
|
const rows = db().query(
|
|
915
1073
|
`SELECT session_id, conversation_cost, worker_cost, conversation_turns,
|
|
916
1074
|
cache_read_tokens, cache_write_tokens,
|
|
917
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
1075
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1076
|
+
avoided_compactions, avoided_compaction_cost
|
|
918
1077
|
FROM session_state
|
|
919
1078
|
WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
|
|
920
1079
|
).all();
|
|
@@ -930,11 +1089,160 @@ function loadAllSessionCosts() {
|
|
|
930
1089
|
warmupHits: row.warmup_hits,
|
|
931
1090
|
ttlSavings: row.ttl_savings,
|
|
932
1091
|
ttlHits: row.ttl_hits,
|
|
933
|
-
batchSavings: row.batch_savings
|
|
1092
|
+
batchSavings: row.batch_savings,
|
|
1093
|
+
avoidedCompactions: row.avoided_compactions,
|
|
1094
|
+
avoidedCompactionCost: row.avoided_compaction_cost
|
|
934
1095
|
});
|
|
935
1096
|
}
|
|
936
1097
|
return result;
|
|
937
1098
|
}
|
|
1099
|
+
function saveSessionTracking(sessionID, state) {
|
|
1100
|
+
const now = Date.now();
|
|
1101
|
+
db().query(
|
|
1102
|
+
"INSERT OR IGNORE INTO session_state (session_id, force_min_layer, updated_at) VALUES (?, 0, ?)"
|
|
1103
|
+
).run(sessionID, now);
|
|
1104
|
+
const sets = ["updated_at = ?"];
|
|
1105
|
+
const vals = [now];
|
|
1106
|
+
if (state.lastCuratedAt !== void 0) {
|
|
1107
|
+
sets.push("last_curated_at = ?");
|
|
1108
|
+
vals.push(state.lastCuratedAt);
|
|
1109
|
+
}
|
|
1110
|
+
if (state.messageCount !== void 0) {
|
|
1111
|
+
sets.push("message_count = ?");
|
|
1112
|
+
vals.push(state.messageCount);
|
|
1113
|
+
}
|
|
1114
|
+
if (state.turnsSinceCuration !== void 0) {
|
|
1115
|
+
sets.push("turns_since_curation = ?");
|
|
1116
|
+
vals.push(state.turnsSinceCuration);
|
|
1117
|
+
}
|
|
1118
|
+
if (state.consecutiveTextOnlyTurns !== void 0) {
|
|
1119
|
+
sets.push("consecutive_text_only_turns = ?");
|
|
1120
|
+
vals.push(state.consecutiveTextOnlyTurns);
|
|
1121
|
+
}
|
|
1122
|
+
if (state.ltmCacheText !== void 0) {
|
|
1123
|
+
sets.push("ltm_cache_text = ?");
|
|
1124
|
+
vals.push(state.ltmCacheText);
|
|
1125
|
+
}
|
|
1126
|
+
if (state.ltmCacheTokens !== void 0) {
|
|
1127
|
+
sets.push("ltm_cache_tokens = ?");
|
|
1128
|
+
vals.push(state.ltmCacheTokens);
|
|
1129
|
+
}
|
|
1130
|
+
if (state.ltmPinText !== void 0) {
|
|
1131
|
+
sets.push("ltm_pin_text = ?");
|
|
1132
|
+
vals.push(state.ltmPinText);
|
|
1133
|
+
}
|
|
1134
|
+
if (state.ltmPinTokens !== void 0) {
|
|
1135
|
+
sets.push("ltm_pin_tokens = ?");
|
|
1136
|
+
vals.push(state.ltmPinTokens);
|
|
1137
|
+
}
|
|
1138
|
+
if (state.fingerprint !== void 0) {
|
|
1139
|
+
sets.push("fingerprint = ?");
|
|
1140
|
+
vals.push(state.fingerprint);
|
|
1141
|
+
}
|
|
1142
|
+
if (state.headerSessionId !== void 0) {
|
|
1143
|
+
sets.push("header_session_id = ?");
|
|
1144
|
+
vals.push(state.headerSessionId);
|
|
1145
|
+
}
|
|
1146
|
+
if (state.headerName !== void 0) {
|
|
1147
|
+
sets.push("header_name = ?");
|
|
1148
|
+
vals.push(state.headerName);
|
|
1149
|
+
}
|
|
1150
|
+
if (state.resolvedConversationTTL !== void 0) {
|
|
1151
|
+
sets.push("resolved_conversation_ttl = ?");
|
|
1152
|
+
vals.push(state.resolvedConversationTTL);
|
|
1153
|
+
}
|
|
1154
|
+
if (state.warmupState !== void 0) {
|
|
1155
|
+
sets.push("warmup_state = ?");
|
|
1156
|
+
vals.push(state.warmupState);
|
|
1157
|
+
}
|
|
1158
|
+
if (state.dynamicContextCap !== void 0) {
|
|
1159
|
+
sets.push("dynamic_context_cap = ?");
|
|
1160
|
+
vals.push(state.dynamicContextCap);
|
|
1161
|
+
}
|
|
1162
|
+
if (state.bustRateEMA !== void 0) {
|
|
1163
|
+
sets.push("bust_rate_ema = ?");
|
|
1164
|
+
vals.push(state.bustRateEMA);
|
|
1165
|
+
}
|
|
1166
|
+
if (state.interBustIntervalEMA !== void 0) {
|
|
1167
|
+
sets.push("inter_bust_interval_ema = ?");
|
|
1168
|
+
vals.push(state.interBustIntervalEMA);
|
|
1169
|
+
}
|
|
1170
|
+
if (state.lastLayer !== void 0) {
|
|
1171
|
+
sets.push("last_layer = ?");
|
|
1172
|
+
vals.push(state.lastLayer);
|
|
1173
|
+
}
|
|
1174
|
+
if (state.lastKnownInput !== void 0) {
|
|
1175
|
+
sets.push("last_known_input = ?");
|
|
1176
|
+
vals.push(state.lastKnownInput);
|
|
1177
|
+
}
|
|
1178
|
+
if (state.lastTurnAt !== void 0) {
|
|
1179
|
+
sets.push("last_turn_at = ?");
|
|
1180
|
+
vals.push(state.lastTurnAt);
|
|
1181
|
+
}
|
|
1182
|
+
if (state.lastBustAt !== void 0) {
|
|
1183
|
+
sets.push("last_bust_at = ?");
|
|
1184
|
+
vals.push(state.lastBustAt);
|
|
1185
|
+
}
|
|
1186
|
+
db().query(
|
|
1187
|
+
"UPDATE session_state SET " + sets.join(", ") + " WHERE session_id = ?"
|
|
1188
|
+
).run(...vals, sessionID);
|
|
1189
|
+
}
|
|
1190
|
+
function loadSessionTracking(sessionID) {
|
|
1191
|
+
const row = db().query(
|
|
1192
|
+
`SELECT last_curated_at, message_count, turns_since_curation,
|
|
1193
|
+
consecutive_text_only_turns,
|
|
1194
|
+
ltm_cache_text, ltm_cache_tokens, ltm_pin_text, ltm_pin_tokens,
|
|
1195
|
+
fingerprint, header_session_id, header_name,
|
|
1196
|
+
resolved_conversation_ttl, warmup_state,
|
|
1197
|
+
dynamic_context_cap, bust_rate_ema, inter_bust_interval_ema,
|
|
1198
|
+
last_layer, last_known_input, last_turn_at, last_bust_at
|
|
1199
|
+
FROM session_state WHERE session_id = ?`
|
|
1200
|
+
).get(sessionID);
|
|
1201
|
+
if (!row) return null;
|
|
1202
|
+
return {
|
|
1203
|
+
lastCuratedAt: row.last_curated_at,
|
|
1204
|
+
messageCount: row.message_count,
|
|
1205
|
+
turnsSinceCuration: row.turns_since_curation,
|
|
1206
|
+
consecutiveTextOnlyTurns: row.consecutive_text_only_turns,
|
|
1207
|
+
ltmCacheText: row.ltm_cache_text,
|
|
1208
|
+
ltmCacheTokens: row.ltm_cache_tokens,
|
|
1209
|
+
ltmPinText: row.ltm_pin_text,
|
|
1210
|
+
ltmPinTokens: row.ltm_pin_tokens,
|
|
1211
|
+
fingerprint: row.fingerprint,
|
|
1212
|
+
headerSessionId: row.header_session_id,
|
|
1213
|
+
headerName: row.header_name,
|
|
1214
|
+
resolvedConversationTTL: row.resolved_conversation_ttl,
|
|
1215
|
+
warmupState: row.warmup_state,
|
|
1216
|
+
dynamicContextCap: row.dynamic_context_cap,
|
|
1217
|
+
bustRateEMA: row.bust_rate_ema,
|
|
1218
|
+
interBustIntervalEMA: row.inter_bust_interval_ema,
|
|
1219
|
+
lastLayer: row.last_layer,
|
|
1220
|
+
lastKnownInput: row.last_known_input,
|
|
1221
|
+
lastTurnAt: row.last_turn_at,
|
|
1222
|
+
lastBustAt: row.last_bust_at
|
|
1223
|
+
};
|
|
1224
|
+
}
|
|
1225
|
+
function loadHeaderSessionIndex() {
|
|
1226
|
+
const rows = db().query(
|
|
1227
|
+
`SELECT session_id, header_session_id, header_name
|
|
1228
|
+
FROM session_state
|
|
1229
|
+
WHERE header_session_id IS NOT NULL AND header_name IS NOT NULL`
|
|
1230
|
+
).all();
|
|
1231
|
+
return rows.map((row) => ({
|
|
1232
|
+
sessionId: row.session_id,
|
|
1233
|
+
headerSessionId: row.header_session_id,
|
|
1234
|
+
headerName: row.header_name
|
|
1235
|
+
}));
|
|
1236
|
+
}
|
|
1237
|
+
function getKV(key) {
|
|
1238
|
+
const row = db().query("SELECT value FROM kv_meta WHERE key = ?").get(key);
|
|
1239
|
+
return row?.value ?? null;
|
|
1240
|
+
}
|
|
1241
|
+
function setKV(key, value) {
|
|
1242
|
+
db().query(
|
|
1243
|
+
"INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
|
|
1244
|
+
).run(key, value, value);
|
|
1245
|
+
}
|
|
938
1246
|
function getMeta(key) {
|
|
939
1247
|
const row = db().query("SELECT value FROM metadata WHERE key = ?").get(key);
|
|
940
1248
|
return row?.value ?? null;
|
|
@@ -9753,7 +10061,7 @@ var handle = {
|
|
|
9753
10061
|
};
|
|
9754
10062
|
|
|
9755
10063
|
// ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
|
|
9756
|
-
var
|
|
10064
|
+
var join3 = [joinDefaults];
|
|
9757
10065
|
function joinDefaults(left, right, parent, state) {
|
|
9758
10066
|
if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
|
|
9759
10067
|
return false;
|
|
@@ -10173,7 +10481,7 @@ function toMarkdown(tree, options) {
|
|
|
10173
10481
|
handle: void 0,
|
|
10174
10482
|
indentLines,
|
|
10175
10483
|
indexStack: [],
|
|
10176
|
-
join: [...
|
|
10484
|
+
join: [...join3],
|
|
10177
10485
|
options: {},
|
|
10178
10486
|
safe: safeBound,
|
|
10179
10487
|
stack: [],
|
|
@@ -11898,6 +12206,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
|
|
|
11898
12206
|
- Environment/tooling setup details that affect development
|
|
11899
12207
|
- Important relationships between components that aren't obvious from reading the code
|
|
11900
12208
|
- User preferences and working style specific to how they use this project
|
|
12209
|
+
- Repeated user instructions \u2014 when the user says things like "always", "never",
|
|
12210
|
+
"make sure to", "don't forget to", these are high-value preference candidates.
|
|
12211
|
+
If you see instruction-like language, prioritize extracting it as a "preference" entry.
|
|
12212
|
+
These instructions represent how the user wants to work and should persist across sessions.
|
|
11901
12213
|
|
|
11902
12214
|
Do NOT extract:
|
|
11903
12215
|
- Task-specific details (file currently being edited, current bug being fixed)
|
|
@@ -11982,7 +12294,9 @@ IMPORTANT:
|
|
|
11982
12294
|
2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
|
|
11983
12295
|
3. If entries cover the same system from different angles, merge them: update one, delete the rest.
|
|
11984
12296
|
4. Only create a new entry for genuinely distinct knowledge with no existing home.
|
|
11985
|
-
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it
|
|
12297
|
+
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
|
|
12298
|
+
6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
|
|
12299
|
+
These are strong signals for "preference" entries with high confidence.`;
|
|
11986
12300
|
}
|
|
11987
12301
|
var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
|
|
11988
12302
|
|
|
@@ -12146,9 +12460,12 @@ var log_exports = {};
|
|
|
12146
12460
|
__export(log_exports, {
|
|
12147
12461
|
error: () => error,
|
|
12148
12462
|
info: () => info,
|
|
12463
|
+
logFilePath: () => logFilePath,
|
|
12149
12464
|
registerSink: () => registerSink,
|
|
12150
12465
|
warn: () => warn
|
|
12151
12466
|
});
|
|
12467
|
+
import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
|
|
12468
|
+
import { join as join4 } from "node:path";
|
|
12152
12469
|
var sink = null;
|
|
12153
12470
|
function registerSink(s) {
|
|
12154
12471
|
sink = s;
|
|
@@ -12163,17 +12480,71 @@ function findError(args) {
|
|
|
12163
12480
|
}
|
|
12164
12481
|
return void 0;
|
|
12165
12482
|
}
|
|
12483
|
+
var LOG_MAX_BYTES = 5 * 1024 * 1024;
|
|
12484
|
+
var ROTATION_CHECK_INTERVAL = 1e3;
|
|
12485
|
+
var logPath;
|
|
12486
|
+
var logPathResolved = false;
|
|
12487
|
+
var writeCount = 0;
|
|
12488
|
+
function resolveLogPath() {
|
|
12489
|
+
if (process.env.NODE_ENV === "test") return void 0;
|
|
12490
|
+
try {
|
|
12491
|
+
const dir = dataDir();
|
|
12492
|
+
mkdirSync2(dir, { recursive: true });
|
|
12493
|
+
return join4(dir, "lore.log");
|
|
12494
|
+
} catch {
|
|
12495
|
+
return void 0;
|
|
12496
|
+
}
|
|
12497
|
+
}
|
|
12498
|
+
function logFilePath() {
|
|
12499
|
+
if (!logPathResolved) {
|
|
12500
|
+
logPath = resolveLogPath();
|
|
12501
|
+
logPathResolved = true;
|
|
12502
|
+
}
|
|
12503
|
+
return logPath;
|
|
12504
|
+
}
|
|
12505
|
+
function maybeRotate() {
|
|
12506
|
+
if (!logPath) return;
|
|
12507
|
+
try {
|
|
12508
|
+
const stat = statSync(logPath);
|
|
12509
|
+
if (stat.size > LOG_MAX_BYTES) {
|
|
12510
|
+
renameSync2(logPath, logPath + ".1");
|
|
12511
|
+
}
|
|
12512
|
+
} catch {
|
|
12513
|
+
}
|
|
12514
|
+
}
|
|
12515
|
+
function writeToFile(level, message) {
|
|
12516
|
+
const path = logFilePath();
|
|
12517
|
+
if (!path) return;
|
|
12518
|
+
if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
|
|
12519
|
+
maybeRotate();
|
|
12520
|
+
}
|
|
12521
|
+
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
12522
|
+
const tag = level.toUpperCase().padEnd(5);
|
|
12523
|
+
const flat = message.replace(/\n/g, "\\n");
|
|
12524
|
+
const line = `${ts} [${tag}] ${flat}
|
|
12525
|
+
`;
|
|
12526
|
+
try {
|
|
12527
|
+
appendFileSync(path, line);
|
|
12528
|
+
} catch {
|
|
12529
|
+
}
|
|
12530
|
+
}
|
|
12166
12531
|
function info(...args) {
|
|
12167
12532
|
if (isDebug) console.error("[lore]", ...args);
|
|
12168
|
-
|
|
12533
|
+
const msg = formatArgs(args);
|
|
12534
|
+
sink?.info(msg);
|
|
12535
|
+
writeToFile("info", msg);
|
|
12169
12536
|
}
|
|
12170
12537
|
function warn(...args) {
|
|
12171
12538
|
if (isDebug) console.error("[lore] WARN:", ...args);
|
|
12172
|
-
|
|
12539
|
+
const msg = formatArgs(args);
|
|
12540
|
+
sink?.warn(msg);
|
|
12541
|
+
writeToFile("warn", msg);
|
|
12173
12542
|
}
|
|
12174
12543
|
function error(...args) {
|
|
12175
12544
|
console.error("[lore]", ...args);
|
|
12176
|
-
|
|
12545
|
+
const msg = formatArgs(args);
|
|
12546
|
+
sink?.error(msg);
|
|
12547
|
+
writeToFile("error", msg);
|
|
12177
12548
|
const err = findError(args);
|
|
12178
12549
|
if (err) sink?.captureException(err);
|
|
12179
12550
|
}
|
|
@@ -12333,10 +12704,11 @@ function extractTopTerms(text4, limit = 40) {
|
|
|
12333
12704
|
function reciprocalRankFusion(lists, k = 60) {
|
|
12334
12705
|
const scores = /* @__PURE__ */ new Map();
|
|
12335
12706
|
for (const list4 of lists) {
|
|
12707
|
+
const w = list4.weight ?? 1;
|
|
12336
12708
|
for (let rank = 0; rank < list4.items.length; rank++) {
|
|
12337
12709
|
const item = list4.items[rank];
|
|
12338
12710
|
const id = list4.key(item);
|
|
12339
|
-
const rrfScore =
|
|
12711
|
+
const rrfScore = w / (k + rank);
|
|
12340
12712
|
const existing = scores.get(id);
|
|
12341
12713
|
if (existing) {
|
|
12342
12714
|
existing.score += rrfScore;
|
|
@@ -12390,8 +12762,8 @@ async function expandQuery(llm, query, model, sessionID) {
|
|
|
12390
12762
|
var embedding_exports = {};
|
|
12391
12763
|
__export(embedding_exports, {
|
|
12392
12764
|
LocalProviderUnavailableError: () => LocalProviderUnavailableError,
|
|
12393
|
-
|
|
12394
|
-
|
|
12765
|
+
_markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
|
|
12766
|
+
_resetLocalProviderProbe: () => _resetLocalProviderProbe,
|
|
12395
12767
|
_restoreProvider: () => _restoreProvider,
|
|
12396
12768
|
_saveAndClearProvider: () => _saveAndClearProvider,
|
|
12397
12769
|
_shutdownAndDisable: () => _shutdownAndDisable,
|
|
@@ -12410,6 +12782,7 @@ __export(embedding_exports, {
|
|
|
12410
12782
|
runStartupBackfill: () => runStartupBackfill,
|
|
12411
12783
|
toBlob: () => toBlob,
|
|
12412
12784
|
vectorSearch: () => vectorSearch,
|
|
12785
|
+
vectorSearchAllDistillations: () => vectorSearchAllDistillations,
|
|
12413
12786
|
vectorSearchDistillations: () => vectorSearchDistillations,
|
|
12414
12787
|
vectorSearchTemporal: () => vectorSearchTemporal
|
|
12415
12788
|
});
|
|
@@ -26183,8 +26556,8 @@ function date4(params) {
|
|
|
26183
26556
|
config(en_default());
|
|
26184
26557
|
|
|
26185
26558
|
// src/config.ts
|
|
26186
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
26187
|
-
import { join as
|
|
26559
|
+
import { existsSync as existsSync2, readFileSync } from "node:fs";
|
|
26560
|
+
import { join as join5 } from "node:path";
|
|
26188
26561
|
var LoreConfig = external_exports.object({
|
|
26189
26562
|
model: external_exports.object({
|
|
26190
26563
|
providerID: external_exports.string(),
|
|
@@ -26301,15 +26674,25 @@ var LoreConfig = external_exports.object({
|
|
|
26301
26674
|
}).default({ title: 6, content: 2, category: 3 }),
|
|
26302
26675
|
/** Max results per source in recall tool before fusion. Default: 10. */
|
|
26303
26676
|
recallLimit: external_exports.number().min(1).max(50).default(10),
|
|
26304
|
-
/** Enable LLM-based query expansion for the recall tool. Default:
|
|
26305
|
-
*
|
|
26306
|
-
*
|
|
26307
|
-
|
|
26677
|
+
/** Enable LLM-based query expansion for the recall tool. Default: true.
|
|
26678
|
+
* The configured model generates 2–3 alternative query phrasings before
|
|
26679
|
+
* search, improving recall for ambiguous queries. Guarded by a 3-second
|
|
26680
|
+
* timeout — if expansion fails or times out, the original query is used. */
|
|
26681
|
+
queryExpansion: external_exports.boolean().default(true),
|
|
26682
|
+
/** RRF weight multiplier for vector search lists. Applied when the query
|
|
26683
|
+
* has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
|
|
26684
|
+
* Boosts semantic/vector results relative to keyword-based BM25 lists.
|
|
26685
|
+
* Default: 1.5. Set to 1.0 to disable. */
|
|
26686
|
+
vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
|
|
26687
|
+
/** Minimum meaningful query terms (after stopword removal) to activate
|
|
26688
|
+
* vector boost. Short keyword queries (1-2 terms) are left unweighted
|
|
26689
|
+
* since BM25 excels there. Default: 3. */
|
|
26690
|
+
vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
|
|
26308
26691
|
/** Vector embedding search.
|
|
26309
26692
|
* Supports multiple providers:
|
|
26310
|
-
* - "local" (default):
|
|
26311
|
-
*
|
|
26312
|
-
* cached
|
|
26693
|
+
* - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
|
|
26694
|
+
* 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
|
|
26695
|
+
* cached locally. Uses task instruction prefixes (search_document: / search_query:).
|
|
26313
26696
|
* - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
|
|
26314
26697
|
* - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
|
|
26315
26698
|
* Set enabled: false to explicitly disable even with a provider available. */
|
|
@@ -26318,19 +26701,20 @@ var LoreConfig = external_exports.object({
|
|
|
26318
26701
|
* Set to false to explicitly disable. */
|
|
26319
26702
|
enabled: external_exports.boolean().default(true),
|
|
26320
26703
|
/** Embedding provider. Default: "local".
|
|
26321
|
-
* - "local":
|
|
26704
|
+
* - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
|
|
26322
26705
|
* - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
|
|
26323
26706
|
* - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
|
|
26324
26707
|
provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
|
|
26325
26708
|
/** Model ID for the embedding provider. Default depends on provider. */
|
|
26326
|
-
model: external_exports.string().default("
|
|
26327
|
-
/** Embedding dimensions. Default:
|
|
26328
|
-
|
|
26709
|
+
model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
|
|
26710
|
+
/** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
|
|
26711
|
+
* For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
|
|
26712
|
+
dimensions: external_exports.number().min(64).max(2048).default(768)
|
|
26329
26713
|
}).default({
|
|
26330
26714
|
enabled: true,
|
|
26331
26715
|
provider: "local",
|
|
26332
|
-
model: "
|
|
26333
|
-
dimensions:
|
|
26716
|
+
model: "nomic-ai/nomic-embed-text-v1.5",
|
|
26717
|
+
dimensions: 768
|
|
26334
26718
|
}),
|
|
26335
26719
|
/** Recall output formatting — controls how search results are presented to the agent. */
|
|
26336
26720
|
recall: external_exports.object({
|
|
@@ -26347,8 +26731,10 @@ var LoreConfig = external_exports.object({
|
|
|
26347
26731
|
}).default({
|
|
26348
26732
|
ftsWeights: { title: 6, content: 2, category: 3 },
|
|
26349
26733
|
recallLimit: 10,
|
|
26350
|
-
queryExpansion:
|
|
26351
|
-
|
|
26734
|
+
queryExpansion: true,
|
|
26735
|
+
vectorBoostWeight: 1.5,
|
|
26736
|
+
vectorBoostMinTerms: 3,
|
|
26737
|
+
embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
|
|
26352
26738
|
recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
|
|
26353
26739
|
}),
|
|
26354
26740
|
cache: external_exports.object({
|
|
@@ -26366,9 +26752,9 @@ var LoreConfig = external_exports.object({
|
|
|
26366
26752
|
warming: external_exports.object({
|
|
26367
26753
|
/** Enable cache warming. Default: true. */
|
|
26368
26754
|
enabled: external_exports.boolean().default(true),
|
|
26369
|
-
/** Override the
|
|
26370
|
-
* skipped. Default: auto-derived from
|
|
26371
|
-
* (~0.
|
|
26755
|
+
/** Override the return probability threshold below which warming is
|
|
26756
|
+
* skipped. Default: auto-derived from corrected cost ratio
|
|
26757
|
+
* read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
|
|
26372
26758
|
minReturnProbability: external_exports.number().min(0).max(1).optional()
|
|
26373
26759
|
}).default({ enabled: true })
|
|
26374
26760
|
}).default({
|
|
@@ -26388,8 +26774,8 @@ function config2() {
|
|
|
26388
26774
|
return current;
|
|
26389
26775
|
}
|
|
26390
26776
|
async function load(directory) {
|
|
26391
|
-
const path =
|
|
26392
|
-
if (
|
|
26777
|
+
const path = join5(directory, ".lore.json");
|
|
26778
|
+
if (existsSync2(path)) {
|
|
26393
26779
|
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
26394
26780
|
current = LoreConfig.parse(raw);
|
|
26395
26781
|
return current;
|
|
@@ -26420,8 +26806,7 @@ function vendorModelInfo() {
|
|
|
26420
26806
|
const reg = getRegistration();
|
|
26421
26807
|
if (!reg) return null;
|
|
26422
26808
|
return {
|
|
26423
|
-
|
|
26424
|
-
modelName: reg.modelName
|
|
26809
|
+
localModelPath: reg.localModelPath
|
|
26425
26810
|
};
|
|
26426
26811
|
}
|
|
26427
26812
|
function isVendoredBinary() {
|
|
@@ -26508,62 +26893,31 @@ var OpenAIProvider = class {
|
|
|
26508
26893
|
var LocalProviderUnavailableError = class extends Error {
|
|
26509
26894
|
constructor(cause) {
|
|
26510
26895
|
super(
|
|
26511
|
-
"Local embedding provider unavailable: '
|
|
26896
|
+
"Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
|
|
26512
26897
|
);
|
|
26513
26898
|
this.name = "LocalProviderUnavailableError";
|
|
26514
26899
|
if (cause !== void 0) this.cause = cause;
|
|
26515
26900
|
}
|
|
26516
26901
|
};
|
|
26517
|
-
var
|
|
26518
|
-
var
|
|
26519
|
-
|
|
26520
|
-
|
|
26521
|
-
|
|
26522
|
-
fastembedModule = null;
|
|
26523
|
-
fastembedProbed = false;
|
|
26524
|
-
fastembedAvailable = false;
|
|
26525
|
-
fastembedLogged = false;
|
|
26526
|
-
}
|
|
26527
|
-
function _markFastembedUnavailable() {
|
|
26528
|
-
fastembedModule = null;
|
|
26529
|
-
fastembedProbed = true;
|
|
26530
|
-
fastembedAvailable = false;
|
|
26531
|
-
fastembedLogged = true;
|
|
26532
|
-
}
|
|
26533
|
-
async function tryLoadFastembed() {
|
|
26534
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26535
|
-
try {
|
|
26536
|
-
const mod = await loadFastembedModule();
|
|
26537
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26538
|
-
fastembedModule = mod;
|
|
26539
|
-
fastembedAvailable = true;
|
|
26540
|
-
} catch (err) {
|
|
26541
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26542
|
-
fastembedAvailable = false;
|
|
26543
|
-
if (!fastembedLogged) {
|
|
26544
|
-
fastembedLogged = true;
|
|
26545
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
26546
|
-
const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
|
|
26547
|
-
info(
|
|
26548
|
-
`local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
|
|
26549
|
-
);
|
|
26550
|
-
}
|
|
26551
|
-
} finally {
|
|
26552
|
-
fastembedProbed = true;
|
|
26553
|
-
}
|
|
26554
|
-
return fastembedAvailable ? fastembedModule : null;
|
|
26902
|
+
var localProviderKnownBroken = false;
|
|
26903
|
+
var localProviderErrorLogged = false;
|
|
26904
|
+
function _resetLocalProviderProbe() {
|
|
26905
|
+
localProviderKnownBroken = false;
|
|
26906
|
+
localProviderErrorLogged = false;
|
|
26555
26907
|
}
|
|
26556
|
-
|
|
26557
|
-
|
|
26908
|
+
function _markLocalProviderUnavailable() {
|
|
26909
|
+
localProviderKnownBroken = true;
|
|
26910
|
+
localProviderErrorLogged = true;
|
|
26558
26911
|
}
|
|
26559
|
-
function
|
|
26560
|
-
return
|
|
26912
|
+
function localProviderKnownUnavailable() {
|
|
26913
|
+
return localProviderKnownBroken;
|
|
26561
26914
|
}
|
|
26562
26915
|
var LocalProvider = class {
|
|
26563
26916
|
// With inference off the main thread, large batches no longer block
|
|
26564
26917
|
// the event loop. 256 maximises throughput per round-trip to the
|
|
26565
|
-
// worker. Backfill callers use
|
|
26566
|
-
// the worker's priority queue breathing room
|
|
26918
|
+
// worker. Backfill callers use token-budget-based batching (see
|
|
26919
|
+
// nextBatch) to give the worker's priority queue breathing room
|
|
26920
|
+
// for recall queries and prevent OOM on long texts.
|
|
26567
26921
|
maxBatchSize = 256;
|
|
26568
26922
|
worker = null;
|
|
26569
26923
|
workerReady = false;
|
|
@@ -26571,14 +26925,14 @@ var LocalProvider = class {
|
|
|
26571
26925
|
pendingRequests = /* @__PURE__ */ new Map();
|
|
26572
26926
|
nextRequestId = 0;
|
|
26573
26927
|
initPromise = null;
|
|
26574
|
-
|
|
26575
|
-
|
|
26576
|
-
|
|
26928
|
+
modelId;
|
|
26929
|
+
dimensions;
|
|
26930
|
+
constructor(modelId, dimensions) {
|
|
26931
|
+
this.modelId = modelId;
|
|
26932
|
+
this.dimensions = dimensions;
|
|
26577
26933
|
}
|
|
26578
26934
|
/**
|
|
26579
|
-
* Ensure the worker thread is running.
|
|
26580
|
-
* thread first (fast, cached) as a fast-fail gate — the worker is only
|
|
26581
|
-
* spawned if the module is known-loadable. Worker startup failure is
|
|
26935
|
+
* Ensure the worker thread is running. Worker startup failure is
|
|
26582
26936
|
* surfaced as `LocalProviderUnavailableError` to trigger the existing
|
|
26583
26937
|
* auto-fallback to remote providers.
|
|
26584
26938
|
*/
|
|
@@ -26587,8 +26941,7 @@ var LocalProvider = class {
|
|
|
26587
26941
|
if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
|
|
26588
26942
|
if (this.initPromise) return this.initPromise;
|
|
26589
26943
|
this.initPromise = (async () => {
|
|
26590
|
-
|
|
26591
|
-
if (!fastembed) throw new LocalProviderUnavailableError();
|
|
26944
|
+
if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
|
|
26592
26945
|
const { Worker } = await import("node:worker_threads");
|
|
26593
26946
|
const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
|
|
26594
26947
|
let workerUrl;
|
|
@@ -26602,12 +26955,22 @@ var LocalProvider = class {
|
|
|
26602
26955
|
workerUrl = vendorWorkerUrl;
|
|
26603
26956
|
}
|
|
26604
26957
|
} else {
|
|
26605
|
-
|
|
26958
|
+
const selfUrl = typeof import.meta.url === "string" ? import.meta.url : void 0;
|
|
26959
|
+
if (selfUrl) {
|
|
26960
|
+
workerUrl = new URL(
|
|
26961
|
+
`./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
|
|
26962
|
+
selfUrl
|
|
26963
|
+
);
|
|
26964
|
+
} else {
|
|
26965
|
+
const { pathToFileURL } = await import("node:url");
|
|
26966
|
+
workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
|
|
26967
|
+
}
|
|
26606
26968
|
}
|
|
26607
26969
|
const vendor = vendorModelInfo();
|
|
26608
26970
|
const workerInitData = {
|
|
26609
|
-
|
|
26610
|
-
|
|
26971
|
+
modelId: this.modelId,
|
|
26972
|
+
dimensions: this.dimensions,
|
|
26973
|
+
vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
|
|
26611
26974
|
};
|
|
26612
26975
|
this.worker = new Worker(workerUrl, { workerData: workerInitData });
|
|
26613
26976
|
this.worker.unref();
|
|
@@ -26634,6 +26997,13 @@ var LocalProvider = class {
|
|
|
26634
26997
|
case "init-error": {
|
|
26635
26998
|
this.workerInitError = msg.error;
|
|
26636
26999
|
this.workerReady = false;
|
|
27000
|
+
localProviderKnownBroken = true;
|
|
27001
|
+
if (!localProviderErrorLogged) {
|
|
27002
|
+
localProviderErrorLogged = true;
|
|
27003
|
+
info(
|
|
27004
|
+
`local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
|
|
27005
|
+
);
|
|
27006
|
+
}
|
|
26637
27007
|
for (const [, p2] of this.pendingRequests) {
|
|
26638
27008
|
p2.reject(new LocalProviderUnavailableError(msg.error));
|
|
26639
27009
|
}
|
|
@@ -26685,6 +27055,8 @@ var LocalProvider = class {
|
|
|
26685
27055
|
}
|
|
26686
27056
|
async embed(texts, inputType) {
|
|
26687
27057
|
await this.ensureWorker();
|
|
27058
|
+
const prefix = inputType === "document" ? "search_document: " : "search_query: ";
|
|
27059
|
+
const prefixed = texts.map((t2) => prefix + t2);
|
|
26688
27060
|
const id = this.nextRequestId++;
|
|
26689
27061
|
const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
|
|
26690
27062
|
return new Promise((resolve, reject) => {
|
|
@@ -26693,7 +27065,7 @@ var LocalProvider = class {
|
|
|
26693
27065
|
this.worker.postMessage({
|
|
26694
27066
|
type: "embed",
|
|
26695
27067
|
id,
|
|
26696
|
-
texts,
|
|
27068
|
+
texts: prefixed,
|
|
26697
27069
|
inputType,
|
|
26698
27070
|
priority
|
|
26699
27071
|
});
|
|
@@ -26701,8 +27073,6 @@ var LocalProvider = class {
|
|
|
26701
27073
|
}
|
|
26702
27074
|
/** Shut down the worker thread. Called by `resetProvider()` on config change.
|
|
26703
27075
|
* Sends a shutdown message so the worker calls `process.exit(0)` internally.
|
|
26704
|
-
* We avoid `worker.terminate()` because Bun's forced termination triggers a
|
|
26705
|
-
* NAPI fatal error when tearing down onnxruntime's native bindings.
|
|
26706
27076
|
*
|
|
26707
27077
|
* Returns a promise that resolves once the worker has fully exited. Callers
|
|
26708
27078
|
* that need a clean teardown (tests, config change) should await the result.
|
|
@@ -26725,7 +27095,7 @@ var LocalProvider = class {
|
|
|
26725
27095
|
}
|
|
26726
27096
|
};
|
|
26727
27097
|
var PROVIDER_DEFAULTS = {
|
|
26728
|
-
local: { model: "
|
|
27098
|
+
local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
|
|
26729
27099
|
voyage: { model: "voyage-code-3", dimensions: 1024 },
|
|
26730
27100
|
openai: { model: "text-embedding-3-small", dimensions: 1536 }
|
|
26731
27101
|
};
|
|
@@ -26749,7 +27119,7 @@ function getProvider() {
|
|
|
26749
27119
|
const model = cfg.model;
|
|
26750
27120
|
switch (providerName) {
|
|
26751
27121
|
case "local": {
|
|
26752
|
-
cachedProvider = new LocalProvider(model);
|
|
27122
|
+
cachedProvider = new LocalProvider(model, cfg.dimensions);
|
|
26753
27123
|
break;
|
|
26754
27124
|
}
|
|
26755
27125
|
case "voyage": {
|
|
@@ -26826,7 +27196,7 @@ function pickRemoteFallback() {
|
|
|
26826
27196
|
function isAvailable() {
|
|
26827
27197
|
const provider = getProvider();
|
|
26828
27198
|
if (!provider) return false;
|
|
26829
|
-
if (provider instanceof LocalProvider &&
|
|
27199
|
+
if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
|
|
26830
27200
|
return true;
|
|
26831
27201
|
}
|
|
26832
27202
|
async function embed(texts, inputType) {
|
|
@@ -26841,7 +27211,7 @@ async function embed(texts, inputType) {
|
|
|
26841
27211
|
if (!remoteFallbackLogged) {
|
|
26842
27212
|
remoteFallbackLogged = true;
|
|
26843
27213
|
info(
|
|
26844
|
-
`
|
|
27214
|
+
`local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
|
|
26845
27215
|
);
|
|
26846
27216
|
}
|
|
26847
27217
|
cachedProvider = fallback.provider;
|
|
@@ -26869,8 +27239,14 @@ function fromBlob(blob) {
|
|
|
26869
27239
|
const bytes = new Uint8Array(blob);
|
|
26870
27240
|
return new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
|
|
26871
27241
|
}
|
|
26872
|
-
function vectorSearch(queryEmbedding, limit = 10) {
|
|
26873
|
-
|
|
27242
|
+
function vectorSearch(queryEmbedding, limit = 10, excludeCategories) {
|
|
27243
|
+
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
|
|
27244
|
+
const params = [];
|
|
27245
|
+
if (excludeCategories?.length) {
|
|
27246
|
+
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
|
|
27247
|
+
params.push(...excludeCategories);
|
|
27248
|
+
}
|
|
27249
|
+
const rows = db().query(sql).all(...params);
|
|
26874
27250
|
const scored = [];
|
|
26875
27251
|
for (const row of rows) {
|
|
26876
27252
|
const vec = fromBlob(row.embedding);
|
|
@@ -26893,6 +27269,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
|
|
|
26893
27269
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
26894
27270
|
return scored.slice(0, limit);
|
|
26895
27271
|
}
|
|
27272
|
+
var MAX_DISTILLATION_VECTOR_ROWS = 500;
|
|
27273
|
+
function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
|
|
27274
|
+
const rows = db().query(
|
|
27275
|
+
"SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
|
|
27276
|
+
).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
|
|
27277
|
+
const scored = [];
|
|
27278
|
+
for (const row of rows) {
|
|
27279
|
+
const vec = fromBlob(row.embedding);
|
|
27280
|
+
const sim = cosineSimilarity(queryEmbedding, vec);
|
|
27281
|
+
scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
|
|
27282
|
+
}
|
|
27283
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
27284
|
+
return scored.slice(0, limit);
|
|
27285
|
+
}
|
|
26896
27286
|
function embedKnowledgeEntry(id, title, content3) {
|
|
26897
27287
|
const text4 = `${title}
|
|
26898
27288
|
${content3}`;
|
|
@@ -26994,20 +27384,37 @@ async function runStartupBackfill() {
|
|
|
26994
27384
|
);
|
|
26995
27385
|
info(`embedding startup: ${parts.join("; ")}`);
|
|
26996
27386
|
}
|
|
26997
|
-
var
|
|
27387
|
+
var MAX_BACKFILL_CHUNK = 8;
|
|
27388
|
+
var MAX_BATCH_TOKEN_AREA = 4096;
|
|
27389
|
+
var CHARS_PER_TOKEN = 4;
|
|
27390
|
+
function nextBatch(rows, start) {
|
|
27391
|
+
const batch = [];
|
|
27392
|
+
let maxTokens = 0;
|
|
27393
|
+
for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
|
|
27394
|
+
const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
|
|
27395
|
+
const newMax = Math.max(maxTokens, estTokens);
|
|
27396
|
+
const newArea = (batch.length + 1) * newMax;
|
|
27397
|
+
if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
|
|
27398
|
+
batch.push(rows[i]);
|
|
27399
|
+
maxTokens = newMax;
|
|
27400
|
+
}
|
|
27401
|
+
return batch;
|
|
27402
|
+
}
|
|
26998
27403
|
async function backfillEmbeddings() {
|
|
26999
27404
|
checkConfigChange();
|
|
27000
27405
|
const provider = getProvider();
|
|
27001
27406
|
if (!provider) return 0;
|
|
27002
27407
|
const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
|
|
27003
27408
|
if (!rows.length) return 0;
|
|
27409
|
+
const items = rows.map((r) => ({ ...r, text: `${r.title}
|
|
27410
|
+
${r.content}` }));
|
|
27004
27411
|
let embedded = 0;
|
|
27005
|
-
|
|
27006
|
-
|
|
27007
|
-
const
|
|
27008
|
-
|
|
27412
|
+
let i = 0;
|
|
27413
|
+
while (i < items.length) {
|
|
27414
|
+
const batch = nextBatch(items, i);
|
|
27415
|
+
i += batch.length;
|
|
27009
27416
|
try {
|
|
27010
|
-
const vectors = await embed(
|
|
27417
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
27011
27418
|
const update2 = db().prepare(
|
|
27012
27419
|
"UPDATE knowledge SET embedding = ? WHERE id = ?"
|
|
27013
27420
|
);
|
|
@@ -27016,7 +27423,7 @@ ${r.content}`);
|
|
|
27016
27423
|
embedded++;
|
|
27017
27424
|
}
|
|
27018
27425
|
} catch (err) {
|
|
27019
|
-
|
|
27426
|
+
error(`embedding backfill batch failed (${batch.length} items):`, err);
|
|
27020
27427
|
}
|
|
27021
27428
|
}
|
|
27022
27429
|
if (embedded > 0) {
|
|
@@ -27034,11 +27441,13 @@ async function backfillDistillationEmbeddings() {
|
|
|
27034
27441
|
let embedded = 0;
|
|
27035
27442
|
const PROGRESS_INTERVAL = 256;
|
|
27036
27443
|
let nextProgressAt = PROGRESS_INTERVAL;
|
|
27037
|
-
|
|
27038
|
-
|
|
27039
|
-
|
|
27444
|
+
const items = rows.map((r) => ({ ...r, text: r.observations }));
|
|
27445
|
+
let i = 0;
|
|
27446
|
+
while (i < items.length) {
|
|
27447
|
+
const batch = nextBatch(items, i);
|
|
27448
|
+
i += batch.length;
|
|
27040
27449
|
try {
|
|
27041
|
-
const vectors = await embed(
|
|
27450
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
27042
27451
|
const update2 = db().prepare(
|
|
27043
27452
|
"UPDATE distillations SET embedding = ? WHERE id = ?"
|
|
27044
27453
|
);
|
|
@@ -27047,7 +27456,7 @@ async function backfillDistillationEmbeddings() {
|
|
|
27047
27456
|
embedded++;
|
|
27048
27457
|
}
|
|
27049
27458
|
} catch (err) {
|
|
27050
|
-
|
|
27459
|
+
error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
|
|
27051
27460
|
}
|
|
27052
27461
|
if (embedded >= nextProgressAt) {
|
|
27053
27462
|
info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
|
|
@@ -27161,7 +27570,7 @@ function searchLike(input) {
|
|
|
27161
27570
|
if (!terms.length) return [];
|
|
27162
27571
|
const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
|
|
27163
27572
|
const likeParams = terms.map((t2) => `%${t2}%`);
|
|
27164
|
-
const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
27573
|
+
const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
27165
27574
|
const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
|
|
27166
27575
|
return db().query(query).all(...params);
|
|
27167
27576
|
}
|
|
@@ -27170,10 +27579,10 @@ function search2(input) {
|
|
|
27170
27579
|
const limit = input.limit ?? 20;
|
|
27171
27580
|
const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
|
|
27172
27581
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27173
|
-
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
|
|
27582
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
|
|
27174
27583
|
ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
|
|
27175
27584
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27176
|
-
WHERE f.content MATCH ? AND m.project_id = ?
|
|
27585
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
|
|
27177
27586
|
ORDER BY rank LIMIT ?`;
|
|
27178
27587
|
try {
|
|
27179
27588
|
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
@@ -27194,10 +27603,10 @@ function searchScored(input) {
|
|
|
27194
27603
|
const limit = input.limit ?? 20;
|
|
27195
27604
|
const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
|
|
27196
27605
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27197
|
-
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
|
|
27606
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
|
|
27198
27607
|
ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
|
|
27199
27608
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27200
|
-
WHERE f.content MATCH ? AND m.project_id = ?
|
|
27609
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
|
|
27201
27610
|
ORDER BY rank LIMIT ?`;
|
|
27202
27611
|
try {
|
|
27203
27612
|
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
@@ -27226,6 +27635,12 @@ function count(projectPath, sessionID) {
|
|
|
27226
27635
|
const params = sessionID ? [pid, sessionID] : [pid];
|
|
27227
27636
|
return db().query(query).get(...params).count;
|
|
27228
27637
|
}
|
|
27638
|
+
function hasMessages(projectPath, sessionID) {
|
|
27639
|
+
const pid = ensureProject(projectPath);
|
|
27640
|
+
return !!db().query(
|
|
27641
|
+
"SELECT 1 FROM temporal_messages WHERE project_id = ? AND session_id = ? LIMIT 1"
|
|
27642
|
+
).get(pid, sessionID);
|
|
27643
|
+
}
|
|
27229
27644
|
function undistilledCount(projectPath, sessionID) {
|
|
27230
27645
|
const pid = ensureProject(projectPath);
|
|
27231
27646
|
const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND distilled = 0";
|
|
@@ -27284,17 +27699,31 @@ function prune(input) {
|
|
|
27284
27699
|
var ltm_exports = {};
|
|
27285
27700
|
__export(ltm_exports, {
|
|
27286
27701
|
all: () => all2,
|
|
27702
|
+
calibrateDedupThreshold: () => calibrateDedupThreshold,
|
|
27287
27703
|
cascadeRefReplace: () => cascadeRefReplace,
|
|
27288
27704
|
check: () => check2,
|
|
27289
27705
|
cleanDeadRefs: () => cleanDeadRefs,
|
|
27290
27706
|
create: () => create,
|
|
27707
|
+
crossProject: () => crossProject,
|
|
27708
|
+
dedupPairKey: () => dedupPairKey,
|
|
27709
|
+
deduplicate: () => deduplicate,
|
|
27710
|
+
deduplicateGlobal: () => deduplicateGlobal,
|
|
27291
27711
|
extractRefs: () => extractRefs,
|
|
27712
|
+
findFuzzyDuplicate: () => findFuzzyDuplicate,
|
|
27292
27713
|
forProject: () => forProject,
|
|
27293
27714
|
forSession: () => forSession,
|
|
27294
27715
|
get: () => get,
|
|
27716
|
+
getDedupFeedback: () => getDedupFeedback,
|
|
27717
|
+
getDedupFeedbackCount: () => getDedupFeedbackCount,
|
|
27718
|
+
loadCalibratedThreshold: () => loadCalibratedThreshold,
|
|
27719
|
+
pruneDedupFeedback: () => pruneDedupFeedback,
|
|
27295
27720
|
pruneOversized: () => pruneOversized,
|
|
27721
|
+
recordAutoSignals: () => recordAutoSignals,
|
|
27722
|
+
recordDedupFeedback: () => recordDedupFeedback,
|
|
27723
|
+
recordDedupResultFeedback: () => recordDedupResultFeedback,
|
|
27296
27724
|
remove: () => remove,
|
|
27297
27725
|
resolveRef: () => resolveRef2,
|
|
27726
|
+
saveCalibratedThreshold: () => saveCalibratedThreshold,
|
|
27298
27727
|
search: () => search3,
|
|
27299
27728
|
searchScored: () => searchScored3,
|
|
27300
27729
|
searchScoredOtherProjects: () => searchScoredOtherProjects,
|
|
@@ -27647,8 +28076,8 @@ __export(lat_reader_exports, {
|
|
|
27647
28076
|
scoreForSession: () => scoreForSession,
|
|
27648
28077
|
searchScored: () => searchScored2
|
|
27649
28078
|
});
|
|
27650
|
-
import { readdirSync, readFileSync as readFileSync2, existsSync as
|
|
27651
|
-
import { join as
|
|
28079
|
+
import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
|
|
28080
|
+
import { join as join6, relative } from "path";
|
|
27652
28081
|
var processor2 = remark();
|
|
27653
28082
|
function estimateTokens2(text4) {
|
|
27654
28083
|
return Math.ceil(text4.length / 3);
|
|
@@ -27726,7 +28155,7 @@ function listMarkdownFiles(dir) {
|
|
|
27726
28155
|
try {
|
|
27727
28156
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
27728
28157
|
for (const entry of entries) {
|
|
27729
|
-
const fullPath =
|
|
28158
|
+
const fullPath = join6(dir, entry.name);
|
|
27730
28159
|
if (entry.isDirectory() && !entry.name.startsWith(".")) {
|
|
27731
28160
|
results.push(...listMarkdownFiles(fullPath));
|
|
27732
28161
|
} else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
@@ -27741,12 +28170,12 @@ function contentHash(content3) {
|
|
|
27741
28170
|
return sha256(content3);
|
|
27742
28171
|
}
|
|
27743
28172
|
function hasLatDir(projectPath) {
|
|
27744
|
-
const latDir =
|
|
27745
|
-
return
|
|
28173
|
+
const latDir = join6(projectPath, "lat.md");
|
|
28174
|
+
return existsSync3(latDir) && statSync2(latDir).isDirectory();
|
|
27746
28175
|
}
|
|
27747
28176
|
function refresh(projectPath) {
|
|
27748
|
-
const latDir =
|
|
27749
|
-
if (!
|
|
28177
|
+
const latDir = join6(projectPath, "lat.md");
|
|
28178
|
+
if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
|
|
27750
28179
|
const pid = ensureProject(projectPath);
|
|
27751
28180
|
const files = listMarkdownFiles(latDir);
|
|
27752
28181
|
let upserted = 0;
|
|
@@ -27868,6 +28297,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
|
|
|
27868
28297
|
var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
|
|
27869
28298
|
function create(input) {
|
|
27870
28299
|
const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
|
|
28300
|
+
const crossProject2 = pid === null ? true : input.crossProject ?? false;
|
|
27871
28301
|
if (!input.id) {
|
|
27872
28302
|
const existing = pid !== null ? db().query(
|
|
27873
28303
|
"SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
|
|
@@ -27885,6 +28315,11 @@ function create(input) {
|
|
|
27885
28315
|
update(crossExisting.id, { content: input.content });
|
|
27886
28316
|
return crossExisting.id;
|
|
27887
28317
|
}
|
|
28318
|
+
const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
|
|
28319
|
+
if (fuzzyMatch) {
|
|
28320
|
+
update(fuzzyMatch.id, { content: input.content });
|
|
28321
|
+
return fuzzyMatch.id;
|
|
28322
|
+
}
|
|
27888
28323
|
}
|
|
27889
28324
|
const id = input.id ?? uuidv72();
|
|
27890
28325
|
const now = Date.now();
|
|
@@ -27898,7 +28333,7 @@ function create(input) {
|
|
|
27898
28333
|
input.title,
|
|
27899
28334
|
input.content,
|
|
27900
28335
|
input.session ?? null,
|
|
27901
|
-
|
|
28336
|
+
crossProject2 ? 1 : 0,
|
|
27902
28337
|
now,
|
|
27903
28338
|
now
|
|
27904
28339
|
);
|
|
@@ -27916,7 +28351,7 @@ function update(id, input) {
|
|
|
27916
28351
|
}
|
|
27917
28352
|
if (input.confidence !== void 0) {
|
|
27918
28353
|
sets.push("confidence = ?");
|
|
27919
|
-
params.push(input.confidence);
|
|
28354
|
+
params.push(Math.max(0, Math.min(1, input.confidence)));
|
|
27920
28355
|
}
|
|
27921
28356
|
sets.push("updated_at = ?");
|
|
27922
28357
|
params.push(Date.now());
|
|
@@ -27932,6 +28367,50 @@ function update(id, input) {
|
|
|
27932
28367
|
function remove(id) {
|
|
27933
28368
|
db().query("DELETE FROM knowledge WHERE id = ?").run(id);
|
|
27934
28369
|
}
|
|
28370
|
+
function titleOverlap(a, b) {
|
|
28371
|
+
const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
|
|
28372
|
+
const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
|
|
28373
|
+
if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
|
|
28374
|
+
const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
|
|
28375
|
+
return {
|
|
28376
|
+
coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
|
|
28377
|
+
intersectionSize: intersection2.length
|
|
28378
|
+
};
|
|
28379
|
+
}
|
|
28380
|
+
var FUZZY_DEDUP_THRESHOLD = 0.7;
|
|
28381
|
+
var FUZZY_DEDUP_MIN_OVERLAP = 4;
|
|
28382
|
+
var EMBEDDING_DEDUP_THRESHOLD = 0.935;
|
|
28383
|
+
function findFuzzyDuplicate(input) {
|
|
28384
|
+
const q = ftsQueryOr(input.title);
|
|
28385
|
+
if (q === EMPTY_QUERY) return null;
|
|
28386
|
+
const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
|
|
28387
|
+
try {
|
|
28388
|
+
const excludeClause = input.excludeId ? "AND k.id != ?" : "";
|
|
28389
|
+
const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
|
|
28390
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
28391
|
+
WHERE knowledge_fts MATCH ?
|
|
28392
|
+
AND (k.project_id = ? OR k.cross_project = 1)
|
|
28393
|
+
AND k.confidence > 0.2
|
|
28394
|
+
${excludeClause}
|
|
28395
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
|
|
28396
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
28397
|
+
WHERE knowledge_fts MATCH ?
|
|
28398
|
+
AND (k.project_id IS NULL OR k.cross_project = 1)
|
|
28399
|
+
AND k.confidence > 0.2
|
|
28400
|
+
${excludeClause}
|
|
28401
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
|
|
28402
|
+
const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
|
|
28403
|
+
const candidates = db().query(sql).all(...params);
|
|
28404
|
+
for (const candidate of candidates) {
|
|
28405
|
+
const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
|
|
28406
|
+
if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
|
|
28407
|
+
return candidate;
|
|
28408
|
+
}
|
|
28409
|
+
}
|
|
28410
|
+
} catch {
|
|
28411
|
+
}
|
|
28412
|
+
return null;
|
|
28413
|
+
}
|
|
27935
28414
|
function forProject(projectPath, includeCross = true) {
|
|
27936
28415
|
const pid = ensureProject(projectPath);
|
|
27937
28416
|
if (includeCross) {
|
|
@@ -27981,18 +28460,29 @@ function scoreEntriesFTS(sessionContext) {
|
|
|
27981
28460
|
return /* @__PURE__ */ new Map();
|
|
27982
28461
|
}
|
|
27983
28462
|
}
|
|
27984
|
-
function forSession(projectPath, sessionID, maxTokens) {
|
|
28463
|
+
async function forSession(projectPath, sessionID, maxTokens, options) {
|
|
27985
28464
|
const pid = ensureProject(projectPath);
|
|
28465
|
+
const categoryFilter = options?.categories;
|
|
28466
|
+
const excludeFilter = options?.excludeCategories;
|
|
28467
|
+
let categoryClause = "";
|
|
28468
|
+
let categoryParams = [];
|
|
28469
|
+
if (categoryFilter?.length) {
|
|
28470
|
+
categoryClause = ` AND category IN (${categoryFilter.map(() => "?").join(",")})`;
|
|
28471
|
+
categoryParams = categoryFilter;
|
|
28472
|
+
} else if (excludeFilter?.length) {
|
|
28473
|
+
categoryClause = ` AND category NOT IN (${excludeFilter.map(() => "?").join(",")})`;
|
|
28474
|
+
categoryParams = excludeFilter;
|
|
28475
|
+
}
|
|
27986
28476
|
const projectEntries = db().query(
|
|
27987
28477
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
27988
|
-
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
|
|
28478
|
+
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2${categoryClause}
|
|
27989
28479
|
ORDER BY confidence DESC, updated_at DESC`
|
|
27990
|
-
).all(pid);
|
|
28480
|
+
).all(pid, ...categoryParams);
|
|
27991
28481
|
const crossEntries = db().query(
|
|
27992
28482
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
27993
|
-
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
28483
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2${categoryClause}
|
|
27994
28484
|
ORDER BY confidence DESC, updated_at DESC`
|
|
27995
|
-
).all();
|
|
28485
|
+
).all(...categoryParams);
|
|
27996
28486
|
if (!crossEntries.length && !projectEntries.length) return [];
|
|
27997
28487
|
let sessionContext = "";
|
|
27998
28488
|
if (sessionID) {
|
|
@@ -28013,22 +28503,52 @@ function forSession(projectPath, sessionID, maxTokens) {
|
|
|
28013
28503
|
sessionContext += recentMsgs.map((m) => m.content).join("\n");
|
|
28014
28504
|
}
|
|
28015
28505
|
}
|
|
28506
|
+
if (!sessionContext.trim() && options?.contextHint) {
|
|
28507
|
+
sessionContext = options.contextHint;
|
|
28508
|
+
}
|
|
28016
28509
|
let scoredProject;
|
|
28017
28510
|
let scoredCross;
|
|
28018
|
-
if (sessionContext.trim().length > 20) {
|
|
28511
|
+
if (sessionContext.trim().length > 20 && isAvailable()) {
|
|
28512
|
+
let vectorScores;
|
|
28513
|
+
try {
|
|
28514
|
+
const [contextVec] = await embed([sessionContext], "query");
|
|
28515
|
+
const hits = vectorSearch(contextVec, 50, excludeFilter);
|
|
28516
|
+
vectorScores = new Map(hits.map((h3) => [h3.id, h3.similarity]));
|
|
28517
|
+
} catch (err) {
|
|
28518
|
+
warn("Vector scoring failed, falling back to FTS5:", err);
|
|
28519
|
+
vectorScores = /* @__PURE__ */ new Map();
|
|
28520
|
+
}
|
|
28521
|
+
if (vectorScores.size > 0) {
|
|
28522
|
+
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28523
|
+
const rawScored = projectEntries.map((entry) => {
|
|
28524
|
+
const vecScore = vectorScores.get(entry.id);
|
|
28525
|
+
const score = vecScore != null ? vecScore * entry.confidence : (ftsScores.get(entry.id) ?? 0) * entry.confidence;
|
|
28526
|
+
return { entry, score };
|
|
28527
|
+
});
|
|
28528
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
28529
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28530
|
+
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28531
|
+
scoredProject = [...matched, ...safetyNet];
|
|
28532
|
+
scoredCross = crossEntries.filter((e) => vectorScores.has(e.id) || ftsScores.has(e.id)).map((e) => {
|
|
28533
|
+
const vecScore = vectorScores.get(e.id);
|
|
28534
|
+
const score = vecScore != null ? vecScore * e.confidence : (ftsScores.get(e.id) ?? 0) * e.confidence;
|
|
28535
|
+
return { entry: e, score };
|
|
28536
|
+
});
|
|
28537
|
+
} else {
|
|
28538
|
+
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28539
|
+
({ scoredProject, scoredCross } = scoreFTS(
|
|
28540
|
+
projectEntries,
|
|
28541
|
+
crossEntries,
|
|
28542
|
+
ftsScores
|
|
28543
|
+
));
|
|
28544
|
+
}
|
|
28545
|
+
} else if (sessionContext.trim().length > 20) {
|
|
28019
28546
|
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28020
|
-
|
|
28021
|
-
|
|
28022
|
-
|
|
28023
|
-
|
|
28024
|
-
|
|
28025
|
-
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28026
|
-
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28027
|
-
scoredProject = [...matched, ...safetyNet];
|
|
28028
|
-
scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
|
|
28029
|
-
entry: e,
|
|
28030
|
-
score: (ftsScores.get(e.id) ?? 0) * e.confidence
|
|
28031
|
-
}));
|
|
28547
|
+
({ scoredProject, scoredCross } = scoreFTS(
|
|
28548
|
+
projectEntries,
|
|
28549
|
+
crossEntries,
|
|
28550
|
+
ftsScores
|
|
28551
|
+
));
|
|
28032
28552
|
} else {
|
|
28033
28553
|
scoredProject = projectEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
|
|
28034
28554
|
scoredCross = crossEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
|
|
@@ -28074,11 +28594,33 @@ function forSession(projectPath, sessionID, maxTokens) {
|
|
|
28074
28594
|
}
|
|
28075
28595
|
return result;
|
|
28076
28596
|
}
|
|
28597
|
+
function scoreFTS(projectEntries, crossEntries, ftsScores) {
|
|
28598
|
+
const rawScored = projectEntries.map((entry) => ({
|
|
28599
|
+
entry,
|
|
28600
|
+
score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
|
|
28601
|
+
}));
|
|
28602
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
28603
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28604
|
+
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28605
|
+
const scoredProject = [...matched, ...safetyNet];
|
|
28606
|
+
const scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
|
|
28607
|
+
entry: e,
|
|
28608
|
+
score: (ftsScores.get(e.id) ?? 0) * e.confidence
|
|
28609
|
+
}));
|
|
28610
|
+
return { scoredProject, scoredCross };
|
|
28611
|
+
}
|
|
28077
28612
|
function all2() {
|
|
28078
28613
|
return db().query(
|
|
28079
28614
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
|
|
28080
28615
|
).all();
|
|
28081
28616
|
}
|
|
28617
|
+
function crossProject() {
|
|
28618
|
+
return db().query(
|
|
28619
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28620
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
28621
|
+
ORDER BY confidence DESC, updated_at DESC`
|
|
28622
|
+
).all();
|
|
28623
|
+
}
|
|
28082
28624
|
function searchLike2(input) {
|
|
28083
28625
|
const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
|
|
28084
28626
|
if (!terms.length) return [];
|
|
@@ -28310,6 +28852,270 @@ function check2(projectPath) {
|
|
|
28310
28852
|
}
|
|
28311
28853
|
return issues;
|
|
28312
28854
|
}
|
|
28855
|
+
function dedupPairKey(idA, idB) {
|
|
28856
|
+
return idA < idB ? `${idA}:${idB}` : `${idB}:${idA}`;
|
|
28857
|
+
}
|
|
28858
|
+
function _dedup(entries, dryRun, embeddingThreshold = EMBEDDING_DEDUP_THRESHOLD) {
|
|
28859
|
+
if (entries.length < 2) return { clusters: [], totalRemoved: 0, pairSimilarities: /* @__PURE__ */ new Map(), entryTitles: /* @__PURE__ */ new Map() };
|
|
28860
|
+
const embeddingMap = /* @__PURE__ */ new Map();
|
|
28861
|
+
{
|
|
28862
|
+
const entryIds = entries.map((e) => e.id);
|
|
28863
|
+
const placeholders = entryIds.map(() => "?").join(",");
|
|
28864
|
+
const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
|
|
28865
|
+
for (const row of rows) {
|
|
28866
|
+
try {
|
|
28867
|
+
embeddingMap.set(row.id, fromBlob(row.embedding));
|
|
28868
|
+
} catch {
|
|
28869
|
+
info(`skipping corrupted embedding for entry ${row.id}`);
|
|
28870
|
+
}
|
|
28871
|
+
}
|
|
28872
|
+
}
|
|
28873
|
+
const neighborMap = /* @__PURE__ */ new Map();
|
|
28874
|
+
const pairSimilarities = /* @__PURE__ */ new Map();
|
|
28875
|
+
for (const entry of entries) {
|
|
28876
|
+
const neighbors = [];
|
|
28877
|
+
const entryVec = embeddingMap.get(entry.id);
|
|
28878
|
+
for (const other of entries) {
|
|
28879
|
+
if (other.id === entry.id) continue;
|
|
28880
|
+
const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
|
|
28881
|
+
const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
|
|
28882
|
+
let embeddingMatch = false;
|
|
28883
|
+
let similarity = 0;
|
|
28884
|
+
if (entryVec) {
|
|
28885
|
+
const otherVec = embeddingMap.get(other.id);
|
|
28886
|
+
if (otherVec && entryVec.length === otherVec.length) {
|
|
28887
|
+
similarity = cosineSimilarity(entryVec, otherVec);
|
|
28888
|
+
embeddingMatch = similarity >= embeddingThreshold;
|
|
28889
|
+
}
|
|
28890
|
+
}
|
|
28891
|
+
if (similarity > 0) {
|
|
28892
|
+
const pk = dedupPairKey(entry.id, other.id);
|
|
28893
|
+
if (!pairSimilarities.has(pk)) {
|
|
28894
|
+
pairSimilarities.set(pk, similarity);
|
|
28895
|
+
}
|
|
28896
|
+
}
|
|
28897
|
+
if (titleMatch || embeddingMatch) {
|
|
28898
|
+
neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
|
|
28899
|
+
}
|
|
28900
|
+
}
|
|
28901
|
+
neighbors.sort((a, b) => b.score - a.score);
|
|
28902
|
+
neighborMap.set(entry.id, neighbors);
|
|
28903
|
+
}
|
|
28904
|
+
const claimed = /* @__PURE__ */ new Set();
|
|
28905
|
+
const rawClusters = /* @__PURE__ */ new Map();
|
|
28906
|
+
const sortedIds = [...neighborMap.keys()].sort(
|
|
28907
|
+
(a, b) => neighborMap.get(b).length - neighborMap.get(a).length
|
|
28908
|
+
);
|
|
28909
|
+
for (const centerId of sortedIds) {
|
|
28910
|
+
if (claimed.has(centerId)) continue;
|
|
28911
|
+
claimed.add(centerId);
|
|
28912
|
+
const members = [centerId];
|
|
28913
|
+
for (const { id: neighborId } of neighborMap.get(centerId)) {
|
|
28914
|
+
if (claimed.has(neighborId)) continue;
|
|
28915
|
+
claimed.add(neighborId);
|
|
28916
|
+
members.push(neighborId);
|
|
28917
|
+
}
|
|
28918
|
+
if (members.length > 1) {
|
|
28919
|
+
rawClusters.set(centerId, members);
|
|
28920
|
+
}
|
|
28921
|
+
}
|
|
28922
|
+
const entryById = new Map(entries.map((e) => [e.id, e]));
|
|
28923
|
+
const result = [];
|
|
28924
|
+
let totalRemoved = 0;
|
|
28925
|
+
for (const members of rawClusters.values()) {
|
|
28926
|
+
if (members.length < 2) continue;
|
|
28927
|
+
const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
|
|
28928
|
+
if (b.confidence !== a.confidence) return b.confidence - a.confidence;
|
|
28929
|
+
if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
|
|
28930
|
+
return a.title.length - b.title.length;
|
|
28931
|
+
});
|
|
28932
|
+
const survivor = sorted[0];
|
|
28933
|
+
const merged = sorted.slice(1);
|
|
28934
|
+
result.push({
|
|
28935
|
+
surviving: { id: survivor.id, title: survivor.title },
|
|
28936
|
+
merged: merged.map((e) => ({ id: e.id, title: e.title }))
|
|
28937
|
+
});
|
|
28938
|
+
if (!dryRun) {
|
|
28939
|
+
for (const entry of merged) {
|
|
28940
|
+
remove(entry.id);
|
|
28941
|
+
}
|
|
28942
|
+
}
|
|
28943
|
+
totalRemoved += merged.length;
|
|
28944
|
+
}
|
|
28945
|
+
result.sort((a, b) => b.merged.length - a.merged.length);
|
|
28946
|
+
const entryTitles = new Map(entries.map((e) => [e.id, e.title]));
|
|
28947
|
+
return { clusters: result, totalRemoved, pairSimilarities, entryTitles };
|
|
28948
|
+
}
|
|
28949
|
+
async function deduplicate(projectPath, opts) {
|
|
28950
|
+
const pid = ensureProject(projectPath);
|
|
28951
|
+
const threshold = loadCalibratedThreshold(pid) ?? EMBEDDING_DEDUP_THRESHOLD;
|
|
28952
|
+
const entries = forProject(projectPath, false);
|
|
28953
|
+
return _dedup(entries, opts?.dryRun ?? true, threshold);
|
|
28954
|
+
}
|
|
28955
|
+
async function deduplicateGlobal(opts) {
|
|
28956
|
+
const threshold = loadCalibratedThreshold(null) ?? EMBEDDING_DEDUP_THRESHOLD;
|
|
28957
|
+
const entries = db().query(
|
|
28958
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28959
|
+
WHERE project_id IS NULL
|
|
28960
|
+
AND confidence > 0.2
|
|
28961
|
+
ORDER BY confidence DESC, updated_at DESC`
|
|
28962
|
+
).all();
|
|
28963
|
+
return _dedup(entries, opts?.dryRun ?? true, threshold);
|
|
28964
|
+
}
|
|
28965
|
+
var MIN_CALIBRATION_SAMPLES = 20;
|
|
28966
|
+
var DEFAULT_EMBEDDING_DEDUP_THRESHOLD = EMBEDDING_DEDUP_THRESHOLD;
|
|
28967
|
+
var AUTO_SIGNAL_MIN_SIMILARITY = 0.8;
|
|
28968
|
+
var AUTO_SIGNAL_MAX_PAIRS = 50;
|
|
28969
|
+
function recordDedupFeedback(input) {
|
|
28970
|
+
db().query(
|
|
28971
|
+
`INSERT INTO dedup_feedback
|
|
28972
|
+
(project_id, entry_a_title, entry_b_title, similarity, accepted, source, created_at)
|
|
28973
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
28974
|
+
).run(
|
|
28975
|
+
input.projectId,
|
|
28976
|
+
input.entryATitle,
|
|
28977
|
+
input.entryBTitle,
|
|
28978
|
+
input.similarity,
|
|
28979
|
+
input.accepted ? 1 : 0,
|
|
28980
|
+
input.source,
|
|
28981
|
+
Date.now()
|
|
28982
|
+
);
|
|
28983
|
+
}
|
|
28984
|
+
function recordDedupResultFeedback(projectId2, result, accepted, source) {
|
|
28985
|
+
for (const cluster of result.clusters) {
|
|
28986
|
+
for (const merged of cluster.merged) {
|
|
28987
|
+
const pk = dedupPairKey(cluster.surviving.id, merged.id);
|
|
28988
|
+
const similarity = result.pairSimilarities.get(pk);
|
|
28989
|
+
if (similarity != null && similarity > 0) {
|
|
28990
|
+
recordDedupFeedback({
|
|
28991
|
+
projectId: projectId2,
|
|
28992
|
+
entryATitle: cluster.surviving.title,
|
|
28993
|
+
entryBTitle: merged.title,
|
|
28994
|
+
similarity,
|
|
28995
|
+
accepted,
|
|
28996
|
+
source
|
|
28997
|
+
});
|
|
28998
|
+
}
|
|
28999
|
+
}
|
|
29000
|
+
}
|
|
29001
|
+
}
|
|
29002
|
+
function recordAutoSignals(projectId2, result) {
|
|
29003
|
+
const mergedPairs = /* @__PURE__ */ new Set();
|
|
29004
|
+
for (const cluster of result.clusters) {
|
|
29005
|
+
for (const merged of cluster.merged) {
|
|
29006
|
+
mergedPairs.add(dedupPairKey(cluster.surviving.id, merged.id));
|
|
29007
|
+
}
|
|
29008
|
+
}
|
|
29009
|
+
const titleMap = new Map(result.entryTitles);
|
|
29010
|
+
for (const cluster of result.clusters) {
|
|
29011
|
+
if (!titleMap.has(cluster.surviving.id)) {
|
|
29012
|
+
titleMap.set(cluster.surviving.id, cluster.surviving.title);
|
|
29013
|
+
}
|
|
29014
|
+
for (const m of cluster.merged) {
|
|
29015
|
+
if (!titleMap.has(m.id)) titleMap.set(m.id, m.title);
|
|
29016
|
+
}
|
|
29017
|
+
}
|
|
29018
|
+
const signals = [];
|
|
29019
|
+
for (const [pk, sim] of result.pairSimilarities) {
|
|
29020
|
+
if (sim < AUTO_SIGNAL_MIN_SIMILARITY) continue;
|
|
29021
|
+
if (mergedPairs.has(pk)) continue;
|
|
29022
|
+
const [idA, idB] = pk.split(":");
|
|
29023
|
+
const titleA = titleMap.get(idA);
|
|
29024
|
+
const titleB = titleMap.get(idB);
|
|
29025
|
+
if (!titleA || !titleB) continue;
|
|
29026
|
+
signals.push({ entryATitle: titleA, entryBTitle: titleB, similarity: sim });
|
|
29027
|
+
}
|
|
29028
|
+
const currentThreshold = loadCalibratedThreshold(projectId2) ?? DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
|
|
29029
|
+
signals.sort((a, b) => Math.abs(a.similarity - currentThreshold) - Math.abs(b.similarity - currentThreshold));
|
|
29030
|
+
const capped = signals.slice(0, AUTO_SIGNAL_MAX_PAIRS);
|
|
29031
|
+
pruneDedupFeedback(projectId2);
|
|
29032
|
+
for (const s of capped) {
|
|
29033
|
+
recordDedupFeedback({
|
|
29034
|
+
projectId: projectId2,
|
|
29035
|
+
entryATitle: s.entryATitle,
|
|
29036
|
+
entryBTitle: s.entryBTitle,
|
|
29037
|
+
similarity: s.similarity,
|
|
29038
|
+
accepted: false,
|
|
29039
|
+
source: "auto_dedup"
|
|
29040
|
+
});
|
|
29041
|
+
}
|
|
29042
|
+
}
|
|
29043
|
+
function getDedupFeedback(projectId2) {
|
|
29044
|
+
const rows = projectId2 !== null ? db().query(
|
|
29045
|
+
"SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id = ? ORDER BY similarity"
|
|
29046
|
+
).all(projectId2) : db().query(
|
|
29047
|
+
"SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id IS NULL ORDER BY similarity"
|
|
29048
|
+
).all();
|
|
29049
|
+
return rows.map((r) => ({ similarity: r.similarity, accepted: r.accepted === 1, source: r.source }));
|
|
29050
|
+
}
|
|
29051
|
+
function getDedupFeedbackCount(projectId2) {
|
|
29052
|
+
const row = projectId2 !== null ? db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id = ?").get(projectId2) : db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id IS NULL").get();
|
|
29053
|
+
return row?.cnt ?? 0;
|
|
29054
|
+
}
|
|
29055
|
+
var MAX_FEEDBACK_ROWS_PER_PROJECT = 500;
|
|
29056
|
+
function pruneDedupFeedback(projectId2) {
|
|
29057
|
+
const count3 = getDedupFeedbackCount(projectId2);
|
|
29058
|
+
if (count3 <= MAX_FEEDBACK_ROWS_PER_PROJECT) return;
|
|
29059
|
+
const excess = count3 - MAX_FEEDBACK_ROWS_PER_PROJECT;
|
|
29060
|
+
if (projectId2 !== null) {
|
|
29061
|
+
db().query(
|
|
29062
|
+
`DELETE FROM dedup_feedback WHERE id IN (
|
|
29063
|
+
SELECT id FROM dedup_feedback WHERE project_id = ?
|
|
29064
|
+
ORDER BY created_at ASC LIMIT ?
|
|
29065
|
+
)`
|
|
29066
|
+
).run(projectId2, excess);
|
|
29067
|
+
} else {
|
|
29068
|
+
db().query(
|
|
29069
|
+
`DELETE FROM dedup_feedback WHERE id IN (
|
|
29070
|
+
SELECT id FROM dedup_feedback WHERE project_id IS NULL
|
|
29071
|
+
ORDER BY created_at ASC LIMIT ?
|
|
29072
|
+
)`
|
|
29073
|
+
).run(excess);
|
|
29074
|
+
}
|
|
29075
|
+
}
|
|
29076
|
+
function calibrateDedupThreshold(projectId2) {
|
|
29077
|
+
const feedback = getDedupFeedback(projectId2);
|
|
29078
|
+
if (feedback.length < MIN_CALIBRATION_SAMPLES) return null;
|
|
29079
|
+
const accepted = feedback.filter((f) => f.accepted);
|
|
29080
|
+
const rejected = feedback.filter((f) => !f.accepted);
|
|
29081
|
+
if (rejected.length === 0) {
|
|
29082
|
+
const minAccepted = Math.min(...accepted.map((f) => f.similarity));
|
|
29083
|
+
return Math.max(0.85, minAccepted - 5e-3);
|
|
29084
|
+
}
|
|
29085
|
+
if (accepted.length === 0) {
|
|
29086
|
+
warn("dedup calibration: all feedback is reject \u2014 keeping default threshold");
|
|
29087
|
+
return null;
|
|
29088
|
+
}
|
|
29089
|
+
const allSims = [...new Set(feedback.map((f) => f.similarity))].sort((a, b) => a - b);
|
|
29090
|
+
let bestThreshold = DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
|
|
29091
|
+
let bestAccuracy = -1;
|
|
29092
|
+
for (let i = 0; i < allSims.length - 1; i++) {
|
|
29093
|
+
const candidate = (allSims[i] + allSims[i + 1]) / 2;
|
|
29094
|
+
const correctAccepted = accepted.filter((f) => f.similarity >= candidate).length;
|
|
29095
|
+
const correctRejected = rejected.filter((f) => f.similarity < candidate).length;
|
|
29096
|
+
const accuracy = (correctAccepted + correctRejected) / feedback.length;
|
|
29097
|
+
if (accuracy > bestAccuracy || accuracy === bestAccuracy && candidate > bestThreshold) {
|
|
29098
|
+
bestAccuracy = accuracy;
|
|
29099
|
+
bestThreshold = candidate;
|
|
29100
|
+
}
|
|
29101
|
+
}
|
|
29102
|
+
return Math.max(0.85, Math.min(0.98, bestThreshold));
|
|
29103
|
+
}
|
|
29104
|
+
function saveCalibratedThreshold(projectId2, threshold, sampleSize) {
|
|
29105
|
+
const key = `dedup_threshold:${projectId2 ?? "global"}`;
|
|
29106
|
+
setKV(key, JSON.stringify({ threshold, sampleSize, calibratedAt: Date.now() }));
|
|
29107
|
+
}
|
|
29108
|
+
function loadCalibratedThreshold(projectId2) {
|
|
29109
|
+
const key = `dedup_threshold:${projectId2 ?? "global"}`;
|
|
29110
|
+
const raw = getKV(key);
|
|
29111
|
+
if (!raw) return null;
|
|
29112
|
+
try {
|
|
29113
|
+
const parsed = JSON.parse(raw);
|
|
29114
|
+
return typeof parsed.threshold === "number" ? parsed.threshold : null;
|
|
29115
|
+
} catch {
|
|
29116
|
+
return null;
|
|
29117
|
+
}
|
|
29118
|
+
}
|
|
28313
29119
|
|
|
28314
29120
|
// src/data.ts
|
|
28315
29121
|
var data_exports = {};
|
|
@@ -28334,11 +29140,11 @@ __export(data_exports, {
|
|
|
28334
29140
|
resolveId: () => resolveId,
|
|
28335
29141
|
wipeDatabase: () => wipeDatabase
|
|
28336
29142
|
});
|
|
28337
|
-
import { statSync as
|
|
29143
|
+
import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
|
|
28338
29144
|
|
|
28339
29145
|
// src/agents-file.ts
|
|
28340
|
-
import { existsSync as
|
|
28341
|
-
import { dirname as dirname2, join as
|
|
29146
|
+
import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
|
|
29147
|
+
import { dirname as dirname2, join as join7 } from "path";
|
|
28342
29148
|
var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
|
|
28343
29149
|
var LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
28344
29150
|
var ALL_START_MARKERS = [
|
|
@@ -28369,7 +29175,7 @@ function setCache(fp, entry) {
|
|
|
28369
29175
|
).run(key, value, value);
|
|
28370
29176
|
}
|
|
28371
29177
|
function clearLoreFileCache(projectPath) {
|
|
28372
|
-
db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX +
|
|
29178
|
+
db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
|
|
28373
29179
|
}
|
|
28374
29180
|
function splitFile(fileContent) {
|
|
28375
29181
|
const spans = [];
|
|
@@ -28482,7 +29288,7 @@ function exportToFile(input) {
|
|
|
28482
29288
|
const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
|
|
28483
29289
|
const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
|
|
28484
29290
|
let fileContent = "";
|
|
28485
|
-
if (
|
|
29291
|
+
if (existsSync4(input.filePath)) {
|
|
28486
29292
|
fileContent = readFileSync3(input.filePath, "utf8");
|
|
28487
29293
|
}
|
|
28488
29294
|
const { before, after } = splitFile(fileContent);
|
|
@@ -28491,11 +29297,11 @@ function exportToFile(input) {
|
|
|
28491
29297
|
const suffix = after.trimStart();
|
|
28492
29298
|
const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
|
|
28493
29299
|
const result = prefixWithSep + newSection + suffixWithSep;
|
|
28494
|
-
|
|
29300
|
+
mkdirSync3(dirname2(input.filePath), { recursive: true });
|
|
28495
29301
|
writeFileSync(input.filePath, result, "utf8");
|
|
28496
29302
|
}
|
|
28497
29303
|
function shouldImport(input) {
|
|
28498
|
-
if (!
|
|
29304
|
+
if (!existsSync4(input.filePath)) return false;
|
|
28499
29305
|
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28500
29306
|
const { section } = splitFile(fileContent);
|
|
28501
29307
|
if (section === null) {
|
|
@@ -28516,18 +29322,26 @@ function _importEntries(entries, projectPath) {
|
|
|
28516
29322
|
update(entry.id, { content: entry.content });
|
|
28517
29323
|
}
|
|
28518
29324
|
} else {
|
|
28519
|
-
|
|
28520
|
-
|
|
28521
|
-
|
|
28522
|
-
title
|
|
28523
|
-
|
|
28524
|
-
|
|
28525
|
-
|
|
28526
|
-
|
|
28527
|
-
|
|
29325
|
+
const pid = ensureProject(projectPath);
|
|
29326
|
+
const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
|
|
29327
|
+
if (fuzzyMatch) {
|
|
29328
|
+
if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
|
|
29329
|
+
update(fuzzyMatch.id, { content: entry.content });
|
|
29330
|
+
}
|
|
29331
|
+
} else {
|
|
29332
|
+
create({
|
|
29333
|
+
projectPath,
|
|
29334
|
+
category: entry.category,
|
|
29335
|
+
title: entry.title,
|
|
29336
|
+
content: entry.content,
|
|
29337
|
+
scope: "project",
|
|
29338
|
+
crossProject: false,
|
|
29339
|
+
id: entry.id
|
|
29340
|
+
});
|
|
29341
|
+
}
|
|
28528
29342
|
}
|
|
28529
29343
|
} else {
|
|
28530
|
-
const existing = forProject(projectPath,
|
|
29344
|
+
const existing = forProject(projectPath, false);
|
|
28531
29345
|
const titleMatch = existing.find(
|
|
28532
29346
|
(e) => e.title.toLowerCase() === entry.title.toLowerCase()
|
|
28533
29347
|
);
|
|
@@ -28545,7 +29359,7 @@ function _importEntries(entries, projectPath) {
|
|
|
28545
29359
|
}
|
|
28546
29360
|
}
|
|
28547
29361
|
function importFromFile(input) {
|
|
28548
|
-
if (!
|
|
29362
|
+
if (!existsSync4(input.filePath)) return;
|
|
28549
29363
|
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28550
29364
|
const { section } = splitFile(fileContent);
|
|
28551
29365
|
const textToParse = section ?? fileContent;
|
|
@@ -28554,25 +29368,25 @@ function importFromFile(input) {
|
|
|
28554
29368
|
_importEntries(fileEntries, input.projectPath);
|
|
28555
29369
|
}
|
|
28556
29370
|
function loreFileExists(projectPath) {
|
|
28557
|
-
return
|
|
29371
|
+
return existsSync4(join7(projectPath, LORE_FILE));
|
|
28558
29372
|
}
|
|
28559
29373
|
function exportLoreFile(projectPath) {
|
|
28560
29374
|
const sectionBody = buildSection(projectPath);
|
|
28561
29375
|
const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
|
|
28562
29376
|
const contentHash2 = hashSection(content3);
|
|
28563
|
-
const fp =
|
|
29377
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
28564
29378
|
const cached2 = getCache(fp);
|
|
28565
29379
|
if (cached2 && cached2.hash === contentHash2) {
|
|
28566
29380
|
return;
|
|
28567
29381
|
}
|
|
28568
29382
|
writeFileSync(fp, content3, "utf8");
|
|
28569
|
-
const { mtimeMs } =
|
|
29383
|
+
const { mtimeMs } = statSync3(fp);
|
|
28570
29384
|
setCache(fp, { mtimeMs, hash: contentHash2 });
|
|
28571
29385
|
}
|
|
28572
29386
|
function shouldImportLoreFile(projectPath) {
|
|
28573
|
-
const fp =
|
|
28574
|
-
if (!
|
|
28575
|
-
const { mtimeMs } =
|
|
29387
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
29388
|
+
if (!existsSync4(fp)) return false;
|
|
29389
|
+
const { mtimeMs } = statSync3(fp);
|
|
28576
29390
|
const cached2 = getCache(fp);
|
|
28577
29391
|
if (cached2 && cached2.mtimeMs === mtimeMs) {
|
|
28578
29392
|
return false;
|
|
@@ -28588,12 +29402,17 @@ function shouldImportLoreFile(projectPath) {
|
|
|
28588
29402
|
return true;
|
|
28589
29403
|
}
|
|
28590
29404
|
function importLoreFile(projectPath) {
|
|
28591
|
-
const fp =
|
|
28592
|
-
if (!
|
|
29405
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
29406
|
+
if (!existsSync4(fp)) return;
|
|
28593
29407
|
const fileContent = readFileSync3(fp, "utf8");
|
|
28594
29408
|
const fileEntries = parseEntriesFromSection(fileContent);
|
|
28595
29409
|
if (!fileEntries.length) return;
|
|
28596
29410
|
_importEntries(fileEntries, projectPath);
|
|
29411
|
+
try {
|
|
29412
|
+
const { mtimeMs } = statSync3(fp);
|
|
29413
|
+
setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
|
|
29414
|
+
} catch {
|
|
29415
|
+
}
|
|
28597
29416
|
}
|
|
28598
29417
|
|
|
28599
29418
|
// src/data.ts
|
|
@@ -28668,10 +29487,10 @@ function globalStats() {
|
|
|
28668
29487
|
let db_size_bytes = 0;
|
|
28669
29488
|
try {
|
|
28670
29489
|
const p2 = dbPath();
|
|
28671
|
-
db_size_bytes =
|
|
29490
|
+
db_size_bytes = statSync4(p2).size;
|
|
28672
29491
|
const walPath = p2 + "-wal";
|
|
28673
|
-
if (
|
|
28674
|
-
db_size_bytes +=
|
|
29492
|
+
if (existsSync5(walPath)) {
|
|
29493
|
+
db_size_bytes += statSync4(walPath).size;
|
|
28675
29494
|
}
|
|
28676
29495
|
} catch {
|
|
28677
29496
|
}
|
|
@@ -28722,7 +29541,7 @@ function clearProject(projectPath) {
|
|
|
28722
29541
|
database.exec("ROLLBACK");
|
|
28723
29542
|
throw e;
|
|
28724
29543
|
}
|
|
28725
|
-
if (
|
|
29544
|
+
if (existsSync5(projectPath)) {
|
|
28726
29545
|
try {
|
|
28727
29546
|
exportLoreFile(projectPath);
|
|
28728
29547
|
} catch {
|
|
@@ -28793,7 +29612,7 @@ function clearKnowledge(projectPath) {
|
|
|
28793
29612
|
"SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
|
|
28794
29613
|
).get(pid).c;
|
|
28795
29614
|
db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
|
|
28796
|
-
if (
|
|
29615
|
+
if (existsSync5(projectPath)) {
|
|
28797
29616
|
try {
|
|
28798
29617
|
exportLoreFile(projectPath);
|
|
28799
29618
|
} catch {
|
|
@@ -28852,7 +29671,7 @@ function wipeDatabase() {
|
|
|
28852
29671
|
close();
|
|
28853
29672
|
for (const suffix of ["", "-wal", "-shm"]) {
|
|
28854
29673
|
const fp = p2 + suffix;
|
|
28855
|
-
if (
|
|
29674
|
+
if (existsSync5(fp)) {
|
|
28856
29675
|
try {
|
|
28857
29676
|
unlinkSync(fp);
|
|
28858
29677
|
} catch {
|
|
@@ -28893,7 +29712,7 @@ function backfillGitRemotes() {
|
|
|
28893
29712
|
for (const project of projects) {
|
|
28894
29713
|
let gitRemote = project.git_remote;
|
|
28895
29714
|
if (!gitRemote) {
|
|
28896
|
-
if (!
|
|
29715
|
+
if (!existsSync5(project.path)) continue;
|
|
28897
29716
|
gitRemote = getGitRemote(project.path);
|
|
28898
29717
|
if (!gitRemote) continue;
|
|
28899
29718
|
const existing = db().query(
|
|
@@ -28992,6 +29811,32 @@ var PATTERNS = [
|
|
|
28992
29811
|
regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
|
|
28993
29812
|
category: "preference",
|
|
28994
29813
|
titleFn: (m) => `Typically uses ${m[1].trim()}`
|
|
29814
|
+
},
|
|
29815
|
+
// Process instruction patterns — match distilled observations recording
|
|
29816
|
+
// user assertions about workflow/process rules. The distillation observer
|
|
29817
|
+
// normalizes user instructions into "User stated always X" phrasing.
|
|
29818
|
+
// These require "stated/asserted/said" to avoid overlapping with the
|
|
29819
|
+
// existing "typically uses" pattern above (which already handles
|
|
29820
|
+
// "user always use/prefer/go with X").
|
|
29821
|
+
{
|
|
29822
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
|
|
29823
|
+
category: "preference",
|
|
29824
|
+
titleFn: (m) => `Always ${m[1].trim()}`
|
|
29825
|
+
},
|
|
29826
|
+
{
|
|
29827
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
|
|
29828
|
+
category: "preference",
|
|
29829
|
+
titleFn: (m) => `Never ${m[1].trim()}`
|
|
29830
|
+
},
|
|
29831
|
+
{
|
|
29832
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
|
|
29833
|
+
category: "preference",
|
|
29834
|
+
titleFn: (m) => `Make sure to ${m[1].trim()}`
|
|
29835
|
+
},
|
|
29836
|
+
{
|
|
29837
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
|
|
29838
|
+
category: "preference",
|
|
29839
|
+
titleFn: (m) => `Always ${m[1].trim()}`
|
|
28995
29840
|
}
|
|
28996
29841
|
];
|
|
28997
29842
|
function extractPatterns(observations) {
|
|
@@ -29001,6 +29846,8 @@ function extractPatterns(observations) {
|
|
|
29001
29846
|
regex.lastIndex = 0;
|
|
29002
29847
|
let match;
|
|
29003
29848
|
while ((match = regex.exec(observations)) !== null) {
|
|
29849
|
+
const captures = match.slice(1);
|
|
29850
|
+
if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
|
|
29004
29851
|
const title = titleFn(match);
|
|
29005
29852
|
const key = title.toLowerCase();
|
|
29006
29853
|
if (seen.has(key)) continue;
|
|
@@ -29116,11 +29963,21 @@ function getSessionState(sessionID) {
|
|
|
29116
29963
|
if (!state) {
|
|
29117
29964
|
state = makeSessionState();
|
|
29118
29965
|
state.forceMinLayer = loadForceMinLayer(sessionID);
|
|
29966
|
+
const persisted = loadSessionTracking(sessionID);
|
|
29967
|
+
if (persisted && persisted.lastTurnAt > 0) {
|
|
29968
|
+
state.dynamicContextCap = persisted.dynamicContextCap;
|
|
29969
|
+
state.bustRateEMA = persisted.bustRateEMA;
|
|
29970
|
+
state.interBustIntervalEMA = persisted.interBustIntervalEMA;
|
|
29971
|
+
state.lastLayer = persisted.lastLayer;
|
|
29972
|
+
state.lastKnownInput = persisted.lastKnownInput;
|
|
29973
|
+
state.lastTurnAt = persisted.lastTurnAt;
|
|
29974
|
+
state.lastBustAt = persisted.lastBustAt;
|
|
29975
|
+
}
|
|
29119
29976
|
sessionStates.set(sessionID, state);
|
|
29120
29977
|
}
|
|
29121
29978
|
return state;
|
|
29122
29979
|
}
|
|
29123
|
-
function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
29980
|
+
function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
|
|
29124
29981
|
if (thresholdMs <= 0) return { triggered: false };
|
|
29125
29982
|
const state = getSessionState(sessionID);
|
|
29126
29983
|
if (state.lastTurnAt === 0) return { triggered: false };
|
|
@@ -29130,7 +29987,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
|
29130
29987
|
state.rawWindowCache = null;
|
|
29131
29988
|
state.distillationSnapshot = null;
|
|
29132
29989
|
state.cameOutOfIdle = true;
|
|
29133
|
-
state.postIdleCompact =
|
|
29990
|
+
state.postIdleCompact = !skipCompact;
|
|
29134
29991
|
return { triggered: true, idleMs };
|
|
29135
29992
|
}
|
|
29136
29993
|
function getLastTurnAt(sessionID) {
|
|
@@ -29221,6 +30078,19 @@ function inspectSessionState(sessionID) {
|
|
|
29221
30078
|
function setLastTurnAtForTest(sessionID, ms) {
|
|
29222
30079
|
getSessionState(sessionID).lastTurnAt = ms;
|
|
29223
30080
|
}
|
|
30081
|
+
function saveGradientState(sessionID) {
|
|
30082
|
+
const state = sessionStates.get(sessionID);
|
|
30083
|
+
if (!state) return;
|
|
30084
|
+
saveSessionTracking(sessionID, {
|
|
30085
|
+
dynamicContextCap: state.dynamicContextCap,
|
|
30086
|
+
bustRateEMA: state.bustRateEMA,
|
|
30087
|
+
interBustIntervalEMA: state.interBustIntervalEMA,
|
|
30088
|
+
lastLayer: state.lastLayer,
|
|
30089
|
+
lastKnownInput: state.lastKnownInput,
|
|
30090
|
+
lastTurnAt: state.lastTurnAt,
|
|
30091
|
+
lastBustAt: state.lastBustAt
|
|
30092
|
+
});
|
|
30093
|
+
}
|
|
29224
30094
|
function loadDistillations(projectPath, sessionID) {
|
|
29225
30095
|
const pid = ensureProject(projectPath);
|
|
29226
30096
|
const query = sessionID ? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC" : "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
@@ -29505,6 +30375,26 @@ function buildPrefixMessages(formatted) {
|
|
|
29505
30375
|
}
|
|
29506
30376
|
];
|
|
29507
30377
|
}
|
|
30378
|
+
var DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
|
|
30379
|
+
var GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
|
|
30380
|
+
var ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
|
|
30381
|
+
function importanceBonus(d) {
|
|
30382
|
+
let bonus = 0;
|
|
30383
|
+
if (DECISION_RE.test(d.observations)) bonus += 0.3;
|
|
30384
|
+
if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
|
|
30385
|
+
if (ARCH_RE.test(d.observations)) bonus += 0.1;
|
|
30386
|
+
if (d.generation >= 1) bonus += 0.2;
|
|
30387
|
+
return Math.min(bonus, 1);
|
|
30388
|
+
}
|
|
30389
|
+
function selectDistillations(all3, limit) {
|
|
30390
|
+
if (all3.length <= limit) return all3;
|
|
30391
|
+
const maxIdx = all3.length - 1;
|
|
30392
|
+
const scored = all3.map((d, i) => ({
|
|
30393
|
+
d,
|
|
30394
|
+
score: (maxIdx > 0 ? i / maxIdx : 1) * 0.7 + importanceBonus(d) * 0.3
|
|
30395
|
+
}));
|
|
30396
|
+
return scored.sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.d).sort((a, b) => a.created_at - b.created_at);
|
|
30397
|
+
}
|
|
29508
30398
|
function distilledPrefix(distillations) {
|
|
29509
30399
|
if (!distillations.length) return [];
|
|
29510
30400
|
const formatted = formatDistillations(distillations);
|
|
@@ -29622,6 +30512,11 @@ function tryFitStable(input) {
|
|
|
29622
30512
|
}
|
|
29623
30513
|
return result;
|
|
29624
30514
|
}
|
|
30515
|
+
var COMPRESSION_STAGES = [
|
|
30516
|
+
{ strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
|
|
30517
|
+
{ strip: "old-tools", rawFrac: 0.5, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
|
|
30518
|
+
{ strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false }
|
|
30519
|
+
];
|
|
29625
30520
|
var urgentDistillationMap = /* @__PURE__ */ new Map();
|
|
29626
30521
|
function needsUrgentDistillation(sessionID) {
|
|
29627
30522
|
const v = urgentDistillationMap.get(sessionID) ?? false;
|
|
@@ -29653,7 +30548,7 @@ function transformInner(input) {
|
|
|
29653
30548
|
if (calibrated) return true;
|
|
29654
30549
|
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
29655
30550
|
}
|
|
29656
|
-
if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
30551
|
+
if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
29657
30552
|
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
|
|
29658
30553
|
}
|
|
29659
30554
|
const postIdleCompact = sessState.postIdleCompact;
|
|
@@ -29691,7 +30586,8 @@ function transformInner(input) {
|
|
|
29691
30586
|
totalTokens: Math.max(0, messageTokens),
|
|
29692
30587
|
usable,
|
|
29693
30588
|
distilledBudget,
|
|
29694
|
-
rawBudget
|
|
30589
|
+
rawBudget,
|
|
30590
|
+
refreshLtm: false
|
|
29695
30591
|
};
|
|
29696
30592
|
}
|
|
29697
30593
|
const turnStart = currentTurnStart(input.messages);
|
|
@@ -29701,67 +30597,52 @@ function transformInner(input) {
|
|
|
29701
30597
|
const msgs = distilledPrefix(distillations);
|
|
29702
30598
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
29703
30599
|
})();
|
|
29704
|
-
|
|
29705
|
-
const
|
|
29706
|
-
|
|
29707
|
-
|
|
29708
|
-
|
|
29709
|
-
|
|
29710
|
-
|
|
29711
|
-
|
|
29712
|
-
|
|
29713
|
-
|
|
29714
|
-
|
|
29715
|
-
|
|
29716
|
-
|
|
29717
|
-
|
|
29718
|
-
|
|
29719
|
-
|
|
29720
|
-
|
|
29721
|
-
|
|
29722
|
-
|
|
30600
|
+
for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
|
|
30601
|
+
const stageLayer = s + 1;
|
|
30602
|
+
if (effectiveMinLayer > stageLayer) continue;
|
|
30603
|
+
const stage = COMPRESSION_STAGES[s];
|
|
30604
|
+
const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
|
|
30605
|
+
const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
|
|
30606
|
+
let stagePrefix = cached2.messages;
|
|
30607
|
+
let stagePrefixTokens = cached2.tokens;
|
|
30608
|
+
if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
|
|
30609
|
+
const trimmed = selectDistillations(distillations, stage.distLimit);
|
|
30610
|
+
stagePrefix = distilledPrefix(trimmed);
|
|
30611
|
+
stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
30612
|
+
}
|
|
30613
|
+
let result;
|
|
30614
|
+
if (stage.useStableWindow && sid) {
|
|
30615
|
+
result = tryFitStable({
|
|
30616
|
+
messages: dedupMessages,
|
|
30617
|
+
prefix: stagePrefix,
|
|
30618
|
+
prefixTokens: stagePrefixTokens,
|
|
30619
|
+
distilledBudget: stageDistBudget,
|
|
30620
|
+
rawBudget: stageRawBudget,
|
|
30621
|
+
sessionID: sid,
|
|
30622
|
+
sessState
|
|
30623
|
+
});
|
|
30624
|
+
} else {
|
|
30625
|
+
sessState.rawWindowCache = null;
|
|
30626
|
+
result = tryFit({
|
|
30627
|
+
messages: dedupMessages,
|
|
30628
|
+
prefix: stagePrefix,
|
|
30629
|
+
prefixTokens: stagePrefixTokens,
|
|
30630
|
+
distilledBudget: stageDistBudget,
|
|
30631
|
+
rawBudget: stageRawBudget,
|
|
30632
|
+
strip: stage.strip,
|
|
30633
|
+
protectedTurns: stage.protectedTurns
|
|
30634
|
+
});
|
|
30635
|
+
}
|
|
30636
|
+
if (fitsWithSafetyMargin(result)) {
|
|
30637
|
+
if (sid && (s > 0 || cached2.tokens === 0)) {
|
|
29723
30638
|
urgentDistillationMap.set(sid, true);
|
|
29724
30639
|
}
|
|
29725
|
-
return { ...
|
|
30640
|
+
return { ...result, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
|
|
29726
30641
|
}
|
|
29727
30642
|
}
|
|
29728
30643
|
sessState.rawWindowCache = null;
|
|
29729
|
-
if (effectiveMinLayer <= 2) {
|
|
29730
|
-
const layer2 = tryFit({
|
|
29731
|
-
messages: dedupMessages,
|
|
29732
|
-
prefix: cached2.messages,
|
|
29733
|
-
prefixTokens: cached2.tokens,
|
|
29734
|
-
distilledBudget,
|
|
29735
|
-
rawBudget: Math.floor(usable * 0.5),
|
|
29736
|
-
// give raw more room
|
|
29737
|
-
strip: "old-tools",
|
|
29738
|
-
protectedTurns: 2
|
|
29739
|
-
});
|
|
29740
|
-
if (fitsWithSafetyMargin(layer2)) {
|
|
29741
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
29742
|
-
return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
|
|
29743
|
-
}
|
|
29744
|
-
}
|
|
29745
|
-
const trimmedDistillations = distillations.slice(-5);
|
|
29746
|
-
const trimmedPrefix = distilledPrefix(trimmedDistillations);
|
|
29747
|
-
const trimmedPrefixTokens = trimmedPrefix.reduce(
|
|
29748
|
-
(sum, m) => sum + estimateMessage(m),
|
|
29749
|
-
0
|
|
29750
|
-
);
|
|
29751
|
-
const layer3 = tryFit({
|
|
29752
|
-
messages: dedupMessages,
|
|
29753
|
-
prefix: trimmedPrefix,
|
|
29754
|
-
prefixTokens: trimmedPrefixTokens,
|
|
29755
|
-
distilledBudget: Math.floor(usable * 0.15),
|
|
29756
|
-
rawBudget: Math.floor(usable * 0.55),
|
|
29757
|
-
strip: "all-tools"
|
|
29758
|
-
});
|
|
29759
|
-
if (fitsWithSafetyMargin(layer3)) {
|
|
29760
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
29761
|
-
return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
|
|
29762
|
-
}
|
|
29763
30644
|
if (sid) urgentDistillationMap.set(sid, true);
|
|
29764
|
-
const nuclearDistillations = distillations
|
|
30645
|
+
const nuclearDistillations = selectDistillations(distillations, 2);
|
|
29765
30646
|
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
29766
30647
|
const nuclearPrefixTokens = nuclearPrefix.reduce(
|
|
29767
30648
|
(sum, m) => sum + estimateMessage(m),
|
|
@@ -29800,7 +30681,8 @@ function transformInner(input) {
|
|
|
29800
30681
|
totalTokens: nuclearPrefixTokens + nuclearRawTokens,
|
|
29801
30682
|
usable,
|
|
29802
30683
|
distilledBudget,
|
|
29803
|
-
rawBudget
|
|
30684
|
+
rawBudget,
|
|
30685
|
+
refreshLtm: true
|
|
29804
30686
|
};
|
|
29805
30687
|
}
|
|
29806
30688
|
function transform2(input) {
|
|
@@ -29907,10 +30789,189 @@ function isWorkerSession(sessionID) {
|
|
|
29907
30789
|
return workerSessionIDs.has(sessionID);
|
|
29908
30790
|
}
|
|
29909
30791
|
|
|
29910
|
-
//
|
|
29911
|
-
|
|
29912
|
-
|
|
29913
|
-
|
|
30792
|
+
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
30793
|
+
var Node = class {
|
|
30794
|
+
value;
|
|
30795
|
+
next;
|
|
30796
|
+
constructor(value) {
|
|
30797
|
+
this.value = value;
|
|
30798
|
+
}
|
|
30799
|
+
};
|
|
30800
|
+
var Queue = class {
|
|
30801
|
+
#head;
|
|
30802
|
+
#tail;
|
|
30803
|
+
#size;
|
|
30804
|
+
constructor() {
|
|
30805
|
+
this.clear();
|
|
30806
|
+
}
|
|
30807
|
+
enqueue(value) {
|
|
30808
|
+
const node2 = new Node(value);
|
|
30809
|
+
if (this.#head) {
|
|
30810
|
+
this.#tail.next = node2;
|
|
30811
|
+
this.#tail = node2;
|
|
30812
|
+
} else {
|
|
30813
|
+
this.#head = node2;
|
|
30814
|
+
this.#tail = node2;
|
|
30815
|
+
}
|
|
30816
|
+
this.#size++;
|
|
30817
|
+
}
|
|
30818
|
+
dequeue() {
|
|
30819
|
+
const current2 = this.#head;
|
|
30820
|
+
if (!current2) {
|
|
30821
|
+
return;
|
|
30822
|
+
}
|
|
30823
|
+
this.#head = this.#head.next;
|
|
30824
|
+
this.#size--;
|
|
30825
|
+
if (!this.#head) {
|
|
30826
|
+
this.#tail = void 0;
|
|
30827
|
+
}
|
|
30828
|
+
return current2.value;
|
|
30829
|
+
}
|
|
30830
|
+
peek() {
|
|
30831
|
+
if (!this.#head) {
|
|
30832
|
+
return;
|
|
30833
|
+
}
|
|
30834
|
+
return this.#head.value;
|
|
30835
|
+
}
|
|
30836
|
+
clear() {
|
|
30837
|
+
this.#head = void 0;
|
|
30838
|
+
this.#tail = void 0;
|
|
30839
|
+
this.#size = 0;
|
|
30840
|
+
}
|
|
30841
|
+
get size() {
|
|
30842
|
+
return this.#size;
|
|
30843
|
+
}
|
|
30844
|
+
*[Symbol.iterator]() {
|
|
30845
|
+
let current2 = this.#head;
|
|
30846
|
+
while (current2) {
|
|
30847
|
+
yield current2.value;
|
|
30848
|
+
current2 = current2.next;
|
|
30849
|
+
}
|
|
30850
|
+
}
|
|
30851
|
+
*drain() {
|
|
30852
|
+
while (this.#head) {
|
|
30853
|
+
yield this.dequeue();
|
|
30854
|
+
}
|
|
30855
|
+
}
|
|
30856
|
+
};
|
|
30857
|
+
|
|
30858
|
+
// ../../node_modules/.bun/p-limit@7.3.0/node_modules/p-limit/index.js
|
|
30859
|
+
function pLimit(concurrency) {
|
|
30860
|
+
let rejectOnClear = false;
|
|
30861
|
+
if (typeof concurrency === "object") {
|
|
30862
|
+
({ concurrency, rejectOnClear = false } = concurrency);
|
|
30863
|
+
}
|
|
30864
|
+
validateConcurrency(concurrency);
|
|
30865
|
+
if (typeof rejectOnClear !== "boolean") {
|
|
30866
|
+
throw new TypeError("Expected `rejectOnClear` to be a boolean");
|
|
30867
|
+
}
|
|
30868
|
+
const queue = new Queue();
|
|
30869
|
+
let activeCount = 0;
|
|
30870
|
+
const resumeNext = () => {
|
|
30871
|
+
if (activeCount < concurrency && queue.size > 0) {
|
|
30872
|
+
activeCount++;
|
|
30873
|
+
queue.dequeue().run();
|
|
30874
|
+
}
|
|
30875
|
+
};
|
|
30876
|
+
const next = () => {
|
|
30877
|
+
activeCount--;
|
|
30878
|
+
resumeNext();
|
|
30879
|
+
};
|
|
30880
|
+
const run3 = async (function_, resolve, arguments_) => {
|
|
30881
|
+
const result = (async () => function_(...arguments_))();
|
|
30882
|
+
resolve(result);
|
|
30883
|
+
try {
|
|
30884
|
+
await result;
|
|
30885
|
+
} catch {
|
|
30886
|
+
}
|
|
30887
|
+
next();
|
|
30888
|
+
};
|
|
30889
|
+
const enqueue = (function_, resolve, reject, arguments_) => {
|
|
30890
|
+
const queueItem = { reject };
|
|
30891
|
+
new Promise((internalResolve) => {
|
|
30892
|
+
queueItem.run = internalResolve;
|
|
30893
|
+
queue.enqueue(queueItem);
|
|
30894
|
+
}).then(run3.bind(void 0, function_, resolve, arguments_));
|
|
30895
|
+
if (activeCount < concurrency) {
|
|
30896
|
+
resumeNext();
|
|
30897
|
+
}
|
|
30898
|
+
};
|
|
30899
|
+
const generator = (function_, ...arguments_) => new Promise((resolve, reject) => {
|
|
30900
|
+
enqueue(function_, resolve, reject, arguments_);
|
|
30901
|
+
});
|
|
30902
|
+
Object.defineProperties(generator, {
|
|
30903
|
+
activeCount: {
|
|
30904
|
+
get: () => activeCount
|
|
30905
|
+
},
|
|
30906
|
+
pendingCount: {
|
|
30907
|
+
get: () => queue.size
|
|
30908
|
+
},
|
|
30909
|
+
clearQueue: {
|
|
30910
|
+
value() {
|
|
30911
|
+
if (!rejectOnClear) {
|
|
30912
|
+
queue.clear();
|
|
30913
|
+
return;
|
|
30914
|
+
}
|
|
30915
|
+
const abortError = AbortSignal.abort().reason;
|
|
30916
|
+
while (queue.size > 0) {
|
|
30917
|
+
queue.dequeue().reject(abortError);
|
|
30918
|
+
}
|
|
30919
|
+
}
|
|
30920
|
+
},
|
|
30921
|
+
concurrency: {
|
|
30922
|
+
get: () => concurrency,
|
|
30923
|
+
set(newConcurrency) {
|
|
30924
|
+
validateConcurrency(newConcurrency);
|
|
30925
|
+
concurrency = newConcurrency;
|
|
30926
|
+
queueMicrotask(() => {
|
|
30927
|
+
while (activeCount < concurrency && queue.size > 0) {
|
|
30928
|
+
resumeNext();
|
|
30929
|
+
}
|
|
30930
|
+
});
|
|
30931
|
+
}
|
|
30932
|
+
},
|
|
30933
|
+
map: {
|
|
30934
|
+
async value(iterable, function_) {
|
|
30935
|
+
const promises = Array.from(iterable, (value, index2) => this(function_, value, index2));
|
|
30936
|
+
return Promise.all(promises);
|
|
30937
|
+
}
|
|
30938
|
+
}
|
|
30939
|
+
});
|
|
30940
|
+
return generator;
|
|
30941
|
+
}
|
|
30942
|
+
function validateConcurrency(concurrency) {
|
|
30943
|
+
if (!((Number.isInteger(concurrency) || concurrency === Number.POSITIVE_INFINITY) && concurrency > 0)) {
|
|
30944
|
+
throw new TypeError("Expected `concurrency` to be a number from 1 and up");
|
|
30945
|
+
}
|
|
30946
|
+
}
|
|
30947
|
+
|
|
30948
|
+
// src/session-limiter.ts
|
|
30949
|
+
function createLimiterPool() {
|
|
30950
|
+
const limiters = /* @__PURE__ */ new Map();
|
|
30951
|
+
function get2(key) {
|
|
30952
|
+
let limiter = limiters.get(key);
|
|
30953
|
+
if (!limiter) {
|
|
30954
|
+
limiter = pLimit(1);
|
|
30955
|
+
limiters.set(key, limiter);
|
|
30956
|
+
}
|
|
30957
|
+
return limiter;
|
|
30958
|
+
}
|
|
30959
|
+
function isBusy(key) {
|
|
30960
|
+
const limiter = limiters.get(key);
|
|
30961
|
+
return limiter ? limiter.activeCount + limiter.pendingCount > 0 : false;
|
|
30962
|
+
}
|
|
30963
|
+
function clear() {
|
|
30964
|
+
limiters.clear();
|
|
30965
|
+
}
|
|
30966
|
+
return { get: get2, isBusy, clear };
|
|
30967
|
+
}
|
|
30968
|
+
var distillLimiter = createLimiterPool();
|
|
30969
|
+
var curatorLimiter = createLimiterPool();
|
|
30970
|
+
|
|
30971
|
+
// src/distillation.ts
|
|
30972
|
+
function compressionRatio(distilledTokens, sourceTokens) {
|
|
30973
|
+
if (sourceTokens <= 0) return 0;
|
|
30974
|
+
return distilledTokens / Math.sqrt(sourceTokens);
|
|
29914
30975
|
}
|
|
29915
30976
|
function maxAllowedExpansion(sourceTokens) {
|
|
29916
30977
|
if (sourceTokens < 100) return sourceTokens * 5;
|
|
@@ -30151,6 +31212,9 @@ function resetOrphans(projectPath, sessionID) {
|
|
|
30151
31212
|
return orphans.length;
|
|
30152
31213
|
}
|
|
30153
31214
|
async function run(input) {
|
|
31215
|
+
return distillLimiter.get(input.sessionID)(() => runInner(input));
|
|
31216
|
+
}
|
|
31217
|
+
async function runInner(input) {
|
|
30154
31218
|
const orphans = resetOrphans(input.projectPath, input.sessionID);
|
|
30155
31219
|
if (orphans > 0) {
|
|
30156
31220
|
info(
|
|
@@ -30194,7 +31258,7 @@ async function run(input) {
|
|
|
30194
31258
|
}
|
|
30195
31259
|
}
|
|
30196
31260
|
if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
30197
|
-
await
|
|
31261
|
+
await metaDistillInner({
|
|
30198
31262
|
llm: input.llm,
|
|
30199
31263
|
projectPath: input.projectPath,
|
|
30200
31264
|
sessionID: input.sessionID,
|
|
@@ -30244,17 +31308,25 @@ async function distillSegment(input) {
|
|
|
30244
31308
|
);
|
|
30245
31309
|
return null;
|
|
30246
31310
|
}
|
|
30247
|
-
|
|
30248
|
-
|
|
30249
|
-
|
|
30250
|
-
|
|
30251
|
-
|
|
30252
|
-
|
|
30253
|
-
|
|
30254
|
-
|
|
30255
|
-
|
|
30256
|
-
|
|
30257
|
-
|
|
31311
|
+
let distillId;
|
|
31312
|
+
db().exec("BEGIN IMMEDIATE");
|
|
31313
|
+
try {
|
|
31314
|
+
distillId = storeDistillation({
|
|
31315
|
+
projectPath: input.projectPath,
|
|
31316
|
+
sessionID: input.sessionID,
|
|
31317
|
+
observations: result.observations,
|
|
31318
|
+
sourceIDs: input.messages.map((m) => m.id),
|
|
31319
|
+
generation: 0,
|
|
31320
|
+
rCompression: rComp,
|
|
31321
|
+
cNorm,
|
|
31322
|
+
callType: input.callType
|
|
31323
|
+
});
|
|
31324
|
+
markDistilled(input.messages.map((m) => m.id));
|
|
31325
|
+
db().exec("COMMIT");
|
|
31326
|
+
} catch (e) {
|
|
31327
|
+
db().exec("ROLLBACK");
|
|
31328
|
+
throw e;
|
|
31329
|
+
}
|
|
30258
31330
|
info(
|
|
30259
31331
|
`distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
|
|
30260
31332
|
);
|
|
@@ -30267,7 +31339,8 @@ async function distillSegment(input) {
|
|
|
30267
31339
|
embedDistillation(distillId, result.observations);
|
|
30268
31340
|
}
|
|
30269
31341
|
if (config2().knowledge.enabled) {
|
|
30270
|
-
|
|
31342
|
+
const patterns = extractPatterns(result.observations);
|
|
31343
|
+
for (const pat of patterns) {
|
|
30271
31344
|
try {
|
|
30272
31345
|
create({
|
|
30273
31346
|
projectPath: input.projectPath,
|
|
@@ -30280,10 +31353,16 @@ async function distillSegment(input) {
|
|
|
30280
31353
|
} catch {
|
|
30281
31354
|
}
|
|
30282
31355
|
}
|
|
31356
|
+
if (patterns.length > 0) {
|
|
31357
|
+
info(`pattern extraction: ${patterns.length} entries from distillation`);
|
|
31358
|
+
}
|
|
30283
31359
|
}
|
|
30284
31360
|
return result;
|
|
30285
31361
|
}
|
|
30286
31362
|
async function metaDistill(input) {
|
|
31363
|
+
return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
|
|
31364
|
+
}
|
|
31365
|
+
async function metaDistillInner(input) {
|
|
30287
31366
|
const existing = loadGen0(input.projectPath, input.sessionID);
|
|
30288
31367
|
const priorMeta = latestMeta(input.projectPath, input.sessionID);
|
|
30289
31368
|
if (priorMeta) {
|
|
@@ -30325,196 +31404,1801 @@ async function metaDistill(input) {
|
|
|
30325
31404
|
db().exec("ROLLBACK");
|
|
30326
31405
|
throw e;
|
|
30327
31406
|
}
|
|
30328
|
-
if (isAvailable()) {
|
|
30329
|
-
embedDistillation(metaId, result.observations);
|
|
31407
|
+
if (isAvailable()) {
|
|
31408
|
+
embedDistillation(metaId, result.observations);
|
|
31409
|
+
}
|
|
31410
|
+
if (config2().knowledge.enabled) {
|
|
31411
|
+
const patterns = extractPatterns(result.observations);
|
|
31412
|
+
for (const pat of patterns) {
|
|
31413
|
+
try {
|
|
31414
|
+
create({
|
|
31415
|
+
projectPath: input.projectPath,
|
|
31416
|
+
category: pat.category,
|
|
31417
|
+
title: pat.title,
|
|
31418
|
+
content: pat.content,
|
|
31419
|
+
session: input.sessionID,
|
|
31420
|
+
scope: "project"
|
|
31421
|
+
});
|
|
31422
|
+
} catch {
|
|
31423
|
+
}
|
|
31424
|
+
}
|
|
31425
|
+
if (patterns.length > 0) {
|
|
31426
|
+
info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
|
|
31427
|
+
}
|
|
31428
|
+
}
|
|
31429
|
+
return result;
|
|
31430
|
+
}
|
|
31431
|
+
function backfillMetrics() {
|
|
31432
|
+
const rows = db().query(
|
|
31433
|
+
"SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
|
|
31434
|
+
).all();
|
|
31435
|
+
if (!rows.length) return 0;
|
|
31436
|
+
const update2 = db().prepare(
|
|
31437
|
+
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
31438
|
+
);
|
|
31439
|
+
let updated = 0;
|
|
31440
|
+
for (const row of rows) {
|
|
31441
|
+
const sourceIds = parseSourceIds(row.source_ids);
|
|
31442
|
+
if (!sourceIds.length) continue;
|
|
31443
|
+
const placeholders = sourceIds.map(() => "?").join(",");
|
|
31444
|
+
const sources = db().query(
|
|
31445
|
+
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
31446
|
+
).all(...sourceIds);
|
|
31447
|
+
if (!sources.length) continue;
|
|
31448
|
+
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
31449
|
+
const timestamps = sources.map((s) => s.created_at);
|
|
31450
|
+
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
31451
|
+
const cNorm = temporalCnorm(timestamps);
|
|
31452
|
+
update2.run(rComp, cNorm, row.id);
|
|
31453
|
+
updated++;
|
|
31454
|
+
}
|
|
31455
|
+
if (updated > 0) {
|
|
31456
|
+
info(
|
|
31457
|
+
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
31458
|
+
);
|
|
31459
|
+
}
|
|
31460
|
+
return updated;
|
|
31461
|
+
}
|
|
31462
|
+
|
|
31463
|
+
// src/curator.ts
|
|
31464
|
+
var curator_exports = {};
|
|
31465
|
+
__export(curator_exports, {
|
|
31466
|
+
MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
|
|
31467
|
+
applyOps: () => applyOps,
|
|
31468
|
+
consolidate: () => consolidate,
|
|
31469
|
+
parseOps: () => parseOps,
|
|
31470
|
+
resetCurationTracker: () => resetCurationTracker,
|
|
31471
|
+
run: () => run2
|
|
31472
|
+
});
|
|
31473
|
+
|
|
31474
|
+
// src/instruction-detect.ts
|
|
31475
|
+
var instruction_detect_exports = {};
|
|
31476
|
+
__export(instruction_detect_exports, {
|
|
31477
|
+
detectAndFormat: () => detectAndFormat,
|
|
31478
|
+
extractInstructionCandidates: () => extractInstructionCandidates,
|
|
31479
|
+
findRepeatedInstructions: () => findRepeatedInstructions,
|
|
31480
|
+
formatForCurator: () => formatForCurator
|
|
31481
|
+
});
|
|
31482
|
+
var DEFAULT_REPETITION_THRESHOLD = 2;
|
|
31483
|
+
var VECTOR_SIMILARITY_THRESHOLD = 0.5;
|
|
31484
|
+
var MAX_CANDIDATES = 5;
|
|
31485
|
+
var INSTRUCTION_PATTERNS = [
|
|
31486
|
+
/\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31487
|
+
/\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31488
|
+
/\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31489
|
+
/\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31490
|
+
/\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31491
|
+
/\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
|
|
31492
|
+
];
|
|
31493
|
+
function extractInstructionCandidates(messages) {
|
|
31494
|
+
const candidates = [];
|
|
31495
|
+
const seen = /* @__PURE__ */ new Set();
|
|
31496
|
+
for (const msg of messages) {
|
|
31497
|
+
if (msg.role !== "user") continue;
|
|
31498
|
+
for (const pattern of INSTRUCTION_PATTERNS) {
|
|
31499
|
+
pattern.lastIndex = 0;
|
|
31500
|
+
let match;
|
|
31501
|
+
while ((match = pattern.exec(msg.content)) !== null) {
|
|
31502
|
+
const text4 = match[1]?.trim();
|
|
31503
|
+
if (!text4 || text4.length < 10) continue;
|
|
31504
|
+
const key = text4.toLowerCase();
|
|
31505
|
+
if (seen.has(key)) continue;
|
|
31506
|
+
seen.add(key);
|
|
31507
|
+
candidates.push({
|
|
31508
|
+
text: text4,
|
|
31509
|
+
sessionID: msg.session_id
|
|
31510
|
+
});
|
|
31511
|
+
if (candidates.length >= MAX_CANDIDATES) return candidates;
|
|
31512
|
+
}
|
|
31513
|
+
}
|
|
31514
|
+
}
|
|
31515
|
+
return candidates;
|
|
31516
|
+
}
|
|
31517
|
+
async function findRepeatedInstructions(input) {
|
|
31518
|
+
const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
|
|
31519
|
+
if (!input.candidates.length) return [];
|
|
31520
|
+
const pid = ensureProject(input.projectPath);
|
|
31521
|
+
let candidateEmbeddings = [];
|
|
31522
|
+
if (isAvailable()) {
|
|
31523
|
+
try {
|
|
31524
|
+
candidateEmbeddings = await embed(
|
|
31525
|
+
input.candidates.map((c) => c.text),
|
|
31526
|
+
"query"
|
|
31527
|
+
);
|
|
31528
|
+
} catch (err) {
|
|
31529
|
+
warn("instruction-detect: batch embedding failed:", err);
|
|
31530
|
+
}
|
|
31531
|
+
}
|
|
31532
|
+
const results = [];
|
|
31533
|
+
for (let i = 0; i < input.candidates.length; i++) {
|
|
31534
|
+
const candidate = input.candidates[i];
|
|
31535
|
+
const sessionIDs = /* @__PURE__ */ new Set();
|
|
31536
|
+
if (candidateEmbeddings.length > i) {
|
|
31537
|
+
const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
|
|
31538
|
+
for (const hit of hits) {
|
|
31539
|
+
if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
|
|
31540
|
+
sessionIDs.add(hit.session_id);
|
|
31541
|
+
}
|
|
31542
|
+
}
|
|
31543
|
+
}
|
|
31544
|
+
const terms = filterTerms(candidate.text);
|
|
31545
|
+
if (terms.length >= 2) {
|
|
31546
|
+
const searchText = terms.slice(0, 5).join(" ");
|
|
31547
|
+
const ftsHits = searchDistillationsFTS(pid, searchText);
|
|
31548
|
+
for (const hit of ftsHits) {
|
|
31549
|
+
if (hit.session_id !== input.currentSessionID) {
|
|
31550
|
+
sessionIDs.add(hit.session_id);
|
|
31551
|
+
}
|
|
31552
|
+
}
|
|
31553
|
+
}
|
|
31554
|
+
if (sessionIDs.size >= threshold) {
|
|
31555
|
+
results.push({
|
|
31556
|
+
instruction: candidate.text,
|
|
31557
|
+
priorSessionCount: sessionIDs.size
|
|
31558
|
+
});
|
|
31559
|
+
}
|
|
31560
|
+
}
|
|
31561
|
+
return results;
|
|
31562
|
+
}
|
|
31563
|
+
function searchDistillationsFTS(projectId2, rawQuery) {
|
|
31564
|
+
const matchExpr = ftsQueryOr(rawQuery);
|
|
31565
|
+
if (matchExpr === EMPTY_QUERY) return [];
|
|
31566
|
+
const sql = `SELECT d.id, d.session_id
|
|
31567
|
+
FROM distillation_fts f
|
|
31568
|
+
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
31569
|
+
WHERE distillation_fts MATCH ?
|
|
31570
|
+
AND d.project_id = ?
|
|
31571
|
+
ORDER BY rank LIMIT 30`;
|
|
31572
|
+
try {
|
|
31573
|
+
return db().query(sql).all(matchExpr, projectId2);
|
|
31574
|
+
} catch (err) {
|
|
31575
|
+
warn("instruction-detect: FTS search failed:", err);
|
|
31576
|
+
return [];
|
|
31577
|
+
}
|
|
31578
|
+
}
|
|
31579
|
+
function formatForCurator(instructions) {
|
|
31580
|
+
if (!instructions.length) return "";
|
|
31581
|
+
const lines = instructions.map(
|
|
31582
|
+
(i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
|
|
31583
|
+
);
|
|
31584
|
+
return `
|
|
31585
|
+
|
|
31586
|
+
---
|
|
31587
|
+
CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
|
|
31588
|
+
The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
|
|
31589
|
+
${lines.join("\n")}`;
|
|
31590
|
+
}
|
|
31591
|
+
async function detectAndFormat(input) {
|
|
31592
|
+
const messages = bySession(input.projectPath, input.sessionID);
|
|
31593
|
+
const candidates = extractInstructionCandidates(messages);
|
|
31594
|
+
if (!candidates.length) return "";
|
|
31595
|
+
const repeated = await findRepeatedInstructions({
|
|
31596
|
+
projectPath: input.projectPath,
|
|
31597
|
+
currentSessionID: input.sessionID,
|
|
31598
|
+
candidates,
|
|
31599
|
+
threshold: input.threshold
|
|
31600
|
+
});
|
|
31601
|
+
if (repeated.length) {
|
|
31602
|
+
info(
|
|
31603
|
+
`instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
|
|
31604
|
+
);
|
|
31605
|
+
}
|
|
31606
|
+
return formatForCurator(repeated);
|
|
31607
|
+
}
|
|
31608
|
+
|
|
31609
|
+
// src/curator.ts
|
|
31610
|
+
var MAX_ENTRY_CONTENT_LENGTH = 1200;
|
|
31611
|
+
function parseOps(text4) {
|
|
31612
|
+
const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
|
|
31613
|
+
try {
|
|
31614
|
+
const parsed = JSON.parse(cleaned);
|
|
31615
|
+
if (!Array.isArray(parsed)) return [];
|
|
31616
|
+
return parsed.filter(
|
|
31617
|
+
(op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
|
|
31618
|
+
);
|
|
31619
|
+
} catch {
|
|
31620
|
+
return [];
|
|
31621
|
+
}
|
|
31622
|
+
}
|
|
31623
|
+
function applyOps(ops, input) {
|
|
31624
|
+
let created = 0;
|
|
31625
|
+
let updated = 0;
|
|
31626
|
+
let deleted = 0;
|
|
31627
|
+
const idsToSync = [];
|
|
31628
|
+
for (const op of ops) {
|
|
31629
|
+
if (op.op === "create") {
|
|
31630
|
+
if (input.skipCreate) continue;
|
|
31631
|
+
const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
|
|
31632
|
+
const id = create({
|
|
31633
|
+
projectPath: op.scope === "project" ? input.projectPath : void 0,
|
|
31634
|
+
category: op.category,
|
|
31635
|
+
title: op.title,
|
|
31636
|
+
content: content3,
|
|
31637
|
+
session: input.sessionID,
|
|
31638
|
+
scope: op.scope,
|
|
31639
|
+
crossProject: op.crossProject ?? true
|
|
31640
|
+
});
|
|
31641
|
+
idsToSync.push(id);
|
|
31642
|
+
created++;
|
|
31643
|
+
} else if (op.op === "update") {
|
|
31644
|
+
const entry = get(op.id);
|
|
31645
|
+
if (entry) {
|
|
31646
|
+
const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
|
|
31647
|
+
update(op.id, { content: content3, confidence: op.confidence });
|
|
31648
|
+
if (op.content !== void 0) idsToSync.push(op.id);
|
|
31649
|
+
updated++;
|
|
31650
|
+
}
|
|
31651
|
+
} else if (op.op === "delete") {
|
|
31652
|
+
const entry = get(op.id);
|
|
31653
|
+
if (entry) {
|
|
31654
|
+
remove(op.id);
|
|
31655
|
+
deleted++;
|
|
31656
|
+
}
|
|
31657
|
+
}
|
|
31658
|
+
}
|
|
31659
|
+
for (const id of idsToSync) {
|
|
31660
|
+
syncRefs(id);
|
|
31661
|
+
}
|
|
31662
|
+
return { created, updated, deleted };
|
|
31663
|
+
}
|
|
31664
|
+
var lastCuratedAt = /* @__PURE__ */ new Map();
|
|
31665
|
+
function getLastCuratedAt(sessionID) {
|
|
31666
|
+
const cached2 = lastCuratedAt.get(sessionID);
|
|
31667
|
+
if (cached2 !== void 0) return cached2;
|
|
31668
|
+
const persisted = loadSessionTracking(sessionID);
|
|
31669
|
+
const ts = persisted?.lastCuratedAt ?? 0;
|
|
31670
|
+
lastCuratedAt.set(sessionID, ts);
|
|
31671
|
+
return ts;
|
|
31672
|
+
}
|
|
31673
|
+
async function run2(input) {
|
|
31674
|
+
const cfg = config2();
|
|
31675
|
+
if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
|
|
31676
|
+
if (curatorLimiter.isBusy(input.sessionID)) {
|
|
31677
|
+
info(`curation skipped: already running for session ${input.sessionID.slice(0, 16)}`);
|
|
31678
|
+
return { created: 0, updated: 0, deleted: 0 };
|
|
31679
|
+
}
|
|
31680
|
+
return curatorLimiter.get(input.sessionID)(() => runInner2(input));
|
|
31681
|
+
}
|
|
31682
|
+
async function runInner2(input) {
|
|
31683
|
+
const cfg = config2();
|
|
31684
|
+
const all3 = bySession(input.projectPath, input.sessionID);
|
|
31685
|
+
const sessionCuratedAt = getLastCuratedAt(input.sessionID);
|
|
31686
|
+
const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
|
|
31687
|
+
if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
|
|
31688
|
+
const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
|
|
31689
|
+
const existing = forProject(input.projectPath, false);
|
|
31690
|
+
const existingForPrompt = existing.map((e) => ({
|
|
31691
|
+
id: e.id,
|
|
31692
|
+
category: e.category,
|
|
31693
|
+
title: e.title,
|
|
31694
|
+
content: e.content
|
|
31695
|
+
}));
|
|
31696
|
+
const baseUserContent = curatorUser({
|
|
31697
|
+
messages: text4,
|
|
31698
|
+
existing: existingForPrompt
|
|
31699
|
+
});
|
|
31700
|
+
let crossSessionContext = "";
|
|
31701
|
+
try {
|
|
31702
|
+
crossSessionContext = await detectAndFormat({
|
|
31703
|
+
projectPath: input.projectPath,
|
|
31704
|
+
sessionID: input.sessionID
|
|
31705
|
+
});
|
|
31706
|
+
} catch (err) {
|
|
31707
|
+
warn("instruction-detect failed (non-fatal):", err);
|
|
31708
|
+
}
|
|
31709
|
+
const userContent = baseUserContent + crossSessionContext;
|
|
31710
|
+
const model = input.model ?? cfg.model;
|
|
31711
|
+
const responseText = await input.llm.prompt(
|
|
31712
|
+
CURATOR_SYSTEM,
|
|
31713
|
+
userContent,
|
|
31714
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
|
|
31715
|
+
);
|
|
31716
|
+
if (!responseText) return { created: 0, updated: 0, deleted: 0 };
|
|
31717
|
+
const ops = parseOps(responseText);
|
|
31718
|
+
const result = applyOps(ops, {
|
|
31719
|
+
projectPath: input.projectPath,
|
|
31720
|
+
sessionID: input.sessionID
|
|
31721
|
+
});
|
|
31722
|
+
if (result.created > 0) {
|
|
31723
|
+
try {
|
|
31724
|
+
const dupes = await deduplicate(input.projectPath, { dryRun: false });
|
|
31725
|
+
if (dupes.totalRemoved > 0) {
|
|
31726
|
+
info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
|
|
31727
|
+
result.deleted += dupes.totalRemoved;
|
|
31728
|
+
}
|
|
31729
|
+
if (dupes.pairSimilarities.size > 0) {
|
|
31730
|
+
const pid = ensureProject(input.projectPath);
|
|
31731
|
+
recordAutoSignals(pid, dupes);
|
|
31732
|
+
const newThreshold = calibrateDedupThreshold(pid);
|
|
31733
|
+
if (newThreshold !== null) {
|
|
31734
|
+
const count3 = getDedupFeedbackCount(pid);
|
|
31735
|
+
saveCalibratedThreshold(pid, newThreshold, count3);
|
|
31736
|
+
}
|
|
31737
|
+
}
|
|
31738
|
+
} catch (err) {
|
|
31739
|
+
warn("post-curation dedup failed (non-fatal):", err);
|
|
31740
|
+
}
|
|
31741
|
+
}
|
|
31742
|
+
const now = Date.now();
|
|
31743
|
+
lastCuratedAt.set(input.sessionID, now);
|
|
31744
|
+
saveSessionTracking(input.sessionID, { lastCuratedAt: now });
|
|
31745
|
+
return result;
|
|
31746
|
+
}
|
|
31747
|
+
function resetCurationTracker(sessionID) {
|
|
31748
|
+
if (sessionID) {
|
|
31749
|
+
lastCuratedAt.delete(sessionID);
|
|
31750
|
+
} else {
|
|
31751
|
+
lastCuratedAt.clear();
|
|
31752
|
+
}
|
|
31753
|
+
}
|
|
31754
|
+
async function consolidate(input) {
|
|
31755
|
+
const cfg = config2();
|
|
31756
|
+
if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
|
|
31757
|
+
const entries = forProject(input.projectPath, false);
|
|
31758
|
+
if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
|
|
31759
|
+
const entriesForPrompt = entries.map((e) => ({
|
|
31760
|
+
id: e.id,
|
|
31761
|
+
category: e.category,
|
|
31762
|
+
title: e.title,
|
|
31763
|
+
content: e.content
|
|
31764
|
+
}));
|
|
31765
|
+
const userContent = consolidationUser({
|
|
31766
|
+
entries: entriesForPrompt,
|
|
31767
|
+
targetMax: cfg.curator.maxEntries
|
|
31768
|
+
});
|
|
31769
|
+
const model = input.model ?? cfg.model;
|
|
31770
|
+
const responseText = await input.llm.prompt(
|
|
31771
|
+
CONSOLIDATION_SYSTEM,
|
|
31772
|
+
userContent,
|
|
31773
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
|
|
31774
|
+
);
|
|
31775
|
+
if (!responseText) return { updated: 0, deleted: 0 };
|
|
31776
|
+
const ops = parseOps(responseText);
|
|
31777
|
+
const result = applyOps(ops, {
|
|
31778
|
+
projectPath: input.projectPath,
|
|
31779
|
+
sessionID: input.sessionID,
|
|
31780
|
+
skipCreate: true
|
|
31781
|
+
// Consolidation must not add entries.
|
|
31782
|
+
});
|
|
31783
|
+
return { updated: result.updated, deleted: result.deleted };
|
|
31784
|
+
}
|
|
31785
|
+
|
|
31786
|
+
// src/import/index.ts
|
|
31787
|
+
var import_exports = {};
|
|
31788
|
+
__export(import_exports, {
|
|
31789
|
+
clearProviders: () => clearProviders,
|
|
31790
|
+
computeHash: () => computeHash,
|
|
31791
|
+
detectAll: () => detectAll,
|
|
31792
|
+
extractKnowledge: () => extractKnowledge,
|
|
31793
|
+
getProvider: () => getProvider2,
|
|
31794
|
+
getProviders: () => getProviders,
|
|
31795
|
+
isImported: () => isImported,
|
|
31796
|
+
listImports: () => listImports,
|
|
31797
|
+
recordImport: () => recordImport,
|
|
31798
|
+
registerProvider: () => registerProvider
|
|
31799
|
+
});
|
|
31800
|
+
|
|
31801
|
+
// src/import/providers/index.ts
|
|
31802
|
+
var providers = [];
|
|
31803
|
+
function registerProvider(provider) {
|
|
31804
|
+
providers.push(provider);
|
|
31805
|
+
}
|
|
31806
|
+
function getProviders() {
|
|
31807
|
+
return providers;
|
|
31808
|
+
}
|
|
31809
|
+
function getProvider2(name) {
|
|
31810
|
+
return providers.find((p2) => p2.name === name);
|
|
31811
|
+
}
|
|
31812
|
+
function clearProviders() {
|
|
31813
|
+
providers.length = 0;
|
|
31814
|
+
}
|
|
31815
|
+
|
|
31816
|
+
// src/import/detect.ts
|
|
31817
|
+
function detectAll(projectPath) {
|
|
31818
|
+
const results = [];
|
|
31819
|
+
for (const provider of getProviders()) {
|
|
31820
|
+
try {
|
|
31821
|
+
const sessions = provider.detect(projectPath);
|
|
31822
|
+
if (sessions.length > 0) {
|
|
31823
|
+
results.push({
|
|
31824
|
+
agentName: provider.name,
|
|
31825
|
+
agentDisplayName: provider.displayName,
|
|
31826
|
+
sessions,
|
|
31827
|
+
totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
|
|
31828
|
+
totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
|
|
31829
|
+
});
|
|
31830
|
+
}
|
|
31831
|
+
} catch (err) {
|
|
31832
|
+
}
|
|
31833
|
+
}
|
|
31834
|
+
return results.sort((a, b) => b.totalMessages - a.totalMessages);
|
|
31835
|
+
}
|
|
31836
|
+
|
|
31837
|
+
// src/import/extract.ts
|
|
31838
|
+
var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
|
|
31839
|
+
|
|
31840
|
+
ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
|
|
31841
|
+
- Architecture decisions, design patterns, and project conventions
|
|
31842
|
+
- Gotchas, non-obvious bugs, and their fixes
|
|
31843
|
+
- Developer preferences and workflow patterns
|
|
31844
|
+
- Key technical choices and their rationale
|
|
31845
|
+
|
|
31846
|
+
Ignore:
|
|
31847
|
+
- References to the other agent's specific capabilities or limitations
|
|
31848
|
+
- Task-specific state that is no longer current (e.g. "currently debugging X")
|
|
31849
|
+
- Debugging steps for issues that were already resolved
|
|
31850
|
+
- Transient conversation artifacts (greetings, acknowledgments, status updates)`;
|
|
31851
|
+
async function extractKnowledge(input) {
|
|
31852
|
+
const result = {
|
|
31853
|
+
created: 0,
|
|
31854
|
+
updated: 0,
|
|
31855
|
+
deleted: 0,
|
|
31856
|
+
chunksProcessed: 0,
|
|
31857
|
+
chunksFailed: 0
|
|
31858
|
+
};
|
|
31859
|
+
const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
|
|
31860
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
31861
|
+
const chunk = sorted[i];
|
|
31862
|
+
const existing = forProject(input.projectPath, false);
|
|
31863
|
+
const existingForPrompt = existing.map((e) => ({
|
|
31864
|
+
id: e.id,
|
|
31865
|
+
category: e.category,
|
|
31866
|
+
title: e.title,
|
|
31867
|
+
content: e.content
|
|
31868
|
+
}));
|
|
31869
|
+
const userContent = curatorUser({
|
|
31870
|
+
messages: chunk.text,
|
|
31871
|
+
existing: existingForPrompt
|
|
31872
|
+
});
|
|
31873
|
+
try {
|
|
31874
|
+
const response = await input.llm.prompt(
|
|
31875
|
+
IMPORT_CURATOR_SYSTEM,
|
|
31876
|
+
userContent,
|
|
31877
|
+
{
|
|
31878
|
+
model: input.model,
|
|
31879
|
+
workerID: "lore-import",
|
|
31880
|
+
thinking: false,
|
|
31881
|
+
maxTokens: 4096,
|
|
31882
|
+
sessionID: input.sessionID
|
|
31883
|
+
}
|
|
31884
|
+
);
|
|
31885
|
+
if (response) {
|
|
31886
|
+
const ops = parseOps(response);
|
|
31887
|
+
const applied = applyOps(ops, {
|
|
31888
|
+
projectPath: input.projectPath,
|
|
31889
|
+
sessionID: input.sessionID
|
|
31890
|
+
});
|
|
31891
|
+
result.created += applied.created;
|
|
31892
|
+
result.updated += applied.updated;
|
|
31893
|
+
result.deleted += applied.deleted;
|
|
31894
|
+
}
|
|
31895
|
+
result.chunksProcessed++;
|
|
31896
|
+
} catch {
|
|
31897
|
+
result.chunksFailed++;
|
|
31898
|
+
}
|
|
31899
|
+
input.onProgress?.({
|
|
31900
|
+
current: i + 1,
|
|
31901
|
+
total: sorted.length,
|
|
31902
|
+
created: result.created,
|
|
31903
|
+
updated: result.updated
|
|
31904
|
+
});
|
|
31905
|
+
}
|
|
31906
|
+
return result;
|
|
31907
|
+
}
|
|
31908
|
+
|
|
31909
|
+
// src/import/history.ts
|
|
31910
|
+
function isImported(projectPath, agentName, sourceId, sourceHash) {
|
|
31911
|
+
const projectId2 = ensureProject(projectPath);
|
|
31912
|
+
const row = db().query(
|
|
31913
|
+
`SELECT * FROM import_history
|
|
31914
|
+
WHERE project_id = ? AND agent_name = ? AND source_id = ?`
|
|
31915
|
+
).get(projectId2, agentName, sourceId);
|
|
31916
|
+
if (!row) return null;
|
|
31917
|
+
if (row.source_hash !== sourceHash) return null;
|
|
31918
|
+
return row;
|
|
31919
|
+
}
|
|
31920
|
+
function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
|
|
31921
|
+
const projectId2 = ensureProject(projectPath);
|
|
31922
|
+
db().query(
|
|
31923
|
+
`INSERT OR REPLACE INTO import_history
|
|
31924
|
+
(id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
|
|
31925
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
31926
|
+
).run(
|
|
31927
|
+
crypto.randomUUID(),
|
|
31928
|
+
projectId2,
|
|
31929
|
+
agentName,
|
|
31930
|
+
sourceId,
|
|
31931
|
+
sourceHash,
|
|
31932
|
+
stats.created,
|
|
31933
|
+
stats.updated,
|
|
31934
|
+
Date.now()
|
|
31935
|
+
);
|
|
31936
|
+
}
|
|
31937
|
+
function listImports(projectPath) {
|
|
31938
|
+
const projectId2 = ensureProject(projectPath);
|
|
31939
|
+
return db().query(
|
|
31940
|
+
`SELECT * FROM import_history
|
|
31941
|
+
WHERE project_id = ? AND source_id != '__declined__'
|
|
31942
|
+
ORDER BY imported_at DESC`
|
|
31943
|
+
).all(projectId2);
|
|
31944
|
+
}
|
|
31945
|
+
function computeHash(parts) {
|
|
31946
|
+
return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
|
|
31947
|
+
}
|
|
31948
|
+
|
|
31949
|
+
// src/import/providers/claude-code.ts
|
|
31950
|
+
import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
|
|
31951
|
+
import { join as join8 } from "path";
|
|
31952
|
+
import { homedir as homedir2 } from "os";
|
|
31953
|
+
var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
|
|
31954
|
+
var MAX_TOOL_OUTPUT_CHARS = 500;
|
|
31955
|
+
var DEFAULT_MAX_TOKENS = 12288;
|
|
31956
|
+
function manglePath(projectPath) {
|
|
31957
|
+
return projectPath.replace(/\//g, "-");
|
|
31958
|
+
}
|
|
31959
|
+
function estimateTokens4(text4) {
|
|
31960
|
+
return Math.ceil(text4.length / 3);
|
|
31961
|
+
}
|
|
31962
|
+
function truncate(text4, max) {
|
|
31963
|
+
if (text4.length <= max) return text4;
|
|
31964
|
+
return text4.slice(0, max) + "...";
|
|
31965
|
+
}
|
|
31966
|
+
function blockToText(block) {
|
|
31967
|
+
switch (block.type) {
|
|
31968
|
+
case "text":
|
|
31969
|
+
return block.text;
|
|
31970
|
+
case "tool_use": {
|
|
31971
|
+
const tu = block;
|
|
31972
|
+
const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
|
|
31973
|
+
return `[tool: ${tu.name}] ${inputSummary}`;
|
|
31974
|
+
}
|
|
31975
|
+
case "tool_result": {
|
|
31976
|
+
const tr = block;
|
|
31977
|
+
let content3;
|
|
31978
|
+
if (typeof tr.content === "string") {
|
|
31979
|
+
content3 = tr.content;
|
|
31980
|
+
} else if (Array.isArray(tr.content)) {
|
|
31981
|
+
content3 = tr.content.map((b) => {
|
|
31982
|
+
if (b.type === "text") return b.text;
|
|
31983
|
+
return "";
|
|
31984
|
+
}).filter(Boolean).join("\n");
|
|
31985
|
+
} else {
|
|
31986
|
+
content3 = "";
|
|
31987
|
+
}
|
|
31988
|
+
return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
|
|
31989
|
+
}
|
|
31990
|
+
case "thinking":
|
|
31991
|
+
return null;
|
|
31992
|
+
default:
|
|
31993
|
+
return null;
|
|
31994
|
+
}
|
|
31995
|
+
}
|
|
31996
|
+
function lineToText(parsed) {
|
|
31997
|
+
if (parsed.type === "user") {
|
|
31998
|
+
const msg = parsed;
|
|
31999
|
+
const content3 = msg.message.content;
|
|
32000
|
+
if (typeof content3 === "string") {
|
|
32001
|
+
return `[user] ${content3}`;
|
|
32002
|
+
}
|
|
32003
|
+
const parts = content3.map(blockToText).filter(Boolean);
|
|
32004
|
+
return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
|
|
32005
|
+
}
|
|
32006
|
+
if (parsed.type === "assistant") {
|
|
32007
|
+
const msg = parsed;
|
|
32008
|
+
const blocks = msg.message.content;
|
|
32009
|
+
if (!Array.isArray(blocks)) return null;
|
|
32010
|
+
const parts = blocks.map(blockToText).filter(Boolean);
|
|
32011
|
+
return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
|
|
32012
|
+
}
|
|
32013
|
+
return null;
|
|
32014
|
+
}
|
|
32015
|
+
function parseJSONL(filePath) {
|
|
32016
|
+
const raw = readFileSync4(filePath, "utf-8");
|
|
32017
|
+
const lines = [];
|
|
32018
|
+
for (const line of raw.split("\n")) {
|
|
32019
|
+
if (!line.trim()) continue;
|
|
32020
|
+
try {
|
|
32021
|
+
lines.push(JSON.parse(line));
|
|
32022
|
+
} catch {
|
|
32023
|
+
}
|
|
32024
|
+
}
|
|
32025
|
+
return lines;
|
|
32026
|
+
}
|
|
32027
|
+
function getSessionMetadata(filePath) {
|
|
32028
|
+
let raw;
|
|
32029
|
+
try {
|
|
32030
|
+
raw = readFileSync4(filePath, "utf-8");
|
|
32031
|
+
} catch {
|
|
32032
|
+
return null;
|
|
32033
|
+
}
|
|
32034
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
32035
|
+
if (lines.length === 0) return null;
|
|
32036
|
+
let sessionId;
|
|
32037
|
+
let startedAt = Infinity;
|
|
32038
|
+
let lastActivityAt = 0;
|
|
32039
|
+
let messageCount = 0;
|
|
32040
|
+
for (const line of lines) {
|
|
32041
|
+
try {
|
|
32042
|
+
const parsed = JSON.parse(line);
|
|
32043
|
+
if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
|
|
32044
|
+
if (parsed.timestamp) {
|
|
32045
|
+
const ts = new Date(parsed.timestamp).getTime();
|
|
32046
|
+
if (!Number.isNaN(ts)) {
|
|
32047
|
+
if (ts < startedAt) startedAt = ts;
|
|
32048
|
+
if (ts > lastActivityAt) lastActivityAt = ts;
|
|
32049
|
+
}
|
|
32050
|
+
}
|
|
32051
|
+
if (parsed.type === "user" || parsed.type === "assistant") {
|
|
32052
|
+
messageCount++;
|
|
32053
|
+
}
|
|
32054
|
+
} catch {
|
|
32055
|
+
}
|
|
32056
|
+
}
|
|
32057
|
+
if (!sessionId || messageCount === 0) return null;
|
|
32058
|
+
const fileSize = raw.length;
|
|
32059
|
+
const estimatedTokens = Math.ceil(fileSize / 5);
|
|
32060
|
+
return {
|
|
32061
|
+
sessionId,
|
|
32062
|
+
startedAt: startedAt === Infinity ? Date.now() : startedAt,
|
|
32063
|
+
lastActivityAt,
|
|
32064
|
+
messageCount,
|
|
32065
|
+
estimatedTokens
|
|
32066
|
+
};
|
|
32067
|
+
}
|
|
32068
|
+
var claudeCodeProvider = {
|
|
32069
|
+
name: "claude-code",
|
|
32070
|
+
displayName: "Claude Code",
|
|
32071
|
+
detect(projectPath) {
|
|
32072
|
+
const mangled = manglePath(projectPath);
|
|
32073
|
+
const dir = join8(CLAUDE_DIR, mangled);
|
|
32074
|
+
let entries;
|
|
32075
|
+
try {
|
|
32076
|
+
entries = readdirSync2(dir);
|
|
32077
|
+
} catch {
|
|
32078
|
+
return [];
|
|
32079
|
+
}
|
|
32080
|
+
const sessions = [];
|
|
32081
|
+
for (const entry of entries) {
|
|
32082
|
+
if (!entry.endsWith(".jsonl")) continue;
|
|
32083
|
+
const filePath = join8(dir, entry);
|
|
32084
|
+
try {
|
|
32085
|
+
const stat = statSync5(filePath);
|
|
32086
|
+
if (!stat.isFile()) continue;
|
|
32087
|
+
} catch {
|
|
32088
|
+
continue;
|
|
32089
|
+
}
|
|
32090
|
+
const meta3 = getSessionMetadata(filePath);
|
|
32091
|
+
if (!meta3) continue;
|
|
32092
|
+
if (meta3.messageCount < 3) continue;
|
|
32093
|
+
const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
|
|
32094
|
+
sessions.push({
|
|
32095
|
+
id: filePath,
|
|
32096
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
32097
|
+
startedAt: meta3.startedAt,
|
|
32098
|
+
lastActivityAt: meta3.lastActivityAt,
|
|
32099
|
+
estimatedTokens: meta3.estimatedTokens,
|
|
32100
|
+
messageCount: meta3.messageCount
|
|
32101
|
+
});
|
|
32102
|
+
}
|
|
32103
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32104
|
+
},
|
|
32105
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
|
|
32106
|
+
const chunks = [];
|
|
32107
|
+
for (const filePath of sessionIds) {
|
|
32108
|
+
const lines = parseJSONL(filePath);
|
|
32109
|
+
const messages = [];
|
|
32110
|
+
for (const line of lines) {
|
|
32111
|
+
const text4 = lineToText(line);
|
|
32112
|
+
if (!text4) continue;
|
|
32113
|
+
const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
|
|
32114
|
+
messages.push({ text: text4, timestamp: ts });
|
|
32115
|
+
}
|
|
32116
|
+
if (messages.length === 0) continue;
|
|
32117
|
+
let currentTexts = [];
|
|
32118
|
+
let currentTokens = 0;
|
|
32119
|
+
let chunkStart = messages[0].timestamp;
|
|
32120
|
+
let chunkIndex = 0;
|
|
32121
|
+
const flushChunk = () => {
|
|
32122
|
+
if (currentTexts.length === 0) return;
|
|
32123
|
+
chunkIndex++;
|
|
32124
|
+
const text4 = currentTexts.join("\n\n");
|
|
32125
|
+
chunks.push({
|
|
32126
|
+
label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32127
|
+
text: text4,
|
|
32128
|
+
estimatedTokens: estimateTokens4(text4),
|
|
32129
|
+
timestamp: chunkStart
|
|
32130
|
+
});
|
|
32131
|
+
currentTexts = [];
|
|
32132
|
+
currentTokens = 0;
|
|
32133
|
+
};
|
|
32134
|
+
for (const msg of messages) {
|
|
32135
|
+
const msgTokens = estimateTokens4(msg.text);
|
|
32136
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32137
|
+
flushChunk();
|
|
32138
|
+
chunkStart = msg.timestamp;
|
|
32139
|
+
}
|
|
32140
|
+
currentTexts.push(msg.text);
|
|
32141
|
+
currentTokens += msgTokens;
|
|
32142
|
+
}
|
|
32143
|
+
flushChunk();
|
|
32144
|
+
}
|
|
32145
|
+
return chunks;
|
|
32146
|
+
}
|
|
32147
|
+
};
|
|
32148
|
+
registerProvider(claudeCodeProvider);
|
|
32149
|
+
|
|
32150
|
+
// src/import/providers/codex.ts
|
|
32151
|
+
import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
|
|
32152
|
+
import { join as join9 } from "path";
|
|
32153
|
+
import { homedir as homedir3 } from "os";
|
|
32154
|
+
var CODEX_DIR = join9(homedir3(), ".codex");
|
|
32155
|
+
var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
|
|
32156
|
+
var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
|
|
32157
|
+
var MAX_TOOL_OUTPUT_CHARS2 = 500;
|
|
32158
|
+
var DEFAULT_MAX_TOKENS2 = 12288;
|
|
32159
|
+
function estimateTokens5(text4) {
|
|
32160
|
+
return Math.ceil(text4.length / 3);
|
|
32161
|
+
}
|
|
32162
|
+
function truncate2(text4, max) {
|
|
32163
|
+
if (text4.length <= max) return text4;
|
|
32164
|
+
return text4.slice(0, max) + "...";
|
|
32165
|
+
}
|
|
32166
|
+
function findJsonlFiles(dir) {
|
|
32167
|
+
const results = [];
|
|
32168
|
+
if (!existsSync6(dir)) return results;
|
|
32169
|
+
const walk = (d) => {
|
|
32170
|
+
let entries;
|
|
32171
|
+
try {
|
|
32172
|
+
entries = readdirSync3(d);
|
|
32173
|
+
} catch {
|
|
32174
|
+
return;
|
|
32175
|
+
}
|
|
32176
|
+
for (const entry of entries) {
|
|
32177
|
+
const full = join9(d, entry);
|
|
32178
|
+
try {
|
|
32179
|
+
const stat = statSync6(full);
|
|
32180
|
+
if (stat.isDirectory()) walk(full);
|
|
32181
|
+
else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
|
|
32182
|
+
} catch {
|
|
32183
|
+
}
|
|
32184
|
+
}
|
|
32185
|
+
};
|
|
32186
|
+
walk(dir);
|
|
32187
|
+
return results;
|
|
32188
|
+
}
|
|
32189
|
+
function responseItemToText(item) {
|
|
32190
|
+
if (!item) return null;
|
|
32191
|
+
if (item.type === "message" && item.role && item.content) {
|
|
32192
|
+
const text4 = extractContent(item.content);
|
|
32193
|
+
if (text4) return `[${item.role}] ${text4}`;
|
|
32194
|
+
}
|
|
32195
|
+
if (item.type === "function_call" && item.name) {
|
|
32196
|
+
const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
|
|
32197
|
+
return `[tool: ${item.name}] ${args}`;
|
|
32198
|
+
}
|
|
32199
|
+
if (item.type === "function_call_output" && item.output) {
|
|
32200
|
+
return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
|
|
32201
|
+
}
|
|
32202
|
+
return null;
|
|
32203
|
+
}
|
|
32204
|
+
function extractContent(content3) {
|
|
32205
|
+
if (typeof content3 === "string") return content3;
|
|
32206
|
+
if (!Array.isArray(content3)) return null;
|
|
32207
|
+
const parts = [];
|
|
32208
|
+
for (const part of content3) {
|
|
32209
|
+
if ("text" in part && typeof part.text === "string") {
|
|
32210
|
+
parts.push(part.text);
|
|
32211
|
+
}
|
|
32212
|
+
}
|
|
32213
|
+
return parts.length > 0 ? parts.join("\n") : null;
|
|
32214
|
+
}
|
|
32215
|
+
function parseJSONL2(filePath) {
|
|
32216
|
+
let raw;
|
|
32217
|
+
try {
|
|
32218
|
+
raw = readFileSync5(filePath, "utf-8");
|
|
32219
|
+
} catch {
|
|
32220
|
+
return [];
|
|
32221
|
+
}
|
|
32222
|
+
const lines = [];
|
|
32223
|
+
for (const line of raw.split("\n")) {
|
|
32224
|
+
if (!line.trim()) continue;
|
|
32225
|
+
try {
|
|
32226
|
+
lines.push(JSON.parse(line));
|
|
32227
|
+
} catch {
|
|
32228
|
+
}
|
|
32229
|
+
}
|
|
32230
|
+
return lines;
|
|
32231
|
+
}
|
|
32232
|
+
function getSessionMeta(filePath) {
|
|
32233
|
+
let raw;
|
|
32234
|
+
try {
|
|
32235
|
+
raw = readFileSync5(filePath, "utf-8");
|
|
32236
|
+
} catch {
|
|
32237
|
+
return null;
|
|
32238
|
+
}
|
|
32239
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
32240
|
+
if (lines.length === 0) return null;
|
|
32241
|
+
let meta3;
|
|
32242
|
+
try {
|
|
32243
|
+
meta3 = JSON.parse(lines[0]);
|
|
32244
|
+
} catch {
|
|
32245
|
+
return null;
|
|
32246
|
+
}
|
|
32247
|
+
if (meta3.type !== "session_meta") return null;
|
|
32248
|
+
const payload = meta3.payload;
|
|
32249
|
+
let messageCount = 0;
|
|
32250
|
+
for (const line of lines) {
|
|
32251
|
+
try {
|
|
32252
|
+
const parsed = JSON.parse(line);
|
|
32253
|
+
if (parsed.type === "response_item" || parsed.type === "event_msg") {
|
|
32254
|
+
messageCount++;
|
|
32255
|
+
}
|
|
32256
|
+
} catch {
|
|
32257
|
+
}
|
|
32258
|
+
}
|
|
32259
|
+
return {
|
|
32260
|
+
id: payload.meta.id,
|
|
32261
|
+
cwd: payload.meta.cwd,
|
|
32262
|
+
timestamp: payload.meta.timestamp,
|
|
32263
|
+
messageCount,
|
|
32264
|
+
fileSize: raw.length
|
|
32265
|
+
};
|
|
32266
|
+
}
|
|
32267
|
+
var codexProvider = {
|
|
32268
|
+
name: "codex",
|
|
32269
|
+
displayName: "Codex",
|
|
32270
|
+
detect(projectPath) {
|
|
32271
|
+
const sessions = [];
|
|
32272
|
+
const allFiles = [
|
|
32273
|
+
...findJsonlFiles(SESSIONS_DIR),
|
|
32274
|
+
...findJsonlFiles(ARCHIVED_DIR)
|
|
32275
|
+
];
|
|
32276
|
+
for (const filePath of allFiles) {
|
|
32277
|
+
const meta3 = getSessionMeta(filePath);
|
|
32278
|
+
if (!meta3) continue;
|
|
32279
|
+
if (meta3.cwd !== projectPath) continue;
|
|
32280
|
+
if (meta3.messageCount < 3) continue;
|
|
32281
|
+
const ts = new Date(meta3.timestamp).getTime();
|
|
32282
|
+
const estimatedTokens = Math.ceil(meta3.fileSize / 5);
|
|
32283
|
+
const dateStr = new Date(ts).toISOString().slice(0, 10);
|
|
32284
|
+
sessions.push({
|
|
32285
|
+
id: filePath,
|
|
32286
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
32287
|
+
startedAt: ts,
|
|
32288
|
+
lastActivityAt: ts,
|
|
32289
|
+
// Best approximation without reading all lines
|
|
32290
|
+
estimatedTokens,
|
|
32291
|
+
messageCount: meta3.messageCount
|
|
32292
|
+
});
|
|
32293
|
+
}
|
|
32294
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32295
|
+
},
|
|
32296
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
|
|
32297
|
+
const chunks = [];
|
|
32298
|
+
for (const filePath of sessionIds) {
|
|
32299
|
+
const lines = parseJSONL2(filePath);
|
|
32300
|
+
const messages = [];
|
|
32301
|
+
let sessionTimestamp = Date.now();
|
|
32302
|
+
const firstLine = lines[0];
|
|
32303
|
+
if (firstLine?.type === "session_meta") {
|
|
32304
|
+
const meta3 = firstLine;
|
|
32305
|
+
const ts = new Date(meta3.payload.meta.timestamp).getTime();
|
|
32306
|
+
if (!Number.isNaN(ts)) sessionTimestamp = ts;
|
|
32307
|
+
}
|
|
32308
|
+
for (const line of lines) {
|
|
32309
|
+
if (line.type === "response_item") {
|
|
32310
|
+
const ri = line;
|
|
32311
|
+
const text4 = responseItemToText(ri.payload);
|
|
32312
|
+
if (text4) {
|
|
32313
|
+
messages.push({ text: text4, timestamp: sessionTimestamp });
|
|
32314
|
+
}
|
|
32315
|
+
} else if (line.type === "event_msg") {
|
|
32316
|
+
const ev = line;
|
|
32317
|
+
if (ev.payload.output) {
|
|
32318
|
+
messages.push({
|
|
32319
|
+
text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
|
|
32320
|
+
timestamp: sessionTimestamp
|
|
32321
|
+
});
|
|
32322
|
+
}
|
|
32323
|
+
} else if (line.type === "compacted") {
|
|
32324
|
+
const comp = line;
|
|
32325
|
+
if (comp.payload.replacement_history) {
|
|
32326
|
+
for (const item of comp.payload.replacement_history) {
|
|
32327
|
+
const text4 = responseItemToText(item);
|
|
32328
|
+
if (text4) {
|
|
32329
|
+
messages.push({ text: text4, timestamp: sessionTimestamp });
|
|
32330
|
+
}
|
|
32331
|
+
}
|
|
32332
|
+
}
|
|
32333
|
+
}
|
|
32334
|
+
}
|
|
32335
|
+
if (messages.length === 0) continue;
|
|
32336
|
+
let currentTexts = [];
|
|
32337
|
+
let currentTokens = 0;
|
|
32338
|
+
let chunkIndex = 0;
|
|
32339
|
+
const flushChunk = () => {
|
|
32340
|
+
if (currentTexts.length === 0) return;
|
|
32341
|
+
chunkIndex++;
|
|
32342
|
+
const text4 = currentTexts.join("\n\n");
|
|
32343
|
+
chunks.push({
|
|
32344
|
+
label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32345
|
+
text: text4,
|
|
32346
|
+
estimatedTokens: estimateTokens5(text4),
|
|
32347
|
+
timestamp: sessionTimestamp
|
|
32348
|
+
});
|
|
32349
|
+
currentTexts = [];
|
|
32350
|
+
currentTokens = 0;
|
|
32351
|
+
};
|
|
32352
|
+
for (const msg of messages) {
|
|
32353
|
+
const msgTokens = estimateTokens5(msg.text);
|
|
32354
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32355
|
+
flushChunk();
|
|
32356
|
+
}
|
|
32357
|
+
currentTexts.push(msg.text);
|
|
32358
|
+
currentTokens += msgTokens;
|
|
32359
|
+
}
|
|
32360
|
+
flushChunk();
|
|
32361
|
+
}
|
|
32362
|
+
return chunks;
|
|
32363
|
+
}
|
|
32364
|
+
};
|
|
32365
|
+
registerProvider(codexProvider);
|
|
32366
|
+
|
|
32367
|
+
// src/import/providers/opencode.ts
|
|
32368
|
+
import { existsSync as existsSync7 } from "fs";
|
|
32369
|
+
import { join as join10 } from "path";
|
|
32370
|
+
import { homedir as homedir4 } from "os";
|
|
32371
|
+
var OPENCODE_DB_PATH = join10(
|
|
32372
|
+
process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
|
|
32373
|
+
"opencode",
|
|
32374
|
+
"opencode.db"
|
|
32375
|
+
);
|
|
32376
|
+
var MAX_TOOL_OUTPUT_CHARS3 = 500;
|
|
32377
|
+
var DEFAULT_MAX_TOKENS3 = 12288;
|
|
32378
|
+
function estimateTokens6(text4) {
|
|
32379
|
+
return Math.ceil(text4.length / 3);
|
|
32380
|
+
}
|
|
32381
|
+
function truncate3(text4, max) {
|
|
32382
|
+
if (text4.length <= max) return text4;
|
|
32383
|
+
return text4.slice(0, max) + "...";
|
|
32384
|
+
}
|
|
32385
|
+
function openDB() {
|
|
32386
|
+
if (!existsSync7(OPENCODE_DB_PATH)) return null;
|
|
32387
|
+
try {
|
|
32388
|
+
return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
|
|
32389
|
+
} catch {
|
|
32390
|
+
return null;
|
|
32391
|
+
}
|
|
32392
|
+
}
|
|
32393
|
+
function tableExists(database, table) {
|
|
32394
|
+
const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
|
|
32395
|
+
return row != null;
|
|
32396
|
+
}
|
|
32397
|
+
function partsToConversationText(parts) {
|
|
32398
|
+
const segments = [];
|
|
32399
|
+
for (const part of parts) {
|
|
32400
|
+
if (part.type === "text" && part.text) {
|
|
32401
|
+
segments.push(part.text);
|
|
32402
|
+
} else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
|
|
32403
|
+
segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
|
|
32404
|
+
}
|
|
32405
|
+
}
|
|
32406
|
+
return segments.join("\n");
|
|
32407
|
+
}
|
|
32408
|
+
var opencodeProvider = {
|
|
32409
|
+
name: "opencode",
|
|
32410
|
+
displayName: "OpenCode",
|
|
32411
|
+
detect(projectPath) {
|
|
32412
|
+
const database = openDB();
|
|
32413
|
+
if (!database) return [];
|
|
32414
|
+
try {
|
|
32415
|
+
if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
|
|
32416
|
+
return [];
|
|
32417
|
+
}
|
|
32418
|
+
const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
|
|
32419
|
+
if (!project) return [];
|
|
32420
|
+
const sessions = database.query(
|
|
32421
|
+
`SELECT s.id, s.title, s.time_created, s.time_updated,
|
|
32422
|
+
(SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
|
|
32423
|
+
FROM session s
|
|
32424
|
+
WHERE s.project_id = ? AND s.parent_id IS NULL
|
|
32425
|
+
ORDER BY s.time_updated DESC`
|
|
32426
|
+
).all(project.id);
|
|
32427
|
+
const results = [];
|
|
32428
|
+
for (const sess of sessions) {
|
|
32429
|
+
if (sess.msg_count < 3) continue;
|
|
32430
|
+
const estimatedTokens = sess.msg_count * 500;
|
|
32431
|
+
const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
|
|
32432
|
+
const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
|
|
32433
|
+
results.push({
|
|
32434
|
+
id: sess.id,
|
|
32435
|
+
label,
|
|
32436
|
+
startedAt: sess.time_created,
|
|
32437
|
+
lastActivityAt: sess.time_updated,
|
|
32438
|
+
estimatedTokens,
|
|
32439
|
+
messageCount: sess.msg_count
|
|
32440
|
+
});
|
|
32441
|
+
}
|
|
32442
|
+
return results;
|
|
32443
|
+
} finally {
|
|
32444
|
+
database.close();
|
|
32445
|
+
}
|
|
32446
|
+
},
|
|
32447
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
|
|
32448
|
+
const database = openDB();
|
|
32449
|
+
if (!database) return [];
|
|
32450
|
+
const chunks = [];
|
|
32451
|
+
try {
|
|
32452
|
+
const hasParts = tableExists(database, "part");
|
|
32453
|
+
for (const sessionId of sessionIds) {
|
|
32454
|
+
const messages = database.query(
|
|
32455
|
+
`SELECT id, data, time_created FROM message
|
|
32456
|
+
WHERE session_id = ?
|
|
32457
|
+
ORDER BY time_created ASC`
|
|
32458
|
+
).all(sessionId);
|
|
32459
|
+
if (messages.length === 0) continue;
|
|
32460
|
+
const textMessages = [];
|
|
32461
|
+
for (const msg of messages) {
|
|
32462
|
+
let msgData;
|
|
32463
|
+
try {
|
|
32464
|
+
msgData = JSON.parse(msg.data);
|
|
32465
|
+
} catch {
|
|
32466
|
+
continue;
|
|
32467
|
+
}
|
|
32468
|
+
const role = msgData.role ?? "unknown";
|
|
32469
|
+
let contentText = "";
|
|
32470
|
+
if (hasParts) {
|
|
32471
|
+
const parts = database.query(
|
|
32472
|
+
`SELECT data FROM part
|
|
32473
|
+
WHERE message_id = ?
|
|
32474
|
+
ORDER BY time_created ASC`
|
|
32475
|
+
).all(msg.id);
|
|
32476
|
+
const parsedParts = [];
|
|
32477
|
+
for (const p2 of parts) {
|
|
32478
|
+
try {
|
|
32479
|
+
parsedParts.push(JSON.parse(p2.data));
|
|
32480
|
+
} catch {
|
|
32481
|
+
}
|
|
32482
|
+
}
|
|
32483
|
+
contentText = partsToConversationText(parsedParts);
|
|
32484
|
+
}
|
|
32485
|
+
if (!contentText.trim()) continue;
|
|
32486
|
+
textMessages.push({
|
|
32487
|
+
text: `[${role}] ${contentText}`,
|
|
32488
|
+
timestamp: msg.time_created
|
|
32489
|
+
});
|
|
32490
|
+
}
|
|
32491
|
+
if (textMessages.length === 0) continue;
|
|
32492
|
+
let currentTexts = [];
|
|
32493
|
+
let currentTokens = 0;
|
|
32494
|
+
let chunkStart = textMessages[0].timestamp;
|
|
32495
|
+
let chunkIndex = 0;
|
|
32496
|
+
const flushChunk = () => {
|
|
32497
|
+
if (currentTexts.length === 0) return;
|
|
32498
|
+
chunkIndex++;
|
|
32499
|
+
const text4 = currentTexts.join("\n\n");
|
|
32500
|
+
chunks.push({
|
|
32501
|
+
label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32502
|
+
text: text4,
|
|
32503
|
+
estimatedTokens: estimateTokens6(text4),
|
|
32504
|
+
timestamp: chunkStart
|
|
32505
|
+
});
|
|
32506
|
+
currentTexts = [];
|
|
32507
|
+
currentTokens = 0;
|
|
32508
|
+
};
|
|
32509
|
+
for (const msg of textMessages) {
|
|
32510
|
+
const msgTokens = estimateTokens6(msg.text);
|
|
32511
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32512
|
+
flushChunk();
|
|
32513
|
+
chunkStart = msg.timestamp;
|
|
32514
|
+
}
|
|
32515
|
+
currentTexts.push(msg.text);
|
|
32516
|
+
currentTokens += msgTokens;
|
|
32517
|
+
}
|
|
32518
|
+
flushChunk();
|
|
32519
|
+
}
|
|
32520
|
+
} finally {
|
|
32521
|
+
database.close();
|
|
32522
|
+
}
|
|
32523
|
+
return chunks;
|
|
32524
|
+
}
|
|
32525
|
+
};
|
|
32526
|
+
registerProvider(opencodeProvider);
|
|
32527
|
+
|
|
32528
|
+
// src/import/providers/cline.ts
|
|
32529
|
+
import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
|
|
32530
|
+
import { join as join11 } from "path";
|
|
32531
|
+
import { homedir as homedir5 } from "os";
|
|
32532
|
+
var MAX_TOOL_OUTPUT_CHARS4 = 500;
|
|
32533
|
+
var DEFAULT_MAX_TOKENS4 = 12288;
|
|
32534
|
+
var EXTENSION_IDS = [
|
|
32535
|
+
"saoudrizwan.claude-dev",
|
|
32536
|
+
"cline.cline"
|
|
32537
|
+
];
|
|
32538
|
+
function estimateTokens7(text4) {
|
|
32539
|
+
return Math.ceil(text4.length / 3);
|
|
32540
|
+
}
|
|
32541
|
+
function truncate4(text4, max) {
|
|
32542
|
+
if (text4.length <= max) return text4;
|
|
32543
|
+
return text4.slice(0, max) + "...";
|
|
32544
|
+
}
|
|
32545
|
+
function findGlobalStorageDirs() {
|
|
32546
|
+
const home = homedir5();
|
|
32547
|
+
const dirs = [];
|
|
32548
|
+
const basePaths = [];
|
|
32549
|
+
const platform = process.platform;
|
|
32550
|
+
if (platform === "darwin") {
|
|
32551
|
+
basePaths.push(
|
|
32552
|
+
join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
|
|
32553
|
+
join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
|
|
32554
|
+
join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
|
|
32555
|
+
);
|
|
32556
|
+
} else if (platform === "win32") {
|
|
32557
|
+
const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
|
|
32558
|
+
basePaths.push(
|
|
32559
|
+
join11(appdata, "Code", "User", "globalStorage"),
|
|
32560
|
+
join11(appdata, "Code - Insiders", "User", "globalStorage"),
|
|
32561
|
+
join11(appdata, "VSCodium", "User", "globalStorage")
|
|
32562
|
+
);
|
|
32563
|
+
} else {
|
|
32564
|
+
const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
|
|
32565
|
+
basePaths.push(
|
|
32566
|
+
join11(configHome, "Code", "User", "globalStorage"),
|
|
32567
|
+
join11(configHome, "Code - Insiders", "User", "globalStorage"),
|
|
32568
|
+
join11(configHome, "VSCodium", "User", "globalStorage")
|
|
32569
|
+
);
|
|
32570
|
+
basePaths.push(
|
|
32571
|
+
join11(home, ".vscode", "data", "User", "globalStorage"),
|
|
32572
|
+
join11(home, ".vscode-insiders", "data", "User", "globalStorage")
|
|
32573
|
+
);
|
|
32574
|
+
}
|
|
32575
|
+
for (const base of basePaths) {
|
|
32576
|
+
for (const extId of EXTENSION_IDS) {
|
|
32577
|
+
const dir = join11(base, extId);
|
|
32578
|
+
if (existsSync8(dir)) dirs.push(dir);
|
|
32579
|
+
}
|
|
32580
|
+
}
|
|
32581
|
+
return dirs;
|
|
32582
|
+
}
|
|
32583
|
+
function loadTaskHistory(storageDir, projectPath) {
|
|
32584
|
+
const paths = [
|
|
32585
|
+
join11(storageDir, "state", "taskHistory.json"),
|
|
32586
|
+
join11(storageDir, "taskHistory.json")
|
|
32587
|
+
];
|
|
32588
|
+
for (const historyPath of paths) {
|
|
32589
|
+
if (!existsSync8(historyPath)) continue;
|
|
32590
|
+
try {
|
|
32591
|
+
const raw = readFileSync6(historyPath, "utf-8");
|
|
32592
|
+
const items = JSON.parse(raw);
|
|
32593
|
+
if (!Array.isArray(items)) continue;
|
|
32594
|
+
return items.filter(
|
|
32595
|
+
(item) => item.cwdOnTaskInitialization === projectPath
|
|
32596
|
+
);
|
|
32597
|
+
} catch {
|
|
32598
|
+
continue;
|
|
32599
|
+
}
|
|
32600
|
+
}
|
|
32601
|
+
return [];
|
|
32602
|
+
}
|
|
32603
|
+
function readConversation(taskDir) {
|
|
32604
|
+
const filePath = join11(taskDir, "api_conversation_history.json");
|
|
32605
|
+
if (!existsSync8(filePath)) return [];
|
|
32606
|
+
try {
|
|
32607
|
+
const raw = readFileSync6(filePath, "utf-8");
|
|
32608
|
+
const messages = JSON.parse(raw);
|
|
32609
|
+
return Array.isArray(messages) ? messages : [];
|
|
32610
|
+
} catch {
|
|
32611
|
+
return [];
|
|
32612
|
+
}
|
|
32613
|
+
}
|
|
32614
|
+
function blockToText2(block) {
|
|
32615
|
+
switch (block.type) {
|
|
32616
|
+
case "text":
|
|
32617
|
+
return block.text;
|
|
32618
|
+
case "tool_use": {
|
|
32619
|
+
const tu = block;
|
|
32620
|
+
return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
|
|
32621
|
+
}
|
|
32622
|
+
case "tool_result": {
|
|
32623
|
+
const tr = block;
|
|
32624
|
+
let content3;
|
|
32625
|
+
if (typeof tr.content === "string") {
|
|
32626
|
+
content3 = tr.content;
|
|
32627
|
+
} else if (Array.isArray(tr.content)) {
|
|
32628
|
+
content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
|
|
32629
|
+
} else {
|
|
32630
|
+
content3 = "";
|
|
32631
|
+
}
|
|
32632
|
+
return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
|
|
32633
|
+
}
|
|
32634
|
+
default:
|
|
32635
|
+
return null;
|
|
32636
|
+
}
|
|
32637
|
+
}
|
|
32638
|
+
function messageToText(msg) {
|
|
32639
|
+
if (typeof msg.content === "string") {
|
|
32640
|
+
return msg.content ? `[${msg.role}] ${msg.content}` : null;
|
|
32641
|
+
}
|
|
32642
|
+
const parts = msg.content.map(blockToText2).filter(Boolean);
|
|
32643
|
+
return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
|
|
32644
|
+
}
|
|
32645
|
+
var clineProvider = {
|
|
32646
|
+
name: "cline",
|
|
32647
|
+
displayName: "Cline",
|
|
32648
|
+
detect(projectPath) {
|
|
32649
|
+
const sessions = [];
|
|
32650
|
+
const storageDirs = findGlobalStorageDirs();
|
|
32651
|
+
for (const storageDir of storageDirs) {
|
|
32652
|
+
const tasks = loadTaskHistory(storageDir, projectPath);
|
|
32653
|
+
for (const task of tasks) {
|
|
32654
|
+
const taskDir = join11(storageDir, "tasks", task.id);
|
|
32655
|
+
if (!existsSync8(taskDir)) continue;
|
|
32656
|
+
const messages = readConversation(taskDir);
|
|
32657
|
+
if (messages.length < 3) continue;
|
|
32658
|
+
const dateStr = new Date(task.ts).toISOString().slice(0, 10);
|
|
32659
|
+
const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
|
|
32660
|
+
const historyFile = join11(taskDir, "api_conversation_history.json");
|
|
32661
|
+
let estimatedTokens = messages.length * 500;
|
|
32662
|
+
try {
|
|
32663
|
+
const stat = statSync7(historyFile);
|
|
32664
|
+
estimatedTokens = Math.ceil(stat.size / 5);
|
|
32665
|
+
} catch {
|
|
32666
|
+
}
|
|
32667
|
+
sessions.push({
|
|
32668
|
+
id: taskDir,
|
|
32669
|
+
label,
|
|
32670
|
+
startedAt: task.ts,
|
|
32671
|
+
lastActivityAt: task.ts,
|
|
32672
|
+
estimatedTokens,
|
|
32673
|
+
messageCount: messages.length
|
|
32674
|
+
});
|
|
32675
|
+
}
|
|
32676
|
+
}
|
|
32677
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32678
|
+
},
|
|
32679
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
|
|
32680
|
+
const chunks = [];
|
|
32681
|
+
for (const taskDir of sessionIds) {
|
|
32682
|
+
const messages = readConversation(taskDir);
|
|
32683
|
+
if (messages.length === 0) continue;
|
|
32684
|
+
let sessionTimestamp;
|
|
32685
|
+
try {
|
|
32686
|
+
sessionTimestamp = statSync7(taskDir).mtimeMs;
|
|
32687
|
+
} catch {
|
|
32688
|
+
sessionTimestamp = Date.now();
|
|
32689
|
+
}
|
|
32690
|
+
const textMessages = [];
|
|
32691
|
+
for (const msg of messages) {
|
|
32692
|
+
const text4 = messageToText(msg);
|
|
32693
|
+
if (text4) textMessages.push({ text: text4 });
|
|
32694
|
+
}
|
|
32695
|
+
if (textMessages.length === 0) continue;
|
|
32696
|
+
let currentTexts = [];
|
|
32697
|
+
let currentTokens = 0;
|
|
32698
|
+
let chunkIndex = 0;
|
|
32699
|
+
const flushChunk = () => {
|
|
32700
|
+
if (currentTexts.length === 0) return;
|
|
32701
|
+
chunkIndex++;
|
|
32702
|
+
const text4 = currentTexts.join("\n\n");
|
|
32703
|
+
chunks.push({
|
|
32704
|
+
label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32705
|
+
text: text4,
|
|
32706
|
+
estimatedTokens: estimateTokens7(text4),
|
|
32707
|
+
timestamp: sessionTimestamp
|
|
32708
|
+
});
|
|
32709
|
+
currentTexts = [];
|
|
32710
|
+
currentTokens = 0;
|
|
32711
|
+
};
|
|
32712
|
+
for (const msg of textMessages) {
|
|
32713
|
+
const msgTokens = estimateTokens7(msg.text);
|
|
32714
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32715
|
+
flushChunk();
|
|
32716
|
+
}
|
|
32717
|
+
currentTexts.push(msg.text);
|
|
32718
|
+
currentTokens += msgTokens;
|
|
32719
|
+
}
|
|
32720
|
+
flushChunk();
|
|
32721
|
+
}
|
|
32722
|
+
return chunks;
|
|
32723
|
+
}
|
|
32724
|
+
};
|
|
32725
|
+
registerProvider(clineProvider);
|
|
32726
|
+
|
|
32727
|
+
// src/import/providers/continue.ts
|
|
32728
|
+
import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
|
|
32729
|
+
import { join as join12 } from "path";
|
|
32730
|
+
import { homedir as homedir6 } from "os";
|
|
32731
|
+
var MAX_TOOL_OUTPUT_CHARS5 = 500;
|
|
32732
|
+
var DEFAULT_MAX_TOKENS5 = 12288;
|
|
32733
|
+
function estimateTokens8(text4) {
|
|
32734
|
+
return Math.ceil(text4.length / 3);
|
|
32735
|
+
}
|
|
32736
|
+
function truncate5(text4, max) {
|
|
32737
|
+
if (text4.length <= max) return text4;
|
|
32738
|
+
return text4.slice(0, max) + "...";
|
|
32739
|
+
}
|
|
32740
|
+
function continueDir() {
|
|
32741
|
+
return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
|
|
32742
|
+
}
|
|
32743
|
+
function loadSessionIndex() {
|
|
32744
|
+
const indexPath = join12(continueDir(), "sessions", "sessions.json");
|
|
32745
|
+
if (!existsSync9(indexPath)) return [];
|
|
32746
|
+
try {
|
|
32747
|
+
const raw = readFileSync7(indexPath, "utf-8");
|
|
32748
|
+
const parsed = JSON.parse(raw);
|
|
32749
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
32750
|
+
} catch {
|
|
32751
|
+
return [];
|
|
32752
|
+
}
|
|
32753
|
+
}
|
|
32754
|
+
function loadSession(sessionId) {
|
|
32755
|
+
const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
|
|
32756
|
+
if (!existsSync9(filePath)) return null;
|
|
32757
|
+
try {
|
|
32758
|
+
const raw = readFileSync7(filePath, "utf-8");
|
|
32759
|
+
return JSON.parse(raw);
|
|
32760
|
+
} catch {
|
|
32761
|
+
return null;
|
|
30330
32762
|
}
|
|
30331
|
-
|
|
30332
|
-
|
|
32763
|
+
}
|
|
32764
|
+
function extractMessageContent(content3) {
|
|
32765
|
+
if (typeof content3 === "string") return content3;
|
|
32766
|
+
if (!Array.isArray(content3)) return "";
|
|
32767
|
+
return content3.filter(
|
|
32768
|
+
(part) => part.type === "text" && typeof part.text === "string"
|
|
32769
|
+
).map((part) => part.text).join("\n");
|
|
32770
|
+
}
|
|
32771
|
+
function historyItemToText(item) {
|
|
32772
|
+
const msg = item.message;
|
|
32773
|
+
if (!msg) return null;
|
|
32774
|
+
if (msg.role === "system") return null;
|
|
32775
|
+
const parts = [];
|
|
32776
|
+
const content3 = extractMessageContent(msg.content);
|
|
32777
|
+
if (content3) parts.push(content3);
|
|
32778
|
+
if (msg.toolCalls) {
|
|
32779
|
+
for (const call of msg.toolCalls) {
|
|
32780
|
+
if (call.function) {
|
|
32781
|
+
const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
|
|
32782
|
+
parts.push(`[tool: ${call.function.name}] ${args}`);
|
|
32783
|
+
}
|
|
32784
|
+
}
|
|
32785
|
+
}
|
|
32786
|
+
if (item.toolCallStates) {
|
|
32787
|
+
for (const state of item.toolCallStates) {
|
|
32788
|
+
if (state.output && state.status === "done") {
|
|
32789
|
+
parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
|
|
32790
|
+
}
|
|
32791
|
+
}
|
|
32792
|
+
}
|
|
32793
|
+
if (parts.length === 0) return null;
|
|
32794
|
+
const role = msg.role === "tool" ? "tool_result" : msg.role;
|
|
32795
|
+
return `[${role}] ${parts.join("\n")}`;
|
|
32796
|
+
}
|
|
32797
|
+
var continueProvider = {
|
|
32798
|
+
name: "continue",
|
|
32799
|
+
displayName: "Continue",
|
|
32800
|
+
detect(projectPath) {
|
|
32801
|
+
const sessions = [];
|
|
32802
|
+
const index2 = loadSessionIndex();
|
|
32803
|
+
for (const meta3 of index2) {
|
|
32804
|
+
if (meta3.workspaceDirectory !== projectPath) continue;
|
|
32805
|
+
const session = loadSession(meta3.sessionId);
|
|
32806
|
+
if (!session || !session.history || session.history.length < 3) continue;
|
|
32807
|
+
const ts = new Date(meta3.dateCreated).getTime();
|
|
32808
|
+
const dateStr = new Date(ts).toISOString().slice(0, 10);
|
|
32809
|
+
const messageCount = session.history.length;
|
|
32810
|
+
const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
|
|
32811
|
+
const estimatedTokens = messageCount * 500;
|
|
32812
|
+
sessions.push({
|
|
32813
|
+
id: meta3.sessionId,
|
|
32814
|
+
label,
|
|
32815
|
+
startedAt: ts,
|
|
32816
|
+
lastActivityAt: ts,
|
|
32817
|
+
estimatedTokens,
|
|
32818
|
+
messageCount
|
|
32819
|
+
});
|
|
32820
|
+
}
|
|
32821
|
+
const sessionsDir = join12(continueDir(), "sessions");
|
|
32822
|
+
if (existsSync9(sessionsDir)) {
|
|
32823
|
+
const existingIds = new Set(sessions.map((s) => s.id));
|
|
32824
|
+
let entries;
|
|
30333
32825
|
try {
|
|
30334
|
-
|
|
30335
|
-
projectPath: input.projectPath,
|
|
30336
|
-
category: pat.category,
|
|
30337
|
-
title: pat.title,
|
|
30338
|
-
content: pat.content,
|
|
30339
|
-
session: input.sessionID,
|
|
30340
|
-
scope: "project"
|
|
30341
|
-
});
|
|
32826
|
+
entries = readdirSync5(sessionsDir);
|
|
30342
32827
|
} catch {
|
|
32828
|
+
entries = [];
|
|
32829
|
+
}
|
|
32830
|
+
for (const entry of entries) {
|
|
32831
|
+
if (!entry.endsWith(".json") || entry === "sessions.json") continue;
|
|
32832
|
+
const sessionId = entry.replace(".json", "");
|
|
32833
|
+
if (existingIds.has(sessionId)) continue;
|
|
32834
|
+
const session = loadSession(sessionId);
|
|
32835
|
+
if (!session) continue;
|
|
32836
|
+
if (session.workspaceDirectory !== projectPath) continue;
|
|
32837
|
+
if (!session.history || session.history.length < 3) continue;
|
|
32838
|
+
const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
|
|
32839
|
+
sessions.push({
|
|
32840
|
+
id: sessionId,
|
|
32841
|
+
label: `${dateStr} (${session.history.length} messages)`,
|
|
32842
|
+
startedAt: Date.now(),
|
|
32843
|
+
lastActivityAt: Date.now(),
|
|
32844
|
+
estimatedTokens: session.history.length * 500,
|
|
32845
|
+
messageCount: session.history.length
|
|
32846
|
+
});
|
|
32847
|
+
}
|
|
32848
|
+
}
|
|
32849
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32850
|
+
},
|
|
32851
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
|
|
32852
|
+
const chunks = [];
|
|
32853
|
+
for (const sessionId of sessionIds) {
|
|
32854
|
+
const session = loadSession(sessionId);
|
|
32855
|
+
if (!session || !session.history) continue;
|
|
32856
|
+
const textMessages = [];
|
|
32857
|
+
for (const item of session.history) {
|
|
32858
|
+
const text4 = historyItemToText(item);
|
|
32859
|
+
if (text4) textMessages.push({ text: text4 });
|
|
32860
|
+
}
|
|
32861
|
+
if (textMessages.length === 0) continue;
|
|
32862
|
+
const sessionTimestamp = Date.now();
|
|
32863
|
+
let currentTexts = [];
|
|
32864
|
+
let currentTokens = 0;
|
|
32865
|
+
let chunkIndex = 0;
|
|
32866
|
+
const flushChunk = () => {
|
|
32867
|
+
if (currentTexts.length === 0) return;
|
|
32868
|
+
chunkIndex++;
|
|
32869
|
+
const text4 = currentTexts.join("\n\n");
|
|
32870
|
+
chunks.push({
|
|
32871
|
+
label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
|
|
32872
|
+
text: text4,
|
|
32873
|
+
estimatedTokens: estimateTokens8(text4),
|
|
32874
|
+
timestamp: sessionTimestamp
|
|
32875
|
+
});
|
|
32876
|
+
currentTexts = [];
|
|
32877
|
+
currentTokens = 0;
|
|
32878
|
+
};
|
|
32879
|
+
for (const msg of textMessages) {
|
|
32880
|
+
const msgTokens = estimateTokens8(msg.text);
|
|
32881
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32882
|
+
flushChunk();
|
|
32883
|
+
}
|
|
32884
|
+
currentTexts.push(msg.text);
|
|
32885
|
+
currentTokens += msgTokens;
|
|
30343
32886
|
}
|
|
32887
|
+
flushChunk();
|
|
30344
32888
|
}
|
|
32889
|
+
return chunks;
|
|
30345
32890
|
}
|
|
30346
|
-
|
|
32891
|
+
};
|
|
32892
|
+
registerProvider(continueProvider);
|
|
32893
|
+
|
|
32894
|
+
// src/import/providers/pi.ts
|
|
32895
|
+
import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
|
|
32896
|
+
import { join as join13 } from "path";
|
|
32897
|
+
import { homedir as homedir7 } from "os";
|
|
32898
|
+
var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
|
|
32899
|
+
var MAX_TOOL_OUTPUT_CHARS6 = 500;
|
|
32900
|
+
var DEFAULT_MAX_TOKENS6 = 12288;
|
|
32901
|
+
function estimateTokens9(text4) {
|
|
32902
|
+
return Math.ceil(text4.length / 3);
|
|
30347
32903
|
}
|
|
30348
|
-
function
|
|
30349
|
-
|
|
30350
|
-
|
|
30351
|
-
).all();
|
|
30352
|
-
if (!rows.length) return 0;
|
|
30353
|
-
const update2 = db().prepare(
|
|
30354
|
-
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
30355
|
-
);
|
|
30356
|
-
let updated = 0;
|
|
30357
|
-
for (const row of rows) {
|
|
30358
|
-
const sourceIds = parseSourceIds(row.source_ids);
|
|
30359
|
-
if (!sourceIds.length) continue;
|
|
30360
|
-
const placeholders = sourceIds.map(() => "?").join(",");
|
|
30361
|
-
const sources = db().query(
|
|
30362
|
-
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
30363
|
-
).all(...sourceIds);
|
|
30364
|
-
if (!sources.length) continue;
|
|
30365
|
-
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
30366
|
-
const timestamps = sources.map((s) => s.created_at);
|
|
30367
|
-
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
30368
|
-
const cNorm = temporalCnorm(timestamps);
|
|
30369
|
-
update2.run(rComp, cNorm, row.id);
|
|
30370
|
-
updated++;
|
|
30371
|
-
}
|
|
30372
|
-
if (updated > 0) {
|
|
30373
|
-
info(
|
|
30374
|
-
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
30375
|
-
);
|
|
30376
|
-
}
|
|
30377
|
-
return updated;
|
|
32904
|
+
function truncate6(text4, max) {
|
|
32905
|
+
if (text4.length <= max) return text4;
|
|
32906
|
+
return text4.slice(0, max) + "...";
|
|
30378
32907
|
}
|
|
30379
|
-
|
|
30380
|
-
|
|
30381
|
-
|
|
30382
|
-
|
|
30383
|
-
|
|
30384
|
-
|
|
30385
|
-
run: () => run2
|
|
30386
|
-
});
|
|
30387
|
-
var MAX_ENTRY_CONTENT_LENGTH = 1200;
|
|
30388
|
-
function parseOps(text4) {
|
|
30389
|
-
const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
|
|
32908
|
+
function encodeCwd(cwd) {
|
|
32909
|
+
const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
|
|
32910
|
+
return `--${encoded}--`;
|
|
32911
|
+
}
|
|
32912
|
+
function parseJSONL3(filePath) {
|
|
32913
|
+
let raw;
|
|
30390
32914
|
try {
|
|
30391
|
-
|
|
30392
|
-
if (!Array.isArray(parsed)) return [];
|
|
30393
|
-
return parsed.filter(
|
|
30394
|
-
(op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
|
|
30395
|
-
);
|
|
32915
|
+
raw = readFileSync8(filePath, "utf-8");
|
|
30396
32916
|
} catch {
|
|
30397
32917
|
return [];
|
|
30398
32918
|
}
|
|
32919
|
+
const lines = [];
|
|
32920
|
+
for (const line of raw.split("\n")) {
|
|
32921
|
+
if (!line.trim()) continue;
|
|
32922
|
+
try {
|
|
32923
|
+
lines.push(JSON.parse(line));
|
|
32924
|
+
} catch {
|
|
32925
|
+
}
|
|
32926
|
+
}
|
|
32927
|
+
return lines;
|
|
30399
32928
|
}
|
|
30400
|
-
|
|
30401
|
-
|
|
30402
|
-
const
|
|
30403
|
-
|
|
30404
|
-
|
|
30405
|
-
const
|
|
30406
|
-
|
|
30407
|
-
|
|
30408
|
-
|
|
30409
|
-
|
|
30410
|
-
|
|
30411
|
-
|
|
30412
|
-
|
|
30413
|
-
|
|
30414
|
-
|
|
30415
|
-
|
|
30416
|
-
|
|
30417
|
-
|
|
30418
|
-
|
|
30419
|
-
|
|
30420
|
-
const
|
|
30421
|
-
|
|
30422
|
-
|
|
30423
|
-
|
|
30424
|
-
|
|
30425
|
-
|
|
30426
|
-
|
|
30427
|
-
|
|
30428
|
-
|
|
30429
|
-
|
|
30430
|
-
|
|
30431
|
-
|
|
30432
|
-
|
|
30433
|
-
|
|
30434
|
-
|
|
30435
|
-
|
|
30436
|
-
|
|
30437
|
-
|
|
30438
|
-
|
|
30439
|
-
|
|
30440
|
-
|
|
30441
|
-
|
|
30442
|
-
|
|
32929
|
+
function linearize(lines) {
|
|
32930
|
+
if (lines.length === 0) return [];
|
|
32931
|
+
const children = /* @__PURE__ */ new Map();
|
|
32932
|
+
const byId = /* @__PURE__ */ new Map();
|
|
32933
|
+
let rootLine = null;
|
|
32934
|
+
for (const line of lines) {
|
|
32935
|
+
if (line.type === "session") {
|
|
32936
|
+
rootLine = line;
|
|
32937
|
+
continue;
|
|
32938
|
+
}
|
|
32939
|
+
if (!line.id) continue;
|
|
32940
|
+
byId.set(line.id, line);
|
|
32941
|
+
const pid = line.parentId;
|
|
32942
|
+
if (pid) {
|
|
32943
|
+
const siblings = children.get(pid) ?? [];
|
|
32944
|
+
siblings.push(line);
|
|
32945
|
+
children.set(pid, siblings);
|
|
32946
|
+
}
|
|
32947
|
+
}
|
|
32948
|
+
if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
|
|
32949
|
+
const result = [];
|
|
32950
|
+
let currentId = rootLine.id;
|
|
32951
|
+
while (currentId) {
|
|
32952
|
+
const kids = children.get(currentId);
|
|
32953
|
+
if (!kids || kids.length === 0) break;
|
|
32954
|
+
const next = kids[kids.length - 1];
|
|
32955
|
+
result.push(next);
|
|
32956
|
+
currentId = next.id;
|
|
32957
|
+
}
|
|
32958
|
+
return result;
|
|
32959
|
+
}
|
|
32960
|
+
function getSessionMeta2(filePath) {
|
|
32961
|
+
const lines = parseJSONL3(filePath);
|
|
32962
|
+
if (lines.length === 0) return null;
|
|
32963
|
+
const header = lines[0];
|
|
32964
|
+
if (header.type !== "session") return null;
|
|
32965
|
+
const session = header;
|
|
32966
|
+
const messageCount = lines.filter((l) => l.type === "message").length;
|
|
32967
|
+
let fileSize;
|
|
32968
|
+
try {
|
|
32969
|
+
fileSize = statSync8(filePath).size;
|
|
32970
|
+
} catch {
|
|
32971
|
+
fileSize = 0;
|
|
32972
|
+
}
|
|
32973
|
+
const ts = new Date(session.timestamp).getTime();
|
|
32974
|
+
return {
|
|
32975
|
+
id: session.id,
|
|
32976
|
+
cwd: session.cwd,
|
|
32977
|
+
timestamp: Number.isNaN(ts) ? Date.now() : ts,
|
|
32978
|
+
messageCount,
|
|
32979
|
+
fileSize
|
|
32980
|
+
};
|
|
32981
|
+
}
|
|
32982
|
+
var piProvider = {
|
|
32983
|
+
name: "pi",
|
|
32984
|
+
displayName: "Pi",
|
|
32985
|
+
detect(projectPath) {
|
|
32986
|
+
const encoded = encodeCwd(projectPath);
|
|
32987
|
+
const dir = join13(PI_DIR, encoded);
|
|
32988
|
+
let entries;
|
|
32989
|
+
try {
|
|
32990
|
+
entries = readdirSync6(dir);
|
|
32991
|
+
} catch {
|
|
32992
|
+
return [];
|
|
32993
|
+
}
|
|
32994
|
+
const sessions = [];
|
|
32995
|
+
for (const entry of entries) {
|
|
32996
|
+
if (!entry.endsWith(".jsonl")) continue;
|
|
32997
|
+
const filePath = join13(dir, entry);
|
|
32998
|
+
const meta3 = getSessionMeta2(filePath);
|
|
32999
|
+
if (!meta3) continue;
|
|
33000
|
+
if (meta3.messageCount < 3) continue;
|
|
33001
|
+
const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
|
|
33002
|
+
const estimatedTokens = Math.ceil(meta3.fileSize / 5);
|
|
33003
|
+
sessions.push({
|
|
33004
|
+
id: filePath,
|
|
33005
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
33006
|
+
startedAt: meta3.timestamp,
|
|
33007
|
+
lastActivityAt: meta3.timestamp,
|
|
33008
|
+
estimatedTokens,
|
|
33009
|
+
messageCount: meta3.messageCount
|
|
30443
33010
|
});
|
|
30444
|
-
|
|
30445
|
-
|
|
30446
|
-
|
|
30447
|
-
|
|
30448
|
-
|
|
30449
|
-
|
|
30450
|
-
|
|
30451
|
-
|
|
30452
|
-
|
|
33011
|
+
}
|
|
33012
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
33013
|
+
},
|
|
33014
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
|
|
33015
|
+
const chunks = [];
|
|
33016
|
+
for (const filePath of sessionIds) {
|
|
33017
|
+
const allLines = parseJSONL3(filePath);
|
|
33018
|
+
const linearLines = linearize(allLines);
|
|
33019
|
+
let sessionTimestamp = Date.now();
|
|
33020
|
+
const header = allLines.find((l) => l.type === "session");
|
|
33021
|
+
if (header?.type === "session") {
|
|
33022
|
+
const session = header;
|
|
33023
|
+
const ts = new Date(session.timestamp).getTime();
|
|
33024
|
+
if (!Number.isNaN(ts)) sessionTimestamp = ts;
|
|
33025
|
+
}
|
|
33026
|
+
const messages = [];
|
|
33027
|
+
for (const line of linearLines) {
|
|
33028
|
+
if (line.type === "message") {
|
|
33029
|
+
const msg = line;
|
|
33030
|
+
const content3 = msg.message.content;
|
|
33031
|
+
if (!content3) continue;
|
|
33032
|
+
const ts = new Date(msg.timestamp).getTime();
|
|
33033
|
+
messages.push({
|
|
33034
|
+
text: `[${msg.message.role}] ${content3}`,
|
|
33035
|
+
timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
|
|
33036
|
+
});
|
|
33037
|
+
} else if (line.type === "compaction") {
|
|
33038
|
+
const comp = line;
|
|
33039
|
+
if (comp.summary) {
|
|
33040
|
+
messages.push({
|
|
33041
|
+
text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
|
|
33042
|
+
timestamp: sessionTimestamp
|
|
33043
|
+
});
|
|
33044
|
+
}
|
|
33045
|
+
}
|
|
30453
33046
|
}
|
|
30454
|
-
|
|
30455
|
-
|
|
30456
|
-
|
|
30457
|
-
|
|
30458
|
-
|
|
33047
|
+
if (messages.length === 0) continue;
|
|
33048
|
+
let currentTexts = [];
|
|
33049
|
+
let currentTokens = 0;
|
|
33050
|
+
let chunkIndex = 0;
|
|
33051
|
+
const flushChunk = () => {
|
|
33052
|
+
if (currentTexts.length === 0) return;
|
|
33053
|
+
chunkIndex++;
|
|
33054
|
+
const text4 = currentTexts.join("\n\n");
|
|
33055
|
+
chunks.push({
|
|
33056
|
+
label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
33057
|
+
text: text4,
|
|
33058
|
+
estimatedTokens: estimateTokens9(text4),
|
|
33059
|
+
timestamp: sessionTimestamp
|
|
33060
|
+
});
|
|
33061
|
+
currentTexts = [];
|
|
33062
|
+
currentTokens = 0;
|
|
33063
|
+
};
|
|
33064
|
+
for (const msg of messages) {
|
|
33065
|
+
const msgTokens = estimateTokens9(msg.text);
|
|
33066
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
33067
|
+
flushChunk();
|
|
33068
|
+
}
|
|
33069
|
+
currentTexts.push(msg.text);
|
|
33070
|
+
currentTokens += msgTokens;
|
|
30459
33071
|
}
|
|
33072
|
+
flushChunk();
|
|
30460
33073
|
}
|
|
33074
|
+
return chunks;
|
|
30461
33075
|
}
|
|
30462
|
-
|
|
30463
|
-
|
|
30464
|
-
|
|
30465
|
-
|
|
30466
|
-
|
|
33076
|
+
};
|
|
33077
|
+
registerProvider(piProvider);
|
|
33078
|
+
|
|
33079
|
+
// src/import/providers/aider.ts
|
|
33080
|
+
import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
|
|
33081
|
+
import { join as join14 } from "path";
|
|
33082
|
+
var HISTORY_FILE = ".aider.chat.history.md";
|
|
33083
|
+
var DEFAULT_MAX_TOKENS7 = 12288;
|
|
33084
|
+
var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
|
|
33085
|
+
function estimateTokens10(text4) {
|
|
33086
|
+
return Math.ceil(text4.length / 3);
|
|
30467
33087
|
}
|
|
30468
|
-
function
|
|
30469
|
-
|
|
30470
|
-
|
|
30471
|
-
|
|
30472
|
-
|
|
33088
|
+
function parseAiderHistory(content3) {
|
|
33089
|
+
const lines = content3.split("\n");
|
|
33090
|
+
const messages = [];
|
|
33091
|
+
let currentRole = null;
|
|
33092
|
+
let currentLines = [];
|
|
33093
|
+
const flush = () => {
|
|
33094
|
+
if (currentRole && currentLines.length > 0) {
|
|
33095
|
+
const text4 = currentLines.join("\n").trim();
|
|
33096
|
+
if (text4) {
|
|
33097
|
+
messages.push({ role: currentRole, text: text4 });
|
|
33098
|
+
}
|
|
33099
|
+
}
|
|
33100
|
+
currentLines = [];
|
|
33101
|
+
};
|
|
33102
|
+
for (const line of lines) {
|
|
33103
|
+
const match = ROLE_HEADER_RE.exec(line);
|
|
33104
|
+
if (match) {
|
|
33105
|
+
flush();
|
|
33106
|
+
currentRole = match[1].toLowerCase();
|
|
33107
|
+
continue;
|
|
33108
|
+
}
|
|
33109
|
+
if (line.trim() === "---") {
|
|
33110
|
+
flush();
|
|
33111
|
+
currentRole = null;
|
|
33112
|
+
continue;
|
|
33113
|
+
}
|
|
33114
|
+
if (currentRole) {
|
|
33115
|
+
currentLines.push(line);
|
|
33116
|
+
}
|
|
30473
33117
|
}
|
|
33118
|
+
flush();
|
|
33119
|
+
return messages;
|
|
30474
33120
|
}
|
|
30475
|
-
|
|
30476
|
-
|
|
30477
|
-
|
|
30478
|
-
|
|
30479
|
-
|
|
30480
|
-
|
|
30481
|
-
|
|
30482
|
-
|
|
30483
|
-
|
|
30484
|
-
|
|
30485
|
-
|
|
30486
|
-
|
|
30487
|
-
|
|
30488
|
-
|
|
30489
|
-
|
|
30490
|
-
|
|
30491
|
-
|
|
30492
|
-
|
|
30493
|
-
|
|
30494
|
-
|
|
30495
|
-
|
|
30496
|
-
|
|
30497
|
-
|
|
30498
|
-
|
|
30499
|
-
|
|
30500
|
-
|
|
30501
|
-
|
|
30502
|
-
|
|
30503
|
-
|
|
30504
|
-
|
|
30505
|
-
|
|
30506
|
-
|
|
33121
|
+
var aiderProvider = {
|
|
33122
|
+
name: "aider",
|
|
33123
|
+
displayName: "Aider",
|
|
33124
|
+
detect(projectPath) {
|
|
33125
|
+
const filePath = join14(projectPath, HISTORY_FILE);
|
|
33126
|
+
if (!existsSync11(filePath)) return [];
|
|
33127
|
+
let stat;
|
|
33128
|
+
try {
|
|
33129
|
+
stat = statSync9(filePath);
|
|
33130
|
+
} catch {
|
|
33131
|
+
return [];
|
|
33132
|
+
}
|
|
33133
|
+
if (!stat.isFile() || stat.size === 0) return [];
|
|
33134
|
+
let content3;
|
|
33135
|
+
try {
|
|
33136
|
+
content3 = readFileSync9(filePath, "utf-8");
|
|
33137
|
+
} catch {
|
|
33138
|
+
return [];
|
|
33139
|
+
}
|
|
33140
|
+
const messages = parseAiderHistory(content3);
|
|
33141
|
+
if (messages.length < 3) return [];
|
|
33142
|
+
const estimatedTokens = estimateTokens10(content3);
|
|
33143
|
+
return [
|
|
33144
|
+
{
|
|
33145
|
+
id: filePath,
|
|
33146
|
+
label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
|
|
33147
|
+
startedAt: stat.birthtimeMs || stat.ctimeMs,
|
|
33148
|
+
lastActivityAt: stat.mtimeMs,
|
|
33149
|
+
estimatedTokens,
|
|
33150
|
+
messageCount: messages.length
|
|
33151
|
+
}
|
|
33152
|
+
];
|
|
33153
|
+
},
|
|
33154
|
+
readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
|
|
33155
|
+
const chunks = [];
|
|
33156
|
+
for (const filePath of sessionIds) {
|
|
33157
|
+
let content3;
|
|
33158
|
+
try {
|
|
33159
|
+
content3 = readFileSync9(filePath, "utf-8");
|
|
33160
|
+
} catch {
|
|
33161
|
+
continue;
|
|
30507
33162
|
}
|
|
30508
|
-
|
|
30509
|
-
|
|
30510
|
-
|
|
30511
|
-
|
|
30512
|
-
|
|
33163
|
+
const messages = parseAiderHistory(content3);
|
|
33164
|
+
if (messages.length === 0) continue;
|
|
33165
|
+
let fileTimestamp;
|
|
33166
|
+
try {
|
|
33167
|
+
fileTimestamp = statSync9(filePath).mtimeMs;
|
|
33168
|
+
} catch {
|
|
33169
|
+
fileTimestamp = Date.now();
|
|
33170
|
+
}
|
|
33171
|
+
let currentTexts = [];
|
|
33172
|
+
let currentTokens = 0;
|
|
33173
|
+
let chunkIndex = 0;
|
|
33174
|
+
const flushChunk = () => {
|
|
33175
|
+
if (currentTexts.length === 0) return;
|
|
33176
|
+
chunkIndex++;
|
|
33177
|
+
const text4 = currentTexts.join("\n\n");
|
|
33178
|
+
chunks.push({
|
|
33179
|
+
label: `Aider history (${chunkIndex})`,
|
|
33180
|
+
text: text4,
|
|
33181
|
+
estimatedTokens: estimateTokens10(text4),
|
|
33182
|
+
timestamp: fileTimestamp
|
|
33183
|
+
});
|
|
33184
|
+
currentTexts = [];
|
|
33185
|
+
currentTokens = 0;
|
|
33186
|
+
};
|
|
33187
|
+
for (const msg of messages) {
|
|
33188
|
+
const formatted = `[${msg.role}] ${msg.text}`;
|
|
33189
|
+
const msgTokens = estimateTokens10(formatted);
|
|
33190
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
33191
|
+
flushChunk();
|
|
33192
|
+
}
|
|
33193
|
+
currentTexts.push(formatted);
|
|
33194
|
+
currentTokens += msgTokens;
|
|
30513
33195
|
}
|
|
33196
|
+
flushChunk();
|
|
30514
33197
|
}
|
|
33198
|
+
return chunks;
|
|
30515
33199
|
}
|
|
30516
|
-
|
|
30517
|
-
|
|
33200
|
+
};
|
|
33201
|
+
registerProvider(aiderProvider);
|
|
30518
33202
|
|
|
30519
33203
|
// src/recall.ts
|
|
30520
33204
|
function getTaggedText(tagged) {
|
|
@@ -30780,7 +33464,10 @@ async function searchRecall(input) {
|
|
|
30780
33464
|
info("recall: query expansion failed, using original:", err);
|
|
30781
33465
|
}
|
|
30782
33466
|
}
|
|
33467
|
+
const queryTermCount = filterTerms(query).length;
|
|
33468
|
+
const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
|
|
30783
33469
|
const allRrfLists = [];
|
|
33470
|
+
let primaryListEnd = 0;
|
|
30784
33471
|
for (const q of queries) {
|
|
30785
33472
|
const knowledgeResults = [];
|
|
30786
33473
|
if (knowledgeEnabled && scope !== "session") {
|
|
@@ -30857,7 +33544,11 @@ async function searchRecall(input) {
|
|
|
30857
33544
|
key: (r) => `t:${r.item.id}`
|
|
30858
33545
|
});
|
|
30859
33546
|
}
|
|
33547
|
+
if (primaryListEnd === 0) {
|
|
33548
|
+
primaryListEnd = allRrfLists.length;
|
|
33549
|
+
}
|
|
30860
33550
|
}
|
|
33551
|
+
const perQueryListEnd = allRrfLists.length;
|
|
30861
33552
|
if (isAvailable() && scope !== "session") {
|
|
30862
33553
|
try {
|
|
30863
33554
|
const [queryVec] = await embed([query], "query");
|
|
@@ -30876,7 +33567,8 @@ async function searchRecall(input) {
|
|
|
30876
33567
|
if (vectorTagged.length) {
|
|
30877
33568
|
allRrfLists.push({
|
|
30878
33569
|
items: vectorTagged,
|
|
30879
|
-
key: (r) => `k:${r.item.id}
|
|
33570
|
+
key: (r) => `k:${r.item.id}`,
|
|
33571
|
+
weight: vectorWeight
|
|
30880
33572
|
});
|
|
30881
33573
|
}
|
|
30882
33574
|
}
|
|
@@ -30895,7 +33587,8 @@ async function searchRecall(input) {
|
|
|
30895
33587
|
if (distVectorTagged.length) {
|
|
30896
33588
|
allRrfLists.push({
|
|
30897
33589
|
items: distVectorTagged,
|
|
30898
|
-
key: (r) => `d:${r.item.id}
|
|
33590
|
+
key: (r) => `d:${r.item.id}`,
|
|
33591
|
+
weight: vectorWeight
|
|
30899
33592
|
});
|
|
30900
33593
|
}
|
|
30901
33594
|
}
|
|
@@ -30919,7 +33612,8 @@ async function searchRecall(input) {
|
|
|
30919
33612
|
if (temporalVectorTagged.length) {
|
|
30920
33613
|
allRrfLists.push({
|
|
30921
33614
|
items: temporalVectorTagged,
|
|
30922
|
-
key: (r) => `t:${r.item.id}
|
|
33615
|
+
key: (r) => `t:${r.item.id}`,
|
|
33616
|
+
weight: vectorWeight
|
|
30923
33617
|
});
|
|
30924
33618
|
}
|
|
30925
33619
|
}
|
|
@@ -31022,6 +33716,15 @@ async function searchRecall(input) {
|
|
|
31022
33716
|
});
|
|
31023
33717
|
}
|
|
31024
33718
|
}
|
|
33719
|
+
const MAX_RRF_LISTS = 10;
|
|
33720
|
+
if (allRrfLists.length > MAX_RRF_LISTS) {
|
|
33721
|
+
const primary = allRrfLists.slice(0, primaryListEnd);
|
|
33722
|
+
const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
|
|
33723
|
+
const supplemental = allRrfLists.slice(perQueryListEnd);
|
|
33724
|
+
const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
|
|
33725
|
+
allRrfLists.length = 0;
|
|
33726
|
+
allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
|
|
33727
|
+
}
|
|
31025
33728
|
const fused = reciprocalRankFusion(allRrfLists);
|
|
31026
33729
|
const maxResults = limit * 3;
|
|
31027
33730
|
return fused.slice(0, maxResults);
|
|
@@ -31091,9 +33794,6 @@ async function runRecall(input) {
|
|
|
31091
33794
|
if (input.id) {
|
|
31092
33795
|
return recallById(input.id);
|
|
31093
33796
|
}
|
|
31094
|
-
if (ftsQuery(input.query) === EMPTY_QUERY) {
|
|
31095
|
-
return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
|
|
31096
|
-
}
|
|
31097
33797
|
const fused = await searchRecall(input);
|
|
31098
33798
|
const recallCfg = input.searchConfig?.recall;
|
|
31099
33799
|
return formatFusedResults(fused, {
|
|
@@ -31140,9 +33840,11 @@ export {
|
|
|
31140
33840
|
config2 as config,
|
|
31141
33841
|
consolidationUser,
|
|
31142
33842
|
consumeCameOutOfIdle,
|
|
33843
|
+
import_exports as conversationImport,
|
|
31143
33844
|
curator_exports as curator,
|
|
31144
33845
|
curatorUser,
|
|
31145
33846
|
data_exports as data,
|
|
33847
|
+
dataDir,
|
|
31146
33848
|
db,
|
|
31147
33849
|
dbPath,
|
|
31148
33850
|
distillation_exports as distillation,
|
|
@@ -31162,6 +33864,8 @@ export {
|
|
|
31162
33864
|
ftsQueryRelaxed,
|
|
31163
33865
|
getGitRemote,
|
|
31164
33866
|
getInstanceId,
|
|
33867
|
+
getKV,
|
|
33868
|
+
getLastImportAt,
|
|
31165
33869
|
getLastTransformEstimate,
|
|
31166
33870
|
getLastTransformedCount,
|
|
31167
33871
|
getLastTurnAt,
|
|
@@ -31174,6 +33878,7 @@ export {
|
|
|
31174
33878
|
importLoreFile,
|
|
31175
33879
|
inline,
|
|
31176
33880
|
inspectSessionState,
|
|
33881
|
+
instruction_detect_exports as instructionDetect,
|
|
31177
33882
|
isFirstRun,
|
|
31178
33883
|
isReasoningPart,
|
|
31179
33884
|
isTextPart,
|
|
@@ -31185,7 +33890,9 @@ export {
|
|
|
31185
33890
|
load,
|
|
31186
33891
|
loadAllSessionCosts,
|
|
31187
33892
|
loadForceMinLayer,
|
|
33893
|
+
loadHeaderSessionIndex,
|
|
31188
33894
|
loadSessionCosts,
|
|
33895
|
+
loadSessionTracking,
|
|
31189
33896
|
log_exports as log,
|
|
31190
33897
|
loreFileExists,
|
|
31191
33898
|
ltm_exports as ltm,
|
|
@@ -31206,10 +33913,14 @@ export {
|
|
|
31206
33913
|
runRecall,
|
|
31207
33914
|
sanitizeSurrogates,
|
|
31208
33915
|
saveForceMinLayer,
|
|
33916
|
+
saveGradientState,
|
|
31209
33917
|
saveSessionCosts,
|
|
33918
|
+
saveSessionTracking,
|
|
31210
33919
|
searchRecall,
|
|
31211
33920
|
serialize,
|
|
31212
33921
|
setForceMinLayer,
|
|
33922
|
+
setKV,
|
|
33923
|
+
setLastImportAt,
|
|
31213
33924
|
setLastTurnAtForTest,
|
|
31214
33925
|
setLtmTokens,
|
|
31215
33926
|
setMaxContextTokens,
|