@loreai/core 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +4 -0
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +2 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts +45 -0
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/data-dir.d.ts +18 -0
- package/dist/bun/data-dir.d.ts.map +1 -0
- package/dist/bun/db.d.ts +85 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +22 -38
- package/dist/bun/embedding-vendor.d.ts.map +1 -1
- package/dist/bun/embedding-worker-types.d.ts +17 -12
- package/dist/bun/embedding-worker-types.d.ts.map +1 -1
- package/dist/bun/embedding-worker.d.ts +9 -2
- package/dist/bun/embedding-worker.d.ts.map +1 -1
- package/dist/bun/embedding-worker.js +38864 -33
- package/dist/bun/embedding-worker.js.map +4 -4
- package/dist/bun/embedding.d.ts +35 -23
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +17 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/import/detect.d.ts +14 -0
- package/dist/bun/import/detect.d.ts.map +1 -0
- package/dist/bun/import/extract.d.ts +43 -0
- package/dist/bun/import/extract.d.ts.map +1 -0
- package/dist/bun/import/history.d.ts +40 -0
- package/dist/bun/import/history.d.ts.map +1 -0
- package/dist/bun/import/index.d.ts +17 -0
- package/dist/bun/import/index.d.ts.map +1 -0
- package/dist/bun/import/providers/aider.d.ts +2 -0
- package/dist/bun/import/providers/aider.d.ts.map +1 -0
- package/dist/bun/import/providers/claude-code.d.ts +2 -0
- package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
- package/dist/bun/import/providers/cline.d.ts +2 -0
- package/dist/bun/import/providers/cline.d.ts.map +1 -0
- package/dist/bun/import/providers/codex.d.ts +2 -0
- package/dist/bun/import/providers/codex.d.ts.map +1 -0
- package/dist/bun/import/providers/continue.d.ts +2 -0
- package/dist/bun/import/providers/continue.d.ts.map +1 -0
- package/dist/bun/import/providers/index.d.ts +19 -0
- package/dist/bun/import/providers/index.d.ts.map +1 -0
- package/dist/bun/import/providers/opencode.d.ts +2 -0
- package/dist/bun/import/providers/opencode.d.ts.map +1 -0
- package/dist/bun/import/providers/pi.d.ts +2 -0
- package/dist/bun/import/providers/pi.d.ts.map +1 -0
- package/dist/bun/import/types.d.ts +82 -0
- package/dist/bun/import/types.d.ts.map +1 -0
- package/dist/bun/index.d.ts +5 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +3150 -439
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/instruction-detect.d.ts +66 -0
- package/dist/bun/instruction-detect.d.ts.map +1 -0
- package/dist/bun/log.d.ts +9 -0
- package/dist/bun/log.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +139 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/pattern-extract.d.ts +7 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +5 -3
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +1 -1
- package/dist/node/agents-file.d.ts +4 -0
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +2 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts +45 -0
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/data-dir.d.ts +18 -0
- package/dist/node/data-dir.d.ts.map +1 -0
- package/dist/node/db.d.ts +85 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +22 -38
- package/dist/node/embedding-vendor.d.ts.map +1 -1
- package/dist/node/embedding-worker-types.d.ts +17 -12
- package/dist/node/embedding-worker-types.d.ts.map +1 -1
- package/dist/node/embedding-worker.d.ts +9 -2
- package/dist/node/embedding-worker.d.ts.map +1 -1
- package/dist/node/embedding-worker.js +38864 -33
- package/dist/node/embedding-worker.js.map +4 -4
- package/dist/node/embedding.d.ts +35 -23
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +17 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/import/detect.d.ts +14 -0
- package/dist/node/import/detect.d.ts.map +1 -0
- package/dist/node/import/extract.d.ts +43 -0
- package/dist/node/import/extract.d.ts.map +1 -0
- package/dist/node/import/history.d.ts +40 -0
- package/dist/node/import/history.d.ts.map +1 -0
- package/dist/node/import/index.d.ts +17 -0
- package/dist/node/import/index.d.ts.map +1 -0
- package/dist/node/import/providers/aider.d.ts +2 -0
- package/dist/node/import/providers/aider.d.ts.map +1 -0
- package/dist/node/import/providers/claude-code.d.ts +2 -0
- package/dist/node/import/providers/claude-code.d.ts.map +1 -0
- package/dist/node/import/providers/cline.d.ts +2 -0
- package/dist/node/import/providers/cline.d.ts.map +1 -0
- package/dist/node/import/providers/codex.d.ts +2 -0
- package/dist/node/import/providers/codex.d.ts.map +1 -0
- package/dist/node/import/providers/continue.d.ts +2 -0
- package/dist/node/import/providers/continue.d.ts.map +1 -0
- package/dist/node/import/providers/index.d.ts +19 -0
- package/dist/node/import/providers/index.d.ts.map +1 -0
- package/dist/node/import/providers/opencode.d.ts +2 -0
- package/dist/node/import/providers/opencode.d.ts.map +1 -0
- package/dist/node/import/providers/pi.d.ts +2 -0
- package/dist/node/import/providers/pi.d.ts.map +1 -0
- package/dist/node/import/types.d.ts +82 -0
- package/dist/node/import/types.d.ts.map +1 -0
- package/dist/node/index.d.ts +5 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +3150 -439
- package/dist/node/index.js.map +4 -4
- package/dist/node/instruction-detect.d.ts +66 -0
- package/dist/node/instruction-detect.d.ts.map +1 -0
- package/dist/node/log.d.ts +9 -0
- package/dist/node/log.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +139 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/pattern-extract.d.ts +7 -0
- package/dist/node/pattern-extract.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +5 -3
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +1 -1
- package/dist/types/agents-file.d.ts +4 -0
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +2 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts +45 -0
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/data-dir.d.ts +18 -0
- package/dist/types/data-dir.d.ts.map +1 -0
- package/dist/types/db.d.ts +85 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +22 -38
- package/dist/types/embedding-vendor.d.ts.map +1 -1
- package/dist/types/embedding-worker-types.d.ts +17 -12
- package/dist/types/embedding-worker-types.d.ts.map +1 -1
- package/dist/types/embedding-worker.d.ts +9 -2
- package/dist/types/embedding-worker.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +35 -23
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +17 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/import/detect.d.ts +14 -0
- package/dist/types/import/detect.d.ts.map +1 -0
- package/dist/types/import/extract.d.ts +43 -0
- package/dist/types/import/extract.d.ts.map +1 -0
- package/dist/types/import/history.d.ts +40 -0
- package/dist/types/import/history.d.ts.map +1 -0
- package/dist/types/import/index.d.ts +17 -0
- package/dist/types/import/index.d.ts.map +1 -0
- package/dist/types/import/providers/aider.d.ts +2 -0
- package/dist/types/import/providers/aider.d.ts.map +1 -0
- package/dist/types/import/providers/claude-code.d.ts +2 -0
- package/dist/types/import/providers/claude-code.d.ts.map +1 -0
- package/dist/types/import/providers/cline.d.ts +2 -0
- package/dist/types/import/providers/cline.d.ts.map +1 -0
- package/dist/types/import/providers/codex.d.ts +2 -0
- package/dist/types/import/providers/codex.d.ts.map +1 -0
- package/dist/types/import/providers/continue.d.ts +2 -0
- package/dist/types/import/providers/continue.d.ts.map +1 -0
- package/dist/types/import/providers/index.d.ts +19 -0
- package/dist/types/import/providers/index.d.ts.map +1 -0
- package/dist/types/import/providers/opencode.d.ts +2 -0
- package/dist/types/import/providers/opencode.d.ts.map +1 -0
- package/dist/types/import/providers/pi.d.ts +2 -0
- package/dist/types/import/providers/pi.d.ts.map +1 -0
- package/dist/types/import/types.d.ts +82 -0
- package/dist/types/import/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +5 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/instruction-detect.d.ts +66 -0
- package/dist/types/instruction-detect.d.ts.map +1 -0
- package/dist/types/log.d.ts +9 -0
- package/dist/types/log.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +139 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +7 -0
- package/dist/types/pattern-extract.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +5 -3
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +1 -1
- package/package.json +3 -4
- package/src/agents-file.ts +41 -13
- package/src/config.ts +31 -18
- package/src/curator.ts +163 -75
- package/src/data-dir.ts +76 -0
- package/src/db.ts +457 -11
- package/src/distillation.ts +65 -16
- package/src/embedding-vendor.ts +23 -40
- package/src/embedding-worker-types.ts +19 -11
- package/src/embedding-worker.ts +111 -47
- package/src/embedding.ts +224 -174
- package/src/gradient.ts +192 -75
- package/src/import/detect.ts +37 -0
- package/src/import/extract.ts +137 -0
- package/src/import/history.ts +99 -0
- package/src/import/index.ts +45 -0
- package/src/import/providers/aider.ts +207 -0
- package/src/import/providers/claude-code.ts +339 -0
- package/src/import/providers/cline.ts +324 -0
- package/src/import/providers/codex.ts +369 -0
- package/src/import/providers/continue.ts +304 -0
- package/src/import/providers/index.ts +32 -0
- package/src/import/providers/opencode.ts +272 -0
- package/src/import/providers/pi.ts +332 -0
- package/src/import/types.ts +91 -0
- package/src/index.ts +13 -0
- package/src/instruction-detect.ts +275 -0
- package/src/log.ts +91 -3
- package/src/ltm.ts +789 -41
- package/src/pattern-extract.ts +41 -0
- package/src/prompt.ts +7 -1
- package/src/recall.ts +43 -5
- package/src/search.ts +7 -5
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +18 -6
- package/src/types.ts +1 -1
package/dist/node/index.js
CHANGED
|
@@ -125,6 +125,7 @@ __export(temporal_exports, {
|
|
|
125
125
|
CHUNK_TERMINATOR: () => CHUNK_TERMINATOR,
|
|
126
126
|
bySession: () => bySession,
|
|
127
127
|
count: () => count,
|
|
128
|
+
hasMessages: () => hasMessages,
|
|
128
129
|
markDistilled: () => markDistilled,
|
|
129
130
|
partsToText: () => partsToText,
|
|
130
131
|
prune: () => prune,
|
|
@@ -162,9 +163,8 @@ function sha256(input) {
|
|
|
162
163
|
}
|
|
163
164
|
|
|
164
165
|
// src/db.ts
|
|
165
|
-
import { join, dirname } from "path";
|
|
166
|
+
import { join as join2, dirname } from "path";
|
|
166
167
|
import { mkdirSync } from "fs";
|
|
167
|
-
import { homedir } from "os";
|
|
168
168
|
|
|
169
169
|
// src/git.ts
|
|
170
170
|
import { execSync } from "child_process";
|
|
@@ -227,6 +227,36 @@ function getGitRemote(path) {
|
|
|
227
227
|
}
|
|
228
228
|
}
|
|
229
229
|
|
|
230
|
+
// src/data-dir.ts
|
|
231
|
+
import { existsSync, renameSync } from "node:fs";
|
|
232
|
+
import { join } from "node:path";
|
|
233
|
+
import { homedir } from "node:os";
|
|
234
|
+
var OLD_DIR_NAME = "opencode-lore";
|
|
235
|
+
var NEW_DIR_NAME = "lore";
|
|
236
|
+
var migrationAttempted = false;
|
|
237
|
+
function baseDir() {
|
|
238
|
+
return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
|
|
239
|
+
}
|
|
240
|
+
function migrateDataDir() {
|
|
241
|
+
if (migrationAttempted) return;
|
|
242
|
+
migrationAttempted = true;
|
|
243
|
+
if (process.env.NODE_ENV === "test") return;
|
|
244
|
+
const base = baseDir();
|
|
245
|
+
const oldDir = join(base, OLD_DIR_NAME);
|
|
246
|
+
const newDir = join(base, NEW_DIR_NAME);
|
|
247
|
+
try {
|
|
248
|
+
if (existsSync(oldDir) && !existsSync(newDir)) {
|
|
249
|
+
renameSync(oldDir, newDir);
|
|
250
|
+
console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
|
|
251
|
+
}
|
|
252
|
+
} catch {
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
function dataDir() {
|
|
256
|
+
migrateDataDir();
|
|
257
|
+
return join(baseDir(), NEW_DIR_NAME);
|
|
258
|
+
}
|
|
259
|
+
|
|
230
260
|
// src/db.ts
|
|
231
261
|
function repoNameFromRemote(remote) {
|
|
232
262
|
if (!remote) return null;
|
|
@@ -663,17 +693,123 @@ var MIGRATIONS = [
|
|
|
663
693
|
ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
|
|
664
694
|
ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
|
|
665
695
|
ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
|
|
696
|
+
`,
|
|
697
|
+
`
|
|
698
|
+
-- Version 19: Import history for conversation import idempotency.
|
|
699
|
+
-- Tracks which external agent sessions have been imported to prevent
|
|
700
|
+
-- re-importing unchanged sources and to record user-declined imports.
|
|
701
|
+
CREATE TABLE IF NOT EXISTS import_history (
|
|
702
|
+
id TEXT PRIMARY KEY,
|
|
703
|
+
project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
|
704
|
+
agent_name TEXT NOT NULL,
|
|
705
|
+
source_id TEXT NOT NULL,
|
|
706
|
+
source_hash TEXT NOT NULL,
|
|
707
|
+
entries_created INTEGER NOT NULL DEFAULT 0,
|
|
708
|
+
entries_updated INTEGER NOT NULL DEFAULT 0,
|
|
709
|
+
imported_at INTEGER NOT NULL,
|
|
710
|
+
UNIQUE(project_id, agent_name, source_id)
|
|
711
|
+
);
|
|
712
|
+
CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
|
|
713
|
+
`,
|
|
714
|
+
`
|
|
715
|
+
-- Version 20: Purge worker boilerplate from temporal messages.
|
|
716
|
+
-- Legacy gateway/plugin worker calls (distillation observer, curator,
|
|
717
|
+
-- consolidation, reflector, eval) stored their full system prompts
|
|
718
|
+
-- (containing entire conversation transcripts, up to 1.6MB each) as
|
|
719
|
+
-- temporal messages. These pollute FTS search results by matching
|
|
720
|
+
-- virtually any domain keyword. Safe to delete: their actual output
|
|
721
|
+
-- (distillations, knowledge entries) is stored in dedicated tables.
|
|
722
|
+
DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
|
|
723
|
+
OR content LIKE '%You are a long-term memory curator.%'
|
|
724
|
+
OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
|
|
725
|
+
OR content LIKE '%You are a memory reflector.%'
|
|
726
|
+
OR content LIKE '%You are evaluating distillation quality.%';
|
|
727
|
+
`,
|
|
728
|
+
`
|
|
729
|
+
-- Version 21: Persist avoided compaction data from live sessions.
|
|
730
|
+
-- Historical estimates previously re-simulated avoided compactions from
|
|
731
|
+
-- temporal message token estimates (chars/3), missing system prompt and
|
|
732
|
+
-- tool definition overhead. Persisting the live session's real shadow
|
|
733
|
+
-- context tracking (from actual API-reported total input tokens) gives
|
|
734
|
+
-- accurate post-restart historical estimates.
|
|
735
|
+
ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
|
|
736
|
+
ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
|
|
737
|
+
`,
|
|
738
|
+
`
|
|
739
|
+
-- Version 22: Track when conversation import was last offered/run.
|
|
740
|
+
-- NULL means import has never been offered for this project.
|
|
741
|
+
-- Used by auto-import to avoid re-prompting, and by explicit
|
|
742
|
+
-- \`lore import\` for incremental imports (only newer conversations).
|
|
743
|
+
ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
|
|
744
|
+
|
|
745
|
+
-- Backfill: migrate legacy __declined__ sentinel rows so existing
|
|
746
|
+
-- users who previously declined are not re-prompted after upgrading.
|
|
747
|
+
UPDATE projects SET last_import_at = (
|
|
748
|
+
SELECT ih.imported_at FROM import_history ih
|
|
749
|
+
WHERE ih.project_id = projects.id
|
|
750
|
+
AND ih.source_id = '__declined__'
|
|
751
|
+
LIMIT 1
|
|
752
|
+
)
|
|
753
|
+
WHERE EXISTS (
|
|
754
|
+
SELECT 1 FROM import_history ih
|
|
755
|
+
WHERE ih.project_id = projects.id
|
|
756
|
+
AND ih.source_id = '__declined__'
|
|
757
|
+
);
|
|
758
|
+
`,
|
|
759
|
+
`
|
|
760
|
+
-- Version 23: Persist volatile session tracking state across restarts.
|
|
761
|
+
-- Previously these were in-memory only, causing duplicate processing,
|
|
762
|
+
-- false compaction detection, and expensive prompt cache busts on restart.
|
|
763
|
+
ALTER TABLE session_state ADD COLUMN last_curated_at INTEGER NOT NULL DEFAULT 0;
|
|
764
|
+
ALTER TABLE session_state ADD COLUMN message_count INTEGER NOT NULL DEFAULT 0;
|
|
765
|
+
ALTER TABLE session_state ADD COLUMN turns_since_curation INTEGER NOT NULL DEFAULT 0;
|
|
766
|
+
ALTER TABLE session_state ADD COLUMN ltm_cache_text TEXT;
|
|
767
|
+
ALTER TABLE session_state ADD COLUMN ltm_cache_tokens INTEGER;
|
|
768
|
+
ALTER TABLE session_state ADD COLUMN ltm_pin_text TEXT;
|
|
769
|
+
ALTER TABLE session_state ADD COLUMN ltm_pin_tokens INTEGER;
|
|
770
|
+
ALTER TABLE session_state ADD COLUMN consecutive_text_only_turns INTEGER NOT NULL DEFAULT 0;
|
|
771
|
+
`,
|
|
772
|
+
`
|
|
773
|
+
-- Version 24: Persist remaining volatile session state across restarts.
|
|
774
|
+
-- Session identity (Tier 1/2/3 session correlation)
|
|
775
|
+
ALTER TABLE session_state ADD COLUMN fingerprint TEXT NOT NULL DEFAULT '';
|
|
776
|
+
ALTER TABLE session_state ADD COLUMN header_session_id TEXT;
|
|
777
|
+
ALTER TABLE session_state ADD COLUMN header_name TEXT;
|
|
778
|
+
-- Cache warming state
|
|
779
|
+
ALTER TABLE session_state ADD COLUMN resolved_conversation_ttl TEXT NOT NULL DEFAULT '5m';
|
|
780
|
+
ALTER TABLE session_state ADD COLUMN warmup_state TEXT;
|
|
781
|
+
-- Gradient calibration state (survives restarts to avoid uncalibrated busts)
|
|
782
|
+
ALTER TABLE session_state ADD COLUMN dynamic_context_cap REAL NOT NULL DEFAULT 0;
|
|
783
|
+
ALTER TABLE session_state ADD COLUMN bust_rate_ema REAL NOT NULL DEFAULT -1;
|
|
784
|
+
ALTER TABLE session_state ADD COLUMN inter_bust_interval_ema REAL NOT NULL DEFAULT -1;
|
|
785
|
+
ALTER TABLE session_state ADD COLUMN last_layer INTEGER NOT NULL DEFAULT 0;
|
|
786
|
+
ALTER TABLE session_state ADD COLUMN last_known_input INTEGER NOT NULL DEFAULT 0;
|
|
787
|
+
ALTER TABLE session_state ADD COLUMN last_turn_at INTEGER NOT NULL DEFAULT 0;
|
|
788
|
+
ALTER TABLE session_state ADD COLUMN last_bust_at INTEGER NOT NULL DEFAULT 0;
|
|
789
|
+
`,
|
|
790
|
+
`
|
|
791
|
+
-- Version 25: Adaptive dedup threshold \u2014 store accept/reject feedback
|
|
792
|
+
-- on embedding-based duplicate pairs for per-project threshold calibration.
|
|
793
|
+
-- Titles stored instead of FK IDs because entries are deleted during dedup;
|
|
794
|
+
-- the similarity float is the actual calibration input.
|
|
795
|
+
CREATE TABLE IF NOT EXISTS dedup_feedback (
|
|
796
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
797
|
+
project_id TEXT,
|
|
798
|
+
entry_a_title TEXT NOT NULL,
|
|
799
|
+
entry_b_title TEXT NOT NULL,
|
|
800
|
+
similarity REAL NOT NULL,
|
|
801
|
+
accepted INTEGER NOT NULL,
|
|
802
|
+
source TEXT NOT NULL DEFAULT 'manual',
|
|
803
|
+
created_at INTEGER NOT NULL
|
|
804
|
+
);
|
|
805
|
+
CREATE INDEX IF NOT EXISTS idx_dedup_feedback_project
|
|
806
|
+
ON dedup_feedback(project_id);
|
|
666
807
|
`
|
|
667
808
|
];
|
|
668
|
-
function dataDir() {
|
|
669
|
-
const xdg = process.env.XDG_DATA_HOME;
|
|
670
|
-
const base = xdg || join(homedir(), ".local", "share");
|
|
671
|
-
return join(base, "opencode-lore");
|
|
672
|
-
}
|
|
673
809
|
function dbPath() {
|
|
674
810
|
const envPath = process.env.LORE_DB_PATH;
|
|
675
811
|
if (envPath) return envPath;
|
|
676
|
-
return
|
|
812
|
+
return join2(dataDir(), "lore.db");
|
|
677
813
|
}
|
|
678
814
|
var instance;
|
|
679
815
|
function db() {
|
|
@@ -691,7 +827,7 @@ function db() {
|
|
|
691
827
|
}
|
|
692
828
|
const dir = dataDir();
|
|
693
829
|
mkdirSync(dir, { recursive: true });
|
|
694
|
-
path =
|
|
830
|
+
path = join2(dir, "lore.db");
|
|
695
831
|
}
|
|
696
832
|
const database = new Database(path);
|
|
697
833
|
database.exec("PRAGMA journal_mode = WAL");
|
|
@@ -804,6 +940,11 @@ function close() {
|
|
|
804
940
|
}
|
|
805
941
|
}
|
|
806
942
|
function ensureProject(path, name) {
|
|
943
|
+
if (!process.env.LORE_DB_PATH && /^\/test\//.test(path)) {
|
|
944
|
+
throw new Error(
|
|
945
|
+
`Refusing to create project with test path "${path}" in the production DB. Set LORE_DB_PATH to a temp path, or run tests via \`bun test\` from the repo root.`
|
|
946
|
+
);
|
|
947
|
+
}
|
|
807
948
|
const existing = db().query("SELECT id, git_remote FROM projects WHERE path = ?").get(path);
|
|
808
949
|
if (existing) {
|
|
809
950
|
if (!existing.git_remote) {
|
|
@@ -858,6 +999,15 @@ function isFirstRun() {
|
|
|
858
999
|
const row = db().query("SELECT COUNT(*) as count FROM projects").get();
|
|
859
1000
|
return row.count === 0;
|
|
860
1001
|
}
|
|
1002
|
+
function getLastImportAt(projectPath) {
|
|
1003
|
+
const id = ensureProject(projectPath);
|
|
1004
|
+
const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
|
|
1005
|
+
return row?.last_import_at ?? null;
|
|
1006
|
+
}
|
|
1007
|
+
function setLastImportAt(projectPath, timestamp) {
|
|
1008
|
+
const id = ensureProject(projectPath);
|
|
1009
|
+
db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
|
|
1010
|
+
}
|
|
861
1011
|
function loadForceMinLayer(sessionID) {
|
|
862
1012
|
const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
|
|
863
1013
|
return row?.force_min_layer ?? 0;
|
|
@@ -876,8 +1026,9 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
876
1026
|
`INSERT INTO session_state (session_id, force_min_layer, updated_at,
|
|
877
1027
|
conversation_cost, worker_cost, conversation_turns,
|
|
878
1028
|
cache_read_tokens, cache_write_tokens,
|
|
879
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
880
|
-
|
|
1029
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1030
|
+
avoided_compactions, avoided_compaction_cost)
|
|
1031
|
+
VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
881
1032
|
ON CONFLICT(session_id) DO UPDATE SET
|
|
882
1033
|
conversation_cost = excluded.conversation_cost,
|
|
883
1034
|
worker_cost = excluded.worker_cost,
|
|
@@ -889,6 +1040,8 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
889
1040
|
ttl_savings = excluded.ttl_savings,
|
|
890
1041
|
ttl_hits = excluded.ttl_hits,
|
|
891
1042
|
batch_savings = excluded.batch_savings,
|
|
1043
|
+
avoided_compactions = excluded.avoided_compactions,
|
|
1044
|
+
avoided_compaction_cost = excluded.avoided_compaction_cost,
|
|
892
1045
|
updated_at = excluded.updated_at`
|
|
893
1046
|
).run(
|
|
894
1047
|
sessionID,
|
|
@@ -903,14 +1056,17 @@ function saveSessionCosts(sessionID, costs) {
|
|
|
903
1056
|
costs.warmupHits,
|
|
904
1057
|
costs.ttlSavings,
|
|
905
1058
|
costs.ttlHits,
|
|
906
|
-
costs.batchSavings
|
|
1059
|
+
costs.batchSavings,
|
|
1060
|
+
costs.avoidedCompactions,
|
|
1061
|
+
costs.avoidedCompactionCost
|
|
907
1062
|
);
|
|
908
1063
|
}
|
|
909
1064
|
function loadSessionCosts(sessionID) {
|
|
910
1065
|
const row = db().query(
|
|
911
1066
|
`SELECT conversation_cost, worker_cost, conversation_turns,
|
|
912
1067
|
cache_read_tokens, cache_write_tokens,
|
|
913
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
1068
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1069
|
+
avoided_compactions, avoided_compaction_cost
|
|
914
1070
|
FROM session_state WHERE session_id = ?`
|
|
915
1071
|
).get(sessionID);
|
|
916
1072
|
if (!row) return null;
|
|
@@ -924,14 +1080,17 @@ function loadSessionCosts(sessionID) {
|
|
|
924
1080
|
warmupHits: row.warmup_hits,
|
|
925
1081
|
ttlSavings: row.ttl_savings,
|
|
926
1082
|
ttlHits: row.ttl_hits,
|
|
927
|
-
batchSavings: row.batch_savings
|
|
1083
|
+
batchSavings: row.batch_savings,
|
|
1084
|
+
avoidedCompactions: row.avoided_compactions,
|
|
1085
|
+
avoidedCompactionCost: row.avoided_compaction_cost
|
|
928
1086
|
};
|
|
929
1087
|
}
|
|
930
1088
|
function loadAllSessionCosts() {
|
|
931
1089
|
const rows = db().query(
|
|
932
1090
|
`SELECT session_id, conversation_cost, worker_cost, conversation_turns,
|
|
933
1091
|
cache_read_tokens, cache_write_tokens,
|
|
934
|
-
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
|
|
1092
|
+
warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
|
|
1093
|
+
avoided_compactions, avoided_compaction_cost
|
|
935
1094
|
FROM session_state
|
|
936
1095
|
WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
|
|
937
1096
|
).all();
|
|
@@ -947,11 +1106,160 @@ function loadAllSessionCosts() {
|
|
|
947
1106
|
warmupHits: row.warmup_hits,
|
|
948
1107
|
ttlSavings: row.ttl_savings,
|
|
949
1108
|
ttlHits: row.ttl_hits,
|
|
950
|
-
batchSavings: row.batch_savings
|
|
1109
|
+
batchSavings: row.batch_savings,
|
|
1110
|
+
avoidedCompactions: row.avoided_compactions,
|
|
1111
|
+
avoidedCompactionCost: row.avoided_compaction_cost
|
|
951
1112
|
});
|
|
952
1113
|
}
|
|
953
1114
|
return result;
|
|
954
1115
|
}
|
|
1116
|
+
function saveSessionTracking(sessionID, state) {
|
|
1117
|
+
const now = Date.now();
|
|
1118
|
+
db().query(
|
|
1119
|
+
"INSERT OR IGNORE INTO session_state (session_id, force_min_layer, updated_at) VALUES (?, 0, ?)"
|
|
1120
|
+
).run(sessionID, now);
|
|
1121
|
+
const sets = ["updated_at = ?"];
|
|
1122
|
+
const vals = [now];
|
|
1123
|
+
if (state.lastCuratedAt !== void 0) {
|
|
1124
|
+
sets.push("last_curated_at = ?");
|
|
1125
|
+
vals.push(state.lastCuratedAt);
|
|
1126
|
+
}
|
|
1127
|
+
if (state.messageCount !== void 0) {
|
|
1128
|
+
sets.push("message_count = ?");
|
|
1129
|
+
vals.push(state.messageCount);
|
|
1130
|
+
}
|
|
1131
|
+
if (state.turnsSinceCuration !== void 0) {
|
|
1132
|
+
sets.push("turns_since_curation = ?");
|
|
1133
|
+
vals.push(state.turnsSinceCuration);
|
|
1134
|
+
}
|
|
1135
|
+
if (state.consecutiveTextOnlyTurns !== void 0) {
|
|
1136
|
+
sets.push("consecutive_text_only_turns = ?");
|
|
1137
|
+
vals.push(state.consecutiveTextOnlyTurns);
|
|
1138
|
+
}
|
|
1139
|
+
if (state.ltmCacheText !== void 0) {
|
|
1140
|
+
sets.push("ltm_cache_text = ?");
|
|
1141
|
+
vals.push(state.ltmCacheText);
|
|
1142
|
+
}
|
|
1143
|
+
if (state.ltmCacheTokens !== void 0) {
|
|
1144
|
+
sets.push("ltm_cache_tokens = ?");
|
|
1145
|
+
vals.push(state.ltmCacheTokens);
|
|
1146
|
+
}
|
|
1147
|
+
if (state.ltmPinText !== void 0) {
|
|
1148
|
+
sets.push("ltm_pin_text = ?");
|
|
1149
|
+
vals.push(state.ltmPinText);
|
|
1150
|
+
}
|
|
1151
|
+
if (state.ltmPinTokens !== void 0) {
|
|
1152
|
+
sets.push("ltm_pin_tokens = ?");
|
|
1153
|
+
vals.push(state.ltmPinTokens);
|
|
1154
|
+
}
|
|
1155
|
+
if (state.fingerprint !== void 0) {
|
|
1156
|
+
sets.push("fingerprint = ?");
|
|
1157
|
+
vals.push(state.fingerprint);
|
|
1158
|
+
}
|
|
1159
|
+
if (state.headerSessionId !== void 0) {
|
|
1160
|
+
sets.push("header_session_id = ?");
|
|
1161
|
+
vals.push(state.headerSessionId);
|
|
1162
|
+
}
|
|
1163
|
+
if (state.headerName !== void 0) {
|
|
1164
|
+
sets.push("header_name = ?");
|
|
1165
|
+
vals.push(state.headerName);
|
|
1166
|
+
}
|
|
1167
|
+
if (state.resolvedConversationTTL !== void 0) {
|
|
1168
|
+
sets.push("resolved_conversation_ttl = ?");
|
|
1169
|
+
vals.push(state.resolvedConversationTTL);
|
|
1170
|
+
}
|
|
1171
|
+
if (state.warmupState !== void 0) {
|
|
1172
|
+
sets.push("warmup_state = ?");
|
|
1173
|
+
vals.push(state.warmupState);
|
|
1174
|
+
}
|
|
1175
|
+
if (state.dynamicContextCap !== void 0) {
|
|
1176
|
+
sets.push("dynamic_context_cap = ?");
|
|
1177
|
+
vals.push(state.dynamicContextCap);
|
|
1178
|
+
}
|
|
1179
|
+
if (state.bustRateEMA !== void 0) {
|
|
1180
|
+
sets.push("bust_rate_ema = ?");
|
|
1181
|
+
vals.push(state.bustRateEMA);
|
|
1182
|
+
}
|
|
1183
|
+
if (state.interBustIntervalEMA !== void 0) {
|
|
1184
|
+
sets.push("inter_bust_interval_ema = ?");
|
|
1185
|
+
vals.push(state.interBustIntervalEMA);
|
|
1186
|
+
}
|
|
1187
|
+
if (state.lastLayer !== void 0) {
|
|
1188
|
+
sets.push("last_layer = ?");
|
|
1189
|
+
vals.push(state.lastLayer);
|
|
1190
|
+
}
|
|
1191
|
+
if (state.lastKnownInput !== void 0) {
|
|
1192
|
+
sets.push("last_known_input = ?");
|
|
1193
|
+
vals.push(state.lastKnownInput);
|
|
1194
|
+
}
|
|
1195
|
+
if (state.lastTurnAt !== void 0) {
|
|
1196
|
+
sets.push("last_turn_at = ?");
|
|
1197
|
+
vals.push(state.lastTurnAt);
|
|
1198
|
+
}
|
|
1199
|
+
if (state.lastBustAt !== void 0) {
|
|
1200
|
+
sets.push("last_bust_at = ?");
|
|
1201
|
+
vals.push(state.lastBustAt);
|
|
1202
|
+
}
|
|
1203
|
+
db().query(
|
|
1204
|
+
"UPDATE session_state SET " + sets.join(", ") + " WHERE session_id = ?"
|
|
1205
|
+
).run(...vals, sessionID);
|
|
1206
|
+
}
|
|
1207
|
+
function loadSessionTracking(sessionID) {
|
|
1208
|
+
const row = db().query(
|
|
1209
|
+
`SELECT last_curated_at, message_count, turns_since_curation,
|
|
1210
|
+
consecutive_text_only_turns,
|
|
1211
|
+
ltm_cache_text, ltm_cache_tokens, ltm_pin_text, ltm_pin_tokens,
|
|
1212
|
+
fingerprint, header_session_id, header_name,
|
|
1213
|
+
resolved_conversation_ttl, warmup_state,
|
|
1214
|
+
dynamic_context_cap, bust_rate_ema, inter_bust_interval_ema,
|
|
1215
|
+
last_layer, last_known_input, last_turn_at, last_bust_at
|
|
1216
|
+
FROM session_state WHERE session_id = ?`
|
|
1217
|
+
).get(sessionID);
|
|
1218
|
+
if (!row) return null;
|
|
1219
|
+
return {
|
|
1220
|
+
lastCuratedAt: row.last_curated_at,
|
|
1221
|
+
messageCount: row.message_count,
|
|
1222
|
+
turnsSinceCuration: row.turns_since_curation,
|
|
1223
|
+
consecutiveTextOnlyTurns: row.consecutive_text_only_turns,
|
|
1224
|
+
ltmCacheText: row.ltm_cache_text,
|
|
1225
|
+
ltmCacheTokens: row.ltm_cache_tokens,
|
|
1226
|
+
ltmPinText: row.ltm_pin_text,
|
|
1227
|
+
ltmPinTokens: row.ltm_pin_tokens,
|
|
1228
|
+
fingerprint: row.fingerprint,
|
|
1229
|
+
headerSessionId: row.header_session_id,
|
|
1230
|
+
headerName: row.header_name,
|
|
1231
|
+
resolvedConversationTTL: row.resolved_conversation_ttl,
|
|
1232
|
+
warmupState: row.warmup_state,
|
|
1233
|
+
dynamicContextCap: row.dynamic_context_cap,
|
|
1234
|
+
bustRateEMA: row.bust_rate_ema,
|
|
1235
|
+
interBustIntervalEMA: row.inter_bust_interval_ema,
|
|
1236
|
+
lastLayer: row.last_layer,
|
|
1237
|
+
lastKnownInput: row.last_known_input,
|
|
1238
|
+
lastTurnAt: row.last_turn_at,
|
|
1239
|
+
lastBustAt: row.last_bust_at
|
|
1240
|
+
};
|
|
1241
|
+
}
|
|
1242
|
+
function loadHeaderSessionIndex() {
|
|
1243
|
+
const rows = db().query(
|
|
1244
|
+
`SELECT session_id, header_session_id, header_name
|
|
1245
|
+
FROM session_state
|
|
1246
|
+
WHERE header_session_id IS NOT NULL AND header_name IS NOT NULL`
|
|
1247
|
+
).all();
|
|
1248
|
+
return rows.map((row) => ({
|
|
1249
|
+
sessionId: row.session_id,
|
|
1250
|
+
headerSessionId: row.header_session_id,
|
|
1251
|
+
headerName: row.header_name
|
|
1252
|
+
}));
|
|
1253
|
+
}
|
|
1254
|
+
function getKV(key) {
|
|
1255
|
+
const row = db().query("SELECT value FROM kv_meta WHERE key = ?").get(key);
|
|
1256
|
+
return row?.value ?? null;
|
|
1257
|
+
}
|
|
1258
|
+
function setKV(key, value) {
|
|
1259
|
+
db().query(
|
|
1260
|
+
"INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
|
|
1261
|
+
).run(key, value, value);
|
|
1262
|
+
}
|
|
955
1263
|
function getMeta(key) {
|
|
956
1264
|
const row = db().query("SELECT value FROM metadata WHERE key = ?").get(key);
|
|
957
1265
|
return row?.value ?? null;
|
|
@@ -9770,7 +10078,7 @@ var handle = {
|
|
|
9770
10078
|
};
|
|
9771
10079
|
|
|
9772
10080
|
// ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
|
|
9773
|
-
var
|
|
10081
|
+
var join3 = [joinDefaults];
|
|
9774
10082
|
function joinDefaults(left, right, parent, state) {
|
|
9775
10083
|
if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
|
|
9776
10084
|
return false;
|
|
@@ -10190,7 +10498,7 @@ function toMarkdown(tree, options) {
|
|
|
10190
10498
|
handle: void 0,
|
|
10191
10499
|
indentLines,
|
|
10192
10500
|
indexStack: [],
|
|
10193
|
-
join: [...
|
|
10501
|
+
join: [...join3],
|
|
10194
10502
|
options: {},
|
|
10195
10503
|
safe: safeBound,
|
|
10196
10504
|
stack: [],
|
|
@@ -11915,6 +12223,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
|
|
|
11915
12223
|
- Environment/tooling setup details that affect development
|
|
11916
12224
|
- Important relationships between components that aren't obvious from reading the code
|
|
11917
12225
|
- User preferences and working style specific to how they use this project
|
|
12226
|
+
- Repeated user instructions \u2014 when the user says things like "always", "never",
|
|
12227
|
+
"make sure to", "don't forget to", these are high-value preference candidates.
|
|
12228
|
+
If you see instruction-like language, prioritize extracting it as a "preference" entry.
|
|
12229
|
+
These instructions represent how the user wants to work and should persist across sessions.
|
|
11918
12230
|
|
|
11919
12231
|
Do NOT extract:
|
|
11920
12232
|
- Task-specific details (file currently being edited, current bug being fixed)
|
|
@@ -11999,7 +12311,9 @@ IMPORTANT:
|
|
|
11999
12311
|
2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
|
|
12000
12312
|
3. If entries cover the same system from different angles, merge them: update one, delete the rest.
|
|
12001
12313
|
4. Only create a new entry for genuinely distinct knowledge with no existing home.
|
|
12002
|
-
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it
|
|
12314
|
+
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
|
|
12315
|
+
6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
|
|
12316
|
+
These are strong signals for "preference" entries with high confidence.`;
|
|
12003
12317
|
}
|
|
12004
12318
|
var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
|
|
12005
12319
|
|
|
@@ -12163,9 +12477,12 @@ var log_exports = {};
|
|
|
12163
12477
|
__export(log_exports, {
|
|
12164
12478
|
error: () => error,
|
|
12165
12479
|
info: () => info,
|
|
12480
|
+
logFilePath: () => logFilePath,
|
|
12166
12481
|
registerSink: () => registerSink,
|
|
12167
12482
|
warn: () => warn
|
|
12168
12483
|
});
|
|
12484
|
+
import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
|
|
12485
|
+
import { join as join4 } from "node:path";
|
|
12169
12486
|
var sink = null;
|
|
12170
12487
|
function registerSink(s) {
|
|
12171
12488
|
sink = s;
|
|
@@ -12180,17 +12497,71 @@ function findError(args) {
|
|
|
12180
12497
|
}
|
|
12181
12498
|
return void 0;
|
|
12182
12499
|
}
|
|
12500
|
+
var LOG_MAX_BYTES = 5 * 1024 * 1024;
|
|
12501
|
+
var ROTATION_CHECK_INTERVAL = 1e3;
|
|
12502
|
+
var logPath;
|
|
12503
|
+
var logPathResolved = false;
|
|
12504
|
+
var writeCount = 0;
|
|
12505
|
+
function resolveLogPath() {
|
|
12506
|
+
if (process.env.NODE_ENV === "test") return void 0;
|
|
12507
|
+
try {
|
|
12508
|
+
const dir = dataDir();
|
|
12509
|
+
mkdirSync2(dir, { recursive: true });
|
|
12510
|
+
return join4(dir, "lore.log");
|
|
12511
|
+
} catch {
|
|
12512
|
+
return void 0;
|
|
12513
|
+
}
|
|
12514
|
+
}
|
|
12515
|
+
function logFilePath() {
|
|
12516
|
+
if (!logPathResolved) {
|
|
12517
|
+
logPath = resolveLogPath();
|
|
12518
|
+
logPathResolved = true;
|
|
12519
|
+
}
|
|
12520
|
+
return logPath;
|
|
12521
|
+
}
|
|
12522
|
+
function maybeRotate() {
|
|
12523
|
+
if (!logPath) return;
|
|
12524
|
+
try {
|
|
12525
|
+
const stat = statSync(logPath);
|
|
12526
|
+
if (stat.size > LOG_MAX_BYTES) {
|
|
12527
|
+
renameSync2(logPath, logPath + ".1");
|
|
12528
|
+
}
|
|
12529
|
+
} catch {
|
|
12530
|
+
}
|
|
12531
|
+
}
|
|
12532
|
+
function writeToFile(level, message) {
|
|
12533
|
+
const path = logFilePath();
|
|
12534
|
+
if (!path) return;
|
|
12535
|
+
if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
|
|
12536
|
+
maybeRotate();
|
|
12537
|
+
}
|
|
12538
|
+
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
12539
|
+
const tag = level.toUpperCase().padEnd(5);
|
|
12540
|
+
const flat = message.replace(/\n/g, "\\n");
|
|
12541
|
+
const line = `${ts} [${tag}] ${flat}
|
|
12542
|
+
`;
|
|
12543
|
+
try {
|
|
12544
|
+
appendFileSync(path, line);
|
|
12545
|
+
} catch {
|
|
12546
|
+
}
|
|
12547
|
+
}
|
|
12183
12548
|
function info(...args) {
|
|
12184
12549
|
if (isDebug) console.error("[lore]", ...args);
|
|
12185
|
-
|
|
12550
|
+
const msg = formatArgs(args);
|
|
12551
|
+
sink?.info(msg);
|
|
12552
|
+
writeToFile("info", msg);
|
|
12186
12553
|
}
|
|
12187
12554
|
function warn(...args) {
|
|
12188
12555
|
if (isDebug) console.error("[lore] WARN:", ...args);
|
|
12189
|
-
|
|
12556
|
+
const msg = formatArgs(args);
|
|
12557
|
+
sink?.warn(msg);
|
|
12558
|
+
writeToFile("warn", msg);
|
|
12190
12559
|
}
|
|
12191
12560
|
function error(...args) {
|
|
12192
12561
|
console.error("[lore]", ...args);
|
|
12193
|
-
|
|
12562
|
+
const msg = formatArgs(args);
|
|
12563
|
+
sink?.error(msg);
|
|
12564
|
+
writeToFile("error", msg);
|
|
12194
12565
|
const err = findError(args);
|
|
12195
12566
|
if (err) sink?.captureException(err);
|
|
12196
12567
|
}
|
|
@@ -12350,10 +12721,11 @@ function extractTopTerms(text4, limit = 40) {
|
|
|
12350
12721
|
function reciprocalRankFusion(lists, k = 60) {
|
|
12351
12722
|
const scores = /* @__PURE__ */ new Map();
|
|
12352
12723
|
for (const list4 of lists) {
|
|
12724
|
+
const w = list4.weight ?? 1;
|
|
12353
12725
|
for (let rank = 0; rank < list4.items.length; rank++) {
|
|
12354
12726
|
const item = list4.items[rank];
|
|
12355
12727
|
const id = list4.key(item);
|
|
12356
|
-
const rrfScore =
|
|
12728
|
+
const rrfScore = w / (k + rank);
|
|
12357
12729
|
const existing = scores.get(id);
|
|
12358
12730
|
if (existing) {
|
|
12359
12731
|
existing.score += rrfScore;
|
|
@@ -12407,8 +12779,8 @@ async function expandQuery(llm, query, model, sessionID) {
|
|
|
12407
12779
|
var embedding_exports = {};
|
|
12408
12780
|
__export(embedding_exports, {
|
|
12409
12781
|
LocalProviderUnavailableError: () => LocalProviderUnavailableError,
|
|
12410
|
-
|
|
12411
|
-
|
|
12782
|
+
_markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
|
|
12783
|
+
_resetLocalProviderProbe: () => _resetLocalProviderProbe,
|
|
12412
12784
|
_restoreProvider: () => _restoreProvider,
|
|
12413
12785
|
_saveAndClearProvider: () => _saveAndClearProvider,
|
|
12414
12786
|
_shutdownAndDisable: () => _shutdownAndDisable,
|
|
@@ -12427,6 +12799,7 @@ __export(embedding_exports, {
|
|
|
12427
12799
|
runStartupBackfill: () => runStartupBackfill,
|
|
12428
12800
|
toBlob: () => toBlob,
|
|
12429
12801
|
vectorSearch: () => vectorSearch,
|
|
12802
|
+
vectorSearchAllDistillations: () => vectorSearchAllDistillations,
|
|
12430
12803
|
vectorSearchDistillations: () => vectorSearchDistillations,
|
|
12431
12804
|
vectorSearchTemporal: () => vectorSearchTemporal
|
|
12432
12805
|
});
|
|
@@ -26200,8 +26573,8 @@ function date4(params) {
|
|
|
26200
26573
|
config(en_default());
|
|
26201
26574
|
|
|
26202
26575
|
// src/config.ts
|
|
26203
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
26204
|
-
import { join as
|
|
26576
|
+
import { existsSync as existsSync2, readFileSync } from "node:fs";
|
|
26577
|
+
import { join as join5 } from "node:path";
|
|
26205
26578
|
var LoreConfig = external_exports.object({
|
|
26206
26579
|
model: external_exports.object({
|
|
26207
26580
|
providerID: external_exports.string(),
|
|
@@ -26318,15 +26691,25 @@ var LoreConfig = external_exports.object({
|
|
|
26318
26691
|
}).default({ title: 6, content: 2, category: 3 }),
|
|
26319
26692
|
/** Max results per source in recall tool before fusion. Default: 10. */
|
|
26320
26693
|
recallLimit: external_exports.number().min(1).max(50).default(10),
|
|
26321
|
-
/** Enable LLM-based query expansion for the recall tool. Default:
|
|
26322
|
-
*
|
|
26323
|
-
*
|
|
26324
|
-
|
|
26694
|
+
/** Enable LLM-based query expansion for the recall tool. Default: true.
|
|
26695
|
+
* The configured model generates 2–3 alternative query phrasings before
|
|
26696
|
+
* search, improving recall for ambiguous queries. Guarded by a 3-second
|
|
26697
|
+
* timeout — if expansion fails or times out, the original query is used. */
|
|
26698
|
+
queryExpansion: external_exports.boolean().default(true),
|
|
26699
|
+
/** RRF weight multiplier for vector search lists. Applied when the query
|
|
26700
|
+
* has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
|
|
26701
|
+
* Boosts semantic/vector results relative to keyword-based BM25 lists.
|
|
26702
|
+
* Default: 1.5. Set to 1.0 to disable. */
|
|
26703
|
+
vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
|
|
26704
|
+
/** Minimum meaningful query terms (after stopword removal) to activate
|
|
26705
|
+
* vector boost. Short keyword queries (1-2 terms) are left unweighted
|
|
26706
|
+
* since BM25 excels there. Default: 3. */
|
|
26707
|
+
vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
|
|
26325
26708
|
/** Vector embedding search.
|
|
26326
26709
|
* Supports multiple providers:
|
|
26327
|
-
* - "local" (default):
|
|
26328
|
-
*
|
|
26329
|
-
* cached
|
|
26710
|
+
* - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
|
|
26711
|
+
* 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
|
|
26712
|
+
* cached locally. Uses task instruction prefixes (search_document: / search_query:).
|
|
26330
26713
|
* - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
|
|
26331
26714
|
* - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
|
|
26332
26715
|
* Set enabled: false to explicitly disable even with a provider available. */
|
|
@@ -26335,19 +26718,20 @@ var LoreConfig = external_exports.object({
|
|
|
26335
26718
|
* Set to false to explicitly disable. */
|
|
26336
26719
|
enabled: external_exports.boolean().default(true),
|
|
26337
26720
|
/** Embedding provider. Default: "local".
|
|
26338
|
-
* - "local":
|
|
26721
|
+
* - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
|
|
26339
26722
|
* - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
|
|
26340
26723
|
* - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
|
|
26341
26724
|
provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
|
|
26342
26725
|
/** Model ID for the embedding provider. Default depends on provider. */
|
|
26343
|
-
model: external_exports.string().default("
|
|
26344
|
-
/** Embedding dimensions. Default:
|
|
26345
|
-
|
|
26726
|
+
model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
|
|
26727
|
+
/** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
|
|
26728
|
+
* For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
|
|
26729
|
+
dimensions: external_exports.number().min(64).max(2048).default(768)
|
|
26346
26730
|
}).default({
|
|
26347
26731
|
enabled: true,
|
|
26348
26732
|
provider: "local",
|
|
26349
|
-
model: "
|
|
26350
|
-
dimensions:
|
|
26733
|
+
model: "nomic-ai/nomic-embed-text-v1.5",
|
|
26734
|
+
dimensions: 768
|
|
26351
26735
|
}),
|
|
26352
26736
|
/** Recall output formatting — controls how search results are presented to the agent. */
|
|
26353
26737
|
recall: external_exports.object({
|
|
@@ -26364,8 +26748,10 @@ var LoreConfig = external_exports.object({
|
|
|
26364
26748
|
}).default({
|
|
26365
26749
|
ftsWeights: { title: 6, content: 2, category: 3 },
|
|
26366
26750
|
recallLimit: 10,
|
|
26367
|
-
queryExpansion:
|
|
26368
|
-
|
|
26751
|
+
queryExpansion: true,
|
|
26752
|
+
vectorBoostWeight: 1.5,
|
|
26753
|
+
vectorBoostMinTerms: 3,
|
|
26754
|
+
embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
|
|
26369
26755
|
recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
|
|
26370
26756
|
}),
|
|
26371
26757
|
cache: external_exports.object({
|
|
@@ -26383,9 +26769,9 @@ var LoreConfig = external_exports.object({
|
|
|
26383
26769
|
warming: external_exports.object({
|
|
26384
26770
|
/** Enable cache warming. Default: true. */
|
|
26385
26771
|
enabled: external_exports.boolean().default(true),
|
|
26386
|
-
/** Override the
|
|
26387
|
-
* skipped. Default: auto-derived from
|
|
26388
|
-
* (~0.
|
|
26772
|
+
/** Override the return probability threshold below which warming is
|
|
26773
|
+
* skipped. Default: auto-derived from corrected cost ratio
|
|
26774
|
+
* read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
|
|
26389
26775
|
minReturnProbability: external_exports.number().min(0).max(1).optional()
|
|
26390
26776
|
}).default({ enabled: true })
|
|
26391
26777
|
}).default({
|
|
@@ -26405,8 +26791,8 @@ function config2() {
|
|
|
26405
26791
|
return current;
|
|
26406
26792
|
}
|
|
26407
26793
|
async function load(directory) {
|
|
26408
|
-
const path =
|
|
26409
|
-
if (
|
|
26794
|
+
const path = join5(directory, ".lore.json");
|
|
26795
|
+
if (existsSync2(path)) {
|
|
26410
26796
|
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
26411
26797
|
current = LoreConfig.parse(raw);
|
|
26412
26798
|
return current;
|
|
@@ -26437,8 +26823,7 @@ function vendorModelInfo() {
|
|
|
26437
26823
|
const reg = getRegistration();
|
|
26438
26824
|
if (!reg) return null;
|
|
26439
26825
|
return {
|
|
26440
|
-
|
|
26441
|
-
modelName: reg.modelName
|
|
26826
|
+
localModelPath: reg.localModelPath
|
|
26442
26827
|
};
|
|
26443
26828
|
}
|
|
26444
26829
|
function isVendoredBinary() {
|
|
@@ -26525,62 +26910,31 @@ var OpenAIProvider = class {
|
|
|
26525
26910
|
var LocalProviderUnavailableError = class extends Error {
|
|
26526
26911
|
constructor(cause) {
|
|
26527
26912
|
super(
|
|
26528
|
-
"Local embedding provider unavailable: '
|
|
26913
|
+
"Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
|
|
26529
26914
|
);
|
|
26530
26915
|
this.name = "LocalProviderUnavailableError";
|
|
26531
26916
|
if (cause !== void 0) this.cause = cause;
|
|
26532
26917
|
}
|
|
26533
26918
|
};
|
|
26534
|
-
var
|
|
26535
|
-
var
|
|
26536
|
-
|
|
26537
|
-
|
|
26538
|
-
|
|
26539
|
-
fastembedModule = null;
|
|
26540
|
-
fastembedProbed = false;
|
|
26541
|
-
fastembedAvailable = false;
|
|
26542
|
-
fastembedLogged = false;
|
|
26543
|
-
}
|
|
26544
|
-
function _markFastembedUnavailable() {
|
|
26545
|
-
fastembedModule = null;
|
|
26546
|
-
fastembedProbed = true;
|
|
26547
|
-
fastembedAvailable = false;
|
|
26548
|
-
fastembedLogged = true;
|
|
26549
|
-
}
|
|
26550
|
-
async function tryLoadFastembed() {
|
|
26551
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26552
|
-
try {
|
|
26553
|
-
const mod = await loadFastembedModule();
|
|
26554
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26555
|
-
fastembedModule = mod;
|
|
26556
|
-
fastembedAvailable = true;
|
|
26557
|
-
} catch (err) {
|
|
26558
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
26559
|
-
fastembedAvailable = false;
|
|
26560
|
-
if (!fastembedLogged) {
|
|
26561
|
-
fastembedLogged = true;
|
|
26562
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
26563
|
-
const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
|
|
26564
|
-
info(
|
|
26565
|
-
`local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
|
|
26566
|
-
);
|
|
26567
|
-
}
|
|
26568
|
-
} finally {
|
|
26569
|
-
fastembedProbed = true;
|
|
26570
|
-
}
|
|
26571
|
-
return fastembedAvailable ? fastembedModule : null;
|
|
26919
|
+
var localProviderKnownBroken = false;
|
|
26920
|
+
var localProviderErrorLogged = false;
|
|
26921
|
+
function _resetLocalProviderProbe() {
|
|
26922
|
+
localProviderKnownBroken = false;
|
|
26923
|
+
localProviderErrorLogged = false;
|
|
26572
26924
|
}
|
|
26573
|
-
|
|
26574
|
-
|
|
26925
|
+
function _markLocalProviderUnavailable() {
|
|
26926
|
+
localProviderKnownBroken = true;
|
|
26927
|
+
localProviderErrorLogged = true;
|
|
26575
26928
|
}
|
|
26576
|
-
function
|
|
26577
|
-
return
|
|
26929
|
+
function localProviderKnownUnavailable() {
|
|
26930
|
+
return localProviderKnownBroken;
|
|
26578
26931
|
}
|
|
26579
26932
|
var LocalProvider = class {
|
|
26580
26933
|
// With inference off the main thread, large batches no longer block
|
|
26581
26934
|
// the event loop. 256 maximises throughput per round-trip to the
|
|
26582
|
-
// worker. Backfill callers use
|
|
26583
|
-
// the worker's priority queue breathing room
|
|
26935
|
+
// worker. Backfill callers use token-budget-based batching (see
|
|
26936
|
+
// nextBatch) to give the worker's priority queue breathing room
|
|
26937
|
+
// for recall queries and prevent OOM on long texts.
|
|
26584
26938
|
maxBatchSize = 256;
|
|
26585
26939
|
worker = null;
|
|
26586
26940
|
workerReady = false;
|
|
@@ -26588,14 +26942,14 @@ var LocalProvider = class {
|
|
|
26588
26942
|
pendingRequests = /* @__PURE__ */ new Map();
|
|
26589
26943
|
nextRequestId = 0;
|
|
26590
26944
|
initPromise = null;
|
|
26591
|
-
|
|
26592
|
-
|
|
26593
|
-
|
|
26945
|
+
modelId;
|
|
26946
|
+
dimensions;
|
|
26947
|
+
constructor(modelId, dimensions) {
|
|
26948
|
+
this.modelId = modelId;
|
|
26949
|
+
this.dimensions = dimensions;
|
|
26594
26950
|
}
|
|
26595
26951
|
/**
|
|
26596
|
-
* Ensure the worker thread is running.
|
|
26597
|
-
* thread first (fast, cached) as a fast-fail gate — the worker is only
|
|
26598
|
-
* spawned if the module is known-loadable. Worker startup failure is
|
|
26952
|
+
* Ensure the worker thread is running. Worker startup failure is
|
|
26599
26953
|
* surfaced as `LocalProviderUnavailableError` to trigger the existing
|
|
26600
26954
|
* auto-fallback to remote providers.
|
|
26601
26955
|
*/
|
|
@@ -26604,8 +26958,7 @@ var LocalProvider = class {
|
|
|
26604
26958
|
if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
|
|
26605
26959
|
if (this.initPromise) return this.initPromise;
|
|
26606
26960
|
this.initPromise = (async () => {
|
|
26607
|
-
|
|
26608
|
-
if (!fastembed) throw new LocalProviderUnavailableError();
|
|
26961
|
+
if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
|
|
26609
26962
|
const { Worker } = await import("node:worker_threads");
|
|
26610
26963
|
const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
|
|
26611
26964
|
let workerUrl;
|
|
@@ -26619,12 +26972,22 @@ var LocalProvider = class {
|
|
|
26619
26972
|
workerUrl = vendorWorkerUrl;
|
|
26620
26973
|
}
|
|
26621
26974
|
} else {
|
|
26622
|
-
|
|
26975
|
+
const selfUrl = typeof import.meta.url === "string" ? import.meta.url : void 0;
|
|
26976
|
+
if (selfUrl) {
|
|
26977
|
+
workerUrl = new URL(
|
|
26978
|
+
`./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
|
|
26979
|
+
selfUrl
|
|
26980
|
+
);
|
|
26981
|
+
} else {
|
|
26982
|
+
const { pathToFileURL } = await import("node:url");
|
|
26983
|
+
workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
|
|
26984
|
+
}
|
|
26623
26985
|
}
|
|
26624
26986
|
const vendor = vendorModelInfo();
|
|
26625
26987
|
const workerInitData = {
|
|
26626
|
-
|
|
26627
|
-
|
|
26988
|
+
modelId: this.modelId,
|
|
26989
|
+
dimensions: this.dimensions,
|
|
26990
|
+
vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
|
|
26628
26991
|
};
|
|
26629
26992
|
this.worker = new Worker(workerUrl, { workerData: workerInitData });
|
|
26630
26993
|
this.worker.unref();
|
|
@@ -26651,6 +27014,13 @@ var LocalProvider = class {
|
|
|
26651
27014
|
case "init-error": {
|
|
26652
27015
|
this.workerInitError = msg.error;
|
|
26653
27016
|
this.workerReady = false;
|
|
27017
|
+
localProviderKnownBroken = true;
|
|
27018
|
+
if (!localProviderErrorLogged) {
|
|
27019
|
+
localProviderErrorLogged = true;
|
|
27020
|
+
info(
|
|
27021
|
+
`local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
|
|
27022
|
+
);
|
|
27023
|
+
}
|
|
26654
27024
|
for (const [, p2] of this.pendingRequests) {
|
|
26655
27025
|
p2.reject(new LocalProviderUnavailableError(msg.error));
|
|
26656
27026
|
}
|
|
@@ -26702,6 +27072,8 @@ var LocalProvider = class {
|
|
|
26702
27072
|
}
|
|
26703
27073
|
async embed(texts, inputType) {
|
|
26704
27074
|
await this.ensureWorker();
|
|
27075
|
+
const prefix = inputType === "document" ? "search_document: " : "search_query: ";
|
|
27076
|
+
const prefixed = texts.map((t2) => prefix + t2);
|
|
26705
27077
|
const id = this.nextRequestId++;
|
|
26706
27078
|
const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
|
|
26707
27079
|
return new Promise((resolve, reject) => {
|
|
@@ -26710,7 +27082,7 @@ var LocalProvider = class {
|
|
|
26710
27082
|
this.worker.postMessage({
|
|
26711
27083
|
type: "embed",
|
|
26712
27084
|
id,
|
|
26713
|
-
texts,
|
|
27085
|
+
texts: prefixed,
|
|
26714
27086
|
inputType,
|
|
26715
27087
|
priority
|
|
26716
27088
|
});
|
|
@@ -26718,8 +27090,6 @@ var LocalProvider = class {
|
|
|
26718
27090
|
}
|
|
26719
27091
|
/** Shut down the worker thread. Called by `resetProvider()` on config change.
|
|
26720
27092
|
* Sends a shutdown message so the worker calls `process.exit(0)` internally.
|
|
26721
|
-
* We avoid `worker.terminate()` because Bun's forced termination triggers a
|
|
26722
|
-
* NAPI fatal error when tearing down onnxruntime's native bindings.
|
|
26723
27093
|
*
|
|
26724
27094
|
* Returns a promise that resolves once the worker has fully exited. Callers
|
|
26725
27095
|
* that need a clean teardown (tests, config change) should await the result.
|
|
@@ -26742,7 +27112,7 @@ var LocalProvider = class {
|
|
|
26742
27112
|
}
|
|
26743
27113
|
};
|
|
26744
27114
|
var PROVIDER_DEFAULTS = {
|
|
26745
|
-
local: { model: "
|
|
27115
|
+
local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
|
|
26746
27116
|
voyage: { model: "voyage-code-3", dimensions: 1024 },
|
|
26747
27117
|
openai: { model: "text-embedding-3-small", dimensions: 1536 }
|
|
26748
27118
|
};
|
|
@@ -26766,7 +27136,7 @@ function getProvider() {
|
|
|
26766
27136
|
const model = cfg.model;
|
|
26767
27137
|
switch (providerName) {
|
|
26768
27138
|
case "local": {
|
|
26769
|
-
cachedProvider = new LocalProvider(model);
|
|
27139
|
+
cachedProvider = new LocalProvider(model, cfg.dimensions);
|
|
26770
27140
|
break;
|
|
26771
27141
|
}
|
|
26772
27142
|
case "voyage": {
|
|
@@ -26843,7 +27213,7 @@ function pickRemoteFallback() {
|
|
|
26843
27213
|
function isAvailable() {
|
|
26844
27214
|
const provider = getProvider();
|
|
26845
27215
|
if (!provider) return false;
|
|
26846
|
-
if (provider instanceof LocalProvider &&
|
|
27216
|
+
if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
|
|
26847
27217
|
return true;
|
|
26848
27218
|
}
|
|
26849
27219
|
async function embed(texts, inputType) {
|
|
@@ -26858,7 +27228,7 @@ async function embed(texts, inputType) {
|
|
|
26858
27228
|
if (!remoteFallbackLogged) {
|
|
26859
27229
|
remoteFallbackLogged = true;
|
|
26860
27230
|
info(
|
|
26861
|
-
`
|
|
27231
|
+
`local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
|
|
26862
27232
|
);
|
|
26863
27233
|
}
|
|
26864
27234
|
cachedProvider = fallback.provider;
|
|
@@ -26886,8 +27256,14 @@ function fromBlob(blob) {
|
|
|
26886
27256
|
const bytes = new Uint8Array(blob);
|
|
26887
27257
|
return new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
|
|
26888
27258
|
}
|
|
26889
|
-
function vectorSearch(queryEmbedding, limit = 10) {
|
|
26890
|
-
|
|
27259
|
+
function vectorSearch(queryEmbedding, limit = 10, excludeCategories) {
|
|
27260
|
+
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
|
|
27261
|
+
const params = [];
|
|
27262
|
+
if (excludeCategories?.length) {
|
|
27263
|
+
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
|
|
27264
|
+
params.push(...excludeCategories);
|
|
27265
|
+
}
|
|
27266
|
+
const rows = db().query(sql).all(...params);
|
|
26891
27267
|
const scored = [];
|
|
26892
27268
|
for (const row of rows) {
|
|
26893
27269
|
const vec = fromBlob(row.embedding);
|
|
@@ -26910,6 +27286,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
|
|
|
26910
27286
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
26911
27287
|
return scored.slice(0, limit);
|
|
26912
27288
|
}
|
|
27289
|
+
var MAX_DISTILLATION_VECTOR_ROWS = 500;
|
|
27290
|
+
function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
|
|
27291
|
+
const rows = db().query(
|
|
27292
|
+
"SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
|
|
27293
|
+
).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
|
|
27294
|
+
const scored = [];
|
|
27295
|
+
for (const row of rows) {
|
|
27296
|
+
const vec = fromBlob(row.embedding);
|
|
27297
|
+
const sim = cosineSimilarity(queryEmbedding, vec);
|
|
27298
|
+
scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
|
|
27299
|
+
}
|
|
27300
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
27301
|
+
return scored.slice(0, limit);
|
|
27302
|
+
}
|
|
26913
27303
|
function embedKnowledgeEntry(id, title, content3) {
|
|
26914
27304
|
const text4 = `${title}
|
|
26915
27305
|
${content3}`;
|
|
@@ -27011,20 +27401,37 @@ async function runStartupBackfill() {
|
|
|
27011
27401
|
);
|
|
27012
27402
|
info(`embedding startup: ${parts.join("; ")}`);
|
|
27013
27403
|
}
|
|
27014
|
-
var
|
|
27404
|
+
var MAX_BACKFILL_CHUNK = 8;
|
|
27405
|
+
var MAX_BATCH_TOKEN_AREA = 4096;
|
|
27406
|
+
var CHARS_PER_TOKEN = 4;
|
|
27407
|
+
function nextBatch(rows, start) {
|
|
27408
|
+
const batch = [];
|
|
27409
|
+
let maxTokens = 0;
|
|
27410
|
+
for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
|
|
27411
|
+
const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
|
|
27412
|
+
const newMax = Math.max(maxTokens, estTokens);
|
|
27413
|
+
const newArea = (batch.length + 1) * newMax;
|
|
27414
|
+
if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
|
|
27415
|
+
batch.push(rows[i]);
|
|
27416
|
+
maxTokens = newMax;
|
|
27417
|
+
}
|
|
27418
|
+
return batch;
|
|
27419
|
+
}
|
|
27015
27420
|
async function backfillEmbeddings() {
|
|
27016
27421
|
checkConfigChange();
|
|
27017
27422
|
const provider = getProvider();
|
|
27018
27423
|
if (!provider) return 0;
|
|
27019
27424
|
const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
|
|
27020
27425
|
if (!rows.length) return 0;
|
|
27426
|
+
const items = rows.map((r) => ({ ...r, text: `${r.title}
|
|
27427
|
+
${r.content}` }));
|
|
27021
27428
|
let embedded = 0;
|
|
27022
|
-
|
|
27023
|
-
|
|
27024
|
-
const
|
|
27025
|
-
|
|
27429
|
+
let i = 0;
|
|
27430
|
+
while (i < items.length) {
|
|
27431
|
+
const batch = nextBatch(items, i);
|
|
27432
|
+
i += batch.length;
|
|
27026
27433
|
try {
|
|
27027
|
-
const vectors = await embed(
|
|
27434
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
27028
27435
|
const update2 = db().prepare(
|
|
27029
27436
|
"UPDATE knowledge SET embedding = ? WHERE id = ?"
|
|
27030
27437
|
);
|
|
@@ -27033,7 +27440,7 @@ ${r.content}`);
|
|
|
27033
27440
|
embedded++;
|
|
27034
27441
|
}
|
|
27035
27442
|
} catch (err) {
|
|
27036
|
-
|
|
27443
|
+
error(`embedding backfill batch failed (${batch.length} items):`, err);
|
|
27037
27444
|
}
|
|
27038
27445
|
}
|
|
27039
27446
|
if (embedded > 0) {
|
|
@@ -27051,11 +27458,13 @@ async function backfillDistillationEmbeddings() {
|
|
|
27051
27458
|
let embedded = 0;
|
|
27052
27459
|
const PROGRESS_INTERVAL = 256;
|
|
27053
27460
|
let nextProgressAt = PROGRESS_INTERVAL;
|
|
27054
|
-
|
|
27055
|
-
|
|
27056
|
-
|
|
27461
|
+
const items = rows.map((r) => ({ ...r, text: r.observations }));
|
|
27462
|
+
let i = 0;
|
|
27463
|
+
while (i < items.length) {
|
|
27464
|
+
const batch = nextBatch(items, i);
|
|
27465
|
+
i += batch.length;
|
|
27057
27466
|
try {
|
|
27058
|
-
const vectors = await embed(
|
|
27467
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
27059
27468
|
const update2 = db().prepare(
|
|
27060
27469
|
"UPDATE distillations SET embedding = ? WHERE id = ?"
|
|
27061
27470
|
);
|
|
@@ -27064,7 +27473,7 @@ async function backfillDistillationEmbeddings() {
|
|
|
27064
27473
|
embedded++;
|
|
27065
27474
|
}
|
|
27066
27475
|
} catch (err) {
|
|
27067
|
-
|
|
27476
|
+
error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
|
|
27068
27477
|
}
|
|
27069
27478
|
if (embedded >= nextProgressAt) {
|
|
27070
27479
|
info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
|
|
@@ -27178,7 +27587,7 @@ function searchLike(input) {
|
|
|
27178
27587
|
if (!terms.length) return [];
|
|
27179
27588
|
const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
|
|
27180
27589
|
const likeParams = terms.map((t2) => `%${t2}%`);
|
|
27181
|
-
const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
27590
|
+
const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
27182
27591
|
const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
|
|
27183
27592
|
return db().query(query).all(...params);
|
|
27184
27593
|
}
|
|
@@ -27187,10 +27596,10 @@ function search2(input) {
|
|
|
27187
27596
|
const limit = input.limit ?? 20;
|
|
27188
27597
|
const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
|
|
27189
27598
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27190
|
-
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
|
|
27599
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
|
|
27191
27600
|
ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
|
|
27192
27601
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27193
|
-
WHERE f.content MATCH ? AND m.project_id = ?
|
|
27602
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
|
|
27194
27603
|
ORDER BY rank LIMIT ?`;
|
|
27195
27604
|
try {
|
|
27196
27605
|
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
@@ -27211,10 +27620,10 @@ function searchScored(input) {
|
|
|
27211
27620
|
const limit = input.limit ?? 20;
|
|
27212
27621
|
const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
|
|
27213
27622
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27214
|
-
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
|
|
27623
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
|
|
27215
27624
|
ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
|
|
27216
27625
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
27217
|
-
WHERE f.content MATCH ? AND m.project_id = ?
|
|
27626
|
+
WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
|
|
27218
27627
|
ORDER BY rank LIMIT ?`;
|
|
27219
27628
|
try {
|
|
27220
27629
|
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
@@ -27243,6 +27652,12 @@ function count(projectPath, sessionID) {
|
|
|
27243
27652
|
const params = sessionID ? [pid, sessionID] : [pid];
|
|
27244
27653
|
return db().query(query).get(...params).count;
|
|
27245
27654
|
}
|
|
27655
|
+
function hasMessages(projectPath, sessionID) {
|
|
27656
|
+
const pid = ensureProject(projectPath);
|
|
27657
|
+
return !!db().query(
|
|
27658
|
+
"SELECT 1 FROM temporal_messages WHERE project_id = ? AND session_id = ? LIMIT 1"
|
|
27659
|
+
).get(pid, sessionID);
|
|
27660
|
+
}
|
|
27246
27661
|
function undistilledCount(projectPath, sessionID) {
|
|
27247
27662
|
const pid = ensureProject(projectPath);
|
|
27248
27663
|
const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND distilled = 0";
|
|
@@ -27301,17 +27716,31 @@ function prune(input) {
|
|
|
27301
27716
|
var ltm_exports = {};
|
|
27302
27717
|
__export(ltm_exports, {
|
|
27303
27718
|
all: () => all2,
|
|
27719
|
+
calibrateDedupThreshold: () => calibrateDedupThreshold,
|
|
27304
27720
|
cascadeRefReplace: () => cascadeRefReplace,
|
|
27305
27721
|
check: () => check2,
|
|
27306
27722
|
cleanDeadRefs: () => cleanDeadRefs,
|
|
27307
27723
|
create: () => create,
|
|
27724
|
+
crossProject: () => crossProject,
|
|
27725
|
+
dedupPairKey: () => dedupPairKey,
|
|
27726
|
+
deduplicate: () => deduplicate,
|
|
27727
|
+
deduplicateGlobal: () => deduplicateGlobal,
|
|
27308
27728
|
extractRefs: () => extractRefs,
|
|
27729
|
+
findFuzzyDuplicate: () => findFuzzyDuplicate,
|
|
27309
27730
|
forProject: () => forProject,
|
|
27310
27731
|
forSession: () => forSession,
|
|
27311
27732
|
get: () => get,
|
|
27733
|
+
getDedupFeedback: () => getDedupFeedback,
|
|
27734
|
+
getDedupFeedbackCount: () => getDedupFeedbackCount,
|
|
27735
|
+
loadCalibratedThreshold: () => loadCalibratedThreshold,
|
|
27736
|
+
pruneDedupFeedback: () => pruneDedupFeedback,
|
|
27312
27737
|
pruneOversized: () => pruneOversized,
|
|
27738
|
+
recordAutoSignals: () => recordAutoSignals,
|
|
27739
|
+
recordDedupFeedback: () => recordDedupFeedback,
|
|
27740
|
+
recordDedupResultFeedback: () => recordDedupResultFeedback,
|
|
27313
27741
|
remove: () => remove,
|
|
27314
27742
|
resolveRef: () => resolveRef2,
|
|
27743
|
+
saveCalibratedThreshold: () => saveCalibratedThreshold,
|
|
27315
27744
|
search: () => search3,
|
|
27316
27745
|
searchScored: () => searchScored3,
|
|
27317
27746
|
searchScoredOtherProjects: () => searchScoredOtherProjects,
|
|
@@ -27664,8 +28093,8 @@ __export(lat_reader_exports, {
|
|
|
27664
28093
|
scoreForSession: () => scoreForSession,
|
|
27665
28094
|
searchScored: () => searchScored2
|
|
27666
28095
|
});
|
|
27667
|
-
import { readdirSync, readFileSync as readFileSync2, existsSync as
|
|
27668
|
-
import { join as
|
|
28096
|
+
import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
|
|
28097
|
+
import { join as join6, relative } from "path";
|
|
27669
28098
|
var processor2 = remark();
|
|
27670
28099
|
function estimateTokens2(text4) {
|
|
27671
28100
|
return Math.ceil(text4.length / 3);
|
|
@@ -27743,7 +28172,7 @@ function listMarkdownFiles(dir) {
|
|
|
27743
28172
|
try {
|
|
27744
28173
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
27745
28174
|
for (const entry of entries) {
|
|
27746
|
-
const fullPath =
|
|
28175
|
+
const fullPath = join6(dir, entry.name);
|
|
27747
28176
|
if (entry.isDirectory() && !entry.name.startsWith(".")) {
|
|
27748
28177
|
results.push(...listMarkdownFiles(fullPath));
|
|
27749
28178
|
} else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
@@ -27758,12 +28187,12 @@ function contentHash(content3) {
|
|
|
27758
28187
|
return sha256(content3);
|
|
27759
28188
|
}
|
|
27760
28189
|
function hasLatDir(projectPath) {
|
|
27761
|
-
const latDir =
|
|
27762
|
-
return
|
|
28190
|
+
const latDir = join6(projectPath, "lat.md");
|
|
28191
|
+
return existsSync3(latDir) && statSync2(latDir).isDirectory();
|
|
27763
28192
|
}
|
|
27764
28193
|
function refresh(projectPath) {
|
|
27765
|
-
const latDir =
|
|
27766
|
-
if (!
|
|
28194
|
+
const latDir = join6(projectPath, "lat.md");
|
|
28195
|
+
if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
|
|
27767
28196
|
const pid = ensureProject(projectPath);
|
|
27768
28197
|
const files = listMarkdownFiles(latDir);
|
|
27769
28198
|
let upserted = 0;
|
|
@@ -27885,6 +28314,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
|
|
|
27885
28314
|
var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
|
|
27886
28315
|
function create(input) {
|
|
27887
28316
|
const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
|
|
28317
|
+
const crossProject2 = pid === null ? true : input.crossProject ?? false;
|
|
27888
28318
|
if (!input.id) {
|
|
27889
28319
|
const existing = pid !== null ? db().query(
|
|
27890
28320
|
"SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
|
|
@@ -27902,6 +28332,11 @@ function create(input) {
|
|
|
27902
28332
|
update(crossExisting.id, { content: input.content });
|
|
27903
28333
|
return crossExisting.id;
|
|
27904
28334
|
}
|
|
28335
|
+
const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
|
|
28336
|
+
if (fuzzyMatch) {
|
|
28337
|
+
update(fuzzyMatch.id, { content: input.content });
|
|
28338
|
+
return fuzzyMatch.id;
|
|
28339
|
+
}
|
|
27905
28340
|
}
|
|
27906
28341
|
const id = input.id ?? uuidv72();
|
|
27907
28342
|
const now = Date.now();
|
|
@@ -27915,7 +28350,7 @@ function create(input) {
|
|
|
27915
28350
|
input.title,
|
|
27916
28351
|
input.content,
|
|
27917
28352
|
input.session ?? null,
|
|
27918
|
-
|
|
28353
|
+
crossProject2 ? 1 : 0,
|
|
27919
28354
|
now,
|
|
27920
28355
|
now
|
|
27921
28356
|
);
|
|
@@ -27933,7 +28368,7 @@ function update(id, input) {
|
|
|
27933
28368
|
}
|
|
27934
28369
|
if (input.confidence !== void 0) {
|
|
27935
28370
|
sets.push("confidence = ?");
|
|
27936
|
-
params.push(input.confidence);
|
|
28371
|
+
params.push(Math.max(0, Math.min(1, input.confidence)));
|
|
27937
28372
|
}
|
|
27938
28373
|
sets.push("updated_at = ?");
|
|
27939
28374
|
params.push(Date.now());
|
|
@@ -27949,6 +28384,50 @@ function update(id, input) {
|
|
|
27949
28384
|
function remove(id) {
|
|
27950
28385
|
db().query("DELETE FROM knowledge WHERE id = ?").run(id);
|
|
27951
28386
|
}
|
|
28387
|
+
function titleOverlap(a, b) {
|
|
28388
|
+
const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
|
|
28389
|
+
const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
|
|
28390
|
+
if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
|
|
28391
|
+
const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
|
|
28392
|
+
return {
|
|
28393
|
+
coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
|
|
28394
|
+
intersectionSize: intersection2.length
|
|
28395
|
+
};
|
|
28396
|
+
}
|
|
28397
|
+
var FUZZY_DEDUP_THRESHOLD = 0.7;
|
|
28398
|
+
var FUZZY_DEDUP_MIN_OVERLAP = 4;
|
|
28399
|
+
var EMBEDDING_DEDUP_THRESHOLD = 0.935;
|
|
28400
|
+
function findFuzzyDuplicate(input) {
|
|
28401
|
+
const q = ftsQueryOr(input.title);
|
|
28402
|
+
if (q === EMPTY_QUERY) return null;
|
|
28403
|
+
const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
|
|
28404
|
+
try {
|
|
28405
|
+
const excludeClause = input.excludeId ? "AND k.id != ?" : "";
|
|
28406
|
+
const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
|
|
28407
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
28408
|
+
WHERE knowledge_fts MATCH ?
|
|
28409
|
+
AND (k.project_id = ? OR k.cross_project = 1)
|
|
28410
|
+
AND k.confidence > 0.2
|
|
28411
|
+
${excludeClause}
|
|
28412
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
|
|
28413
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
28414
|
+
WHERE knowledge_fts MATCH ?
|
|
28415
|
+
AND (k.project_id IS NULL OR k.cross_project = 1)
|
|
28416
|
+
AND k.confidence > 0.2
|
|
28417
|
+
${excludeClause}
|
|
28418
|
+
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
|
|
28419
|
+
const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
|
|
28420
|
+
const candidates = db().query(sql).all(...params);
|
|
28421
|
+
for (const candidate of candidates) {
|
|
28422
|
+
const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
|
|
28423
|
+
if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
|
|
28424
|
+
return candidate;
|
|
28425
|
+
}
|
|
28426
|
+
}
|
|
28427
|
+
} catch {
|
|
28428
|
+
}
|
|
28429
|
+
return null;
|
|
28430
|
+
}
|
|
27952
28431
|
function forProject(projectPath, includeCross = true) {
|
|
27953
28432
|
const pid = ensureProject(projectPath);
|
|
27954
28433
|
if (includeCross) {
|
|
@@ -27998,18 +28477,29 @@ function scoreEntriesFTS(sessionContext) {
|
|
|
27998
28477
|
return /* @__PURE__ */ new Map();
|
|
27999
28478
|
}
|
|
28000
28479
|
}
|
|
28001
|
-
function forSession(projectPath, sessionID, maxTokens) {
|
|
28480
|
+
async function forSession(projectPath, sessionID, maxTokens, options) {
|
|
28002
28481
|
const pid = ensureProject(projectPath);
|
|
28482
|
+
const categoryFilter = options?.categories;
|
|
28483
|
+
const excludeFilter = options?.excludeCategories;
|
|
28484
|
+
let categoryClause = "";
|
|
28485
|
+
let categoryParams = [];
|
|
28486
|
+
if (categoryFilter?.length) {
|
|
28487
|
+
categoryClause = ` AND category IN (${categoryFilter.map(() => "?").join(",")})`;
|
|
28488
|
+
categoryParams = categoryFilter;
|
|
28489
|
+
} else if (excludeFilter?.length) {
|
|
28490
|
+
categoryClause = ` AND category NOT IN (${excludeFilter.map(() => "?").join(",")})`;
|
|
28491
|
+
categoryParams = excludeFilter;
|
|
28492
|
+
}
|
|
28003
28493
|
const projectEntries = db().query(
|
|
28004
28494
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28005
|
-
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
|
|
28495
|
+
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2${categoryClause}
|
|
28006
28496
|
ORDER BY confidence DESC, updated_at DESC`
|
|
28007
|
-
).all(pid);
|
|
28497
|
+
).all(pid, ...categoryParams);
|
|
28008
28498
|
const crossEntries = db().query(
|
|
28009
28499
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28010
|
-
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
28500
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2${categoryClause}
|
|
28011
28501
|
ORDER BY confidence DESC, updated_at DESC`
|
|
28012
|
-
).all();
|
|
28502
|
+
).all(...categoryParams);
|
|
28013
28503
|
if (!crossEntries.length && !projectEntries.length) return [];
|
|
28014
28504
|
let sessionContext = "";
|
|
28015
28505
|
if (sessionID) {
|
|
@@ -28030,22 +28520,52 @@ function forSession(projectPath, sessionID, maxTokens) {
|
|
|
28030
28520
|
sessionContext += recentMsgs.map((m) => m.content).join("\n");
|
|
28031
28521
|
}
|
|
28032
28522
|
}
|
|
28523
|
+
if (!sessionContext.trim() && options?.contextHint) {
|
|
28524
|
+
sessionContext = options.contextHint;
|
|
28525
|
+
}
|
|
28033
28526
|
let scoredProject;
|
|
28034
28527
|
let scoredCross;
|
|
28035
|
-
if (sessionContext.trim().length > 20) {
|
|
28528
|
+
if (sessionContext.trim().length > 20 && isAvailable()) {
|
|
28529
|
+
let vectorScores;
|
|
28530
|
+
try {
|
|
28531
|
+
const [contextVec] = await embed([sessionContext], "query");
|
|
28532
|
+
const hits = vectorSearch(contextVec, 50, excludeFilter);
|
|
28533
|
+
vectorScores = new Map(hits.map((h3) => [h3.id, h3.similarity]));
|
|
28534
|
+
} catch (err) {
|
|
28535
|
+
warn("Vector scoring failed, falling back to FTS5:", err);
|
|
28536
|
+
vectorScores = /* @__PURE__ */ new Map();
|
|
28537
|
+
}
|
|
28538
|
+
if (vectorScores.size > 0) {
|
|
28539
|
+
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28540
|
+
const rawScored = projectEntries.map((entry) => {
|
|
28541
|
+
const vecScore = vectorScores.get(entry.id);
|
|
28542
|
+
const score = vecScore != null ? vecScore * entry.confidence : (ftsScores.get(entry.id) ?? 0) * entry.confidence;
|
|
28543
|
+
return { entry, score };
|
|
28544
|
+
});
|
|
28545
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
28546
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28547
|
+
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28548
|
+
scoredProject = [...matched, ...safetyNet];
|
|
28549
|
+
scoredCross = crossEntries.filter((e) => vectorScores.has(e.id) || ftsScores.has(e.id)).map((e) => {
|
|
28550
|
+
const vecScore = vectorScores.get(e.id);
|
|
28551
|
+
const score = vecScore != null ? vecScore * e.confidence : (ftsScores.get(e.id) ?? 0) * e.confidence;
|
|
28552
|
+
return { entry: e, score };
|
|
28553
|
+
});
|
|
28554
|
+
} else {
|
|
28555
|
+
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28556
|
+
({ scoredProject, scoredCross } = scoreFTS(
|
|
28557
|
+
projectEntries,
|
|
28558
|
+
crossEntries,
|
|
28559
|
+
ftsScores
|
|
28560
|
+
));
|
|
28561
|
+
}
|
|
28562
|
+
} else if (sessionContext.trim().length > 20) {
|
|
28036
28563
|
const ftsScores = scoreEntriesFTS(sessionContext);
|
|
28037
|
-
|
|
28038
|
-
|
|
28039
|
-
|
|
28040
|
-
|
|
28041
|
-
|
|
28042
|
-
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28043
|
-
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28044
|
-
scoredProject = [...matched, ...safetyNet];
|
|
28045
|
-
scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
|
|
28046
|
-
entry: e,
|
|
28047
|
-
score: (ftsScores.get(e.id) ?? 0) * e.confidence
|
|
28048
|
-
}));
|
|
28564
|
+
({ scoredProject, scoredCross } = scoreFTS(
|
|
28565
|
+
projectEntries,
|
|
28566
|
+
crossEntries,
|
|
28567
|
+
ftsScores
|
|
28568
|
+
));
|
|
28049
28569
|
} else {
|
|
28050
28570
|
scoredProject = projectEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
|
|
28051
28571
|
scoredCross = crossEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
|
|
@@ -28091,11 +28611,33 @@ function forSession(projectPath, sessionID, maxTokens) {
|
|
|
28091
28611
|
}
|
|
28092
28612
|
return result;
|
|
28093
28613
|
}
|
|
28614
|
+
function scoreFTS(projectEntries, crossEntries, ftsScores) {
|
|
28615
|
+
const rawScored = projectEntries.map((entry) => ({
|
|
28616
|
+
entry,
|
|
28617
|
+
score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
|
|
28618
|
+
}));
|
|
28619
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
28620
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
28621
|
+
const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
|
|
28622
|
+
const scoredProject = [...matched, ...safetyNet];
|
|
28623
|
+
const scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
|
|
28624
|
+
entry: e,
|
|
28625
|
+
score: (ftsScores.get(e.id) ?? 0) * e.confidence
|
|
28626
|
+
}));
|
|
28627
|
+
return { scoredProject, scoredCross };
|
|
28628
|
+
}
|
|
28094
28629
|
function all2() {
|
|
28095
28630
|
return db().query(
|
|
28096
28631
|
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
|
|
28097
28632
|
).all();
|
|
28098
28633
|
}
|
|
28634
|
+
function crossProject() {
|
|
28635
|
+
return db().query(
|
|
28636
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28637
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
28638
|
+
ORDER BY confidence DESC, updated_at DESC`
|
|
28639
|
+
).all();
|
|
28640
|
+
}
|
|
28099
28641
|
function searchLike2(input) {
|
|
28100
28642
|
const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
|
|
28101
28643
|
if (!terms.length) return [];
|
|
@@ -28327,6 +28869,270 @@ function check2(projectPath) {
|
|
|
28327
28869
|
}
|
|
28328
28870
|
return issues;
|
|
28329
28871
|
}
|
|
28872
|
+
function dedupPairKey(idA, idB) {
|
|
28873
|
+
return idA < idB ? `${idA}:${idB}` : `${idB}:${idA}`;
|
|
28874
|
+
}
|
|
28875
|
+
function _dedup(entries, dryRun, embeddingThreshold = EMBEDDING_DEDUP_THRESHOLD) {
|
|
28876
|
+
if (entries.length < 2) return { clusters: [], totalRemoved: 0, pairSimilarities: /* @__PURE__ */ new Map(), entryTitles: /* @__PURE__ */ new Map() };
|
|
28877
|
+
const embeddingMap = /* @__PURE__ */ new Map();
|
|
28878
|
+
{
|
|
28879
|
+
const entryIds = entries.map((e) => e.id);
|
|
28880
|
+
const placeholders = entryIds.map(() => "?").join(",");
|
|
28881
|
+
const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
|
|
28882
|
+
for (const row of rows) {
|
|
28883
|
+
try {
|
|
28884
|
+
embeddingMap.set(row.id, fromBlob(row.embedding));
|
|
28885
|
+
} catch {
|
|
28886
|
+
info(`skipping corrupted embedding for entry ${row.id}`);
|
|
28887
|
+
}
|
|
28888
|
+
}
|
|
28889
|
+
}
|
|
28890
|
+
const neighborMap = /* @__PURE__ */ new Map();
|
|
28891
|
+
const pairSimilarities = /* @__PURE__ */ new Map();
|
|
28892
|
+
for (const entry of entries) {
|
|
28893
|
+
const neighbors = [];
|
|
28894
|
+
const entryVec = embeddingMap.get(entry.id);
|
|
28895
|
+
for (const other of entries) {
|
|
28896
|
+
if (other.id === entry.id) continue;
|
|
28897
|
+
const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
|
|
28898
|
+
const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
|
|
28899
|
+
let embeddingMatch = false;
|
|
28900
|
+
let similarity = 0;
|
|
28901
|
+
if (entryVec) {
|
|
28902
|
+
const otherVec = embeddingMap.get(other.id);
|
|
28903
|
+
if (otherVec && entryVec.length === otherVec.length) {
|
|
28904
|
+
similarity = cosineSimilarity(entryVec, otherVec);
|
|
28905
|
+
embeddingMatch = similarity >= embeddingThreshold;
|
|
28906
|
+
}
|
|
28907
|
+
}
|
|
28908
|
+
if (similarity > 0) {
|
|
28909
|
+
const pk = dedupPairKey(entry.id, other.id);
|
|
28910
|
+
if (!pairSimilarities.has(pk)) {
|
|
28911
|
+
pairSimilarities.set(pk, similarity);
|
|
28912
|
+
}
|
|
28913
|
+
}
|
|
28914
|
+
if (titleMatch || embeddingMatch) {
|
|
28915
|
+
neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
|
|
28916
|
+
}
|
|
28917
|
+
}
|
|
28918
|
+
neighbors.sort((a, b) => b.score - a.score);
|
|
28919
|
+
neighborMap.set(entry.id, neighbors);
|
|
28920
|
+
}
|
|
28921
|
+
const claimed = /* @__PURE__ */ new Set();
|
|
28922
|
+
const rawClusters = /* @__PURE__ */ new Map();
|
|
28923
|
+
const sortedIds = [...neighborMap.keys()].sort(
|
|
28924
|
+
(a, b) => neighborMap.get(b).length - neighborMap.get(a).length
|
|
28925
|
+
);
|
|
28926
|
+
for (const centerId of sortedIds) {
|
|
28927
|
+
if (claimed.has(centerId)) continue;
|
|
28928
|
+
claimed.add(centerId);
|
|
28929
|
+
const members = [centerId];
|
|
28930
|
+
for (const { id: neighborId } of neighborMap.get(centerId)) {
|
|
28931
|
+
if (claimed.has(neighborId)) continue;
|
|
28932
|
+
claimed.add(neighborId);
|
|
28933
|
+
members.push(neighborId);
|
|
28934
|
+
}
|
|
28935
|
+
if (members.length > 1) {
|
|
28936
|
+
rawClusters.set(centerId, members);
|
|
28937
|
+
}
|
|
28938
|
+
}
|
|
28939
|
+
const entryById = new Map(entries.map((e) => [e.id, e]));
|
|
28940
|
+
const result = [];
|
|
28941
|
+
let totalRemoved = 0;
|
|
28942
|
+
for (const members of rawClusters.values()) {
|
|
28943
|
+
if (members.length < 2) continue;
|
|
28944
|
+
const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
|
|
28945
|
+
if (b.confidence !== a.confidence) return b.confidence - a.confidence;
|
|
28946
|
+
if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
|
|
28947
|
+
return a.title.length - b.title.length;
|
|
28948
|
+
});
|
|
28949
|
+
const survivor = sorted[0];
|
|
28950
|
+
const merged = sorted.slice(1);
|
|
28951
|
+
result.push({
|
|
28952
|
+
surviving: { id: survivor.id, title: survivor.title },
|
|
28953
|
+
merged: merged.map((e) => ({ id: e.id, title: e.title }))
|
|
28954
|
+
});
|
|
28955
|
+
if (!dryRun) {
|
|
28956
|
+
for (const entry of merged) {
|
|
28957
|
+
remove(entry.id);
|
|
28958
|
+
}
|
|
28959
|
+
}
|
|
28960
|
+
totalRemoved += merged.length;
|
|
28961
|
+
}
|
|
28962
|
+
result.sort((a, b) => b.merged.length - a.merged.length);
|
|
28963
|
+
const entryTitles = new Map(entries.map((e) => [e.id, e.title]));
|
|
28964
|
+
return { clusters: result, totalRemoved, pairSimilarities, entryTitles };
|
|
28965
|
+
}
|
|
28966
|
+
async function deduplicate(projectPath, opts) {
|
|
28967
|
+
const pid = ensureProject(projectPath);
|
|
28968
|
+
const threshold = loadCalibratedThreshold(pid) ?? EMBEDDING_DEDUP_THRESHOLD;
|
|
28969
|
+
const entries = forProject(projectPath, false);
|
|
28970
|
+
return _dedup(entries, opts?.dryRun ?? true, threshold);
|
|
28971
|
+
}
|
|
28972
|
+
async function deduplicateGlobal(opts) {
|
|
28973
|
+
const threshold = loadCalibratedThreshold(null) ?? EMBEDDING_DEDUP_THRESHOLD;
|
|
28974
|
+
const entries = db().query(
|
|
28975
|
+
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
|
|
28976
|
+
WHERE project_id IS NULL
|
|
28977
|
+
AND confidence > 0.2
|
|
28978
|
+
ORDER BY confidence DESC, updated_at DESC`
|
|
28979
|
+
).all();
|
|
28980
|
+
return _dedup(entries, opts?.dryRun ?? true, threshold);
|
|
28981
|
+
}
|
|
28982
|
+
var MIN_CALIBRATION_SAMPLES = 20;
|
|
28983
|
+
var DEFAULT_EMBEDDING_DEDUP_THRESHOLD = EMBEDDING_DEDUP_THRESHOLD;
|
|
28984
|
+
var AUTO_SIGNAL_MIN_SIMILARITY = 0.8;
|
|
28985
|
+
var AUTO_SIGNAL_MAX_PAIRS = 50;
|
|
28986
|
+
function recordDedupFeedback(input) {
|
|
28987
|
+
db().query(
|
|
28988
|
+
`INSERT INTO dedup_feedback
|
|
28989
|
+
(project_id, entry_a_title, entry_b_title, similarity, accepted, source, created_at)
|
|
28990
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
28991
|
+
).run(
|
|
28992
|
+
input.projectId,
|
|
28993
|
+
input.entryATitle,
|
|
28994
|
+
input.entryBTitle,
|
|
28995
|
+
input.similarity,
|
|
28996
|
+
input.accepted ? 1 : 0,
|
|
28997
|
+
input.source,
|
|
28998
|
+
Date.now()
|
|
28999
|
+
);
|
|
29000
|
+
}
|
|
29001
|
+
function recordDedupResultFeedback(projectId2, result, accepted, source) {
|
|
29002
|
+
for (const cluster of result.clusters) {
|
|
29003
|
+
for (const merged of cluster.merged) {
|
|
29004
|
+
const pk = dedupPairKey(cluster.surviving.id, merged.id);
|
|
29005
|
+
const similarity = result.pairSimilarities.get(pk);
|
|
29006
|
+
if (similarity != null && similarity > 0) {
|
|
29007
|
+
recordDedupFeedback({
|
|
29008
|
+
projectId: projectId2,
|
|
29009
|
+
entryATitle: cluster.surviving.title,
|
|
29010
|
+
entryBTitle: merged.title,
|
|
29011
|
+
similarity,
|
|
29012
|
+
accepted,
|
|
29013
|
+
source
|
|
29014
|
+
});
|
|
29015
|
+
}
|
|
29016
|
+
}
|
|
29017
|
+
}
|
|
29018
|
+
}
|
|
29019
|
+
function recordAutoSignals(projectId2, result) {
|
|
29020
|
+
const mergedPairs = /* @__PURE__ */ new Set();
|
|
29021
|
+
for (const cluster of result.clusters) {
|
|
29022
|
+
for (const merged of cluster.merged) {
|
|
29023
|
+
mergedPairs.add(dedupPairKey(cluster.surviving.id, merged.id));
|
|
29024
|
+
}
|
|
29025
|
+
}
|
|
29026
|
+
const titleMap = new Map(result.entryTitles);
|
|
29027
|
+
for (const cluster of result.clusters) {
|
|
29028
|
+
if (!titleMap.has(cluster.surviving.id)) {
|
|
29029
|
+
titleMap.set(cluster.surviving.id, cluster.surviving.title);
|
|
29030
|
+
}
|
|
29031
|
+
for (const m of cluster.merged) {
|
|
29032
|
+
if (!titleMap.has(m.id)) titleMap.set(m.id, m.title);
|
|
29033
|
+
}
|
|
29034
|
+
}
|
|
29035
|
+
const signals = [];
|
|
29036
|
+
for (const [pk, sim] of result.pairSimilarities) {
|
|
29037
|
+
if (sim < AUTO_SIGNAL_MIN_SIMILARITY) continue;
|
|
29038
|
+
if (mergedPairs.has(pk)) continue;
|
|
29039
|
+
const [idA, idB] = pk.split(":");
|
|
29040
|
+
const titleA = titleMap.get(idA);
|
|
29041
|
+
const titleB = titleMap.get(idB);
|
|
29042
|
+
if (!titleA || !titleB) continue;
|
|
29043
|
+
signals.push({ entryATitle: titleA, entryBTitle: titleB, similarity: sim });
|
|
29044
|
+
}
|
|
29045
|
+
const currentThreshold = loadCalibratedThreshold(projectId2) ?? DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
|
|
29046
|
+
signals.sort((a, b) => Math.abs(a.similarity - currentThreshold) - Math.abs(b.similarity - currentThreshold));
|
|
29047
|
+
const capped = signals.slice(0, AUTO_SIGNAL_MAX_PAIRS);
|
|
29048
|
+
pruneDedupFeedback(projectId2);
|
|
29049
|
+
for (const s of capped) {
|
|
29050
|
+
recordDedupFeedback({
|
|
29051
|
+
projectId: projectId2,
|
|
29052
|
+
entryATitle: s.entryATitle,
|
|
29053
|
+
entryBTitle: s.entryBTitle,
|
|
29054
|
+
similarity: s.similarity,
|
|
29055
|
+
accepted: false,
|
|
29056
|
+
source: "auto_dedup"
|
|
29057
|
+
});
|
|
29058
|
+
}
|
|
29059
|
+
}
|
|
29060
|
+
function getDedupFeedback(projectId2) {
|
|
29061
|
+
const rows = projectId2 !== null ? db().query(
|
|
29062
|
+
"SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id = ? ORDER BY similarity"
|
|
29063
|
+
).all(projectId2) : db().query(
|
|
29064
|
+
"SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id IS NULL ORDER BY similarity"
|
|
29065
|
+
).all();
|
|
29066
|
+
return rows.map((r) => ({ similarity: r.similarity, accepted: r.accepted === 1, source: r.source }));
|
|
29067
|
+
}
|
|
29068
|
+
function getDedupFeedbackCount(projectId2) {
|
|
29069
|
+
const row = projectId2 !== null ? db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id = ?").get(projectId2) : db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id IS NULL").get();
|
|
29070
|
+
return row?.cnt ?? 0;
|
|
29071
|
+
}
|
|
29072
|
+
var MAX_FEEDBACK_ROWS_PER_PROJECT = 500;
|
|
29073
|
+
function pruneDedupFeedback(projectId2) {
|
|
29074
|
+
const count3 = getDedupFeedbackCount(projectId2);
|
|
29075
|
+
if (count3 <= MAX_FEEDBACK_ROWS_PER_PROJECT) return;
|
|
29076
|
+
const excess = count3 - MAX_FEEDBACK_ROWS_PER_PROJECT;
|
|
29077
|
+
if (projectId2 !== null) {
|
|
29078
|
+
db().query(
|
|
29079
|
+
`DELETE FROM dedup_feedback WHERE id IN (
|
|
29080
|
+
SELECT id FROM dedup_feedback WHERE project_id = ?
|
|
29081
|
+
ORDER BY created_at ASC LIMIT ?
|
|
29082
|
+
)`
|
|
29083
|
+
).run(projectId2, excess);
|
|
29084
|
+
} else {
|
|
29085
|
+
db().query(
|
|
29086
|
+
`DELETE FROM dedup_feedback WHERE id IN (
|
|
29087
|
+
SELECT id FROM dedup_feedback WHERE project_id IS NULL
|
|
29088
|
+
ORDER BY created_at ASC LIMIT ?
|
|
29089
|
+
)`
|
|
29090
|
+
).run(excess);
|
|
29091
|
+
}
|
|
29092
|
+
}
|
|
29093
|
+
function calibrateDedupThreshold(projectId2) {
|
|
29094
|
+
const feedback = getDedupFeedback(projectId2);
|
|
29095
|
+
if (feedback.length < MIN_CALIBRATION_SAMPLES) return null;
|
|
29096
|
+
const accepted = feedback.filter((f) => f.accepted);
|
|
29097
|
+
const rejected = feedback.filter((f) => !f.accepted);
|
|
29098
|
+
if (rejected.length === 0) {
|
|
29099
|
+
const minAccepted = Math.min(...accepted.map((f) => f.similarity));
|
|
29100
|
+
return Math.max(0.85, minAccepted - 5e-3);
|
|
29101
|
+
}
|
|
29102
|
+
if (accepted.length === 0) {
|
|
29103
|
+
warn("dedup calibration: all feedback is reject \u2014 keeping default threshold");
|
|
29104
|
+
return null;
|
|
29105
|
+
}
|
|
29106
|
+
const allSims = [...new Set(feedback.map((f) => f.similarity))].sort((a, b) => a - b);
|
|
29107
|
+
let bestThreshold = DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
|
|
29108
|
+
let bestAccuracy = -1;
|
|
29109
|
+
for (let i = 0; i < allSims.length - 1; i++) {
|
|
29110
|
+
const candidate = (allSims[i] + allSims[i + 1]) / 2;
|
|
29111
|
+
const correctAccepted = accepted.filter((f) => f.similarity >= candidate).length;
|
|
29112
|
+
const correctRejected = rejected.filter((f) => f.similarity < candidate).length;
|
|
29113
|
+
const accuracy = (correctAccepted + correctRejected) / feedback.length;
|
|
29114
|
+
if (accuracy > bestAccuracy || accuracy === bestAccuracy && candidate > bestThreshold) {
|
|
29115
|
+
bestAccuracy = accuracy;
|
|
29116
|
+
bestThreshold = candidate;
|
|
29117
|
+
}
|
|
29118
|
+
}
|
|
29119
|
+
return Math.max(0.85, Math.min(0.98, bestThreshold));
|
|
29120
|
+
}
|
|
29121
|
+
function saveCalibratedThreshold(projectId2, threshold, sampleSize) {
|
|
29122
|
+
const key = `dedup_threshold:${projectId2 ?? "global"}`;
|
|
29123
|
+
setKV(key, JSON.stringify({ threshold, sampleSize, calibratedAt: Date.now() }));
|
|
29124
|
+
}
|
|
29125
|
+
function loadCalibratedThreshold(projectId2) {
|
|
29126
|
+
const key = `dedup_threshold:${projectId2 ?? "global"}`;
|
|
29127
|
+
const raw = getKV(key);
|
|
29128
|
+
if (!raw) return null;
|
|
29129
|
+
try {
|
|
29130
|
+
const parsed = JSON.parse(raw);
|
|
29131
|
+
return typeof parsed.threshold === "number" ? parsed.threshold : null;
|
|
29132
|
+
} catch {
|
|
29133
|
+
return null;
|
|
29134
|
+
}
|
|
29135
|
+
}
|
|
28330
29136
|
|
|
28331
29137
|
// src/data.ts
|
|
28332
29138
|
var data_exports = {};
|
|
@@ -28351,11 +29157,11 @@ __export(data_exports, {
|
|
|
28351
29157
|
resolveId: () => resolveId,
|
|
28352
29158
|
wipeDatabase: () => wipeDatabase
|
|
28353
29159
|
});
|
|
28354
|
-
import { statSync as
|
|
29160
|
+
import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
|
|
28355
29161
|
|
|
28356
29162
|
// src/agents-file.ts
|
|
28357
|
-
import { existsSync as
|
|
28358
|
-
import { dirname as dirname2, join as
|
|
29163
|
+
import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
|
|
29164
|
+
import { dirname as dirname2, join as join7 } from "path";
|
|
28359
29165
|
var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
|
|
28360
29166
|
var LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
28361
29167
|
var ALL_START_MARKERS = [
|
|
@@ -28386,7 +29192,7 @@ function setCache(fp, entry) {
|
|
|
28386
29192
|
).run(key, value, value);
|
|
28387
29193
|
}
|
|
28388
29194
|
function clearLoreFileCache(projectPath) {
|
|
28389
|
-
db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX +
|
|
29195
|
+
db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
|
|
28390
29196
|
}
|
|
28391
29197
|
function splitFile(fileContent) {
|
|
28392
29198
|
const spans = [];
|
|
@@ -28499,7 +29305,7 @@ function exportToFile(input) {
|
|
|
28499
29305
|
const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
|
|
28500
29306
|
const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
|
|
28501
29307
|
let fileContent = "";
|
|
28502
|
-
if (
|
|
29308
|
+
if (existsSync4(input.filePath)) {
|
|
28503
29309
|
fileContent = readFileSync3(input.filePath, "utf8");
|
|
28504
29310
|
}
|
|
28505
29311
|
const { before, after } = splitFile(fileContent);
|
|
@@ -28508,11 +29314,11 @@ function exportToFile(input) {
|
|
|
28508
29314
|
const suffix = after.trimStart();
|
|
28509
29315
|
const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
|
|
28510
29316
|
const result = prefixWithSep + newSection + suffixWithSep;
|
|
28511
|
-
|
|
29317
|
+
mkdirSync3(dirname2(input.filePath), { recursive: true });
|
|
28512
29318
|
writeFileSync(input.filePath, result, "utf8");
|
|
28513
29319
|
}
|
|
28514
29320
|
function shouldImport(input) {
|
|
28515
|
-
if (!
|
|
29321
|
+
if (!existsSync4(input.filePath)) return false;
|
|
28516
29322
|
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28517
29323
|
const { section } = splitFile(fileContent);
|
|
28518
29324
|
if (section === null) {
|
|
@@ -28533,18 +29339,26 @@ function _importEntries(entries, projectPath) {
|
|
|
28533
29339
|
update(entry.id, { content: entry.content });
|
|
28534
29340
|
}
|
|
28535
29341
|
} else {
|
|
28536
|
-
|
|
28537
|
-
|
|
28538
|
-
|
|
28539
|
-
title
|
|
28540
|
-
|
|
28541
|
-
|
|
28542
|
-
|
|
28543
|
-
|
|
28544
|
-
|
|
29342
|
+
const pid = ensureProject(projectPath);
|
|
29343
|
+
const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
|
|
29344
|
+
if (fuzzyMatch) {
|
|
29345
|
+
if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
|
|
29346
|
+
update(fuzzyMatch.id, { content: entry.content });
|
|
29347
|
+
}
|
|
29348
|
+
} else {
|
|
29349
|
+
create({
|
|
29350
|
+
projectPath,
|
|
29351
|
+
category: entry.category,
|
|
29352
|
+
title: entry.title,
|
|
29353
|
+
content: entry.content,
|
|
29354
|
+
scope: "project",
|
|
29355
|
+
crossProject: false,
|
|
29356
|
+
id: entry.id
|
|
29357
|
+
});
|
|
29358
|
+
}
|
|
28545
29359
|
}
|
|
28546
29360
|
} else {
|
|
28547
|
-
const existing = forProject(projectPath,
|
|
29361
|
+
const existing = forProject(projectPath, false);
|
|
28548
29362
|
const titleMatch = existing.find(
|
|
28549
29363
|
(e) => e.title.toLowerCase() === entry.title.toLowerCase()
|
|
28550
29364
|
);
|
|
@@ -28562,7 +29376,7 @@ function _importEntries(entries, projectPath) {
|
|
|
28562
29376
|
}
|
|
28563
29377
|
}
|
|
28564
29378
|
function importFromFile(input) {
|
|
28565
|
-
if (!
|
|
29379
|
+
if (!existsSync4(input.filePath)) return;
|
|
28566
29380
|
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28567
29381
|
const { section } = splitFile(fileContent);
|
|
28568
29382
|
const textToParse = section ?? fileContent;
|
|
@@ -28571,25 +29385,25 @@ function importFromFile(input) {
|
|
|
28571
29385
|
_importEntries(fileEntries, input.projectPath);
|
|
28572
29386
|
}
|
|
28573
29387
|
function loreFileExists(projectPath) {
|
|
28574
|
-
return
|
|
29388
|
+
return existsSync4(join7(projectPath, LORE_FILE));
|
|
28575
29389
|
}
|
|
28576
29390
|
function exportLoreFile(projectPath) {
|
|
28577
29391
|
const sectionBody = buildSection(projectPath);
|
|
28578
29392
|
const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
|
|
28579
29393
|
const contentHash2 = hashSection(content3);
|
|
28580
|
-
const fp =
|
|
29394
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
28581
29395
|
const cached2 = getCache(fp);
|
|
28582
29396
|
if (cached2 && cached2.hash === contentHash2) {
|
|
28583
29397
|
return;
|
|
28584
29398
|
}
|
|
28585
29399
|
writeFileSync(fp, content3, "utf8");
|
|
28586
|
-
const { mtimeMs } =
|
|
29400
|
+
const { mtimeMs } = statSync3(fp);
|
|
28587
29401
|
setCache(fp, { mtimeMs, hash: contentHash2 });
|
|
28588
29402
|
}
|
|
28589
29403
|
function shouldImportLoreFile(projectPath) {
|
|
28590
|
-
const fp =
|
|
28591
|
-
if (!
|
|
28592
|
-
const { mtimeMs } =
|
|
29404
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
29405
|
+
if (!existsSync4(fp)) return false;
|
|
29406
|
+
const { mtimeMs } = statSync3(fp);
|
|
28593
29407
|
const cached2 = getCache(fp);
|
|
28594
29408
|
if (cached2 && cached2.mtimeMs === mtimeMs) {
|
|
28595
29409
|
return false;
|
|
@@ -28605,12 +29419,17 @@ function shouldImportLoreFile(projectPath) {
|
|
|
28605
29419
|
return true;
|
|
28606
29420
|
}
|
|
28607
29421
|
function importLoreFile(projectPath) {
|
|
28608
|
-
const fp =
|
|
28609
|
-
if (!
|
|
29422
|
+
const fp = join7(projectPath, LORE_FILE);
|
|
29423
|
+
if (!existsSync4(fp)) return;
|
|
28610
29424
|
const fileContent = readFileSync3(fp, "utf8");
|
|
28611
29425
|
const fileEntries = parseEntriesFromSection(fileContent);
|
|
28612
29426
|
if (!fileEntries.length) return;
|
|
28613
29427
|
_importEntries(fileEntries, projectPath);
|
|
29428
|
+
try {
|
|
29429
|
+
const { mtimeMs } = statSync3(fp);
|
|
29430
|
+
setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
|
|
29431
|
+
} catch {
|
|
29432
|
+
}
|
|
28614
29433
|
}
|
|
28615
29434
|
|
|
28616
29435
|
// src/data.ts
|
|
@@ -28685,10 +29504,10 @@ function globalStats() {
|
|
|
28685
29504
|
let db_size_bytes = 0;
|
|
28686
29505
|
try {
|
|
28687
29506
|
const p2 = dbPath();
|
|
28688
|
-
db_size_bytes =
|
|
29507
|
+
db_size_bytes = statSync4(p2).size;
|
|
28689
29508
|
const walPath = p2 + "-wal";
|
|
28690
|
-
if (
|
|
28691
|
-
db_size_bytes +=
|
|
29509
|
+
if (existsSync5(walPath)) {
|
|
29510
|
+
db_size_bytes += statSync4(walPath).size;
|
|
28692
29511
|
}
|
|
28693
29512
|
} catch {
|
|
28694
29513
|
}
|
|
@@ -28739,7 +29558,7 @@ function clearProject(projectPath) {
|
|
|
28739
29558
|
database.exec("ROLLBACK");
|
|
28740
29559
|
throw e;
|
|
28741
29560
|
}
|
|
28742
|
-
if (
|
|
29561
|
+
if (existsSync5(projectPath)) {
|
|
28743
29562
|
try {
|
|
28744
29563
|
exportLoreFile(projectPath);
|
|
28745
29564
|
} catch {
|
|
@@ -28810,7 +29629,7 @@ function clearKnowledge(projectPath) {
|
|
|
28810
29629
|
"SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
|
|
28811
29630
|
).get(pid).c;
|
|
28812
29631
|
db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
|
|
28813
|
-
if (
|
|
29632
|
+
if (existsSync5(projectPath)) {
|
|
28814
29633
|
try {
|
|
28815
29634
|
exportLoreFile(projectPath);
|
|
28816
29635
|
} catch {
|
|
@@ -28869,7 +29688,7 @@ function wipeDatabase() {
|
|
|
28869
29688
|
close();
|
|
28870
29689
|
for (const suffix of ["", "-wal", "-shm"]) {
|
|
28871
29690
|
const fp = p2 + suffix;
|
|
28872
|
-
if (
|
|
29691
|
+
if (existsSync5(fp)) {
|
|
28873
29692
|
try {
|
|
28874
29693
|
unlinkSync(fp);
|
|
28875
29694
|
} catch {
|
|
@@ -28910,7 +29729,7 @@ function backfillGitRemotes() {
|
|
|
28910
29729
|
for (const project of projects) {
|
|
28911
29730
|
let gitRemote = project.git_remote;
|
|
28912
29731
|
if (!gitRemote) {
|
|
28913
|
-
if (!
|
|
29732
|
+
if (!existsSync5(project.path)) continue;
|
|
28914
29733
|
gitRemote = getGitRemote(project.path);
|
|
28915
29734
|
if (!gitRemote) continue;
|
|
28916
29735
|
const existing = db().query(
|
|
@@ -29009,6 +29828,32 @@ var PATTERNS = [
|
|
|
29009
29828
|
regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
|
|
29010
29829
|
category: "preference",
|
|
29011
29830
|
titleFn: (m) => `Typically uses ${m[1].trim()}`
|
|
29831
|
+
},
|
|
29832
|
+
// Process instruction patterns — match distilled observations recording
|
|
29833
|
+
// user assertions about workflow/process rules. The distillation observer
|
|
29834
|
+
// normalizes user instructions into "User stated always X" phrasing.
|
|
29835
|
+
// These require "stated/asserted/said" to avoid overlapping with the
|
|
29836
|
+
// existing "typically uses" pattern above (which already handles
|
|
29837
|
+
// "user always use/prefer/go with X").
|
|
29838
|
+
{
|
|
29839
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
|
|
29840
|
+
category: "preference",
|
|
29841
|
+
titleFn: (m) => `Always ${m[1].trim()}`
|
|
29842
|
+
},
|
|
29843
|
+
{
|
|
29844
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
|
|
29845
|
+
category: "preference",
|
|
29846
|
+
titleFn: (m) => `Never ${m[1].trim()}`
|
|
29847
|
+
},
|
|
29848
|
+
{
|
|
29849
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
|
|
29850
|
+
category: "preference",
|
|
29851
|
+
titleFn: (m) => `Make sure to ${m[1].trim()}`
|
|
29852
|
+
},
|
|
29853
|
+
{
|
|
29854
|
+
regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
|
|
29855
|
+
category: "preference",
|
|
29856
|
+
titleFn: (m) => `Always ${m[1].trim()}`
|
|
29012
29857
|
}
|
|
29013
29858
|
];
|
|
29014
29859
|
function extractPatterns(observations) {
|
|
@@ -29018,6 +29863,8 @@ function extractPatterns(observations) {
|
|
|
29018
29863
|
regex.lastIndex = 0;
|
|
29019
29864
|
let match;
|
|
29020
29865
|
while ((match = regex.exec(observations)) !== null) {
|
|
29866
|
+
const captures = match.slice(1);
|
|
29867
|
+
if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
|
|
29021
29868
|
const title = titleFn(match);
|
|
29022
29869
|
const key = title.toLowerCase();
|
|
29023
29870
|
if (seen.has(key)) continue;
|
|
@@ -29133,11 +29980,21 @@ function getSessionState(sessionID) {
|
|
|
29133
29980
|
if (!state) {
|
|
29134
29981
|
state = makeSessionState();
|
|
29135
29982
|
state.forceMinLayer = loadForceMinLayer(sessionID);
|
|
29983
|
+
const persisted = loadSessionTracking(sessionID);
|
|
29984
|
+
if (persisted && persisted.lastTurnAt > 0) {
|
|
29985
|
+
state.dynamicContextCap = persisted.dynamicContextCap;
|
|
29986
|
+
state.bustRateEMA = persisted.bustRateEMA;
|
|
29987
|
+
state.interBustIntervalEMA = persisted.interBustIntervalEMA;
|
|
29988
|
+
state.lastLayer = persisted.lastLayer;
|
|
29989
|
+
state.lastKnownInput = persisted.lastKnownInput;
|
|
29990
|
+
state.lastTurnAt = persisted.lastTurnAt;
|
|
29991
|
+
state.lastBustAt = persisted.lastBustAt;
|
|
29992
|
+
}
|
|
29136
29993
|
sessionStates.set(sessionID, state);
|
|
29137
29994
|
}
|
|
29138
29995
|
return state;
|
|
29139
29996
|
}
|
|
29140
|
-
function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
29997
|
+
function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
|
|
29141
29998
|
if (thresholdMs <= 0) return { triggered: false };
|
|
29142
29999
|
const state = getSessionState(sessionID);
|
|
29143
30000
|
if (state.lastTurnAt === 0) return { triggered: false };
|
|
@@ -29147,7 +30004,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
|
29147
30004
|
state.rawWindowCache = null;
|
|
29148
30005
|
state.distillationSnapshot = null;
|
|
29149
30006
|
state.cameOutOfIdle = true;
|
|
29150
|
-
state.postIdleCompact =
|
|
30007
|
+
state.postIdleCompact = !skipCompact;
|
|
29151
30008
|
return { triggered: true, idleMs };
|
|
29152
30009
|
}
|
|
29153
30010
|
function getLastTurnAt(sessionID) {
|
|
@@ -29238,6 +30095,19 @@ function inspectSessionState(sessionID) {
|
|
|
29238
30095
|
function setLastTurnAtForTest(sessionID, ms) {
|
|
29239
30096
|
getSessionState(sessionID).lastTurnAt = ms;
|
|
29240
30097
|
}
|
|
30098
|
+
function saveGradientState(sessionID) {
|
|
30099
|
+
const state = sessionStates.get(sessionID);
|
|
30100
|
+
if (!state) return;
|
|
30101
|
+
saveSessionTracking(sessionID, {
|
|
30102
|
+
dynamicContextCap: state.dynamicContextCap,
|
|
30103
|
+
bustRateEMA: state.bustRateEMA,
|
|
30104
|
+
interBustIntervalEMA: state.interBustIntervalEMA,
|
|
30105
|
+
lastLayer: state.lastLayer,
|
|
30106
|
+
lastKnownInput: state.lastKnownInput,
|
|
30107
|
+
lastTurnAt: state.lastTurnAt,
|
|
30108
|
+
lastBustAt: state.lastBustAt
|
|
30109
|
+
});
|
|
30110
|
+
}
|
|
29241
30111
|
function loadDistillations(projectPath, sessionID) {
|
|
29242
30112
|
const pid = ensureProject(projectPath);
|
|
29243
30113
|
const query = sessionID ? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC" : "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
@@ -29522,6 +30392,26 @@ function buildPrefixMessages(formatted) {
|
|
|
29522
30392
|
}
|
|
29523
30393
|
];
|
|
29524
30394
|
}
|
|
30395
|
+
var DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
|
|
30396
|
+
var GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
|
|
30397
|
+
var ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
|
|
30398
|
+
function importanceBonus(d) {
|
|
30399
|
+
let bonus = 0;
|
|
30400
|
+
if (DECISION_RE.test(d.observations)) bonus += 0.3;
|
|
30401
|
+
if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
|
|
30402
|
+
if (ARCH_RE.test(d.observations)) bonus += 0.1;
|
|
30403
|
+
if (d.generation >= 1) bonus += 0.2;
|
|
30404
|
+
return Math.min(bonus, 1);
|
|
30405
|
+
}
|
|
30406
|
+
function selectDistillations(all3, limit) {
|
|
30407
|
+
if (all3.length <= limit) return all3;
|
|
30408
|
+
const maxIdx = all3.length - 1;
|
|
30409
|
+
const scored = all3.map((d, i) => ({
|
|
30410
|
+
d,
|
|
30411
|
+
score: (maxIdx > 0 ? i / maxIdx : 1) * 0.7 + importanceBonus(d) * 0.3
|
|
30412
|
+
}));
|
|
30413
|
+
return scored.sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.d).sort((a, b) => a.created_at - b.created_at);
|
|
30414
|
+
}
|
|
29525
30415
|
function distilledPrefix(distillations) {
|
|
29526
30416
|
if (!distillations.length) return [];
|
|
29527
30417
|
const formatted = formatDistillations(distillations);
|
|
@@ -29639,6 +30529,11 @@ function tryFitStable(input) {
|
|
|
29639
30529
|
}
|
|
29640
30530
|
return result;
|
|
29641
30531
|
}
|
|
30532
|
+
var COMPRESSION_STAGES = [
|
|
30533
|
+
{ strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
|
|
30534
|
+
{ strip: "old-tools", rawFrac: 0.5, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
|
|
30535
|
+
{ strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false }
|
|
30536
|
+
];
|
|
29642
30537
|
var urgentDistillationMap = /* @__PURE__ */ new Map();
|
|
29643
30538
|
function needsUrgentDistillation(sessionID) {
|
|
29644
30539
|
const v = urgentDistillationMap.get(sessionID) ?? false;
|
|
@@ -29670,7 +30565,7 @@ function transformInner(input) {
|
|
|
29670
30565
|
if (calibrated) return true;
|
|
29671
30566
|
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
29672
30567
|
}
|
|
29673
|
-
if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
30568
|
+
if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
29674
30569
|
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
|
|
29675
30570
|
}
|
|
29676
30571
|
const postIdleCompact = sessState.postIdleCompact;
|
|
@@ -29708,7 +30603,8 @@ function transformInner(input) {
|
|
|
29708
30603
|
totalTokens: Math.max(0, messageTokens),
|
|
29709
30604
|
usable,
|
|
29710
30605
|
distilledBudget,
|
|
29711
|
-
rawBudget
|
|
30606
|
+
rawBudget,
|
|
30607
|
+
refreshLtm: false
|
|
29712
30608
|
};
|
|
29713
30609
|
}
|
|
29714
30610
|
const turnStart = currentTurnStart(input.messages);
|
|
@@ -29718,67 +30614,52 @@ function transformInner(input) {
|
|
|
29718
30614
|
const msgs = distilledPrefix(distillations);
|
|
29719
30615
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
29720
30616
|
})();
|
|
29721
|
-
|
|
29722
|
-
const
|
|
29723
|
-
|
|
29724
|
-
|
|
29725
|
-
|
|
29726
|
-
|
|
29727
|
-
|
|
29728
|
-
|
|
29729
|
-
|
|
29730
|
-
|
|
29731
|
-
|
|
29732
|
-
|
|
29733
|
-
|
|
29734
|
-
|
|
29735
|
-
|
|
29736
|
-
|
|
29737
|
-
|
|
29738
|
-
|
|
29739
|
-
|
|
30617
|
+
for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
|
|
30618
|
+
const stageLayer = s + 1;
|
|
30619
|
+
if (effectiveMinLayer > stageLayer) continue;
|
|
30620
|
+
const stage = COMPRESSION_STAGES[s];
|
|
30621
|
+
const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
|
|
30622
|
+
const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
|
|
30623
|
+
let stagePrefix = cached2.messages;
|
|
30624
|
+
let stagePrefixTokens = cached2.tokens;
|
|
30625
|
+
if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
|
|
30626
|
+
const trimmed = selectDistillations(distillations, stage.distLimit);
|
|
30627
|
+
stagePrefix = distilledPrefix(trimmed);
|
|
30628
|
+
stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
30629
|
+
}
|
|
30630
|
+
let result;
|
|
30631
|
+
if (stage.useStableWindow && sid) {
|
|
30632
|
+
result = tryFitStable({
|
|
30633
|
+
messages: dedupMessages,
|
|
30634
|
+
prefix: stagePrefix,
|
|
30635
|
+
prefixTokens: stagePrefixTokens,
|
|
30636
|
+
distilledBudget: stageDistBudget,
|
|
30637
|
+
rawBudget: stageRawBudget,
|
|
30638
|
+
sessionID: sid,
|
|
30639
|
+
sessState
|
|
30640
|
+
});
|
|
30641
|
+
} else {
|
|
30642
|
+
sessState.rawWindowCache = null;
|
|
30643
|
+
result = tryFit({
|
|
30644
|
+
messages: dedupMessages,
|
|
30645
|
+
prefix: stagePrefix,
|
|
30646
|
+
prefixTokens: stagePrefixTokens,
|
|
30647
|
+
distilledBudget: stageDistBudget,
|
|
30648
|
+
rawBudget: stageRawBudget,
|
|
30649
|
+
strip: stage.strip,
|
|
30650
|
+
protectedTurns: stage.protectedTurns
|
|
30651
|
+
});
|
|
30652
|
+
}
|
|
30653
|
+
if (fitsWithSafetyMargin(result)) {
|
|
30654
|
+
if (sid && (s > 0 || cached2.tokens === 0)) {
|
|
29740
30655
|
urgentDistillationMap.set(sid, true);
|
|
29741
30656
|
}
|
|
29742
|
-
return { ...
|
|
30657
|
+
return { ...result, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
|
|
29743
30658
|
}
|
|
29744
30659
|
}
|
|
29745
30660
|
sessState.rawWindowCache = null;
|
|
29746
|
-
if (effectiveMinLayer <= 2) {
|
|
29747
|
-
const layer2 = tryFit({
|
|
29748
|
-
messages: dedupMessages,
|
|
29749
|
-
prefix: cached2.messages,
|
|
29750
|
-
prefixTokens: cached2.tokens,
|
|
29751
|
-
distilledBudget,
|
|
29752
|
-
rawBudget: Math.floor(usable * 0.5),
|
|
29753
|
-
// give raw more room
|
|
29754
|
-
strip: "old-tools",
|
|
29755
|
-
protectedTurns: 2
|
|
29756
|
-
});
|
|
29757
|
-
if (fitsWithSafetyMargin(layer2)) {
|
|
29758
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
29759
|
-
return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
|
|
29760
|
-
}
|
|
29761
|
-
}
|
|
29762
|
-
const trimmedDistillations = distillations.slice(-5);
|
|
29763
|
-
const trimmedPrefix = distilledPrefix(trimmedDistillations);
|
|
29764
|
-
const trimmedPrefixTokens = trimmedPrefix.reduce(
|
|
29765
|
-
(sum, m) => sum + estimateMessage(m),
|
|
29766
|
-
0
|
|
29767
|
-
);
|
|
29768
|
-
const layer3 = tryFit({
|
|
29769
|
-
messages: dedupMessages,
|
|
29770
|
-
prefix: trimmedPrefix,
|
|
29771
|
-
prefixTokens: trimmedPrefixTokens,
|
|
29772
|
-
distilledBudget: Math.floor(usable * 0.15),
|
|
29773
|
-
rawBudget: Math.floor(usable * 0.55),
|
|
29774
|
-
strip: "all-tools"
|
|
29775
|
-
});
|
|
29776
|
-
if (fitsWithSafetyMargin(layer3)) {
|
|
29777
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
29778
|
-
return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
|
|
29779
|
-
}
|
|
29780
30661
|
if (sid) urgentDistillationMap.set(sid, true);
|
|
29781
|
-
const nuclearDistillations = distillations
|
|
30662
|
+
const nuclearDistillations = selectDistillations(distillations, 2);
|
|
29782
30663
|
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
29783
30664
|
const nuclearPrefixTokens = nuclearPrefix.reduce(
|
|
29784
30665
|
(sum, m) => sum + estimateMessage(m),
|
|
@@ -29817,7 +30698,8 @@ function transformInner(input) {
|
|
|
29817
30698
|
totalTokens: nuclearPrefixTokens + nuclearRawTokens,
|
|
29818
30699
|
usable,
|
|
29819
30700
|
distilledBudget,
|
|
29820
|
-
rawBudget
|
|
30701
|
+
rawBudget,
|
|
30702
|
+
refreshLtm: true
|
|
29821
30703
|
};
|
|
29822
30704
|
}
|
|
29823
30705
|
function transform2(input) {
|
|
@@ -29924,10 +30806,189 @@ function isWorkerSession(sessionID) {
|
|
|
29924
30806
|
return workerSessionIDs.has(sessionID);
|
|
29925
30807
|
}
|
|
29926
30808
|
|
|
29927
|
-
//
|
|
29928
|
-
|
|
29929
|
-
|
|
29930
|
-
|
|
30809
|
+
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
30810
|
+
var Node = class {
|
|
30811
|
+
value;
|
|
30812
|
+
next;
|
|
30813
|
+
constructor(value) {
|
|
30814
|
+
this.value = value;
|
|
30815
|
+
}
|
|
30816
|
+
};
|
|
30817
|
+
var Queue = class {
|
|
30818
|
+
#head;
|
|
30819
|
+
#tail;
|
|
30820
|
+
#size;
|
|
30821
|
+
constructor() {
|
|
30822
|
+
this.clear();
|
|
30823
|
+
}
|
|
30824
|
+
enqueue(value) {
|
|
30825
|
+
const node2 = new Node(value);
|
|
30826
|
+
if (this.#head) {
|
|
30827
|
+
this.#tail.next = node2;
|
|
30828
|
+
this.#tail = node2;
|
|
30829
|
+
} else {
|
|
30830
|
+
this.#head = node2;
|
|
30831
|
+
this.#tail = node2;
|
|
30832
|
+
}
|
|
30833
|
+
this.#size++;
|
|
30834
|
+
}
|
|
30835
|
+
dequeue() {
|
|
30836
|
+
const current2 = this.#head;
|
|
30837
|
+
if (!current2) {
|
|
30838
|
+
return;
|
|
30839
|
+
}
|
|
30840
|
+
this.#head = this.#head.next;
|
|
30841
|
+
this.#size--;
|
|
30842
|
+
if (!this.#head) {
|
|
30843
|
+
this.#tail = void 0;
|
|
30844
|
+
}
|
|
30845
|
+
return current2.value;
|
|
30846
|
+
}
|
|
30847
|
+
peek() {
|
|
30848
|
+
if (!this.#head) {
|
|
30849
|
+
return;
|
|
30850
|
+
}
|
|
30851
|
+
return this.#head.value;
|
|
30852
|
+
}
|
|
30853
|
+
clear() {
|
|
30854
|
+
this.#head = void 0;
|
|
30855
|
+
this.#tail = void 0;
|
|
30856
|
+
this.#size = 0;
|
|
30857
|
+
}
|
|
30858
|
+
get size() {
|
|
30859
|
+
return this.#size;
|
|
30860
|
+
}
|
|
30861
|
+
*[Symbol.iterator]() {
|
|
30862
|
+
let current2 = this.#head;
|
|
30863
|
+
while (current2) {
|
|
30864
|
+
yield current2.value;
|
|
30865
|
+
current2 = current2.next;
|
|
30866
|
+
}
|
|
30867
|
+
}
|
|
30868
|
+
*drain() {
|
|
30869
|
+
while (this.#head) {
|
|
30870
|
+
yield this.dequeue();
|
|
30871
|
+
}
|
|
30872
|
+
}
|
|
30873
|
+
};
|
|
30874
|
+
|
|
30875
|
+
// ../../node_modules/.bun/p-limit@7.3.0/node_modules/p-limit/index.js
|
|
30876
|
+
function pLimit(concurrency) {
|
|
30877
|
+
let rejectOnClear = false;
|
|
30878
|
+
if (typeof concurrency === "object") {
|
|
30879
|
+
({ concurrency, rejectOnClear = false } = concurrency);
|
|
30880
|
+
}
|
|
30881
|
+
validateConcurrency(concurrency);
|
|
30882
|
+
if (typeof rejectOnClear !== "boolean") {
|
|
30883
|
+
throw new TypeError("Expected `rejectOnClear` to be a boolean");
|
|
30884
|
+
}
|
|
30885
|
+
const queue = new Queue();
|
|
30886
|
+
let activeCount = 0;
|
|
30887
|
+
const resumeNext = () => {
|
|
30888
|
+
if (activeCount < concurrency && queue.size > 0) {
|
|
30889
|
+
activeCount++;
|
|
30890
|
+
queue.dequeue().run();
|
|
30891
|
+
}
|
|
30892
|
+
};
|
|
30893
|
+
const next = () => {
|
|
30894
|
+
activeCount--;
|
|
30895
|
+
resumeNext();
|
|
30896
|
+
};
|
|
30897
|
+
const run3 = async (function_, resolve, arguments_) => {
|
|
30898
|
+
const result = (async () => function_(...arguments_))();
|
|
30899
|
+
resolve(result);
|
|
30900
|
+
try {
|
|
30901
|
+
await result;
|
|
30902
|
+
} catch {
|
|
30903
|
+
}
|
|
30904
|
+
next();
|
|
30905
|
+
};
|
|
30906
|
+
const enqueue = (function_, resolve, reject, arguments_) => {
|
|
30907
|
+
const queueItem = { reject };
|
|
30908
|
+
new Promise((internalResolve) => {
|
|
30909
|
+
queueItem.run = internalResolve;
|
|
30910
|
+
queue.enqueue(queueItem);
|
|
30911
|
+
}).then(run3.bind(void 0, function_, resolve, arguments_));
|
|
30912
|
+
if (activeCount < concurrency) {
|
|
30913
|
+
resumeNext();
|
|
30914
|
+
}
|
|
30915
|
+
};
|
|
30916
|
+
const generator = (function_, ...arguments_) => new Promise((resolve, reject) => {
|
|
30917
|
+
enqueue(function_, resolve, reject, arguments_);
|
|
30918
|
+
});
|
|
30919
|
+
Object.defineProperties(generator, {
|
|
30920
|
+
activeCount: {
|
|
30921
|
+
get: () => activeCount
|
|
30922
|
+
},
|
|
30923
|
+
pendingCount: {
|
|
30924
|
+
get: () => queue.size
|
|
30925
|
+
},
|
|
30926
|
+
clearQueue: {
|
|
30927
|
+
value() {
|
|
30928
|
+
if (!rejectOnClear) {
|
|
30929
|
+
queue.clear();
|
|
30930
|
+
return;
|
|
30931
|
+
}
|
|
30932
|
+
const abortError = AbortSignal.abort().reason;
|
|
30933
|
+
while (queue.size > 0) {
|
|
30934
|
+
queue.dequeue().reject(abortError);
|
|
30935
|
+
}
|
|
30936
|
+
}
|
|
30937
|
+
},
|
|
30938
|
+
concurrency: {
|
|
30939
|
+
get: () => concurrency,
|
|
30940
|
+
set(newConcurrency) {
|
|
30941
|
+
validateConcurrency(newConcurrency);
|
|
30942
|
+
concurrency = newConcurrency;
|
|
30943
|
+
queueMicrotask(() => {
|
|
30944
|
+
while (activeCount < concurrency && queue.size > 0) {
|
|
30945
|
+
resumeNext();
|
|
30946
|
+
}
|
|
30947
|
+
});
|
|
30948
|
+
}
|
|
30949
|
+
},
|
|
30950
|
+
map: {
|
|
30951
|
+
async value(iterable, function_) {
|
|
30952
|
+
const promises = Array.from(iterable, (value, index2) => this(function_, value, index2));
|
|
30953
|
+
return Promise.all(promises);
|
|
30954
|
+
}
|
|
30955
|
+
}
|
|
30956
|
+
});
|
|
30957
|
+
return generator;
|
|
30958
|
+
}
|
|
30959
|
+
function validateConcurrency(concurrency) {
|
|
30960
|
+
if (!((Number.isInteger(concurrency) || concurrency === Number.POSITIVE_INFINITY) && concurrency > 0)) {
|
|
30961
|
+
throw new TypeError("Expected `concurrency` to be a number from 1 and up");
|
|
30962
|
+
}
|
|
30963
|
+
}
|
|
30964
|
+
|
|
30965
|
+
// src/session-limiter.ts
|
|
30966
|
+
function createLimiterPool() {
|
|
30967
|
+
const limiters = /* @__PURE__ */ new Map();
|
|
30968
|
+
function get2(key) {
|
|
30969
|
+
let limiter = limiters.get(key);
|
|
30970
|
+
if (!limiter) {
|
|
30971
|
+
limiter = pLimit(1);
|
|
30972
|
+
limiters.set(key, limiter);
|
|
30973
|
+
}
|
|
30974
|
+
return limiter;
|
|
30975
|
+
}
|
|
30976
|
+
function isBusy(key) {
|
|
30977
|
+
const limiter = limiters.get(key);
|
|
30978
|
+
return limiter ? limiter.activeCount + limiter.pendingCount > 0 : false;
|
|
30979
|
+
}
|
|
30980
|
+
function clear() {
|
|
30981
|
+
limiters.clear();
|
|
30982
|
+
}
|
|
30983
|
+
return { get: get2, isBusy, clear };
|
|
30984
|
+
}
|
|
30985
|
+
var distillLimiter = createLimiterPool();
|
|
30986
|
+
var curatorLimiter = createLimiterPool();
|
|
30987
|
+
|
|
30988
|
+
// src/distillation.ts
|
|
30989
|
+
function compressionRatio(distilledTokens, sourceTokens) {
|
|
30990
|
+
if (sourceTokens <= 0) return 0;
|
|
30991
|
+
return distilledTokens / Math.sqrt(sourceTokens);
|
|
29931
30992
|
}
|
|
29932
30993
|
function maxAllowedExpansion(sourceTokens) {
|
|
29933
30994
|
if (sourceTokens < 100) return sourceTokens * 5;
|
|
@@ -30168,6 +31229,9 @@ function resetOrphans(projectPath, sessionID) {
|
|
|
30168
31229
|
return orphans.length;
|
|
30169
31230
|
}
|
|
30170
31231
|
async function run(input) {
|
|
31232
|
+
return distillLimiter.get(input.sessionID)(() => runInner(input));
|
|
31233
|
+
}
|
|
31234
|
+
async function runInner(input) {
|
|
30171
31235
|
const orphans = resetOrphans(input.projectPath, input.sessionID);
|
|
30172
31236
|
if (orphans > 0) {
|
|
30173
31237
|
info(
|
|
@@ -30211,7 +31275,7 @@ async function run(input) {
|
|
|
30211
31275
|
}
|
|
30212
31276
|
}
|
|
30213
31277
|
if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
30214
|
-
await
|
|
31278
|
+
await metaDistillInner({
|
|
30215
31279
|
llm: input.llm,
|
|
30216
31280
|
projectPath: input.projectPath,
|
|
30217
31281
|
sessionID: input.sessionID,
|
|
@@ -30261,17 +31325,25 @@ async function distillSegment(input) {
|
|
|
30261
31325
|
);
|
|
30262
31326
|
return null;
|
|
30263
31327
|
}
|
|
30264
|
-
|
|
30265
|
-
|
|
30266
|
-
|
|
30267
|
-
|
|
30268
|
-
|
|
30269
|
-
|
|
30270
|
-
|
|
30271
|
-
|
|
30272
|
-
|
|
30273
|
-
|
|
30274
|
-
|
|
31328
|
+
let distillId;
|
|
31329
|
+
db().exec("BEGIN IMMEDIATE");
|
|
31330
|
+
try {
|
|
31331
|
+
distillId = storeDistillation({
|
|
31332
|
+
projectPath: input.projectPath,
|
|
31333
|
+
sessionID: input.sessionID,
|
|
31334
|
+
observations: result.observations,
|
|
31335
|
+
sourceIDs: input.messages.map((m) => m.id),
|
|
31336
|
+
generation: 0,
|
|
31337
|
+
rCompression: rComp,
|
|
31338
|
+
cNorm,
|
|
31339
|
+
callType: input.callType
|
|
31340
|
+
});
|
|
31341
|
+
markDistilled(input.messages.map((m) => m.id));
|
|
31342
|
+
db().exec("COMMIT");
|
|
31343
|
+
} catch (e) {
|
|
31344
|
+
db().exec("ROLLBACK");
|
|
31345
|
+
throw e;
|
|
31346
|
+
}
|
|
30275
31347
|
info(
|
|
30276
31348
|
`distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
|
|
30277
31349
|
);
|
|
@@ -30284,7 +31356,8 @@ async function distillSegment(input) {
|
|
|
30284
31356
|
embedDistillation(distillId, result.observations);
|
|
30285
31357
|
}
|
|
30286
31358
|
if (config2().knowledge.enabled) {
|
|
30287
|
-
|
|
31359
|
+
const patterns = extractPatterns(result.observations);
|
|
31360
|
+
for (const pat of patterns) {
|
|
30288
31361
|
try {
|
|
30289
31362
|
create({
|
|
30290
31363
|
projectPath: input.projectPath,
|
|
@@ -30297,10 +31370,16 @@ async function distillSegment(input) {
|
|
|
30297
31370
|
} catch {
|
|
30298
31371
|
}
|
|
30299
31372
|
}
|
|
31373
|
+
if (patterns.length > 0) {
|
|
31374
|
+
info(`pattern extraction: ${patterns.length} entries from distillation`);
|
|
31375
|
+
}
|
|
30300
31376
|
}
|
|
30301
31377
|
return result;
|
|
30302
31378
|
}
|
|
30303
31379
|
async function metaDistill(input) {
|
|
31380
|
+
return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
|
|
31381
|
+
}
|
|
31382
|
+
async function metaDistillInner(input) {
|
|
30304
31383
|
const existing = loadGen0(input.projectPath, input.sessionID);
|
|
30305
31384
|
const priorMeta = latestMeta(input.projectPath, input.sessionID);
|
|
30306
31385
|
if (priorMeta) {
|
|
@@ -30342,196 +31421,1801 @@ async function metaDistill(input) {
|
|
|
30342
31421
|
db().exec("ROLLBACK");
|
|
30343
31422
|
throw e;
|
|
30344
31423
|
}
|
|
30345
|
-
if (isAvailable()) {
|
|
30346
|
-
embedDistillation(metaId, result.observations);
|
|
31424
|
+
if (isAvailable()) {
|
|
31425
|
+
embedDistillation(metaId, result.observations);
|
|
31426
|
+
}
|
|
31427
|
+
if (config2().knowledge.enabled) {
|
|
31428
|
+
const patterns = extractPatterns(result.observations);
|
|
31429
|
+
for (const pat of patterns) {
|
|
31430
|
+
try {
|
|
31431
|
+
create({
|
|
31432
|
+
projectPath: input.projectPath,
|
|
31433
|
+
category: pat.category,
|
|
31434
|
+
title: pat.title,
|
|
31435
|
+
content: pat.content,
|
|
31436
|
+
session: input.sessionID,
|
|
31437
|
+
scope: "project"
|
|
31438
|
+
});
|
|
31439
|
+
} catch {
|
|
31440
|
+
}
|
|
31441
|
+
}
|
|
31442
|
+
if (patterns.length > 0) {
|
|
31443
|
+
info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
|
|
31444
|
+
}
|
|
31445
|
+
}
|
|
31446
|
+
return result;
|
|
31447
|
+
}
|
|
31448
|
+
function backfillMetrics() {
|
|
31449
|
+
const rows = db().query(
|
|
31450
|
+
"SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
|
|
31451
|
+
).all();
|
|
31452
|
+
if (!rows.length) return 0;
|
|
31453
|
+
const update2 = db().prepare(
|
|
31454
|
+
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
31455
|
+
);
|
|
31456
|
+
let updated = 0;
|
|
31457
|
+
for (const row of rows) {
|
|
31458
|
+
const sourceIds = parseSourceIds(row.source_ids);
|
|
31459
|
+
if (!sourceIds.length) continue;
|
|
31460
|
+
const placeholders = sourceIds.map(() => "?").join(",");
|
|
31461
|
+
const sources = db().query(
|
|
31462
|
+
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
31463
|
+
).all(...sourceIds);
|
|
31464
|
+
if (!sources.length) continue;
|
|
31465
|
+
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
31466
|
+
const timestamps = sources.map((s) => s.created_at);
|
|
31467
|
+
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
31468
|
+
const cNorm = temporalCnorm(timestamps);
|
|
31469
|
+
update2.run(rComp, cNorm, row.id);
|
|
31470
|
+
updated++;
|
|
31471
|
+
}
|
|
31472
|
+
if (updated > 0) {
|
|
31473
|
+
info(
|
|
31474
|
+
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
31475
|
+
);
|
|
31476
|
+
}
|
|
31477
|
+
return updated;
|
|
31478
|
+
}
|
|
31479
|
+
|
|
31480
|
+
// src/curator.ts
|
|
31481
|
+
var curator_exports = {};
|
|
31482
|
+
__export(curator_exports, {
|
|
31483
|
+
MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
|
|
31484
|
+
applyOps: () => applyOps,
|
|
31485
|
+
consolidate: () => consolidate,
|
|
31486
|
+
parseOps: () => parseOps,
|
|
31487
|
+
resetCurationTracker: () => resetCurationTracker,
|
|
31488
|
+
run: () => run2
|
|
31489
|
+
});
|
|
31490
|
+
|
|
31491
|
+
// src/instruction-detect.ts
|
|
31492
|
+
var instruction_detect_exports = {};
|
|
31493
|
+
__export(instruction_detect_exports, {
|
|
31494
|
+
detectAndFormat: () => detectAndFormat,
|
|
31495
|
+
extractInstructionCandidates: () => extractInstructionCandidates,
|
|
31496
|
+
findRepeatedInstructions: () => findRepeatedInstructions,
|
|
31497
|
+
formatForCurator: () => formatForCurator
|
|
31498
|
+
});
|
|
31499
|
+
var DEFAULT_REPETITION_THRESHOLD = 2;
|
|
31500
|
+
var VECTOR_SIMILARITY_THRESHOLD = 0.5;
|
|
31501
|
+
var MAX_CANDIDATES = 5;
|
|
31502
|
+
var INSTRUCTION_PATTERNS = [
|
|
31503
|
+
/\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31504
|
+
/\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31505
|
+
/\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31506
|
+
/\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31507
|
+
/\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
|
|
31508
|
+
/\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
|
|
31509
|
+
];
|
|
31510
|
+
function extractInstructionCandidates(messages) {
|
|
31511
|
+
const candidates = [];
|
|
31512
|
+
const seen = /* @__PURE__ */ new Set();
|
|
31513
|
+
for (const msg of messages) {
|
|
31514
|
+
if (msg.role !== "user") continue;
|
|
31515
|
+
for (const pattern of INSTRUCTION_PATTERNS) {
|
|
31516
|
+
pattern.lastIndex = 0;
|
|
31517
|
+
let match;
|
|
31518
|
+
while ((match = pattern.exec(msg.content)) !== null) {
|
|
31519
|
+
const text4 = match[1]?.trim();
|
|
31520
|
+
if (!text4 || text4.length < 10) continue;
|
|
31521
|
+
const key = text4.toLowerCase();
|
|
31522
|
+
if (seen.has(key)) continue;
|
|
31523
|
+
seen.add(key);
|
|
31524
|
+
candidates.push({
|
|
31525
|
+
text: text4,
|
|
31526
|
+
sessionID: msg.session_id
|
|
31527
|
+
});
|
|
31528
|
+
if (candidates.length >= MAX_CANDIDATES) return candidates;
|
|
31529
|
+
}
|
|
31530
|
+
}
|
|
31531
|
+
}
|
|
31532
|
+
return candidates;
|
|
31533
|
+
}
|
|
31534
|
+
async function findRepeatedInstructions(input) {
|
|
31535
|
+
const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
|
|
31536
|
+
if (!input.candidates.length) return [];
|
|
31537
|
+
const pid = ensureProject(input.projectPath);
|
|
31538
|
+
let candidateEmbeddings = [];
|
|
31539
|
+
if (isAvailable()) {
|
|
31540
|
+
try {
|
|
31541
|
+
candidateEmbeddings = await embed(
|
|
31542
|
+
input.candidates.map((c) => c.text),
|
|
31543
|
+
"query"
|
|
31544
|
+
);
|
|
31545
|
+
} catch (err) {
|
|
31546
|
+
warn("instruction-detect: batch embedding failed:", err);
|
|
31547
|
+
}
|
|
31548
|
+
}
|
|
31549
|
+
const results = [];
|
|
31550
|
+
for (let i = 0; i < input.candidates.length; i++) {
|
|
31551
|
+
const candidate = input.candidates[i];
|
|
31552
|
+
const sessionIDs = /* @__PURE__ */ new Set();
|
|
31553
|
+
if (candidateEmbeddings.length > i) {
|
|
31554
|
+
const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
|
|
31555
|
+
for (const hit of hits) {
|
|
31556
|
+
if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
|
|
31557
|
+
sessionIDs.add(hit.session_id);
|
|
31558
|
+
}
|
|
31559
|
+
}
|
|
31560
|
+
}
|
|
31561
|
+
const terms = filterTerms(candidate.text);
|
|
31562
|
+
if (terms.length >= 2) {
|
|
31563
|
+
const searchText = terms.slice(0, 5).join(" ");
|
|
31564
|
+
const ftsHits = searchDistillationsFTS(pid, searchText);
|
|
31565
|
+
for (const hit of ftsHits) {
|
|
31566
|
+
if (hit.session_id !== input.currentSessionID) {
|
|
31567
|
+
sessionIDs.add(hit.session_id);
|
|
31568
|
+
}
|
|
31569
|
+
}
|
|
31570
|
+
}
|
|
31571
|
+
if (sessionIDs.size >= threshold) {
|
|
31572
|
+
results.push({
|
|
31573
|
+
instruction: candidate.text,
|
|
31574
|
+
priorSessionCount: sessionIDs.size
|
|
31575
|
+
});
|
|
31576
|
+
}
|
|
31577
|
+
}
|
|
31578
|
+
return results;
|
|
31579
|
+
}
|
|
31580
|
+
function searchDistillationsFTS(projectId2, rawQuery) {
|
|
31581
|
+
const matchExpr = ftsQueryOr(rawQuery);
|
|
31582
|
+
if (matchExpr === EMPTY_QUERY) return [];
|
|
31583
|
+
const sql = `SELECT d.id, d.session_id
|
|
31584
|
+
FROM distillation_fts f
|
|
31585
|
+
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
31586
|
+
WHERE distillation_fts MATCH ?
|
|
31587
|
+
AND d.project_id = ?
|
|
31588
|
+
ORDER BY rank LIMIT 30`;
|
|
31589
|
+
try {
|
|
31590
|
+
return db().query(sql).all(matchExpr, projectId2);
|
|
31591
|
+
} catch (err) {
|
|
31592
|
+
warn("instruction-detect: FTS search failed:", err);
|
|
31593
|
+
return [];
|
|
31594
|
+
}
|
|
31595
|
+
}
|
|
31596
|
+
function formatForCurator(instructions) {
|
|
31597
|
+
if (!instructions.length) return "";
|
|
31598
|
+
const lines = instructions.map(
|
|
31599
|
+
(i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
|
|
31600
|
+
);
|
|
31601
|
+
return `
|
|
31602
|
+
|
|
31603
|
+
---
|
|
31604
|
+
CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
|
|
31605
|
+
The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
|
|
31606
|
+
${lines.join("\n")}`;
|
|
31607
|
+
}
|
|
31608
|
+
async function detectAndFormat(input) {
|
|
31609
|
+
const messages = bySession(input.projectPath, input.sessionID);
|
|
31610
|
+
const candidates = extractInstructionCandidates(messages);
|
|
31611
|
+
if (!candidates.length) return "";
|
|
31612
|
+
const repeated = await findRepeatedInstructions({
|
|
31613
|
+
projectPath: input.projectPath,
|
|
31614
|
+
currentSessionID: input.sessionID,
|
|
31615
|
+
candidates,
|
|
31616
|
+
threshold: input.threshold
|
|
31617
|
+
});
|
|
31618
|
+
if (repeated.length) {
|
|
31619
|
+
info(
|
|
31620
|
+
`instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
|
|
31621
|
+
);
|
|
31622
|
+
}
|
|
31623
|
+
return formatForCurator(repeated);
|
|
31624
|
+
}
|
|
31625
|
+
|
|
31626
|
+
// src/curator.ts
|
|
31627
|
+
var MAX_ENTRY_CONTENT_LENGTH = 1200;
|
|
31628
|
+
function parseOps(text4) {
|
|
31629
|
+
const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
|
|
31630
|
+
try {
|
|
31631
|
+
const parsed = JSON.parse(cleaned);
|
|
31632
|
+
if (!Array.isArray(parsed)) return [];
|
|
31633
|
+
return parsed.filter(
|
|
31634
|
+
(op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
|
|
31635
|
+
);
|
|
31636
|
+
} catch {
|
|
31637
|
+
return [];
|
|
31638
|
+
}
|
|
31639
|
+
}
|
|
31640
|
+
function applyOps(ops, input) {
|
|
31641
|
+
let created = 0;
|
|
31642
|
+
let updated = 0;
|
|
31643
|
+
let deleted = 0;
|
|
31644
|
+
const idsToSync = [];
|
|
31645
|
+
for (const op of ops) {
|
|
31646
|
+
if (op.op === "create") {
|
|
31647
|
+
if (input.skipCreate) continue;
|
|
31648
|
+
const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
|
|
31649
|
+
const id = create({
|
|
31650
|
+
projectPath: op.scope === "project" ? input.projectPath : void 0,
|
|
31651
|
+
category: op.category,
|
|
31652
|
+
title: op.title,
|
|
31653
|
+
content: content3,
|
|
31654
|
+
session: input.sessionID,
|
|
31655
|
+
scope: op.scope,
|
|
31656
|
+
crossProject: op.crossProject ?? true
|
|
31657
|
+
});
|
|
31658
|
+
idsToSync.push(id);
|
|
31659
|
+
created++;
|
|
31660
|
+
} else if (op.op === "update") {
|
|
31661
|
+
const entry = get(op.id);
|
|
31662
|
+
if (entry) {
|
|
31663
|
+
const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
|
|
31664
|
+
update(op.id, { content: content3, confidence: op.confidence });
|
|
31665
|
+
if (op.content !== void 0) idsToSync.push(op.id);
|
|
31666
|
+
updated++;
|
|
31667
|
+
}
|
|
31668
|
+
} else if (op.op === "delete") {
|
|
31669
|
+
const entry = get(op.id);
|
|
31670
|
+
if (entry) {
|
|
31671
|
+
remove(op.id);
|
|
31672
|
+
deleted++;
|
|
31673
|
+
}
|
|
31674
|
+
}
|
|
31675
|
+
}
|
|
31676
|
+
for (const id of idsToSync) {
|
|
31677
|
+
syncRefs(id);
|
|
31678
|
+
}
|
|
31679
|
+
return { created, updated, deleted };
|
|
31680
|
+
}
|
|
31681
|
+
var lastCuratedAt = /* @__PURE__ */ new Map();
|
|
31682
|
+
function getLastCuratedAt(sessionID) {
|
|
31683
|
+
const cached2 = lastCuratedAt.get(sessionID);
|
|
31684
|
+
if (cached2 !== void 0) return cached2;
|
|
31685
|
+
const persisted = loadSessionTracking(sessionID);
|
|
31686
|
+
const ts = persisted?.lastCuratedAt ?? 0;
|
|
31687
|
+
lastCuratedAt.set(sessionID, ts);
|
|
31688
|
+
return ts;
|
|
31689
|
+
}
|
|
31690
|
+
async function run2(input) {
|
|
31691
|
+
const cfg = config2();
|
|
31692
|
+
if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
|
|
31693
|
+
if (curatorLimiter.isBusy(input.sessionID)) {
|
|
31694
|
+
info(`curation skipped: already running for session ${input.sessionID.slice(0, 16)}`);
|
|
31695
|
+
return { created: 0, updated: 0, deleted: 0 };
|
|
31696
|
+
}
|
|
31697
|
+
return curatorLimiter.get(input.sessionID)(() => runInner2(input));
|
|
31698
|
+
}
|
|
31699
|
+
async function runInner2(input) {
|
|
31700
|
+
const cfg = config2();
|
|
31701
|
+
const all3 = bySession(input.projectPath, input.sessionID);
|
|
31702
|
+
const sessionCuratedAt = getLastCuratedAt(input.sessionID);
|
|
31703
|
+
const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
|
|
31704
|
+
if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
|
|
31705
|
+
const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
|
|
31706
|
+
const existing = forProject(input.projectPath, false);
|
|
31707
|
+
const existingForPrompt = existing.map((e) => ({
|
|
31708
|
+
id: e.id,
|
|
31709
|
+
category: e.category,
|
|
31710
|
+
title: e.title,
|
|
31711
|
+
content: e.content
|
|
31712
|
+
}));
|
|
31713
|
+
const baseUserContent = curatorUser({
|
|
31714
|
+
messages: text4,
|
|
31715
|
+
existing: existingForPrompt
|
|
31716
|
+
});
|
|
31717
|
+
let crossSessionContext = "";
|
|
31718
|
+
try {
|
|
31719
|
+
crossSessionContext = await detectAndFormat({
|
|
31720
|
+
projectPath: input.projectPath,
|
|
31721
|
+
sessionID: input.sessionID
|
|
31722
|
+
});
|
|
31723
|
+
} catch (err) {
|
|
31724
|
+
warn("instruction-detect failed (non-fatal):", err);
|
|
31725
|
+
}
|
|
31726
|
+
const userContent = baseUserContent + crossSessionContext;
|
|
31727
|
+
const model = input.model ?? cfg.model;
|
|
31728
|
+
const responseText = await input.llm.prompt(
|
|
31729
|
+
CURATOR_SYSTEM,
|
|
31730
|
+
userContent,
|
|
31731
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
|
|
31732
|
+
);
|
|
31733
|
+
if (!responseText) return { created: 0, updated: 0, deleted: 0 };
|
|
31734
|
+
const ops = parseOps(responseText);
|
|
31735
|
+
const result = applyOps(ops, {
|
|
31736
|
+
projectPath: input.projectPath,
|
|
31737
|
+
sessionID: input.sessionID
|
|
31738
|
+
});
|
|
31739
|
+
if (result.created > 0) {
|
|
31740
|
+
try {
|
|
31741
|
+
const dupes = await deduplicate(input.projectPath, { dryRun: false });
|
|
31742
|
+
if (dupes.totalRemoved > 0) {
|
|
31743
|
+
info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
|
|
31744
|
+
result.deleted += dupes.totalRemoved;
|
|
31745
|
+
}
|
|
31746
|
+
if (dupes.pairSimilarities.size > 0) {
|
|
31747
|
+
const pid = ensureProject(input.projectPath);
|
|
31748
|
+
recordAutoSignals(pid, dupes);
|
|
31749
|
+
const newThreshold = calibrateDedupThreshold(pid);
|
|
31750
|
+
if (newThreshold !== null) {
|
|
31751
|
+
const count3 = getDedupFeedbackCount(pid);
|
|
31752
|
+
saveCalibratedThreshold(pid, newThreshold, count3);
|
|
31753
|
+
}
|
|
31754
|
+
}
|
|
31755
|
+
} catch (err) {
|
|
31756
|
+
warn("post-curation dedup failed (non-fatal):", err);
|
|
31757
|
+
}
|
|
31758
|
+
}
|
|
31759
|
+
const now = Date.now();
|
|
31760
|
+
lastCuratedAt.set(input.sessionID, now);
|
|
31761
|
+
saveSessionTracking(input.sessionID, { lastCuratedAt: now });
|
|
31762
|
+
return result;
|
|
31763
|
+
}
|
|
31764
|
+
function resetCurationTracker(sessionID) {
|
|
31765
|
+
if (sessionID) {
|
|
31766
|
+
lastCuratedAt.delete(sessionID);
|
|
31767
|
+
} else {
|
|
31768
|
+
lastCuratedAt.clear();
|
|
31769
|
+
}
|
|
31770
|
+
}
|
|
31771
|
+
async function consolidate(input) {
|
|
31772
|
+
const cfg = config2();
|
|
31773
|
+
if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
|
|
31774
|
+
const entries = forProject(input.projectPath, false);
|
|
31775
|
+
if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
|
|
31776
|
+
const entriesForPrompt = entries.map((e) => ({
|
|
31777
|
+
id: e.id,
|
|
31778
|
+
category: e.category,
|
|
31779
|
+
title: e.title,
|
|
31780
|
+
content: e.content
|
|
31781
|
+
}));
|
|
31782
|
+
const userContent = consolidationUser({
|
|
31783
|
+
entries: entriesForPrompt,
|
|
31784
|
+
targetMax: cfg.curator.maxEntries
|
|
31785
|
+
});
|
|
31786
|
+
const model = input.model ?? cfg.model;
|
|
31787
|
+
const responseText = await input.llm.prompt(
|
|
31788
|
+
CONSOLIDATION_SYSTEM,
|
|
31789
|
+
userContent,
|
|
31790
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
|
|
31791
|
+
);
|
|
31792
|
+
if (!responseText) return { updated: 0, deleted: 0 };
|
|
31793
|
+
const ops = parseOps(responseText);
|
|
31794
|
+
const result = applyOps(ops, {
|
|
31795
|
+
projectPath: input.projectPath,
|
|
31796
|
+
sessionID: input.sessionID,
|
|
31797
|
+
skipCreate: true
|
|
31798
|
+
// Consolidation must not add entries.
|
|
31799
|
+
});
|
|
31800
|
+
return { updated: result.updated, deleted: result.deleted };
|
|
31801
|
+
}
|
|
31802
|
+
|
|
31803
|
+
// src/import/index.ts
|
|
31804
|
+
var import_exports = {};
|
|
31805
|
+
__export(import_exports, {
|
|
31806
|
+
clearProviders: () => clearProviders,
|
|
31807
|
+
computeHash: () => computeHash,
|
|
31808
|
+
detectAll: () => detectAll,
|
|
31809
|
+
extractKnowledge: () => extractKnowledge,
|
|
31810
|
+
getProvider: () => getProvider2,
|
|
31811
|
+
getProviders: () => getProviders,
|
|
31812
|
+
isImported: () => isImported,
|
|
31813
|
+
listImports: () => listImports,
|
|
31814
|
+
recordImport: () => recordImport,
|
|
31815
|
+
registerProvider: () => registerProvider
|
|
31816
|
+
});
|
|
31817
|
+
|
|
31818
|
+
// src/import/providers/index.ts
|
|
31819
|
+
var providers = [];
|
|
31820
|
+
function registerProvider(provider) {
|
|
31821
|
+
providers.push(provider);
|
|
31822
|
+
}
|
|
31823
|
+
function getProviders() {
|
|
31824
|
+
return providers;
|
|
31825
|
+
}
|
|
31826
|
+
function getProvider2(name) {
|
|
31827
|
+
return providers.find((p2) => p2.name === name);
|
|
31828
|
+
}
|
|
31829
|
+
function clearProviders() {
|
|
31830
|
+
providers.length = 0;
|
|
31831
|
+
}
|
|
31832
|
+
|
|
31833
|
+
// src/import/detect.ts
|
|
31834
|
+
function detectAll(projectPath) {
|
|
31835
|
+
const results = [];
|
|
31836
|
+
for (const provider of getProviders()) {
|
|
31837
|
+
try {
|
|
31838
|
+
const sessions = provider.detect(projectPath);
|
|
31839
|
+
if (sessions.length > 0) {
|
|
31840
|
+
results.push({
|
|
31841
|
+
agentName: provider.name,
|
|
31842
|
+
agentDisplayName: provider.displayName,
|
|
31843
|
+
sessions,
|
|
31844
|
+
totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
|
|
31845
|
+
totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
|
|
31846
|
+
});
|
|
31847
|
+
}
|
|
31848
|
+
} catch (err) {
|
|
31849
|
+
}
|
|
31850
|
+
}
|
|
31851
|
+
return results.sort((a, b) => b.totalMessages - a.totalMessages);
|
|
31852
|
+
}
|
|
31853
|
+
|
|
31854
|
+
// src/import/extract.ts
|
|
31855
|
+
var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
|
|
31856
|
+
|
|
31857
|
+
ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
|
|
31858
|
+
- Architecture decisions, design patterns, and project conventions
|
|
31859
|
+
- Gotchas, non-obvious bugs, and their fixes
|
|
31860
|
+
- Developer preferences and workflow patterns
|
|
31861
|
+
- Key technical choices and their rationale
|
|
31862
|
+
|
|
31863
|
+
Ignore:
|
|
31864
|
+
- References to the other agent's specific capabilities or limitations
|
|
31865
|
+
- Task-specific state that is no longer current (e.g. "currently debugging X")
|
|
31866
|
+
- Debugging steps for issues that were already resolved
|
|
31867
|
+
- Transient conversation artifacts (greetings, acknowledgments, status updates)`;
|
|
31868
|
+
async function extractKnowledge(input) {
|
|
31869
|
+
const result = {
|
|
31870
|
+
created: 0,
|
|
31871
|
+
updated: 0,
|
|
31872
|
+
deleted: 0,
|
|
31873
|
+
chunksProcessed: 0,
|
|
31874
|
+
chunksFailed: 0
|
|
31875
|
+
};
|
|
31876
|
+
const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
|
|
31877
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
31878
|
+
const chunk = sorted[i];
|
|
31879
|
+
const existing = forProject(input.projectPath, false);
|
|
31880
|
+
const existingForPrompt = existing.map((e) => ({
|
|
31881
|
+
id: e.id,
|
|
31882
|
+
category: e.category,
|
|
31883
|
+
title: e.title,
|
|
31884
|
+
content: e.content
|
|
31885
|
+
}));
|
|
31886
|
+
const userContent = curatorUser({
|
|
31887
|
+
messages: chunk.text,
|
|
31888
|
+
existing: existingForPrompt
|
|
31889
|
+
});
|
|
31890
|
+
try {
|
|
31891
|
+
const response = await input.llm.prompt(
|
|
31892
|
+
IMPORT_CURATOR_SYSTEM,
|
|
31893
|
+
userContent,
|
|
31894
|
+
{
|
|
31895
|
+
model: input.model,
|
|
31896
|
+
workerID: "lore-import",
|
|
31897
|
+
thinking: false,
|
|
31898
|
+
maxTokens: 4096,
|
|
31899
|
+
sessionID: input.sessionID
|
|
31900
|
+
}
|
|
31901
|
+
);
|
|
31902
|
+
if (response) {
|
|
31903
|
+
const ops = parseOps(response);
|
|
31904
|
+
const applied = applyOps(ops, {
|
|
31905
|
+
projectPath: input.projectPath,
|
|
31906
|
+
sessionID: input.sessionID
|
|
31907
|
+
});
|
|
31908
|
+
result.created += applied.created;
|
|
31909
|
+
result.updated += applied.updated;
|
|
31910
|
+
result.deleted += applied.deleted;
|
|
31911
|
+
}
|
|
31912
|
+
result.chunksProcessed++;
|
|
31913
|
+
} catch {
|
|
31914
|
+
result.chunksFailed++;
|
|
31915
|
+
}
|
|
31916
|
+
input.onProgress?.({
|
|
31917
|
+
current: i + 1,
|
|
31918
|
+
total: sorted.length,
|
|
31919
|
+
created: result.created,
|
|
31920
|
+
updated: result.updated
|
|
31921
|
+
});
|
|
31922
|
+
}
|
|
31923
|
+
return result;
|
|
31924
|
+
}
|
|
31925
|
+
|
|
31926
|
+
// src/import/history.ts
|
|
31927
|
+
function isImported(projectPath, agentName, sourceId, sourceHash) {
|
|
31928
|
+
const projectId2 = ensureProject(projectPath);
|
|
31929
|
+
const row = db().query(
|
|
31930
|
+
`SELECT * FROM import_history
|
|
31931
|
+
WHERE project_id = ? AND agent_name = ? AND source_id = ?`
|
|
31932
|
+
).get(projectId2, agentName, sourceId);
|
|
31933
|
+
if (!row) return null;
|
|
31934
|
+
if (row.source_hash !== sourceHash) return null;
|
|
31935
|
+
return row;
|
|
31936
|
+
}
|
|
31937
|
+
function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
|
|
31938
|
+
const projectId2 = ensureProject(projectPath);
|
|
31939
|
+
db().query(
|
|
31940
|
+
`INSERT OR REPLACE INTO import_history
|
|
31941
|
+
(id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
|
|
31942
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
31943
|
+
).run(
|
|
31944
|
+
crypto.randomUUID(),
|
|
31945
|
+
projectId2,
|
|
31946
|
+
agentName,
|
|
31947
|
+
sourceId,
|
|
31948
|
+
sourceHash,
|
|
31949
|
+
stats.created,
|
|
31950
|
+
stats.updated,
|
|
31951
|
+
Date.now()
|
|
31952
|
+
);
|
|
31953
|
+
}
|
|
31954
|
+
function listImports(projectPath) {
|
|
31955
|
+
const projectId2 = ensureProject(projectPath);
|
|
31956
|
+
return db().query(
|
|
31957
|
+
`SELECT * FROM import_history
|
|
31958
|
+
WHERE project_id = ? AND source_id != '__declined__'
|
|
31959
|
+
ORDER BY imported_at DESC`
|
|
31960
|
+
).all(projectId2);
|
|
31961
|
+
}
|
|
31962
|
+
function computeHash(parts) {
|
|
31963
|
+
return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
|
|
31964
|
+
}
|
|
31965
|
+
|
|
31966
|
+
// src/import/providers/claude-code.ts
|
|
31967
|
+
import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
|
|
31968
|
+
import { join as join8 } from "path";
|
|
31969
|
+
import { homedir as homedir2 } from "os";
|
|
31970
|
+
var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
|
|
31971
|
+
var MAX_TOOL_OUTPUT_CHARS = 500;
|
|
31972
|
+
var DEFAULT_MAX_TOKENS = 12288;
|
|
31973
|
+
function manglePath(projectPath) {
|
|
31974
|
+
return projectPath.replace(/\//g, "-");
|
|
31975
|
+
}
|
|
31976
|
+
function estimateTokens4(text4) {
|
|
31977
|
+
return Math.ceil(text4.length / 3);
|
|
31978
|
+
}
|
|
31979
|
+
function truncate(text4, max) {
|
|
31980
|
+
if (text4.length <= max) return text4;
|
|
31981
|
+
return text4.slice(0, max) + "...";
|
|
31982
|
+
}
|
|
31983
|
+
function blockToText(block) {
|
|
31984
|
+
switch (block.type) {
|
|
31985
|
+
case "text":
|
|
31986
|
+
return block.text;
|
|
31987
|
+
case "tool_use": {
|
|
31988
|
+
const tu = block;
|
|
31989
|
+
const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
|
|
31990
|
+
return `[tool: ${tu.name}] ${inputSummary}`;
|
|
31991
|
+
}
|
|
31992
|
+
case "tool_result": {
|
|
31993
|
+
const tr = block;
|
|
31994
|
+
let content3;
|
|
31995
|
+
if (typeof tr.content === "string") {
|
|
31996
|
+
content3 = tr.content;
|
|
31997
|
+
} else if (Array.isArray(tr.content)) {
|
|
31998
|
+
content3 = tr.content.map((b) => {
|
|
31999
|
+
if (b.type === "text") return b.text;
|
|
32000
|
+
return "";
|
|
32001
|
+
}).filter(Boolean).join("\n");
|
|
32002
|
+
} else {
|
|
32003
|
+
content3 = "";
|
|
32004
|
+
}
|
|
32005
|
+
return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
|
|
32006
|
+
}
|
|
32007
|
+
case "thinking":
|
|
32008
|
+
return null;
|
|
32009
|
+
default:
|
|
32010
|
+
return null;
|
|
32011
|
+
}
|
|
32012
|
+
}
|
|
32013
|
+
function lineToText(parsed) {
|
|
32014
|
+
if (parsed.type === "user") {
|
|
32015
|
+
const msg = parsed;
|
|
32016
|
+
const content3 = msg.message.content;
|
|
32017
|
+
if (typeof content3 === "string") {
|
|
32018
|
+
return `[user] ${content3}`;
|
|
32019
|
+
}
|
|
32020
|
+
const parts = content3.map(blockToText).filter(Boolean);
|
|
32021
|
+
return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
|
|
32022
|
+
}
|
|
32023
|
+
if (parsed.type === "assistant") {
|
|
32024
|
+
const msg = parsed;
|
|
32025
|
+
const blocks = msg.message.content;
|
|
32026
|
+
if (!Array.isArray(blocks)) return null;
|
|
32027
|
+
const parts = blocks.map(blockToText).filter(Boolean);
|
|
32028
|
+
return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
|
|
32029
|
+
}
|
|
32030
|
+
return null;
|
|
32031
|
+
}
|
|
32032
|
+
function parseJSONL(filePath) {
|
|
32033
|
+
const raw = readFileSync4(filePath, "utf-8");
|
|
32034
|
+
const lines = [];
|
|
32035
|
+
for (const line of raw.split("\n")) {
|
|
32036
|
+
if (!line.trim()) continue;
|
|
32037
|
+
try {
|
|
32038
|
+
lines.push(JSON.parse(line));
|
|
32039
|
+
} catch {
|
|
32040
|
+
}
|
|
32041
|
+
}
|
|
32042
|
+
return lines;
|
|
32043
|
+
}
|
|
32044
|
+
function getSessionMetadata(filePath) {
|
|
32045
|
+
let raw;
|
|
32046
|
+
try {
|
|
32047
|
+
raw = readFileSync4(filePath, "utf-8");
|
|
32048
|
+
} catch {
|
|
32049
|
+
return null;
|
|
32050
|
+
}
|
|
32051
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
32052
|
+
if (lines.length === 0) return null;
|
|
32053
|
+
let sessionId;
|
|
32054
|
+
let startedAt = Infinity;
|
|
32055
|
+
let lastActivityAt = 0;
|
|
32056
|
+
let messageCount = 0;
|
|
32057
|
+
for (const line of lines) {
|
|
32058
|
+
try {
|
|
32059
|
+
const parsed = JSON.parse(line);
|
|
32060
|
+
if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
|
|
32061
|
+
if (parsed.timestamp) {
|
|
32062
|
+
const ts = new Date(parsed.timestamp).getTime();
|
|
32063
|
+
if (!Number.isNaN(ts)) {
|
|
32064
|
+
if (ts < startedAt) startedAt = ts;
|
|
32065
|
+
if (ts > lastActivityAt) lastActivityAt = ts;
|
|
32066
|
+
}
|
|
32067
|
+
}
|
|
32068
|
+
if (parsed.type === "user" || parsed.type === "assistant") {
|
|
32069
|
+
messageCount++;
|
|
32070
|
+
}
|
|
32071
|
+
} catch {
|
|
32072
|
+
}
|
|
32073
|
+
}
|
|
32074
|
+
if (!sessionId || messageCount === 0) return null;
|
|
32075
|
+
const fileSize = raw.length;
|
|
32076
|
+
const estimatedTokens = Math.ceil(fileSize / 5);
|
|
32077
|
+
return {
|
|
32078
|
+
sessionId,
|
|
32079
|
+
startedAt: startedAt === Infinity ? Date.now() : startedAt,
|
|
32080
|
+
lastActivityAt,
|
|
32081
|
+
messageCount,
|
|
32082
|
+
estimatedTokens
|
|
32083
|
+
};
|
|
32084
|
+
}
|
|
32085
|
+
var claudeCodeProvider = {
|
|
32086
|
+
name: "claude-code",
|
|
32087
|
+
displayName: "Claude Code",
|
|
32088
|
+
detect(projectPath) {
|
|
32089
|
+
const mangled = manglePath(projectPath);
|
|
32090
|
+
const dir = join8(CLAUDE_DIR, mangled);
|
|
32091
|
+
let entries;
|
|
32092
|
+
try {
|
|
32093
|
+
entries = readdirSync2(dir);
|
|
32094
|
+
} catch {
|
|
32095
|
+
return [];
|
|
32096
|
+
}
|
|
32097
|
+
const sessions = [];
|
|
32098
|
+
for (const entry of entries) {
|
|
32099
|
+
if (!entry.endsWith(".jsonl")) continue;
|
|
32100
|
+
const filePath = join8(dir, entry);
|
|
32101
|
+
try {
|
|
32102
|
+
const stat = statSync5(filePath);
|
|
32103
|
+
if (!stat.isFile()) continue;
|
|
32104
|
+
} catch {
|
|
32105
|
+
continue;
|
|
32106
|
+
}
|
|
32107
|
+
const meta3 = getSessionMetadata(filePath);
|
|
32108
|
+
if (!meta3) continue;
|
|
32109
|
+
if (meta3.messageCount < 3) continue;
|
|
32110
|
+
const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
|
|
32111
|
+
sessions.push({
|
|
32112
|
+
id: filePath,
|
|
32113
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
32114
|
+
startedAt: meta3.startedAt,
|
|
32115
|
+
lastActivityAt: meta3.lastActivityAt,
|
|
32116
|
+
estimatedTokens: meta3.estimatedTokens,
|
|
32117
|
+
messageCount: meta3.messageCount
|
|
32118
|
+
});
|
|
32119
|
+
}
|
|
32120
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32121
|
+
},
|
|
32122
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
|
|
32123
|
+
const chunks = [];
|
|
32124
|
+
for (const filePath of sessionIds) {
|
|
32125
|
+
const lines = parseJSONL(filePath);
|
|
32126
|
+
const messages = [];
|
|
32127
|
+
for (const line of lines) {
|
|
32128
|
+
const text4 = lineToText(line);
|
|
32129
|
+
if (!text4) continue;
|
|
32130
|
+
const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
|
|
32131
|
+
messages.push({ text: text4, timestamp: ts });
|
|
32132
|
+
}
|
|
32133
|
+
if (messages.length === 0) continue;
|
|
32134
|
+
let currentTexts = [];
|
|
32135
|
+
let currentTokens = 0;
|
|
32136
|
+
let chunkStart = messages[0].timestamp;
|
|
32137
|
+
let chunkIndex = 0;
|
|
32138
|
+
const flushChunk = () => {
|
|
32139
|
+
if (currentTexts.length === 0) return;
|
|
32140
|
+
chunkIndex++;
|
|
32141
|
+
const text4 = currentTexts.join("\n\n");
|
|
32142
|
+
chunks.push({
|
|
32143
|
+
label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32144
|
+
text: text4,
|
|
32145
|
+
estimatedTokens: estimateTokens4(text4),
|
|
32146
|
+
timestamp: chunkStart
|
|
32147
|
+
});
|
|
32148
|
+
currentTexts = [];
|
|
32149
|
+
currentTokens = 0;
|
|
32150
|
+
};
|
|
32151
|
+
for (const msg of messages) {
|
|
32152
|
+
const msgTokens = estimateTokens4(msg.text);
|
|
32153
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32154
|
+
flushChunk();
|
|
32155
|
+
chunkStart = msg.timestamp;
|
|
32156
|
+
}
|
|
32157
|
+
currentTexts.push(msg.text);
|
|
32158
|
+
currentTokens += msgTokens;
|
|
32159
|
+
}
|
|
32160
|
+
flushChunk();
|
|
32161
|
+
}
|
|
32162
|
+
return chunks;
|
|
32163
|
+
}
|
|
32164
|
+
};
|
|
32165
|
+
registerProvider(claudeCodeProvider);
|
|
32166
|
+
|
|
32167
|
+
// src/import/providers/codex.ts
|
|
32168
|
+
import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
|
|
32169
|
+
import { join as join9 } from "path";
|
|
32170
|
+
import { homedir as homedir3 } from "os";
|
|
32171
|
+
var CODEX_DIR = join9(homedir3(), ".codex");
|
|
32172
|
+
var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
|
|
32173
|
+
var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
|
|
32174
|
+
var MAX_TOOL_OUTPUT_CHARS2 = 500;
|
|
32175
|
+
var DEFAULT_MAX_TOKENS2 = 12288;
|
|
32176
|
+
function estimateTokens5(text4) {
|
|
32177
|
+
return Math.ceil(text4.length / 3);
|
|
32178
|
+
}
|
|
32179
|
+
function truncate2(text4, max) {
|
|
32180
|
+
if (text4.length <= max) return text4;
|
|
32181
|
+
return text4.slice(0, max) + "...";
|
|
32182
|
+
}
|
|
32183
|
+
function findJsonlFiles(dir) {
|
|
32184
|
+
const results = [];
|
|
32185
|
+
if (!existsSync6(dir)) return results;
|
|
32186
|
+
const walk = (d) => {
|
|
32187
|
+
let entries;
|
|
32188
|
+
try {
|
|
32189
|
+
entries = readdirSync3(d);
|
|
32190
|
+
} catch {
|
|
32191
|
+
return;
|
|
32192
|
+
}
|
|
32193
|
+
for (const entry of entries) {
|
|
32194
|
+
const full = join9(d, entry);
|
|
32195
|
+
try {
|
|
32196
|
+
const stat = statSync6(full);
|
|
32197
|
+
if (stat.isDirectory()) walk(full);
|
|
32198
|
+
else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
|
|
32199
|
+
} catch {
|
|
32200
|
+
}
|
|
32201
|
+
}
|
|
32202
|
+
};
|
|
32203
|
+
walk(dir);
|
|
32204
|
+
return results;
|
|
32205
|
+
}
|
|
32206
|
+
function responseItemToText(item) {
|
|
32207
|
+
if (!item) return null;
|
|
32208
|
+
if (item.type === "message" && item.role && item.content) {
|
|
32209
|
+
const text4 = extractContent(item.content);
|
|
32210
|
+
if (text4) return `[${item.role}] ${text4}`;
|
|
32211
|
+
}
|
|
32212
|
+
if (item.type === "function_call" && item.name) {
|
|
32213
|
+
const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
|
|
32214
|
+
return `[tool: ${item.name}] ${args}`;
|
|
32215
|
+
}
|
|
32216
|
+
if (item.type === "function_call_output" && item.output) {
|
|
32217
|
+
return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
|
|
32218
|
+
}
|
|
32219
|
+
return null;
|
|
32220
|
+
}
|
|
32221
|
+
function extractContent(content3) {
|
|
32222
|
+
if (typeof content3 === "string") return content3;
|
|
32223
|
+
if (!Array.isArray(content3)) return null;
|
|
32224
|
+
const parts = [];
|
|
32225
|
+
for (const part of content3) {
|
|
32226
|
+
if ("text" in part && typeof part.text === "string") {
|
|
32227
|
+
parts.push(part.text);
|
|
32228
|
+
}
|
|
32229
|
+
}
|
|
32230
|
+
return parts.length > 0 ? parts.join("\n") : null;
|
|
32231
|
+
}
|
|
32232
|
+
function parseJSONL2(filePath) {
|
|
32233
|
+
let raw;
|
|
32234
|
+
try {
|
|
32235
|
+
raw = readFileSync5(filePath, "utf-8");
|
|
32236
|
+
} catch {
|
|
32237
|
+
return [];
|
|
32238
|
+
}
|
|
32239
|
+
const lines = [];
|
|
32240
|
+
for (const line of raw.split("\n")) {
|
|
32241
|
+
if (!line.trim()) continue;
|
|
32242
|
+
try {
|
|
32243
|
+
lines.push(JSON.parse(line));
|
|
32244
|
+
} catch {
|
|
32245
|
+
}
|
|
32246
|
+
}
|
|
32247
|
+
return lines;
|
|
32248
|
+
}
|
|
32249
|
+
function getSessionMeta(filePath) {
|
|
32250
|
+
let raw;
|
|
32251
|
+
try {
|
|
32252
|
+
raw = readFileSync5(filePath, "utf-8");
|
|
32253
|
+
} catch {
|
|
32254
|
+
return null;
|
|
32255
|
+
}
|
|
32256
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
32257
|
+
if (lines.length === 0) return null;
|
|
32258
|
+
let meta3;
|
|
32259
|
+
try {
|
|
32260
|
+
meta3 = JSON.parse(lines[0]);
|
|
32261
|
+
} catch {
|
|
32262
|
+
return null;
|
|
32263
|
+
}
|
|
32264
|
+
if (meta3.type !== "session_meta") return null;
|
|
32265
|
+
const payload = meta3.payload;
|
|
32266
|
+
let messageCount = 0;
|
|
32267
|
+
for (const line of lines) {
|
|
32268
|
+
try {
|
|
32269
|
+
const parsed = JSON.parse(line);
|
|
32270
|
+
if (parsed.type === "response_item" || parsed.type === "event_msg") {
|
|
32271
|
+
messageCount++;
|
|
32272
|
+
}
|
|
32273
|
+
} catch {
|
|
32274
|
+
}
|
|
32275
|
+
}
|
|
32276
|
+
return {
|
|
32277
|
+
id: payload.meta.id,
|
|
32278
|
+
cwd: payload.meta.cwd,
|
|
32279
|
+
timestamp: payload.meta.timestamp,
|
|
32280
|
+
messageCount,
|
|
32281
|
+
fileSize: raw.length
|
|
32282
|
+
};
|
|
32283
|
+
}
|
|
32284
|
+
var codexProvider = {
|
|
32285
|
+
name: "codex",
|
|
32286
|
+
displayName: "Codex",
|
|
32287
|
+
detect(projectPath) {
|
|
32288
|
+
const sessions = [];
|
|
32289
|
+
const allFiles = [
|
|
32290
|
+
...findJsonlFiles(SESSIONS_DIR),
|
|
32291
|
+
...findJsonlFiles(ARCHIVED_DIR)
|
|
32292
|
+
];
|
|
32293
|
+
for (const filePath of allFiles) {
|
|
32294
|
+
const meta3 = getSessionMeta(filePath);
|
|
32295
|
+
if (!meta3) continue;
|
|
32296
|
+
if (meta3.cwd !== projectPath) continue;
|
|
32297
|
+
if (meta3.messageCount < 3) continue;
|
|
32298
|
+
const ts = new Date(meta3.timestamp).getTime();
|
|
32299
|
+
const estimatedTokens = Math.ceil(meta3.fileSize / 5);
|
|
32300
|
+
const dateStr = new Date(ts).toISOString().slice(0, 10);
|
|
32301
|
+
sessions.push({
|
|
32302
|
+
id: filePath,
|
|
32303
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
32304
|
+
startedAt: ts,
|
|
32305
|
+
lastActivityAt: ts,
|
|
32306
|
+
// Best approximation without reading all lines
|
|
32307
|
+
estimatedTokens,
|
|
32308
|
+
messageCount: meta3.messageCount
|
|
32309
|
+
});
|
|
32310
|
+
}
|
|
32311
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32312
|
+
},
|
|
32313
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
|
|
32314
|
+
const chunks = [];
|
|
32315
|
+
for (const filePath of sessionIds) {
|
|
32316
|
+
const lines = parseJSONL2(filePath);
|
|
32317
|
+
const messages = [];
|
|
32318
|
+
let sessionTimestamp = Date.now();
|
|
32319
|
+
const firstLine = lines[0];
|
|
32320
|
+
if (firstLine?.type === "session_meta") {
|
|
32321
|
+
const meta3 = firstLine;
|
|
32322
|
+
const ts = new Date(meta3.payload.meta.timestamp).getTime();
|
|
32323
|
+
if (!Number.isNaN(ts)) sessionTimestamp = ts;
|
|
32324
|
+
}
|
|
32325
|
+
for (const line of lines) {
|
|
32326
|
+
if (line.type === "response_item") {
|
|
32327
|
+
const ri = line;
|
|
32328
|
+
const text4 = responseItemToText(ri.payload);
|
|
32329
|
+
if (text4) {
|
|
32330
|
+
messages.push({ text: text4, timestamp: sessionTimestamp });
|
|
32331
|
+
}
|
|
32332
|
+
} else if (line.type === "event_msg") {
|
|
32333
|
+
const ev = line;
|
|
32334
|
+
if (ev.payload.output) {
|
|
32335
|
+
messages.push({
|
|
32336
|
+
text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
|
|
32337
|
+
timestamp: sessionTimestamp
|
|
32338
|
+
});
|
|
32339
|
+
}
|
|
32340
|
+
} else if (line.type === "compacted") {
|
|
32341
|
+
const comp = line;
|
|
32342
|
+
if (comp.payload.replacement_history) {
|
|
32343
|
+
for (const item of comp.payload.replacement_history) {
|
|
32344
|
+
const text4 = responseItemToText(item);
|
|
32345
|
+
if (text4) {
|
|
32346
|
+
messages.push({ text: text4, timestamp: sessionTimestamp });
|
|
32347
|
+
}
|
|
32348
|
+
}
|
|
32349
|
+
}
|
|
32350
|
+
}
|
|
32351
|
+
}
|
|
32352
|
+
if (messages.length === 0) continue;
|
|
32353
|
+
let currentTexts = [];
|
|
32354
|
+
let currentTokens = 0;
|
|
32355
|
+
let chunkIndex = 0;
|
|
32356
|
+
const flushChunk = () => {
|
|
32357
|
+
if (currentTexts.length === 0) return;
|
|
32358
|
+
chunkIndex++;
|
|
32359
|
+
const text4 = currentTexts.join("\n\n");
|
|
32360
|
+
chunks.push({
|
|
32361
|
+
label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32362
|
+
text: text4,
|
|
32363
|
+
estimatedTokens: estimateTokens5(text4),
|
|
32364
|
+
timestamp: sessionTimestamp
|
|
32365
|
+
});
|
|
32366
|
+
currentTexts = [];
|
|
32367
|
+
currentTokens = 0;
|
|
32368
|
+
};
|
|
32369
|
+
for (const msg of messages) {
|
|
32370
|
+
const msgTokens = estimateTokens5(msg.text);
|
|
32371
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32372
|
+
flushChunk();
|
|
32373
|
+
}
|
|
32374
|
+
currentTexts.push(msg.text);
|
|
32375
|
+
currentTokens += msgTokens;
|
|
32376
|
+
}
|
|
32377
|
+
flushChunk();
|
|
32378
|
+
}
|
|
32379
|
+
return chunks;
|
|
32380
|
+
}
|
|
32381
|
+
};
|
|
32382
|
+
registerProvider(codexProvider);
|
|
32383
|
+
|
|
32384
|
+
// src/import/providers/opencode.ts
|
|
32385
|
+
import { existsSync as existsSync7 } from "fs";
|
|
32386
|
+
import { join as join10 } from "path";
|
|
32387
|
+
import { homedir as homedir4 } from "os";
|
|
32388
|
+
var OPENCODE_DB_PATH = join10(
|
|
32389
|
+
process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
|
|
32390
|
+
"opencode",
|
|
32391
|
+
"opencode.db"
|
|
32392
|
+
);
|
|
32393
|
+
var MAX_TOOL_OUTPUT_CHARS3 = 500;
|
|
32394
|
+
var DEFAULT_MAX_TOKENS3 = 12288;
|
|
32395
|
+
function estimateTokens6(text4) {
|
|
32396
|
+
return Math.ceil(text4.length / 3);
|
|
32397
|
+
}
|
|
32398
|
+
function truncate3(text4, max) {
|
|
32399
|
+
if (text4.length <= max) return text4;
|
|
32400
|
+
return text4.slice(0, max) + "...";
|
|
32401
|
+
}
|
|
32402
|
+
function openDB() {
|
|
32403
|
+
if (!existsSync7(OPENCODE_DB_PATH)) return null;
|
|
32404
|
+
try {
|
|
32405
|
+
return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
|
|
32406
|
+
} catch {
|
|
32407
|
+
return null;
|
|
32408
|
+
}
|
|
32409
|
+
}
|
|
32410
|
+
function tableExists(database, table) {
|
|
32411
|
+
const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
|
|
32412
|
+
return row != null;
|
|
32413
|
+
}
|
|
32414
|
+
function partsToConversationText(parts) {
|
|
32415
|
+
const segments = [];
|
|
32416
|
+
for (const part of parts) {
|
|
32417
|
+
if (part.type === "text" && part.text) {
|
|
32418
|
+
segments.push(part.text);
|
|
32419
|
+
} else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
|
|
32420
|
+
segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
|
|
32421
|
+
}
|
|
32422
|
+
}
|
|
32423
|
+
return segments.join("\n");
|
|
32424
|
+
}
|
|
32425
|
+
var opencodeProvider = {
|
|
32426
|
+
name: "opencode",
|
|
32427
|
+
displayName: "OpenCode",
|
|
32428
|
+
detect(projectPath) {
|
|
32429
|
+
const database = openDB();
|
|
32430
|
+
if (!database) return [];
|
|
32431
|
+
try {
|
|
32432
|
+
if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
|
|
32433
|
+
return [];
|
|
32434
|
+
}
|
|
32435
|
+
const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
|
|
32436
|
+
if (!project) return [];
|
|
32437
|
+
const sessions = database.query(
|
|
32438
|
+
`SELECT s.id, s.title, s.time_created, s.time_updated,
|
|
32439
|
+
(SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
|
|
32440
|
+
FROM session s
|
|
32441
|
+
WHERE s.project_id = ? AND s.parent_id IS NULL
|
|
32442
|
+
ORDER BY s.time_updated DESC`
|
|
32443
|
+
).all(project.id);
|
|
32444
|
+
const results = [];
|
|
32445
|
+
for (const sess of sessions) {
|
|
32446
|
+
if (sess.msg_count < 3) continue;
|
|
32447
|
+
const estimatedTokens = sess.msg_count * 500;
|
|
32448
|
+
const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
|
|
32449
|
+
const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
|
|
32450
|
+
results.push({
|
|
32451
|
+
id: sess.id,
|
|
32452
|
+
label,
|
|
32453
|
+
startedAt: sess.time_created,
|
|
32454
|
+
lastActivityAt: sess.time_updated,
|
|
32455
|
+
estimatedTokens,
|
|
32456
|
+
messageCount: sess.msg_count
|
|
32457
|
+
});
|
|
32458
|
+
}
|
|
32459
|
+
return results;
|
|
32460
|
+
} finally {
|
|
32461
|
+
database.close();
|
|
32462
|
+
}
|
|
32463
|
+
},
|
|
32464
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
|
|
32465
|
+
const database = openDB();
|
|
32466
|
+
if (!database) return [];
|
|
32467
|
+
const chunks = [];
|
|
32468
|
+
try {
|
|
32469
|
+
const hasParts = tableExists(database, "part");
|
|
32470
|
+
for (const sessionId of sessionIds) {
|
|
32471
|
+
const messages = database.query(
|
|
32472
|
+
`SELECT id, data, time_created FROM message
|
|
32473
|
+
WHERE session_id = ?
|
|
32474
|
+
ORDER BY time_created ASC`
|
|
32475
|
+
).all(sessionId);
|
|
32476
|
+
if (messages.length === 0) continue;
|
|
32477
|
+
const textMessages = [];
|
|
32478
|
+
for (const msg of messages) {
|
|
32479
|
+
let msgData;
|
|
32480
|
+
try {
|
|
32481
|
+
msgData = JSON.parse(msg.data);
|
|
32482
|
+
} catch {
|
|
32483
|
+
continue;
|
|
32484
|
+
}
|
|
32485
|
+
const role = msgData.role ?? "unknown";
|
|
32486
|
+
let contentText = "";
|
|
32487
|
+
if (hasParts) {
|
|
32488
|
+
const parts = database.query(
|
|
32489
|
+
`SELECT data FROM part
|
|
32490
|
+
WHERE message_id = ?
|
|
32491
|
+
ORDER BY time_created ASC`
|
|
32492
|
+
).all(msg.id);
|
|
32493
|
+
const parsedParts = [];
|
|
32494
|
+
for (const p2 of parts) {
|
|
32495
|
+
try {
|
|
32496
|
+
parsedParts.push(JSON.parse(p2.data));
|
|
32497
|
+
} catch {
|
|
32498
|
+
}
|
|
32499
|
+
}
|
|
32500
|
+
contentText = partsToConversationText(parsedParts);
|
|
32501
|
+
}
|
|
32502
|
+
if (!contentText.trim()) continue;
|
|
32503
|
+
textMessages.push({
|
|
32504
|
+
text: `[${role}] ${contentText}`,
|
|
32505
|
+
timestamp: msg.time_created
|
|
32506
|
+
});
|
|
32507
|
+
}
|
|
32508
|
+
if (textMessages.length === 0) continue;
|
|
32509
|
+
let currentTexts = [];
|
|
32510
|
+
let currentTokens = 0;
|
|
32511
|
+
let chunkStart = textMessages[0].timestamp;
|
|
32512
|
+
let chunkIndex = 0;
|
|
32513
|
+
const flushChunk = () => {
|
|
32514
|
+
if (currentTexts.length === 0) return;
|
|
32515
|
+
chunkIndex++;
|
|
32516
|
+
const text4 = currentTexts.join("\n\n");
|
|
32517
|
+
chunks.push({
|
|
32518
|
+
label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32519
|
+
text: text4,
|
|
32520
|
+
estimatedTokens: estimateTokens6(text4),
|
|
32521
|
+
timestamp: chunkStart
|
|
32522
|
+
});
|
|
32523
|
+
currentTexts = [];
|
|
32524
|
+
currentTokens = 0;
|
|
32525
|
+
};
|
|
32526
|
+
for (const msg of textMessages) {
|
|
32527
|
+
const msgTokens = estimateTokens6(msg.text);
|
|
32528
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32529
|
+
flushChunk();
|
|
32530
|
+
chunkStart = msg.timestamp;
|
|
32531
|
+
}
|
|
32532
|
+
currentTexts.push(msg.text);
|
|
32533
|
+
currentTokens += msgTokens;
|
|
32534
|
+
}
|
|
32535
|
+
flushChunk();
|
|
32536
|
+
}
|
|
32537
|
+
} finally {
|
|
32538
|
+
database.close();
|
|
32539
|
+
}
|
|
32540
|
+
return chunks;
|
|
32541
|
+
}
|
|
32542
|
+
};
|
|
32543
|
+
registerProvider(opencodeProvider);
|
|
32544
|
+
|
|
32545
|
+
// src/import/providers/cline.ts
|
|
32546
|
+
import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
|
|
32547
|
+
import { join as join11 } from "path";
|
|
32548
|
+
import { homedir as homedir5 } from "os";
|
|
32549
|
+
var MAX_TOOL_OUTPUT_CHARS4 = 500;
|
|
32550
|
+
var DEFAULT_MAX_TOKENS4 = 12288;
|
|
32551
|
+
var EXTENSION_IDS = [
|
|
32552
|
+
"saoudrizwan.claude-dev",
|
|
32553
|
+
"cline.cline"
|
|
32554
|
+
];
|
|
32555
|
+
function estimateTokens7(text4) {
|
|
32556
|
+
return Math.ceil(text4.length / 3);
|
|
32557
|
+
}
|
|
32558
|
+
function truncate4(text4, max) {
|
|
32559
|
+
if (text4.length <= max) return text4;
|
|
32560
|
+
return text4.slice(0, max) + "...";
|
|
32561
|
+
}
|
|
32562
|
+
function findGlobalStorageDirs() {
|
|
32563
|
+
const home = homedir5();
|
|
32564
|
+
const dirs = [];
|
|
32565
|
+
const basePaths = [];
|
|
32566
|
+
const platform = process.platform;
|
|
32567
|
+
if (platform === "darwin") {
|
|
32568
|
+
basePaths.push(
|
|
32569
|
+
join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
|
|
32570
|
+
join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
|
|
32571
|
+
join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
|
|
32572
|
+
);
|
|
32573
|
+
} else if (platform === "win32") {
|
|
32574
|
+
const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
|
|
32575
|
+
basePaths.push(
|
|
32576
|
+
join11(appdata, "Code", "User", "globalStorage"),
|
|
32577
|
+
join11(appdata, "Code - Insiders", "User", "globalStorage"),
|
|
32578
|
+
join11(appdata, "VSCodium", "User", "globalStorage")
|
|
32579
|
+
);
|
|
32580
|
+
} else {
|
|
32581
|
+
const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
|
|
32582
|
+
basePaths.push(
|
|
32583
|
+
join11(configHome, "Code", "User", "globalStorage"),
|
|
32584
|
+
join11(configHome, "Code - Insiders", "User", "globalStorage"),
|
|
32585
|
+
join11(configHome, "VSCodium", "User", "globalStorage")
|
|
32586
|
+
);
|
|
32587
|
+
basePaths.push(
|
|
32588
|
+
join11(home, ".vscode", "data", "User", "globalStorage"),
|
|
32589
|
+
join11(home, ".vscode-insiders", "data", "User", "globalStorage")
|
|
32590
|
+
);
|
|
32591
|
+
}
|
|
32592
|
+
for (const base of basePaths) {
|
|
32593
|
+
for (const extId of EXTENSION_IDS) {
|
|
32594
|
+
const dir = join11(base, extId);
|
|
32595
|
+
if (existsSync8(dir)) dirs.push(dir);
|
|
32596
|
+
}
|
|
32597
|
+
}
|
|
32598
|
+
return dirs;
|
|
32599
|
+
}
|
|
32600
|
+
function loadTaskHistory(storageDir, projectPath) {
|
|
32601
|
+
const paths = [
|
|
32602
|
+
join11(storageDir, "state", "taskHistory.json"),
|
|
32603
|
+
join11(storageDir, "taskHistory.json")
|
|
32604
|
+
];
|
|
32605
|
+
for (const historyPath of paths) {
|
|
32606
|
+
if (!existsSync8(historyPath)) continue;
|
|
32607
|
+
try {
|
|
32608
|
+
const raw = readFileSync6(historyPath, "utf-8");
|
|
32609
|
+
const items = JSON.parse(raw);
|
|
32610
|
+
if (!Array.isArray(items)) continue;
|
|
32611
|
+
return items.filter(
|
|
32612
|
+
(item) => item.cwdOnTaskInitialization === projectPath
|
|
32613
|
+
);
|
|
32614
|
+
} catch {
|
|
32615
|
+
continue;
|
|
32616
|
+
}
|
|
32617
|
+
}
|
|
32618
|
+
return [];
|
|
32619
|
+
}
|
|
32620
|
+
function readConversation(taskDir) {
|
|
32621
|
+
const filePath = join11(taskDir, "api_conversation_history.json");
|
|
32622
|
+
if (!existsSync8(filePath)) return [];
|
|
32623
|
+
try {
|
|
32624
|
+
const raw = readFileSync6(filePath, "utf-8");
|
|
32625
|
+
const messages = JSON.parse(raw);
|
|
32626
|
+
return Array.isArray(messages) ? messages : [];
|
|
32627
|
+
} catch {
|
|
32628
|
+
return [];
|
|
32629
|
+
}
|
|
32630
|
+
}
|
|
32631
|
+
function blockToText2(block) {
|
|
32632
|
+
switch (block.type) {
|
|
32633
|
+
case "text":
|
|
32634
|
+
return block.text;
|
|
32635
|
+
case "tool_use": {
|
|
32636
|
+
const tu = block;
|
|
32637
|
+
return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
|
|
32638
|
+
}
|
|
32639
|
+
case "tool_result": {
|
|
32640
|
+
const tr = block;
|
|
32641
|
+
let content3;
|
|
32642
|
+
if (typeof tr.content === "string") {
|
|
32643
|
+
content3 = tr.content;
|
|
32644
|
+
} else if (Array.isArray(tr.content)) {
|
|
32645
|
+
content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
|
|
32646
|
+
} else {
|
|
32647
|
+
content3 = "";
|
|
32648
|
+
}
|
|
32649
|
+
return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
|
|
32650
|
+
}
|
|
32651
|
+
default:
|
|
32652
|
+
return null;
|
|
32653
|
+
}
|
|
32654
|
+
}
|
|
32655
|
+
function messageToText(msg) {
|
|
32656
|
+
if (typeof msg.content === "string") {
|
|
32657
|
+
return msg.content ? `[${msg.role}] ${msg.content}` : null;
|
|
32658
|
+
}
|
|
32659
|
+
const parts = msg.content.map(blockToText2).filter(Boolean);
|
|
32660
|
+
return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
|
|
32661
|
+
}
|
|
32662
|
+
var clineProvider = {
|
|
32663
|
+
name: "cline",
|
|
32664
|
+
displayName: "Cline",
|
|
32665
|
+
detect(projectPath) {
|
|
32666
|
+
const sessions = [];
|
|
32667
|
+
const storageDirs = findGlobalStorageDirs();
|
|
32668
|
+
for (const storageDir of storageDirs) {
|
|
32669
|
+
const tasks = loadTaskHistory(storageDir, projectPath);
|
|
32670
|
+
for (const task of tasks) {
|
|
32671
|
+
const taskDir = join11(storageDir, "tasks", task.id);
|
|
32672
|
+
if (!existsSync8(taskDir)) continue;
|
|
32673
|
+
const messages = readConversation(taskDir);
|
|
32674
|
+
if (messages.length < 3) continue;
|
|
32675
|
+
const dateStr = new Date(task.ts).toISOString().slice(0, 10);
|
|
32676
|
+
const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
|
|
32677
|
+
const historyFile = join11(taskDir, "api_conversation_history.json");
|
|
32678
|
+
let estimatedTokens = messages.length * 500;
|
|
32679
|
+
try {
|
|
32680
|
+
const stat = statSync7(historyFile);
|
|
32681
|
+
estimatedTokens = Math.ceil(stat.size / 5);
|
|
32682
|
+
} catch {
|
|
32683
|
+
}
|
|
32684
|
+
sessions.push({
|
|
32685
|
+
id: taskDir,
|
|
32686
|
+
label,
|
|
32687
|
+
startedAt: task.ts,
|
|
32688
|
+
lastActivityAt: task.ts,
|
|
32689
|
+
estimatedTokens,
|
|
32690
|
+
messageCount: messages.length
|
|
32691
|
+
});
|
|
32692
|
+
}
|
|
32693
|
+
}
|
|
32694
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32695
|
+
},
|
|
32696
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
|
|
32697
|
+
const chunks = [];
|
|
32698
|
+
for (const taskDir of sessionIds) {
|
|
32699
|
+
const messages = readConversation(taskDir);
|
|
32700
|
+
if (messages.length === 0) continue;
|
|
32701
|
+
let sessionTimestamp;
|
|
32702
|
+
try {
|
|
32703
|
+
sessionTimestamp = statSync7(taskDir).mtimeMs;
|
|
32704
|
+
} catch {
|
|
32705
|
+
sessionTimestamp = Date.now();
|
|
32706
|
+
}
|
|
32707
|
+
const textMessages = [];
|
|
32708
|
+
for (const msg of messages) {
|
|
32709
|
+
const text4 = messageToText(msg);
|
|
32710
|
+
if (text4) textMessages.push({ text: text4 });
|
|
32711
|
+
}
|
|
32712
|
+
if (textMessages.length === 0) continue;
|
|
32713
|
+
let currentTexts = [];
|
|
32714
|
+
let currentTokens = 0;
|
|
32715
|
+
let chunkIndex = 0;
|
|
32716
|
+
const flushChunk = () => {
|
|
32717
|
+
if (currentTexts.length === 0) return;
|
|
32718
|
+
chunkIndex++;
|
|
32719
|
+
const text4 = currentTexts.join("\n\n");
|
|
32720
|
+
chunks.push({
|
|
32721
|
+
label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
32722
|
+
text: text4,
|
|
32723
|
+
estimatedTokens: estimateTokens7(text4),
|
|
32724
|
+
timestamp: sessionTimestamp
|
|
32725
|
+
});
|
|
32726
|
+
currentTexts = [];
|
|
32727
|
+
currentTokens = 0;
|
|
32728
|
+
};
|
|
32729
|
+
for (const msg of textMessages) {
|
|
32730
|
+
const msgTokens = estimateTokens7(msg.text);
|
|
32731
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32732
|
+
flushChunk();
|
|
32733
|
+
}
|
|
32734
|
+
currentTexts.push(msg.text);
|
|
32735
|
+
currentTokens += msgTokens;
|
|
32736
|
+
}
|
|
32737
|
+
flushChunk();
|
|
32738
|
+
}
|
|
32739
|
+
return chunks;
|
|
32740
|
+
}
|
|
32741
|
+
};
|
|
32742
|
+
registerProvider(clineProvider);
|
|
32743
|
+
|
|
32744
|
+
// src/import/providers/continue.ts
|
|
32745
|
+
import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
|
|
32746
|
+
import { join as join12 } from "path";
|
|
32747
|
+
import { homedir as homedir6 } from "os";
|
|
32748
|
+
var MAX_TOOL_OUTPUT_CHARS5 = 500;
|
|
32749
|
+
var DEFAULT_MAX_TOKENS5 = 12288;
|
|
32750
|
+
function estimateTokens8(text4) {
|
|
32751
|
+
return Math.ceil(text4.length / 3);
|
|
32752
|
+
}
|
|
32753
|
+
function truncate5(text4, max) {
|
|
32754
|
+
if (text4.length <= max) return text4;
|
|
32755
|
+
return text4.slice(0, max) + "...";
|
|
32756
|
+
}
|
|
32757
|
+
function continueDir() {
|
|
32758
|
+
return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
|
|
32759
|
+
}
|
|
32760
|
+
function loadSessionIndex() {
|
|
32761
|
+
const indexPath = join12(continueDir(), "sessions", "sessions.json");
|
|
32762
|
+
if (!existsSync9(indexPath)) return [];
|
|
32763
|
+
try {
|
|
32764
|
+
const raw = readFileSync7(indexPath, "utf-8");
|
|
32765
|
+
const parsed = JSON.parse(raw);
|
|
32766
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
32767
|
+
} catch {
|
|
32768
|
+
return [];
|
|
32769
|
+
}
|
|
32770
|
+
}
|
|
32771
|
+
function loadSession(sessionId) {
|
|
32772
|
+
const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
|
|
32773
|
+
if (!existsSync9(filePath)) return null;
|
|
32774
|
+
try {
|
|
32775
|
+
const raw = readFileSync7(filePath, "utf-8");
|
|
32776
|
+
return JSON.parse(raw);
|
|
32777
|
+
} catch {
|
|
32778
|
+
return null;
|
|
30347
32779
|
}
|
|
30348
|
-
|
|
30349
|
-
|
|
32780
|
+
}
|
|
32781
|
+
function extractMessageContent(content3) {
|
|
32782
|
+
if (typeof content3 === "string") return content3;
|
|
32783
|
+
if (!Array.isArray(content3)) return "";
|
|
32784
|
+
return content3.filter(
|
|
32785
|
+
(part) => part.type === "text" && typeof part.text === "string"
|
|
32786
|
+
).map((part) => part.text).join("\n");
|
|
32787
|
+
}
|
|
32788
|
+
function historyItemToText(item) {
|
|
32789
|
+
const msg = item.message;
|
|
32790
|
+
if (!msg) return null;
|
|
32791
|
+
if (msg.role === "system") return null;
|
|
32792
|
+
const parts = [];
|
|
32793
|
+
const content3 = extractMessageContent(msg.content);
|
|
32794
|
+
if (content3) parts.push(content3);
|
|
32795
|
+
if (msg.toolCalls) {
|
|
32796
|
+
for (const call of msg.toolCalls) {
|
|
32797
|
+
if (call.function) {
|
|
32798
|
+
const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
|
|
32799
|
+
parts.push(`[tool: ${call.function.name}] ${args}`);
|
|
32800
|
+
}
|
|
32801
|
+
}
|
|
32802
|
+
}
|
|
32803
|
+
if (item.toolCallStates) {
|
|
32804
|
+
for (const state of item.toolCallStates) {
|
|
32805
|
+
if (state.output && state.status === "done") {
|
|
32806
|
+
parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
|
|
32807
|
+
}
|
|
32808
|
+
}
|
|
32809
|
+
}
|
|
32810
|
+
if (parts.length === 0) return null;
|
|
32811
|
+
const role = msg.role === "tool" ? "tool_result" : msg.role;
|
|
32812
|
+
return `[${role}] ${parts.join("\n")}`;
|
|
32813
|
+
}
|
|
32814
|
+
var continueProvider = {
|
|
32815
|
+
name: "continue",
|
|
32816
|
+
displayName: "Continue",
|
|
32817
|
+
detect(projectPath) {
|
|
32818
|
+
const sessions = [];
|
|
32819
|
+
const index2 = loadSessionIndex();
|
|
32820
|
+
for (const meta3 of index2) {
|
|
32821
|
+
if (meta3.workspaceDirectory !== projectPath) continue;
|
|
32822
|
+
const session = loadSession(meta3.sessionId);
|
|
32823
|
+
if (!session || !session.history || session.history.length < 3) continue;
|
|
32824
|
+
const ts = new Date(meta3.dateCreated).getTime();
|
|
32825
|
+
const dateStr = new Date(ts).toISOString().slice(0, 10);
|
|
32826
|
+
const messageCount = session.history.length;
|
|
32827
|
+
const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
|
|
32828
|
+
const estimatedTokens = messageCount * 500;
|
|
32829
|
+
sessions.push({
|
|
32830
|
+
id: meta3.sessionId,
|
|
32831
|
+
label,
|
|
32832
|
+
startedAt: ts,
|
|
32833
|
+
lastActivityAt: ts,
|
|
32834
|
+
estimatedTokens,
|
|
32835
|
+
messageCount
|
|
32836
|
+
});
|
|
32837
|
+
}
|
|
32838
|
+
const sessionsDir = join12(continueDir(), "sessions");
|
|
32839
|
+
if (existsSync9(sessionsDir)) {
|
|
32840
|
+
const existingIds = new Set(sessions.map((s) => s.id));
|
|
32841
|
+
let entries;
|
|
30350
32842
|
try {
|
|
30351
|
-
|
|
30352
|
-
projectPath: input.projectPath,
|
|
30353
|
-
category: pat.category,
|
|
30354
|
-
title: pat.title,
|
|
30355
|
-
content: pat.content,
|
|
30356
|
-
session: input.sessionID,
|
|
30357
|
-
scope: "project"
|
|
30358
|
-
});
|
|
32843
|
+
entries = readdirSync5(sessionsDir);
|
|
30359
32844
|
} catch {
|
|
32845
|
+
entries = [];
|
|
32846
|
+
}
|
|
32847
|
+
for (const entry of entries) {
|
|
32848
|
+
if (!entry.endsWith(".json") || entry === "sessions.json") continue;
|
|
32849
|
+
const sessionId = entry.replace(".json", "");
|
|
32850
|
+
if (existingIds.has(sessionId)) continue;
|
|
32851
|
+
const session = loadSession(sessionId);
|
|
32852
|
+
if (!session) continue;
|
|
32853
|
+
if (session.workspaceDirectory !== projectPath) continue;
|
|
32854
|
+
if (!session.history || session.history.length < 3) continue;
|
|
32855
|
+
const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
|
|
32856
|
+
sessions.push({
|
|
32857
|
+
id: sessionId,
|
|
32858
|
+
label: `${dateStr} (${session.history.length} messages)`,
|
|
32859
|
+
startedAt: Date.now(),
|
|
32860
|
+
lastActivityAt: Date.now(),
|
|
32861
|
+
estimatedTokens: session.history.length * 500,
|
|
32862
|
+
messageCount: session.history.length
|
|
32863
|
+
});
|
|
32864
|
+
}
|
|
32865
|
+
}
|
|
32866
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
32867
|
+
},
|
|
32868
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
|
|
32869
|
+
const chunks = [];
|
|
32870
|
+
for (const sessionId of sessionIds) {
|
|
32871
|
+
const session = loadSession(sessionId);
|
|
32872
|
+
if (!session || !session.history) continue;
|
|
32873
|
+
const textMessages = [];
|
|
32874
|
+
for (const item of session.history) {
|
|
32875
|
+
const text4 = historyItemToText(item);
|
|
32876
|
+
if (text4) textMessages.push({ text: text4 });
|
|
32877
|
+
}
|
|
32878
|
+
if (textMessages.length === 0) continue;
|
|
32879
|
+
const sessionTimestamp = Date.now();
|
|
32880
|
+
let currentTexts = [];
|
|
32881
|
+
let currentTokens = 0;
|
|
32882
|
+
let chunkIndex = 0;
|
|
32883
|
+
const flushChunk = () => {
|
|
32884
|
+
if (currentTexts.length === 0) return;
|
|
32885
|
+
chunkIndex++;
|
|
32886
|
+
const text4 = currentTexts.join("\n\n");
|
|
32887
|
+
chunks.push({
|
|
32888
|
+
label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
|
|
32889
|
+
text: text4,
|
|
32890
|
+
estimatedTokens: estimateTokens8(text4),
|
|
32891
|
+
timestamp: sessionTimestamp
|
|
32892
|
+
});
|
|
32893
|
+
currentTexts = [];
|
|
32894
|
+
currentTokens = 0;
|
|
32895
|
+
};
|
|
32896
|
+
for (const msg of textMessages) {
|
|
32897
|
+
const msgTokens = estimateTokens8(msg.text);
|
|
32898
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
32899
|
+
flushChunk();
|
|
32900
|
+
}
|
|
32901
|
+
currentTexts.push(msg.text);
|
|
32902
|
+
currentTokens += msgTokens;
|
|
30360
32903
|
}
|
|
32904
|
+
flushChunk();
|
|
30361
32905
|
}
|
|
32906
|
+
return chunks;
|
|
30362
32907
|
}
|
|
30363
|
-
|
|
32908
|
+
};
|
|
32909
|
+
registerProvider(continueProvider);
|
|
32910
|
+
|
|
32911
|
+
// src/import/providers/pi.ts
|
|
32912
|
+
import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
|
|
32913
|
+
import { join as join13 } from "path";
|
|
32914
|
+
import { homedir as homedir7 } from "os";
|
|
32915
|
+
var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
|
|
32916
|
+
var MAX_TOOL_OUTPUT_CHARS6 = 500;
|
|
32917
|
+
var DEFAULT_MAX_TOKENS6 = 12288;
|
|
32918
|
+
function estimateTokens9(text4) {
|
|
32919
|
+
return Math.ceil(text4.length / 3);
|
|
30364
32920
|
}
|
|
30365
|
-
function
|
|
30366
|
-
|
|
30367
|
-
|
|
30368
|
-
).all();
|
|
30369
|
-
if (!rows.length) return 0;
|
|
30370
|
-
const update2 = db().prepare(
|
|
30371
|
-
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
30372
|
-
);
|
|
30373
|
-
let updated = 0;
|
|
30374
|
-
for (const row of rows) {
|
|
30375
|
-
const sourceIds = parseSourceIds(row.source_ids);
|
|
30376
|
-
if (!sourceIds.length) continue;
|
|
30377
|
-
const placeholders = sourceIds.map(() => "?").join(",");
|
|
30378
|
-
const sources = db().query(
|
|
30379
|
-
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
30380
|
-
).all(...sourceIds);
|
|
30381
|
-
if (!sources.length) continue;
|
|
30382
|
-
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
30383
|
-
const timestamps = sources.map((s) => s.created_at);
|
|
30384
|
-
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
30385
|
-
const cNorm = temporalCnorm(timestamps);
|
|
30386
|
-
update2.run(rComp, cNorm, row.id);
|
|
30387
|
-
updated++;
|
|
30388
|
-
}
|
|
30389
|
-
if (updated > 0) {
|
|
30390
|
-
info(
|
|
30391
|
-
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
30392
|
-
);
|
|
30393
|
-
}
|
|
30394
|
-
return updated;
|
|
32921
|
+
function truncate6(text4, max) {
|
|
32922
|
+
if (text4.length <= max) return text4;
|
|
32923
|
+
return text4.slice(0, max) + "...";
|
|
30395
32924
|
}
|
|
30396
|
-
|
|
30397
|
-
|
|
30398
|
-
|
|
30399
|
-
|
|
30400
|
-
|
|
30401
|
-
|
|
30402
|
-
run: () => run2
|
|
30403
|
-
});
|
|
30404
|
-
var MAX_ENTRY_CONTENT_LENGTH = 1200;
|
|
30405
|
-
function parseOps(text4) {
|
|
30406
|
-
const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
|
|
32925
|
+
function encodeCwd(cwd) {
|
|
32926
|
+
const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
|
|
32927
|
+
return `--${encoded}--`;
|
|
32928
|
+
}
|
|
32929
|
+
function parseJSONL3(filePath) {
|
|
32930
|
+
let raw;
|
|
30407
32931
|
try {
|
|
30408
|
-
|
|
30409
|
-
if (!Array.isArray(parsed)) return [];
|
|
30410
|
-
return parsed.filter(
|
|
30411
|
-
(op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
|
|
30412
|
-
);
|
|
32932
|
+
raw = readFileSync8(filePath, "utf-8");
|
|
30413
32933
|
} catch {
|
|
30414
32934
|
return [];
|
|
30415
32935
|
}
|
|
32936
|
+
const lines = [];
|
|
32937
|
+
for (const line of raw.split("\n")) {
|
|
32938
|
+
if (!line.trim()) continue;
|
|
32939
|
+
try {
|
|
32940
|
+
lines.push(JSON.parse(line));
|
|
32941
|
+
} catch {
|
|
32942
|
+
}
|
|
32943
|
+
}
|
|
32944
|
+
return lines;
|
|
30416
32945
|
}
|
|
30417
|
-
|
|
30418
|
-
|
|
30419
|
-
const
|
|
30420
|
-
|
|
30421
|
-
|
|
30422
|
-
const
|
|
30423
|
-
|
|
30424
|
-
|
|
30425
|
-
|
|
30426
|
-
|
|
30427
|
-
|
|
30428
|
-
|
|
30429
|
-
|
|
30430
|
-
|
|
30431
|
-
|
|
30432
|
-
|
|
30433
|
-
|
|
30434
|
-
|
|
30435
|
-
|
|
30436
|
-
|
|
30437
|
-
const
|
|
30438
|
-
|
|
30439
|
-
|
|
30440
|
-
|
|
30441
|
-
|
|
30442
|
-
|
|
30443
|
-
|
|
30444
|
-
|
|
30445
|
-
|
|
30446
|
-
|
|
30447
|
-
|
|
30448
|
-
|
|
30449
|
-
|
|
30450
|
-
|
|
30451
|
-
|
|
30452
|
-
|
|
30453
|
-
|
|
30454
|
-
|
|
30455
|
-
|
|
30456
|
-
|
|
30457
|
-
|
|
30458
|
-
|
|
30459
|
-
|
|
32946
|
+
function linearize(lines) {
|
|
32947
|
+
if (lines.length === 0) return [];
|
|
32948
|
+
const children = /* @__PURE__ */ new Map();
|
|
32949
|
+
const byId = /* @__PURE__ */ new Map();
|
|
32950
|
+
let rootLine = null;
|
|
32951
|
+
for (const line of lines) {
|
|
32952
|
+
if (line.type === "session") {
|
|
32953
|
+
rootLine = line;
|
|
32954
|
+
continue;
|
|
32955
|
+
}
|
|
32956
|
+
if (!line.id) continue;
|
|
32957
|
+
byId.set(line.id, line);
|
|
32958
|
+
const pid = line.parentId;
|
|
32959
|
+
if (pid) {
|
|
32960
|
+
const siblings = children.get(pid) ?? [];
|
|
32961
|
+
siblings.push(line);
|
|
32962
|
+
children.set(pid, siblings);
|
|
32963
|
+
}
|
|
32964
|
+
}
|
|
32965
|
+
if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
|
|
32966
|
+
const result = [];
|
|
32967
|
+
let currentId = rootLine.id;
|
|
32968
|
+
while (currentId) {
|
|
32969
|
+
const kids = children.get(currentId);
|
|
32970
|
+
if (!kids || kids.length === 0) break;
|
|
32971
|
+
const next = kids[kids.length - 1];
|
|
32972
|
+
result.push(next);
|
|
32973
|
+
currentId = next.id;
|
|
32974
|
+
}
|
|
32975
|
+
return result;
|
|
32976
|
+
}
|
|
32977
|
+
function getSessionMeta2(filePath) {
|
|
32978
|
+
const lines = parseJSONL3(filePath);
|
|
32979
|
+
if (lines.length === 0) return null;
|
|
32980
|
+
const header = lines[0];
|
|
32981
|
+
if (header.type !== "session") return null;
|
|
32982
|
+
const session = header;
|
|
32983
|
+
const messageCount = lines.filter((l) => l.type === "message").length;
|
|
32984
|
+
let fileSize;
|
|
32985
|
+
try {
|
|
32986
|
+
fileSize = statSync8(filePath).size;
|
|
32987
|
+
} catch {
|
|
32988
|
+
fileSize = 0;
|
|
32989
|
+
}
|
|
32990
|
+
const ts = new Date(session.timestamp).getTime();
|
|
32991
|
+
return {
|
|
32992
|
+
id: session.id,
|
|
32993
|
+
cwd: session.cwd,
|
|
32994
|
+
timestamp: Number.isNaN(ts) ? Date.now() : ts,
|
|
32995
|
+
messageCount,
|
|
32996
|
+
fileSize
|
|
32997
|
+
};
|
|
32998
|
+
}
|
|
32999
|
+
var piProvider = {
|
|
33000
|
+
name: "pi",
|
|
33001
|
+
displayName: "Pi",
|
|
33002
|
+
detect(projectPath) {
|
|
33003
|
+
const encoded = encodeCwd(projectPath);
|
|
33004
|
+
const dir = join13(PI_DIR, encoded);
|
|
33005
|
+
let entries;
|
|
33006
|
+
try {
|
|
33007
|
+
entries = readdirSync6(dir);
|
|
33008
|
+
} catch {
|
|
33009
|
+
return [];
|
|
33010
|
+
}
|
|
33011
|
+
const sessions = [];
|
|
33012
|
+
for (const entry of entries) {
|
|
33013
|
+
if (!entry.endsWith(".jsonl")) continue;
|
|
33014
|
+
const filePath = join13(dir, entry);
|
|
33015
|
+
const meta3 = getSessionMeta2(filePath);
|
|
33016
|
+
if (!meta3) continue;
|
|
33017
|
+
if (meta3.messageCount < 3) continue;
|
|
33018
|
+
const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
|
|
33019
|
+
const estimatedTokens = Math.ceil(meta3.fileSize / 5);
|
|
33020
|
+
sessions.push({
|
|
33021
|
+
id: filePath,
|
|
33022
|
+
label: `${dateStr} (${meta3.messageCount} messages)`,
|
|
33023
|
+
startedAt: meta3.timestamp,
|
|
33024
|
+
lastActivityAt: meta3.timestamp,
|
|
33025
|
+
estimatedTokens,
|
|
33026
|
+
messageCount: meta3.messageCount
|
|
30460
33027
|
});
|
|
30461
|
-
|
|
30462
|
-
|
|
30463
|
-
|
|
30464
|
-
|
|
30465
|
-
|
|
30466
|
-
|
|
30467
|
-
|
|
30468
|
-
|
|
30469
|
-
|
|
33028
|
+
}
|
|
33029
|
+
return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
|
|
33030
|
+
},
|
|
33031
|
+
readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
|
|
33032
|
+
const chunks = [];
|
|
33033
|
+
for (const filePath of sessionIds) {
|
|
33034
|
+
const allLines = parseJSONL3(filePath);
|
|
33035
|
+
const linearLines = linearize(allLines);
|
|
33036
|
+
let sessionTimestamp = Date.now();
|
|
33037
|
+
const header = allLines.find((l) => l.type === "session");
|
|
33038
|
+
if (header?.type === "session") {
|
|
33039
|
+
const session = header;
|
|
33040
|
+
const ts = new Date(session.timestamp).getTime();
|
|
33041
|
+
if (!Number.isNaN(ts)) sessionTimestamp = ts;
|
|
33042
|
+
}
|
|
33043
|
+
const messages = [];
|
|
33044
|
+
for (const line of linearLines) {
|
|
33045
|
+
if (line.type === "message") {
|
|
33046
|
+
const msg = line;
|
|
33047
|
+
const content3 = msg.message.content;
|
|
33048
|
+
if (!content3) continue;
|
|
33049
|
+
const ts = new Date(msg.timestamp).getTime();
|
|
33050
|
+
messages.push({
|
|
33051
|
+
text: `[${msg.message.role}] ${content3}`,
|
|
33052
|
+
timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
|
|
33053
|
+
});
|
|
33054
|
+
} else if (line.type === "compaction") {
|
|
33055
|
+
const comp = line;
|
|
33056
|
+
if (comp.summary) {
|
|
33057
|
+
messages.push({
|
|
33058
|
+
text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
|
|
33059
|
+
timestamp: sessionTimestamp
|
|
33060
|
+
});
|
|
33061
|
+
}
|
|
33062
|
+
}
|
|
30470
33063
|
}
|
|
30471
|
-
|
|
30472
|
-
|
|
30473
|
-
|
|
30474
|
-
|
|
30475
|
-
|
|
33064
|
+
if (messages.length === 0) continue;
|
|
33065
|
+
let currentTexts = [];
|
|
33066
|
+
let currentTokens = 0;
|
|
33067
|
+
let chunkIndex = 0;
|
|
33068
|
+
const flushChunk = () => {
|
|
33069
|
+
if (currentTexts.length === 0) return;
|
|
33070
|
+
chunkIndex++;
|
|
33071
|
+
const text4 = currentTexts.join("\n\n");
|
|
33072
|
+
chunks.push({
|
|
33073
|
+
label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
|
|
33074
|
+
text: text4,
|
|
33075
|
+
estimatedTokens: estimateTokens9(text4),
|
|
33076
|
+
timestamp: sessionTimestamp
|
|
33077
|
+
});
|
|
33078
|
+
currentTexts = [];
|
|
33079
|
+
currentTokens = 0;
|
|
33080
|
+
};
|
|
33081
|
+
for (const msg of messages) {
|
|
33082
|
+
const msgTokens = estimateTokens9(msg.text);
|
|
33083
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
33084
|
+
flushChunk();
|
|
33085
|
+
}
|
|
33086
|
+
currentTexts.push(msg.text);
|
|
33087
|
+
currentTokens += msgTokens;
|
|
30476
33088
|
}
|
|
33089
|
+
flushChunk();
|
|
30477
33090
|
}
|
|
33091
|
+
return chunks;
|
|
30478
33092
|
}
|
|
30479
|
-
|
|
30480
|
-
|
|
30481
|
-
|
|
30482
|
-
|
|
30483
|
-
|
|
33093
|
+
};
|
|
33094
|
+
registerProvider(piProvider);
|
|
33095
|
+
|
|
33096
|
+
// src/import/providers/aider.ts
|
|
33097
|
+
import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
|
|
33098
|
+
import { join as join14 } from "path";
|
|
33099
|
+
var HISTORY_FILE = ".aider.chat.history.md";
|
|
33100
|
+
var DEFAULT_MAX_TOKENS7 = 12288;
|
|
33101
|
+
var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
|
|
33102
|
+
function estimateTokens10(text4) {
|
|
33103
|
+
return Math.ceil(text4.length / 3);
|
|
30484
33104
|
}
|
|
30485
|
-
function
|
|
30486
|
-
|
|
30487
|
-
|
|
30488
|
-
|
|
30489
|
-
|
|
33105
|
+
function parseAiderHistory(content3) {
|
|
33106
|
+
const lines = content3.split("\n");
|
|
33107
|
+
const messages = [];
|
|
33108
|
+
let currentRole = null;
|
|
33109
|
+
let currentLines = [];
|
|
33110
|
+
const flush = () => {
|
|
33111
|
+
if (currentRole && currentLines.length > 0) {
|
|
33112
|
+
const text4 = currentLines.join("\n").trim();
|
|
33113
|
+
if (text4) {
|
|
33114
|
+
messages.push({ role: currentRole, text: text4 });
|
|
33115
|
+
}
|
|
33116
|
+
}
|
|
33117
|
+
currentLines = [];
|
|
33118
|
+
};
|
|
33119
|
+
for (const line of lines) {
|
|
33120
|
+
const match = ROLE_HEADER_RE.exec(line);
|
|
33121
|
+
if (match) {
|
|
33122
|
+
flush();
|
|
33123
|
+
currentRole = match[1].toLowerCase();
|
|
33124
|
+
continue;
|
|
33125
|
+
}
|
|
33126
|
+
if (line.trim() === "---") {
|
|
33127
|
+
flush();
|
|
33128
|
+
currentRole = null;
|
|
33129
|
+
continue;
|
|
33130
|
+
}
|
|
33131
|
+
if (currentRole) {
|
|
33132
|
+
currentLines.push(line);
|
|
33133
|
+
}
|
|
30490
33134
|
}
|
|
33135
|
+
flush();
|
|
33136
|
+
return messages;
|
|
30491
33137
|
}
|
|
30492
|
-
|
|
30493
|
-
|
|
30494
|
-
|
|
30495
|
-
|
|
30496
|
-
|
|
30497
|
-
|
|
30498
|
-
|
|
30499
|
-
|
|
30500
|
-
|
|
30501
|
-
|
|
30502
|
-
|
|
30503
|
-
|
|
30504
|
-
|
|
30505
|
-
|
|
30506
|
-
|
|
30507
|
-
|
|
30508
|
-
|
|
30509
|
-
|
|
30510
|
-
|
|
30511
|
-
|
|
30512
|
-
|
|
30513
|
-
|
|
30514
|
-
|
|
30515
|
-
|
|
30516
|
-
|
|
30517
|
-
|
|
30518
|
-
|
|
30519
|
-
|
|
30520
|
-
|
|
30521
|
-
|
|
30522
|
-
|
|
30523
|
-
|
|
33138
|
+
var aiderProvider = {
|
|
33139
|
+
name: "aider",
|
|
33140
|
+
displayName: "Aider",
|
|
33141
|
+
detect(projectPath) {
|
|
33142
|
+
const filePath = join14(projectPath, HISTORY_FILE);
|
|
33143
|
+
if (!existsSync11(filePath)) return [];
|
|
33144
|
+
let stat;
|
|
33145
|
+
try {
|
|
33146
|
+
stat = statSync9(filePath);
|
|
33147
|
+
} catch {
|
|
33148
|
+
return [];
|
|
33149
|
+
}
|
|
33150
|
+
if (!stat.isFile() || stat.size === 0) return [];
|
|
33151
|
+
let content3;
|
|
33152
|
+
try {
|
|
33153
|
+
content3 = readFileSync9(filePath, "utf-8");
|
|
33154
|
+
} catch {
|
|
33155
|
+
return [];
|
|
33156
|
+
}
|
|
33157
|
+
const messages = parseAiderHistory(content3);
|
|
33158
|
+
if (messages.length < 3) return [];
|
|
33159
|
+
const estimatedTokens = estimateTokens10(content3);
|
|
33160
|
+
return [
|
|
33161
|
+
{
|
|
33162
|
+
id: filePath,
|
|
33163
|
+
label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
|
|
33164
|
+
startedAt: stat.birthtimeMs || stat.ctimeMs,
|
|
33165
|
+
lastActivityAt: stat.mtimeMs,
|
|
33166
|
+
estimatedTokens,
|
|
33167
|
+
messageCount: messages.length
|
|
33168
|
+
}
|
|
33169
|
+
];
|
|
33170
|
+
},
|
|
33171
|
+
readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
|
|
33172
|
+
const chunks = [];
|
|
33173
|
+
for (const filePath of sessionIds) {
|
|
33174
|
+
let content3;
|
|
33175
|
+
try {
|
|
33176
|
+
content3 = readFileSync9(filePath, "utf-8");
|
|
33177
|
+
} catch {
|
|
33178
|
+
continue;
|
|
30524
33179
|
}
|
|
30525
|
-
|
|
30526
|
-
|
|
30527
|
-
|
|
30528
|
-
|
|
30529
|
-
|
|
33180
|
+
const messages = parseAiderHistory(content3);
|
|
33181
|
+
if (messages.length === 0) continue;
|
|
33182
|
+
let fileTimestamp;
|
|
33183
|
+
try {
|
|
33184
|
+
fileTimestamp = statSync9(filePath).mtimeMs;
|
|
33185
|
+
} catch {
|
|
33186
|
+
fileTimestamp = Date.now();
|
|
33187
|
+
}
|
|
33188
|
+
let currentTexts = [];
|
|
33189
|
+
let currentTokens = 0;
|
|
33190
|
+
let chunkIndex = 0;
|
|
33191
|
+
const flushChunk = () => {
|
|
33192
|
+
if (currentTexts.length === 0) return;
|
|
33193
|
+
chunkIndex++;
|
|
33194
|
+
const text4 = currentTexts.join("\n\n");
|
|
33195
|
+
chunks.push({
|
|
33196
|
+
label: `Aider history (${chunkIndex})`,
|
|
33197
|
+
text: text4,
|
|
33198
|
+
estimatedTokens: estimateTokens10(text4),
|
|
33199
|
+
timestamp: fileTimestamp
|
|
33200
|
+
});
|
|
33201
|
+
currentTexts = [];
|
|
33202
|
+
currentTokens = 0;
|
|
33203
|
+
};
|
|
33204
|
+
for (const msg of messages) {
|
|
33205
|
+
const formatted = `[${msg.role}] ${msg.text}`;
|
|
33206
|
+
const msgTokens = estimateTokens10(formatted);
|
|
33207
|
+
if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
|
|
33208
|
+
flushChunk();
|
|
33209
|
+
}
|
|
33210
|
+
currentTexts.push(formatted);
|
|
33211
|
+
currentTokens += msgTokens;
|
|
30530
33212
|
}
|
|
33213
|
+
flushChunk();
|
|
30531
33214
|
}
|
|
33215
|
+
return chunks;
|
|
30532
33216
|
}
|
|
30533
|
-
|
|
30534
|
-
|
|
33217
|
+
};
|
|
33218
|
+
registerProvider(aiderProvider);
|
|
30535
33219
|
|
|
30536
33220
|
// src/recall.ts
|
|
30537
33221
|
function getTaggedText(tagged) {
|
|
@@ -30797,7 +33481,10 @@ async function searchRecall(input) {
|
|
|
30797
33481
|
info("recall: query expansion failed, using original:", err);
|
|
30798
33482
|
}
|
|
30799
33483
|
}
|
|
33484
|
+
const queryTermCount = filterTerms(query).length;
|
|
33485
|
+
const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
|
|
30800
33486
|
const allRrfLists = [];
|
|
33487
|
+
let primaryListEnd = 0;
|
|
30801
33488
|
for (const q of queries) {
|
|
30802
33489
|
const knowledgeResults = [];
|
|
30803
33490
|
if (knowledgeEnabled && scope !== "session") {
|
|
@@ -30874,7 +33561,11 @@ async function searchRecall(input) {
|
|
|
30874
33561
|
key: (r) => `t:${r.item.id}`
|
|
30875
33562
|
});
|
|
30876
33563
|
}
|
|
33564
|
+
if (primaryListEnd === 0) {
|
|
33565
|
+
primaryListEnd = allRrfLists.length;
|
|
33566
|
+
}
|
|
30877
33567
|
}
|
|
33568
|
+
const perQueryListEnd = allRrfLists.length;
|
|
30878
33569
|
if (isAvailable() && scope !== "session") {
|
|
30879
33570
|
try {
|
|
30880
33571
|
const [queryVec] = await embed([query], "query");
|
|
@@ -30893,7 +33584,8 @@ async function searchRecall(input) {
|
|
|
30893
33584
|
if (vectorTagged.length) {
|
|
30894
33585
|
allRrfLists.push({
|
|
30895
33586
|
items: vectorTagged,
|
|
30896
|
-
key: (r) => `k:${r.item.id}
|
|
33587
|
+
key: (r) => `k:${r.item.id}`,
|
|
33588
|
+
weight: vectorWeight
|
|
30897
33589
|
});
|
|
30898
33590
|
}
|
|
30899
33591
|
}
|
|
@@ -30912,7 +33604,8 @@ async function searchRecall(input) {
|
|
|
30912
33604
|
if (distVectorTagged.length) {
|
|
30913
33605
|
allRrfLists.push({
|
|
30914
33606
|
items: distVectorTagged,
|
|
30915
|
-
key: (r) => `d:${r.item.id}
|
|
33607
|
+
key: (r) => `d:${r.item.id}`,
|
|
33608
|
+
weight: vectorWeight
|
|
30916
33609
|
});
|
|
30917
33610
|
}
|
|
30918
33611
|
}
|
|
@@ -30936,7 +33629,8 @@ async function searchRecall(input) {
|
|
|
30936
33629
|
if (temporalVectorTagged.length) {
|
|
30937
33630
|
allRrfLists.push({
|
|
30938
33631
|
items: temporalVectorTagged,
|
|
30939
|
-
key: (r) => `t:${r.item.id}
|
|
33632
|
+
key: (r) => `t:${r.item.id}`,
|
|
33633
|
+
weight: vectorWeight
|
|
30940
33634
|
});
|
|
30941
33635
|
}
|
|
30942
33636
|
}
|
|
@@ -31039,6 +33733,15 @@ async function searchRecall(input) {
|
|
|
31039
33733
|
});
|
|
31040
33734
|
}
|
|
31041
33735
|
}
|
|
33736
|
+
const MAX_RRF_LISTS = 10;
|
|
33737
|
+
if (allRrfLists.length > MAX_RRF_LISTS) {
|
|
33738
|
+
const primary = allRrfLists.slice(0, primaryListEnd);
|
|
33739
|
+
const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
|
|
33740
|
+
const supplemental = allRrfLists.slice(perQueryListEnd);
|
|
33741
|
+
const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
|
|
33742
|
+
allRrfLists.length = 0;
|
|
33743
|
+
allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
|
|
33744
|
+
}
|
|
31042
33745
|
const fused = reciprocalRankFusion(allRrfLists);
|
|
31043
33746
|
const maxResults = limit * 3;
|
|
31044
33747
|
return fused.slice(0, maxResults);
|
|
@@ -31108,9 +33811,6 @@ async function runRecall(input) {
|
|
|
31108
33811
|
if (input.id) {
|
|
31109
33812
|
return recallById(input.id);
|
|
31110
33813
|
}
|
|
31111
|
-
if (ftsQuery(input.query) === EMPTY_QUERY) {
|
|
31112
|
-
return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
|
|
31113
|
-
}
|
|
31114
33814
|
const fused = await searchRecall(input);
|
|
31115
33815
|
const recallCfg = input.searchConfig?.recall;
|
|
31116
33816
|
return formatFusedResults(fused, {
|
|
@@ -31157,9 +33857,11 @@ export {
|
|
|
31157
33857
|
config2 as config,
|
|
31158
33858
|
consolidationUser,
|
|
31159
33859
|
consumeCameOutOfIdle,
|
|
33860
|
+
import_exports as conversationImport,
|
|
31160
33861
|
curator_exports as curator,
|
|
31161
33862
|
curatorUser,
|
|
31162
33863
|
data_exports as data,
|
|
33864
|
+
dataDir,
|
|
31163
33865
|
db,
|
|
31164
33866
|
dbPath,
|
|
31165
33867
|
distillation_exports as distillation,
|
|
@@ -31179,6 +33881,8 @@ export {
|
|
|
31179
33881
|
ftsQueryRelaxed,
|
|
31180
33882
|
getGitRemote,
|
|
31181
33883
|
getInstanceId,
|
|
33884
|
+
getKV,
|
|
33885
|
+
getLastImportAt,
|
|
31182
33886
|
getLastTransformEstimate,
|
|
31183
33887
|
getLastTransformedCount,
|
|
31184
33888
|
getLastTurnAt,
|
|
@@ -31191,6 +33895,7 @@ export {
|
|
|
31191
33895
|
importLoreFile,
|
|
31192
33896
|
inline,
|
|
31193
33897
|
inspectSessionState,
|
|
33898
|
+
instruction_detect_exports as instructionDetect,
|
|
31194
33899
|
isFirstRun,
|
|
31195
33900
|
isReasoningPart,
|
|
31196
33901
|
isTextPart,
|
|
@@ -31202,7 +33907,9 @@ export {
|
|
|
31202
33907
|
load,
|
|
31203
33908
|
loadAllSessionCosts,
|
|
31204
33909
|
loadForceMinLayer,
|
|
33910
|
+
loadHeaderSessionIndex,
|
|
31205
33911
|
loadSessionCosts,
|
|
33912
|
+
loadSessionTracking,
|
|
31206
33913
|
log_exports as log,
|
|
31207
33914
|
loreFileExists,
|
|
31208
33915
|
ltm_exports as ltm,
|
|
@@ -31223,10 +33930,14 @@ export {
|
|
|
31223
33930
|
runRecall,
|
|
31224
33931
|
sanitizeSurrogates,
|
|
31225
33932
|
saveForceMinLayer,
|
|
33933
|
+
saveGradientState,
|
|
31226
33934
|
saveSessionCosts,
|
|
33935
|
+
saveSessionTracking,
|
|
31227
33936
|
searchRecall,
|
|
31228
33937
|
serialize,
|
|
31229
33938
|
setForceMinLayer,
|
|
33939
|
+
setKV,
|
|
33940
|
+
setLastImportAt,
|
|
31230
33941
|
setLastTurnAtForTest,
|
|
31231
33942
|
setLtmTokens,
|
|
31232
33943
|
setMaxContextTokens,
|