@loreai/core 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +12 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +22 -38
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  14. package/dist/bun/embedding-worker-types.d.ts +17 -12
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  16. package/dist/bun/embedding-worker.d.ts +9 -2
  17. package/dist/bun/embedding-worker.d.ts.map +1 -1
  18. package/dist/bun/embedding-worker.js +38864 -33
  19. package/dist/bun/embedding-worker.js.map +4 -4
  20. package/dist/bun/embedding.d.ts +30 -22
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/gradient.d.ts +8 -1
  23. package/dist/bun/gradient.d.ts.map +1 -1
  24. package/dist/bun/import/detect.d.ts +14 -0
  25. package/dist/bun/import/detect.d.ts.map +1 -0
  26. package/dist/bun/import/extract.d.ts +43 -0
  27. package/dist/bun/import/extract.d.ts.map +1 -0
  28. package/dist/bun/import/history.d.ts +40 -0
  29. package/dist/bun/import/history.d.ts.map +1 -0
  30. package/dist/bun/import/index.d.ts +17 -0
  31. package/dist/bun/import/index.d.ts.map +1 -0
  32. package/dist/bun/import/providers/aider.d.ts +2 -0
  33. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  34. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  35. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  36. package/dist/bun/import/providers/cline.d.ts +2 -0
  37. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  38. package/dist/bun/import/providers/codex.d.ts +2 -0
  39. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  40. package/dist/bun/import/providers/continue.d.ts +2 -0
  41. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  42. package/dist/bun/import/providers/index.d.ts +19 -0
  43. package/dist/bun/import/providers/index.d.ts.map +1 -0
  44. package/dist/bun/import/providers/opencode.d.ts +2 -0
  45. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  46. package/dist/bun/import/providers/pi.d.ts +2 -0
  47. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  48. package/dist/bun/import/types.d.ts +82 -0
  49. package/dist/bun/import/types.d.ts.map +1 -0
  50. package/dist/bun/index.d.ts +4 -1
  51. package/dist/bun/index.d.ts.map +1 -1
  52. package/dist/bun/index.js +2217 -224
  53. package/dist/bun/index.js.map +4 -4
  54. package/dist/bun/instruction-detect.d.ts +66 -0
  55. package/dist/bun/instruction-detect.d.ts.map +1 -0
  56. package/dist/bun/log.d.ts +9 -0
  57. package/dist/bun/log.d.ts.map +1 -1
  58. package/dist/bun/ltm.d.ts +40 -0
  59. package/dist/bun/ltm.d.ts.map +1 -1
  60. package/dist/bun/pattern-extract.d.ts +7 -0
  61. package/dist/bun/pattern-extract.d.ts.map +1 -1
  62. package/dist/bun/prompt.d.ts +1 -1
  63. package/dist/bun/prompt.d.ts.map +1 -1
  64. package/dist/bun/recall.d.ts.map +1 -1
  65. package/dist/bun/search.d.ts +5 -3
  66. package/dist/bun/search.d.ts.map +1 -1
  67. package/dist/bun/temporal.d.ts.map +1 -1
  68. package/dist/bun/types.d.ts +1 -1
  69. package/dist/node/agents-file.d.ts +4 -0
  70. package/dist/node/agents-file.d.ts.map +1 -1
  71. package/dist/node/config.d.ts +2 -0
  72. package/dist/node/config.d.ts.map +1 -1
  73. package/dist/node/curator.d.ts +45 -0
  74. package/dist/node/curator.d.ts.map +1 -1
  75. package/dist/node/data-dir.d.ts +18 -0
  76. package/dist/node/data-dir.d.ts.map +1 -0
  77. package/dist/node/db.d.ts +12 -0
  78. package/dist/node/db.d.ts.map +1 -1
  79. package/dist/node/distillation.d.ts.map +1 -1
  80. package/dist/node/embedding-vendor.d.ts +22 -38
  81. package/dist/node/embedding-vendor.d.ts.map +1 -1
  82. package/dist/node/embedding-worker-types.d.ts +17 -12
  83. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  84. package/dist/node/embedding-worker.d.ts +9 -2
  85. package/dist/node/embedding-worker.d.ts.map +1 -1
  86. package/dist/node/embedding-worker.js +38864 -33
  87. package/dist/node/embedding-worker.js.map +4 -4
  88. package/dist/node/embedding.d.ts +30 -22
  89. package/dist/node/embedding.d.ts.map +1 -1
  90. package/dist/node/gradient.d.ts +8 -1
  91. package/dist/node/gradient.d.ts.map +1 -1
  92. package/dist/node/import/detect.d.ts +14 -0
  93. package/dist/node/import/detect.d.ts.map +1 -0
  94. package/dist/node/import/extract.d.ts +43 -0
  95. package/dist/node/import/extract.d.ts.map +1 -0
  96. package/dist/node/import/history.d.ts +40 -0
  97. package/dist/node/import/history.d.ts.map +1 -0
  98. package/dist/node/import/index.d.ts +17 -0
  99. package/dist/node/import/index.d.ts.map +1 -0
  100. package/dist/node/import/providers/aider.d.ts +2 -0
  101. package/dist/node/import/providers/aider.d.ts.map +1 -0
  102. package/dist/node/import/providers/claude-code.d.ts +2 -0
  103. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  104. package/dist/node/import/providers/cline.d.ts +2 -0
  105. package/dist/node/import/providers/cline.d.ts.map +1 -0
  106. package/dist/node/import/providers/codex.d.ts +2 -0
  107. package/dist/node/import/providers/codex.d.ts.map +1 -0
  108. package/dist/node/import/providers/continue.d.ts +2 -0
  109. package/dist/node/import/providers/continue.d.ts.map +1 -0
  110. package/dist/node/import/providers/index.d.ts +19 -0
  111. package/dist/node/import/providers/index.d.ts.map +1 -0
  112. package/dist/node/import/providers/opencode.d.ts +2 -0
  113. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  114. package/dist/node/import/providers/pi.d.ts +2 -0
  115. package/dist/node/import/providers/pi.d.ts.map +1 -0
  116. package/dist/node/import/types.d.ts +82 -0
  117. package/dist/node/import/types.d.ts.map +1 -0
  118. package/dist/node/index.d.ts +4 -1
  119. package/dist/node/index.d.ts.map +1 -1
  120. package/dist/node/index.js +2217 -224
  121. package/dist/node/index.js.map +4 -4
  122. package/dist/node/instruction-detect.d.ts +66 -0
  123. package/dist/node/instruction-detect.d.ts.map +1 -0
  124. package/dist/node/log.d.ts +9 -0
  125. package/dist/node/log.d.ts.map +1 -1
  126. package/dist/node/ltm.d.ts +40 -0
  127. package/dist/node/ltm.d.ts.map +1 -1
  128. package/dist/node/pattern-extract.d.ts +7 -0
  129. package/dist/node/pattern-extract.d.ts.map +1 -1
  130. package/dist/node/prompt.d.ts +1 -1
  131. package/dist/node/prompt.d.ts.map +1 -1
  132. package/dist/node/recall.d.ts.map +1 -1
  133. package/dist/node/search.d.ts +5 -3
  134. package/dist/node/search.d.ts.map +1 -1
  135. package/dist/node/temporal.d.ts.map +1 -1
  136. package/dist/node/types.d.ts +1 -1
  137. package/dist/types/agents-file.d.ts +4 -0
  138. package/dist/types/agents-file.d.ts.map +1 -1
  139. package/dist/types/config.d.ts +2 -0
  140. package/dist/types/config.d.ts.map +1 -1
  141. package/dist/types/curator.d.ts +45 -0
  142. package/dist/types/curator.d.ts.map +1 -1
  143. package/dist/types/data-dir.d.ts +18 -0
  144. package/dist/types/data-dir.d.ts.map +1 -0
  145. package/dist/types/db.d.ts +12 -0
  146. package/dist/types/db.d.ts.map +1 -1
  147. package/dist/types/distillation.d.ts.map +1 -1
  148. package/dist/types/embedding-vendor.d.ts +22 -38
  149. package/dist/types/embedding-vendor.d.ts.map +1 -1
  150. package/dist/types/embedding-worker-types.d.ts +17 -12
  151. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  152. package/dist/types/embedding-worker.d.ts +9 -2
  153. package/dist/types/embedding-worker.d.ts.map +1 -1
  154. package/dist/types/embedding.d.ts +30 -22
  155. package/dist/types/embedding.d.ts.map +1 -1
  156. package/dist/types/gradient.d.ts +8 -1
  157. package/dist/types/gradient.d.ts.map +1 -1
  158. package/dist/types/import/detect.d.ts +14 -0
  159. package/dist/types/import/detect.d.ts.map +1 -0
  160. package/dist/types/import/extract.d.ts +43 -0
  161. package/dist/types/import/extract.d.ts.map +1 -0
  162. package/dist/types/import/history.d.ts +40 -0
  163. package/dist/types/import/history.d.ts.map +1 -0
  164. package/dist/types/import/index.d.ts +17 -0
  165. package/dist/types/import/index.d.ts.map +1 -0
  166. package/dist/types/import/providers/aider.d.ts +2 -0
  167. package/dist/types/import/providers/aider.d.ts.map +1 -0
  168. package/dist/types/import/providers/claude-code.d.ts +2 -0
  169. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  170. package/dist/types/import/providers/cline.d.ts +2 -0
  171. package/dist/types/import/providers/cline.d.ts.map +1 -0
  172. package/dist/types/import/providers/codex.d.ts +2 -0
  173. package/dist/types/import/providers/codex.d.ts.map +1 -0
  174. package/dist/types/import/providers/continue.d.ts +2 -0
  175. package/dist/types/import/providers/continue.d.ts.map +1 -0
  176. package/dist/types/import/providers/index.d.ts +19 -0
  177. package/dist/types/import/providers/index.d.ts.map +1 -0
  178. package/dist/types/import/providers/opencode.d.ts +2 -0
  179. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  180. package/dist/types/import/providers/pi.d.ts +2 -0
  181. package/dist/types/import/providers/pi.d.ts.map +1 -0
  182. package/dist/types/import/types.d.ts +82 -0
  183. package/dist/types/import/types.d.ts.map +1 -0
  184. package/dist/types/index.d.ts +4 -1
  185. package/dist/types/index.d.ts.map +1 -1
  186. package/dist/types/instruction-detect.d.ts +66 -0
  187. package/dist/types/instruction-detect.d.ts.map +1 -0
  188. package/dist/types/log.d.ts +9 -0
  189. package/dist/types/log.d.ts.map +1 -1
  190. package/dist/types/ltm.d.ts +40 -0
  191. package/dist/types/ltm.d.ts.map +1 -1
  192. package/dist/types/pattern-extract.d.ts +7 -0
  193. package/dist/types/pattern-extract.d.ts.map +1 -1
  194. package/dist/types/prompt.d.ts +1 -1
  195. package/dist/types/prompt.d.ts.map +1 -1
  196. package/dist/types/recall.d.ts.map +1 -1
  197. package/dist/types/search.d.ts +5 -3
  198. package/dist/types/search.d.ts.map +1 -1
  199. package/dist/types/temporal.d.ts.map +1 -1
  200. package/dist/types/types.d.ts +1 -1
  201. package/package.json +2 -4
  202. package/src/agents-file.ts +41 -13
  203. package/src/config.ts +31 -18
  204. package/src/curator.ts +111 -75
  205. package/src/data-dir.ts +76 -0
  206. package/src/db.ts +110 -11
  207. package/src/distillation.ts +10 -2
  208. package/src/embedding-vendor.ts +23 -40
  209. package/src/embedding-worker-types.ts +19 -11
  210. package/src/embedding-worker.ts +111 -47
  211. package/src/embedding.ts +196 -171
  212. package/src/gradient.ts +9 -1
  213. package/src/import/detect.ts +37 -0
  214. package/src/import/extract.ts +137 -0
  215. package/src/import/history.ts +99 -0
  216. package/src/import/index.ts +45 -0
  217. package/src/import/providers/aider.ts +207 -0
  218. package/src/import/providers/claude-code.ts +339 -0
  219. package/src/import/providers/cline.ts +324 -0
  220. package/src/import/providers/codex.ts +369 -0
  221. package/src/import/providers/continue.ts +304 -0
  222. package/src/import/providers/index.ts +32 -0
  223. package/src/import/providers/opencode.ts +272 -0
  224. package/src/import/providers/pi.ts +332 -0
  225. package/src/import/types.ts +91 -0
  226. package/src/index.ts +5 -0
  227. package/src/instruction-detect.ts +275 -0
  228. package/src/log.ts +91 -3
  229. package/src/ltm.ts +316 -3
  230. package/src/pattern-extract.ts +41 -0
  231. package/src/prompt.ts +7 -1
  232. package/src/recall.ts +43 -5
  233. package/src/search.ts +7 -5
  234. package/src/temporal.ts +8 -6
  235. package/src/types.ts +1 -1
@@ -162,9 +162,8 @@ function sha256(input) {
162
162
  }
163
163
 
164
164
  // src/db.ts
165
- import { join, dirname } from "path";
165
+ import { join as join2, dirname } from "path";
166
166
  import { mkdirSync } from "fs";
167
- import { homedir } from "os";
168
167
 
169
168
  // src/git.ts
170
169
  import { execSync } from "child_process";
@@ -227,6 +226,36 @@ function getGitRemote(path) {
227
226
  }
228
227
  }
229
228
 
229
+ // src/data-dir.ts
230
+ import { existsSync, renameSync } from "node:fs";
231
+ import { join } from "node:path";
232
+ import { homedir } from "node:os";
233
+ var OLD_DIR_NAME = "opencode-lore";
234
+ var NEW_DIR_NAME = "lore";
235
+ var migrationAttempted = false;
236
+ function baseDir() {
237
+ return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
238
+ }
239
+ function migrateDataDir() {
240
+ if (migrationAttempted) return;
241
+ migrationAttempted = true;
242
+ if (process.env.NODE_ENV === "test") return;
243
+ const base = baseDir();
244
+ const oldDir = join(base, OLD_DIR_NAME);
245
+ const newDir = join(base, NEW_DIR_NAME);
246
+ try {
247
+ if (existsSync(oldDir) && !existsSync(newDir)) {
248
+ renameSync(oldDir, newDir);
249
+ console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
250
+ }
251
+ } catch {
252
+ }
253
+ }
254
+ function dataDir() {
255
+ migrateDataDir();
256
+ return join(baseDir(), NEW_DIR_NAME);
257
+ }
258
+
230
259
  // src/db.ts
231
260
  function repoNameFromRemote(remote) {
232
261
  if (!remote) return null;
@@ -663,17 +692,74 @@ var MIGRATIONS = [
663
692
  ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
664
693
  ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
665
694
  ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
695
+ `,
696
+ `
697
+ -- Version 19: Import history for conversation import idempotency.
698
+ -- Tracks which external agent sessions have been imported to prevent
699
+ -- re-importing unchanged sources and to record user-declined imports.
700
+ CREATE TABLE IF NOT EXISTS import_history (
701
+ id TEXT PRIMARY KEY,
702
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
703
+ agent_name TEXT NOT NULL,
704
+ source_id TEXT NOT NULL,
705
+ source_hash TEXT NOT NULL,
706
+ entries_created INTEGER NOT NULL DEFAULT 0,
707
+ entries_updated INTEGER NOT NULL DEFAULT 0,
708
+ imported_at INTEGER NOT NULL,
709
+ UNIQUE(project_id, agent_name, source_id)
710
+ );
711
+ CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
712
+ `,
713
+ `
714
+ -- Version 20: Purge worker boilerplate from temporal messages.
715
+ -- Legacy gateway/plugin worker calls (distillation observer, curator,
716
+ -- consolidation, reflector, eval) stored their full system prompts
717
+ -- (containing entire conversation transcripts, up to 1.6MB each) as
718
+ -- temporal messages. These pollute FTS search results by matching
719
+ -- virtually any domain keyword. Safe to delete: their actual output
720
+ -- (distillations, knowledge entries) is stored in dedicated tables.
721
+ DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
722
+ OR content LIKE '%You are a long-term memory curator.%'
723
+ OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
724
+ OR content LIKE '%You are a memory reflector.%'
725
+ OR content LIKE '%You are evaluating distillation quality.%';
726
+ `,
727
+ `
728
+ -- Version 21: Persist avoided compaction data from live sessions.
729
+ -- Historical estimates previously re-simulated avoided compactions from
730
+ -- temporal message token estimates (chars/3), missing system prompt and
731
+ -- tool definition overhead. Persisting the live session's real shadow
732
+ -- context tracking (from actual API-reported total input tokens) gives
733
+ -- accurate post-restart historical estimates.
734
+ ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
735
+ ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
736
+ `,
737
+ `
738
+ -- Version 22: Track when conversation import was last offered/run.
739
+ -- NULL means import has never been offered for this project.
740
+ -- Used by auto-import to avoid re-prompting, and by explicit
741
+ -- \`lore import\` for incremental imports (only newer conversations).
742
+ ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
743
+
744
+ -- Backfill: migrate legacy __declined__ sentinel rows so existing
745
+ -- users who previously declined are not re-prompted after upgrading.
746
+ UPDATE projects SET last_import_at = (
747
+ SELECT ih.imported_at FROM import_history ih
748
+ WHERE ih.project_id = projects.id
749
+ AND ih.source_id = '__declined__'
750
+ LIMIT 1
751
+ )
752
+ WHERE EXISTS (
753
+ SELECT 1 FROM import_history ih
754
+ WHERE ih.project_id = projects.id
755
+ AND ih.source_id = '__declined__'
756
+ );
666
757
  `
667
758
  ];
668
- function dataDir() {
669
- const xdg = process.env.XDG_DATA_HOME;
670
- const base = xdg || join(homedir(), ".local", "share");
671
- return join(base, "opencode-lore");
672
- }
673
759
  function dbPath() {
674
760
  const envPath = process.env.LORE_DB_PATH;
675
761
  if (envPath) return envPath;
676
- return join(dataDir(), "lore.db");
762
+ return join2(dataDir(), "lore.db");
677
763
  }
678
764
  var instance;
679
765
  function db() {
@@ -691,7 +777,7 @@ function db() {
691
777
  }
692
778
  const dir = dataDir();
693
779
  mkdirSync(dir, { recursive: true });
694
- path = join(dir, "lore.db");
780
+ path = join2(dir, "lore.db");
695
781
  }
696
782
  const database = new Database(path);
697
783
  database.exec("PRAGMA journal_mode = WAL");
@@ -858,6 +944,15 @@ function isFirstRun() {
858
944
  const row = db().query("SELECT COUNT(*) as count FROM projects").get();
859
945
  return row.count === 0;
860
946
  }
947
+ function getLastImportAt(projectPath) {
948
+ const id = ensureProject(projectPath);
949
+ const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
950
+ return row?.last_import_at ?? null;
951
+ }
952
+ function setLastImportAt(projectPath, timestamp) {
953
+ const id = ensureProject(projectPath);
954
+ db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
955
+ }
861
956
  function loadForceMinLayer(sessionID) {
862
957
  const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
863
958
  return row?.force_min_layer ?? 0;
@@ -876,8 +971,9 @@ function saveSessionCosts(sessionID, costs) {
876
971
  `INSERT INTO session_state (session_id, force_min_layer, updated_at,
877
972
  conversation_cost, worker_cost, conversation_turns,
878
973
  cache_read_tokens, cache_write_tokens,
879
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings)
880
- VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
974
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
975
+ avoided_compactions, avoided_compaction_cost)
976
+ VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
881
977
  ON CONFLICT(session_id) DO UPDATE SET
882
978
  conversation_cost = excluded.conversation_cost,
883
979
  worker_cost = excluded.worker_cost,
@@ -889,6 +985,8 @@ function saveSessionCosts(sessionID, costs) {
889
985
  ttl_savings = excluded.ttl_savings,
890
986
  ttl_hits = excluded.ttl_hits,
891
987
  batch_savings = excluded.batch_savings,
988
+ avoided_compactions = excluded.avoided_compactions,
989
+ avoided_compaction_cost = excluded.avoided_compaction_cost,
892
990
  updated_at = excluded.updated_at`
893
991
  ).run(
894
992
  sessionID,
@@ -903,14 +1001,17 @@ function saveSessionCosts(sessionID, costs) {
903
1001
  costs.warmupHits,
904
1002
  costs.ttlSavings,
905
1003
  costs.ttlHits,
906
- costs.batchSavings
1004
+ costs.batchSavings,
1005
+ costs.avoidedCompactions,
1006
+ costs.avoidedCompactionCost
907
1007
  );
908
1008
  }
909
1009
  function loadSessionCosts(sessionID) {
910
1010
  const row = db().query(
911
1011
  `SELECT conversation_cost, worker_cost, conversation_turns,
912
1012
  cache_read_tokens, cache_write_tokens,
913
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1013
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1014
+ avoided_compactions, avoided_compaction_cost
914
1015
  FROM session_state WHERE session_id = ?`
915
1016
  ).get(sessionID);
916
1017
  if (!row) return null;
@@ -924,14 +1025,17 @@ function loadSessionCosts(sessionID) {
924
1025
  warmupHits: row.warmup_hits,
925
1026
  ttlSavings: row.ttl_savings,
926
1027
  ttlHits: row.ttl_hits,
927
- batchSavings: row.batch_savings
1028
+ batchSavings: row.batch_savings,
1029
+ avoidedCompactions: row.avoided_compactions,
1030
+ avoidedCompactionCost: row.avoided_compaction_cost
928
1031
  };
929
1032
  }
930
1033
  function loadAllSessionCosts() {
931
1034
  const rows = db().query(
932
1035
  `SELECT session_id, conversation_cost, worker_cost, conversation_turns,
933
1036
  cache_read_tokens, cache_write_tokens,
934
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1037
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1038
+ avoided_compactions, avoided_compaction_cost
935
1039
  FROM session_state
936
1040
  WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
937
1041
  ).all();
@@ -947,7 +1051,9 @@ function loadAllSessionCosts() {
947
1051
  warmupHits: row.warmup_hits,
948
1052
  ttlSavings: row.ttl_savings,
949
1053
  ttlHits: row.ttl_hits,
950
- batchSavings: row.batch_savings
1054
+ batchSavings: row.batch_savings,
1055
+ avoidedCompactions: row.avoided_compactions,
1056
+ avoidedCompactionCost: row.avoided_compaction_cost
951
1057
  });
952
1058
  }
953
1059
  return result;
@@ -9770,7 +9876,7 @@ var handle = {
9770
9876
  };
9771
9877
 
9772
9878
  // ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
9773
- var join2 = [joinDefaults];
9879
+ var join3 = [joinDefaults];
9774
9880
  function joinDefaults(left, right, parent, state) {
9775
9881
  if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
9776
9882
  return false;
@@ -10190,7 +10296,7 @@ function toMarkdown(tree, options) {
10190
10296
  handle: void 0,
10191
10297
  indentLines,
10192
10298
  indexStack: [],
10193
- join: [...join2],
10299
+ join: [...join3],
10194
10300
  options: {},
10195
10301
  safe: safeBound,
10196
10302
  stack: [],
@@ -11915,6 +12021,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
11915
12021
  - Environment/tooling setup details that affect development
11916
12022
  - Important relationships between components that aren't obvious from reading the code
11917
12023
  - User preferences and working style specific to how they use this project
12024
+ - Repeated user instructions \u2014 when the user says things like "always", "never",
12025
+ "make sure to", "don't forget to", these are high-value preference candidates.
12026
+ If you see instruction-like language, prioritize extracting it as a "preference" entry.
12027
+ These instructions represent how the user wants to work and should persist across sessions.
11918
12028
 
11919
12029
  Do NOT extract:
11920
12030
  - Task-specific details (file currently being edited, current bug being fixed)
@@ -11999,7 +12109,9 @@ IMPORTANT:
11999
12109
  2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
12000
12110
  3. If entries cover the same system from different angles, merge them: update one, delete the rest.
12001
12111
  4. Only create a new entry for genuinely distinct knowledge with no existing home.
12002
- 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
12112
+ 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
12113
+ 6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
12114
+ These are strong signals for "preference" entries with high confidence.`;
12003
12115
  }
12004
12116
  var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
12005
12117
 
@@ -12163,9 +12275,12 @@ var log_exports = {};
12163
12275
  __export(log_exports, {
12164
12276
  error: () => error,
12165
12277
  info: () => info,
12278
+ logFilePath: () => logFilePath,
12166
12279
  registerSink: () => registerSink,
12167
12280
  warn: () => warn
12168
12281
  });
12282
+ import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
12283
+ import { join as join4 } from "node:path";
12169
12284
  var sink = null;
12170
12285
  function registerSink(s) {
12171
12286
  sink = s;
@@ -12180,17 +12295,71 @@ function findError(args) {
12180
12295
  }
12181
12296
  return void 0;
12182
12297
  }
12298
+ var LOG_MAX_BYTES = 5 * 1024 * 1024;
12299
+ var ROTATION_CHECK_INTERVAL = 1e3;
12300
+ var logPath;
12301
+ var logPathResolved = false;
12302
+ var writeCount = 0;
12303
+ function resolveLogPath() {
12304
+ if (process.env.NODE_ENV === "test") return void 0;
12305
+ try {
12306
+ const dir = dataDir();
12307
+ mkdirSync2(dir, { recursive: true });
12308
+ return join4(dir, "lore.log");
12309
+ } catch {
12310
+ return void 0;
12311
+ }
12312
+ }
12313
+ function logFilePath() {
12314
+ if (!logPathResolved) {
12315
+ logPath = resolveLogPath();
12316
+ logPathResolved = true;
12317
+ }
12318
+ return logPath;
12319
+ }
12320
+ function maybeRotate() {
12321
+ if (!logPath) return;
12322
+ try {
12323
+ const stat = statSync(logPath);
12324
+ if (stat.size > LOG_MAX_BYTES) {
12325
+ renameSync2(logPath, logPath + ".1");
12326
+ }
12327
+ } catch {
12328
+ }
12329
+ }
12330
+ function writeToFile(level, message) {
12331
+ const path = logFilePath();
12332
+ if (!path) return;
12333
+ if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
12334
+ maybeRotate();
12335
+ }
12336
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
12337
+ const tag = level.toUpperCase().padEnd(5);
12338
+ const flat = message.replace(/\n/g, "\\n");
12339
+ const line = `${ts} [${tag}] ${flat}
12340
+ `;
12341
+ try {
12342
+ appendFileSync(path, line);
12343
+ } catch {
12344
+ }
12345
+ }
12183
12346
  function info(...args) {
12184
12347
  if (isDebug) console.error("[lore]", ...args);
12185
- sink?.info(formatArgs(args));
12348
+ const msg = formatArgs(args);
12349
+ sink?.info(msg);
12350
+ writeToFile("info", msg);
12186
12351
  }
12187
12352
  function warn(...args) {
12188
12353
  if (isDebug) console.error("[lore] WARN:", ...args);
12189
- sink?.warn(formatArgs(args));
12354
+ const msg = formatArgs(args);
12355
+ sink?.warn(msg);
12356
+ writeToFile("warn", msg);
12190
12357
  }
12191
12358
  function error(...args) {
12192
12359
  console.error("[lore]", ...args);
12193
- sink?.error(formatArgs(args));
12360
+ const msg = formatArgs(args);
12361
+ sink?.error(msg);
12362
+ writeToFile("error", msg);
12194
12363
  const err = findError(args);
12195
12364
  if (err) sink?.captureException(err);
12196
12365
  }
@@ -12350,10 +12519,11 @@ function extractTopTerms(text4, limit = 40) {
12350
12519
  function reciprocalRankFusion(lists, k = 60) {
12351
12520
  const scores = /* @__PURE__ */ new Map();
12352
12521
  for (const list4 of lists) {
12522
+ const w = list4.weight ?? 1;
12353
12523
  for (let rank = 0; rank < list4.items.length; rank++) {
12354
12524
  const item = list4.items[rank];
12355
12525
  const id = list4.key(item);
12356
- const rrfScore = 1 / (k + rank);
12526
+ const rrfScore = w / (k + rank);
12357
12527
  const existing = scores.get(id);
12358
12528
  if (existing) {
12359
12529
  existing.score += rrfScore;
@@ -12407,8 +12577,8 @@ async function expandQuery(llm, query, model, sessionID) {
12407
12577
  var embedding_exports = {};
12408
12578
  __export(embedding_exports, {
12409
12579
  LocalProviderUnavailableError: () => LocalProviderUnavailableError,
12410
- _markFastembedUnavailable: () => _markFastembedUnavailable,
12411
- _resetFastembedProbe: () => _resetFastembedProbe,
12580
+ _markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
12581
+ _resetLocalProviderProbe: () => _resetLocalProviderProbe,
12412
12582
  _restoreProvider: () => _restoreProvider,
12413
12583
  _saveAndClearProvider: () => _saveAndClearProvider,
12414
12584
  _shutdownAndDisable: () => _shutdownAndDisable,
@@ -12427,6 +12597,7 @@ __export(embedding_exports, {
12427
12597
  runStartupBackfill: () => runStartupBackfill,
12428
12598
  toBlob: () => toBlob,
12429
12599
  vectorSearch: () => vectorSearch,
12600
+ vectorSearchAllDistillations: () => vectorSearchAllDistillations,
12430
12601
  vectorSearchDistillations: () => vectorSearchDistillations,
12431
12602
  vectorSearchTemporal: () => vectorSearchTemporal
12432
12603
  });
@@ -26200,8 +26371,8 @@ function date4(params) {
26200
26371
  config(en_default());
26201
26372
 
26202
26373
  // src/config.ts
26203
- import { existsSync, readFileSync } from "node:fs";
26204
- import { join as join3 } from "node:path";
26374
+ import { existsSync as existsSync2, readFileSync } from "node:fs";
26375
+ import { join as join5 } from "node:path";
26205
26376
  var LoreConfig = external_exports.object({
26206
26377
  model: external_exports.object({
26207
26378
  providerID: external_exports.string(),
@@ -26318,15 +26489,25 @@ var LoreConfig = external_exports.object({
26318
26489
  }).default({ title: 6, content: 2, category: 3 }),
26319
26490
  /** Max results per source in recall tool before fusion. Default: 10. */
26320
26491
  recallLimit: external_exports.number().min(1).max(50).default(10),
26321
- /** Enable LLM-based query expansion for the recall tool. Default: false.
26322
- * When enabled, the configured model generates 2–3 alternative query phrasings
26323
- * before search, improving recall for ambiguous queries. */
26324
- queryExpansion: external_exports.boolean().default(false),
26492
+ /** Enable LLM-based query expansion for the recall tool. Default: true.
26493
+ * The configured model generates 2–3 alternative query phrasings before
26494
+ * search, improving recall for ambiguous queries. Guarded by a 3-second
26495
+ * timeout — if expansion fails or times out, the original query is used. */
26496
+ queryExpansion: external_exports.boolean().default(true),
26497
+ /** RRF weight multiplier for vector search lists. Applied when the query
26498
+ * has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
26499
+ * Boosts semantic/vector results relative to keyword-based BM25 lists.
26500
+ * Default: 1.5. Set to 1.0 to disable. */
26501
+ vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
26502
+ /** Minimum meaningful query terms (after stopword removal) to activate
26503
+ * vector boost. Short keyword queries (1-2 terms) are left unweighted
26504
+ * since BM25 excels there. Default: 3. */
26505
+ vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
26325
26506
  /** Vector embedding search.
26326
26507
  * Supports multiple providers:
26327
- * - "local" (default): fastembed + ONNX Runtime, no API key needed.
26328
- * Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
26329
- * cached in ~/.cache/fastembed. ~150ms per query embed.
26508
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
26509
+ * 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
26510
+ * cached locally. Uses task instruction prefixes (search_document: / search_query:).
26330
26511
  * - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
26331
26512
  * - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
26332
26513
  * Set enabled: false to explicitly disable even with a provider available. */
@@ -26335,19 +26516,20 @@ var LoreConfig = external_exports.object({
26335
26516
  * Set to false to explicitly disable. */
26336
26517
  enabled: external_exports.boolean().default(true),
26337
26518
  /** Embedding provider. Default: "local".
26338
- * - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
26519
+ * - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
26339
26520
  * - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
26340
26521
  * - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
26341
26522
  provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
26342
26523
  /** Model ID for the embedding provider. Default depends on provider. */
26343
- model: external_exports.string().default("BGESmallENV15"),
26344
- /** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
26345
- dimensions: external_exports.number().min(64).max(2048).default(384)
26524
+ model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
26525
+ /** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
26526
+ * For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
26527
+ dimensions: external_exports.number().min(64).max(2048).default(768)
26346
26528
  }).default({
26347
26529
  enabled: true,
26348
26530
  provider: "local",
26349
- model: "BGESmallENV15",
26350
- dimensions: 384
26531
+ model: "nomic-ai/nomic-embed-text-v1.5",
26532
+ dimensions: 768
26351
26533
  }),
26352
26534
  /** Recall output formatting — controls how search results are presented to the agent. */
26353
26535
  recall: external_exports.object({
@@ -26364,8 +26546,10 @@ var LoreConfig = external_exports.object({
26364
26546
  }).default({
26365
26547
  ftsWeights: { title: 6, content: 2, category: 3 },
26366
26548
  recallLimit: 10,
26367
- queryExpansion: false,
26368
- embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 },
26549
+ queryExpansion: true,
26550
+ vectorBoostWeight: 1.5,
26551
+ vectorBoostMinTerms: 3,
26552
+ embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26369
26553
  recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
26370
26554
  }),
26371
26555
  cache: external_exports.object({
@@ -26383,9 +26567,9 @@ var LoreConfig = external_exports.object({
26383
26567
  warming: external_exports.object({
26384
26568
  /** Enable cache warming. Default: true. */
26385
26569
  enabled: external_exports.boolean().default(true),
26386
- /** Override the survival probability threshold below which warming is
26387
- * skipped. Default: auto-derived from cache read/write cost ratio
26388
- * (~0.08 for 5m TTL, ~0.05 for 1h TTL). */
26570
+ /** Override the return probability threshold below which warming is
26571
+ * skipped. Default: auto-derived from corrected cost ratio
26572
+ * read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
26389
26573
  minReturnProbability: external_exports.number().min(0).max(1).optional()
26390
26574
  }).default({ enabled: true })
26391
26575
  }).default({
@@ -26405,8 +26589,8 @@ function config2() {
26405
26589
  return current;
26406
26590
  }
26407
26591
  async function load(directory) {
26408
- const path = join3(directory, ".lore.json");
26409
- if (existsSync(path)) {
26592
+ const path = join5(directory, ".lore.json");
26593
+ if (existsSync2(path)) {
26410
26594
  const raw = JSON.parse(readFileSync(path, "utf8"));
26411
26595
  current = LoreConfig.parse(raw);
26412
26596
  return current;
@@ -26437,8 +26621,7 @@ function vendorModelInfo() {
26437
26621
  const reg = getRegistration();
26438
26622
  if (!reg) return null;
26439
26623
  return {
26440
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
26441
- modelName: reg.modelName
26624
+ localModelPath: reg.localModelPath
26442
26625
  };
26443
26626
  }
26444
26627
  function isVendoredBinary() {
@@ -26525,62 +26708,31 @@ var OpenAIProvider = class {
26525
26708
  var LocalProviderUnavailableError = class extends Error {
26526
26709
  constructor(cause) {
26527
26710
  super(
26528
- "Local embedding provider unavailable: 'fastembed' is not installed. Configure search.embeddings.provider to 'voyage' or 'openai', or reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install."
26711
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
26529
26712
  );
26530
26713
  this.name = "LocalProviderUnavailableError";
26531
26714
  if (cause !== void 0) this.cause = cause;
26532
26715
  }
26533
26716
  };
26534
- var fastembedModule = null;
26535
- var fastembedProbed = false;
26536
- var fastembedAvailable = false;
26537
- var fastembedLogged = false;
26538
- function _resetFastembedProbe() {
26539
- fastembedModule = null;
26540
- fastembedProbed = false;
26541
- fastembedAvailable = false;
26542
- fastembedLogged = false;
26543
- }
26544
- function _markFastembedUnavailable() {
26545
- fastembedModule = null;
26546
- fastembedProbed = true;
26547
- fastembedAvailable = false;
26548
- fastembedLogged = true;
26549
- }
26550
- async function tryLoadFastembed() {
26551
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26552
- try {
26553
- const mod = await loadFastembedModule();
26554
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26555
- fastembedModule = mod;
26556
- fastembedAvailable = true;
26557
- } catch (err) {
26558
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26559
- fastembedAvailable = false;
26560
- if (!fastembedLogged) {
26561
- fastembedLogged = true;
26562
- const msg = err instanceof Error ? err.message : String(err);
26563
- const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
26564
- info(
26565
- `local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
26566
- );
26567
- }
26568
- } finally {
26569
- fastembedProbed = true;
26570
- }
26571
- return fastembedAvailable ? fastembedModule : null;
26717
+ var localProviderKnownBroken = false;
26718
+ var localProviderErrorLogged = false;
26719
+ function _resetLocalProviderProbe() {
26720
+ localProviderKnownBroken = false;
26721
+ localProviderErrorLogged = false;
26572
26722
  }
26573
- async function loadFastembedModule() {
26574
- return await import("fastembed");
26723
+ function _markLocalProviderUnavailable() {
26724
+ localProviderKnownBroken = true;
26725
+ localProviderErrorLogged = true;
26575
26726
  }
26576
- function fastembedKnownUnavailable() {
26577
- return fastembedProbed && !fastembedAvailable;
26727
+ function localProviderKnownUnavailable() {
26728
+ return localProviderKnownBroken;
26578
26729
  }
26579
26730
  var LocalProvider = class {
26580
26731
  // With inference off the main thread, large batches no longer block
26581
26732
  // the event loop. 256 maximises throughput per round-trip to the
26582
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
26583
- // the worker's priority queue breathing room for recall queries.
26733
+ // worker. Backfill callers use token-budget-based batching (see
26734
+ // nextBatch) to give the worker's priority queue breathing room
26735
+ // for recall queries and prevent OOM on long texts.
26584
26736
  maxBatchSize = 256;
26585
26737
  worker = null;
26586
26738
  workerReady = false;
@@ -26588,14 +26740,14 @@ var LocalProvider = class {
26588
26740
  pendingRequests = /* @__PURE__ */ new Map();
26589
26741
  nextRequestId = 0;
26590
26742
  initPromise = null;
26591
- modelName;
26592
- constructor(modelName) {
26593
- this.modelName = modelName;
26743
+ modelId;
26744
+ dimensions;
26745
+ constructor(modelId, dimensions) {
26746
+ this.modelId = modelId;
26747
+ this.dimensions = dimensions;
26594
26748
  }
26595
26749
  /**
26596
- * Ensure the worker thread is running. Probes fastembed on the main
26597
- * thread first (fast, cached) as a fast-fail gate — the worker is only
26598
- * spawned if the module is known-loadable. Worker startup failure is
26750
+ * Ensure the worker thread is running. Worker startup failure is
26599
26751
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
26600
26752
  * auto-fallback to remote providers.
26601
26753
  */
@@ -26604,8 +26756,7 @@ var LocalProvider = class {
26604
26756
  if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
26605
26757
  if (this.initPromise) return this.initPromise;
26606
26758
  this.initPromise = (async () => {
26607
- const fastembed = await tryLoadFastembed();
26608
- if (!fastembed) throw new LocalProviderUnavailableError();
26759
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
26609
26760
  const { Worker } = await import("node:worker_threads");
26610
26761
  const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
26611
26762
  let workerUrl;
@@ -26623,8 +26774,9 @@ var LocalProvider = class {
26623
26774
  }
26624
26775
  const vendor = vendorModelInfo();
26625
26776
  const workerInitData = {
26626
- modelName: this.modelName,
26627
- vendorModel: vendor ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName } : null
26777
+ modelId: this.modelId,
26778
+ dimensions: this.dimensions,
26779
+ vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
26628
26780
  };
26629
26781
  this.worker = new Worker(workerUrl, { workerData: workerInitData });
26630
26782
  this.worker.unref();
@@ -26651,6 +26803,13 @@ var LocalProvider = class {
26651
26803
  case "init-error": {
26652
26804
  this.workerInitError = msg.error;
26653
26805
  this.workerReady = false;
26806
+ localProviderKnownBroken = true;
26807
+ if (!localProviderErrorLogged) {
26808
+ localProviderErrorLogged = true;
26809
+ info(
26810
+ `local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
26811
+ );
26812
+ }
26654
26813
  for (const [, p2] of this.pendingRequests) {
26655
26814
  p2.reject(new LocalProviderUnavailableError(msg.error));
26656
26815
  }
@@ -26702,6 +26861,8 @@ var LocalProvider = class {
26702
26861
  }
26703
26862
  async embed(texts, inputType) {
26704
26863
  await this.ensureWorker();
26864
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
26865
+ const prefixed = texts.map((t2) => prefix + t2);
26705
26866
  const id = this.nextRequestId++;
26706
26867
  const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
26707
26868
  return new Promise((resolve, reject) => {
@@ -26710,7 +26871,7 @@ var LocalProvider = class {
26710
26871
  this.worker.postMessage({
26711
26872
  type: "embed",
26712
26873
  id,
26713
- texts,
26874
+ texts: prefixed,
26714
26875
  inputType,
26715
26876
  priority
26716
26877
  });
@@ -26718,8 +26879,6 @@ var LocalProvider = class {
26718
26879
  }
26719
26880
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
26720
26881
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
26721
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
26722
- * NAPI fatal error when tearing down onnxruntime's native bindings.
26723
26882
  *
26724
26883
  * Returns a promise that resolves once the worker has fully exited. Callers
26725
26884
  * that need a clean teardown (tests, config change) should await the result.
@@ -26742,7 +26901,7 @@ var LocalProvider = class {
26742
26901
  }
26743
26902
  };
26744
26903
  var PROVIDER_DEFAULTS = {
26745
- local: { model: "BGESmallENV15", dimensions: 384 },
26904
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26746
26905
  voyage: { model: "voyage-code-3", dimensions: 1024 },
26747
26906
  openai: { model: "text-embedding-3-small", dimensions: 1536 }
26748
26907
  };
@@ -26766,7 +26925,7 @@ function getProvider() {
26766
26925
  const model = cfg.model;
26767
26926
  switch (providerName) {
26768
26927
  case "local": {
26769
- cachedProvider = new LocalProvider(model);
26928
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
26770
26929
  break;
26771
26930
  }
26772
26931
  case "voyage": {
@@ -26843,7 +27002,7 @@ function pickRemoteFallback() {
26843
27002
  function isAvailable() {
26844
27003
  const provider = getProvider();
26845
27004
  if (!provider) return false;
26846
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
27005
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
26847
27006
  return true;
26848
27007
  }
26849
27008
  async function embed(texts, inputType) {
@@ -26858,7 +27017,7 @@ async function embed(texts, inputType) {
26858
27017
  if (!remoteFallbackLogged) {
26859
27018
  remoteFallbackLogged = true;
26860
27019
  info(
26861
- `fastembed unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
27020
+ `local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
26862
27021
  );
26863
27022
  }
26864
27023
  cachedProvider = fallback.provider;
@@ -26910,6 +27069,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
26910
27069
  scored.sort((a, b) => b.similarity - a.similarity);
26911
27070
  return scored.slice(0, limit);
26912
27071
  }
27072
+ var MAX_DISTILLATION_VECTOR_ROWS = 500;
27073
+ function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
27074
+ const rows = db().query(
27075
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
27076
+ ).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
27077
+ const scored = [];
27078
+ for (const row of rows) {
27079
+ const vec = fromBlob(row.embedding);
27080
+ const sim = cosineSimilarity(queryEmbedding, vec);
27081
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
27082
+ }
27083
+ scored.sort((a, b) => b.similarity - a.similarity);
27084
+ return scored.slice(0, limit);
27085
+ }
26913
27086
  function embedKnowledgeEntry(id, title, content3) {
26914
27087
  const text4 = `${title}
26915
27088
  ${content3}`;
@@ -27011,20 +27184,37 @@ async function runStartupBackfill() {
27011
27184
  );
27012
27185
  info(`embedding startup: ${parts.join("; ")}`);
27013
27186
  }
27014
- var BACKFILL_CHUNK_SIZE = 32;
27187
+ var MAX_BACKFILL_CHUNK = 8;
27188
+ var MAX_BATCH_TOKEN_AREA = 4096;
27189
+ var CHARS_PER_TOKEN = 4;
27190
+ function nextBatch(rows, start) {
27191
+ const batch = [];
27192
+ let maxTokens = 0;
27193
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
27194
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
27195
+ const newMax = Math.max(maxTokens, estTokens);
27196
+ const newArea = (batch.length + 1) * newMax;
27197
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
27198
+ batch.push(rows[i]);
27199
+ maxTokens = newMax;
27200
+ }
27201
+ return batch;
27202
+ }
27015
27203
  async function backfillEmbeddings() {
27016
27204
  checkConfigChange();
27017
27205
  const provider = getProvider();
27018
27206
  if (!provider) return 0;
27019
27207
  const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
27020
27208
  if (!rows.length) return 0;
27209
+ const items = rows.map((r) => ({ ...r, text: `${r.title}
27210
+ ${r.content}` }));
27021
27211
  let embedded = 0;
27022
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27023
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27024
- const texts = batch.map((r) => `${r.title}
27025
- ${r.content}`);
27212
+ let i = 0;
27213
+ while (i < items.length) {
27214
+ const batch = nextBatch(items, i);
27215
+ i += batch.length;
27026
27216
  try {
27027
- const vectors = await embed(texts, "document");
27217
+ const vectors = await embed(batch.map((b) => b.text), "document");
27028
27218
  const update2 = db().prepare(
27029
27219
  "UPDATE knowledge SET embedding = ? WHERE id = ?"
27030
27220
  );
@@ -27033,7 +27223,7 @@ ${r.content}`);
27033
27223
  embedded++;
27034
27224
  }
27035
27225
  } catch (err) {
27036
- info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27226
+ error(`embedding backfill batch failed (${batch.length} items):`, err);
27037
27227
  }
27038
27228
  }
27039
27229
  if (embedded > 0) {
@@ -27051,11 +27241,13 @@ async function backfillDistillationEmbeddings() {
27051
27241
  let embedded = 0;
27052
27242
  const PROGRESS_INTERVAL = 256;
27053
27243
  let nextProgressAt = PROGRESS_INTERVAL;
27054
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27055
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27056
- const texts = batch.map((r) => r.observations);
27244
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
27245
+ let i = 0;
27246
+ while (i < items.length) {
27247
+ const batch = nextBatch(items, i);
27248
+ i += batch.length;
27057
27249
  try {
27058
- const vectors = await embed(texts, "document");
27250
+ const vectors = await embed(batch.map((b) => b.text), "document");
27059
27251
  const update2 = db().prepare(
27060
27252
  "UPDATE distillations SET embedding = ? WHERE id = ?"
27061
27253
  );
@@ -27064,7 +27256,7 @@ async function backfillDistillationEmbeddings() {
27064
27256
  embedded++;
27065
27257
  }
27066
27258
  } catch (err) {
27067
- info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27259
+ error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
27068
27260
  }
27069
27261
  if (embedded >= nextProgressAt) {
27070
27262
  info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
@@ -27178,7 +27370,7 @@ function searchLike(input) {
27178
27370
  if (!terms.length) return [];
27179
27371
  const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
27180
27372
  const likeParams = terms.map((t2) => `%${t2}%`);
27181
- const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27373
+ const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27182
27374
  const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
27183
27375
  return db().query(query).all(...params);
27184
27376
  }
@@ -27187,10 +27379,10 @@ function search2(input) {
27187
27379
  const limit = input.limit ?? 20;
27188
27380
  const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
27189
27381
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27190
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27382
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27191
27383
  ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
27192
27384
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27193
- WHERE f.content MATCH ? AND m.project_id = ?
27385
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27194
27386
  ORDER BY rank LIMIT ?`;
27195
27387
  try {
27196
27388
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27211,10 +27403,10 @@ function searchScored(input) {
27211
27403
  const limit = input.limit ?? 20;
27212
27404
  const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
27213
27405
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27214
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27406
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27215
27407
  ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
27216
27408
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27217
- WHERE f.content MATCH ? AND m.project_id = ?
27409
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27218
27410
  ORDER BY rank LIMIT ?`;
27219
27411
  try {
27220
27412
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27305,7 +27497,11 @@ __export(ltm_exports, {
27305
27497
  check: () => check2,
27306
27498
  cleanDeadRefs: () => cleanDeadRefs,
27307
27499
  create: () => create,
27500
+ crossProject: () => crossProject,
27501
+ deduplicate: () => deduplicate,
27502
+ deduplicateGlobal: () => deduplicateGlobal,
27308
27503
  extractRefs: () => extractRefs,
27504
+ findFuzzyDuplicate: () => findFuzzyDuplicate,
27309
27505
  forProject: () => forProject,
27310
27506
  forSession: () => forSession,
27311
27507
  get: () => get,
@@ -27664,8 +27860,8 @@ __export(lat_reader_exports, {
27664
27860
  scoreForSession: () => scoreForSession,
27665
27861
  searchScored: () => searchScored2
27666
27862
  });
27667
- import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync2, statSync } from "fs";
27668
- import { join as join4, relative } from "path";
27863
+ import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
27864
+ import { join as join6, relative } from "path";
27669
27865
  var processor2 = remark();
27670
27866
  function estimateTokens2(text4) {
27671
27867
  return Math.ceil(text4.length / 3);
@@ -27743,7 +27939,7 @@ function listMarkdownFiles(dir) {
27743
27939
  try {
27744
27940
  const entries = readdirSync(dir, { withFileTypes: true });
27745
27941
  for (const entry of entries) {
27746
- const fullPath = join4(dir, entry.name);
27942
+ const fullPath = join6(dir, entry.name);
27747
27943
  if (entry.isDirectory() && !entry.name.startsWith(".")) {
27748
27944
  results.push(...listMarkdownFiles(fullPath));
27749
27945
  } else if (entry.isFile() && entry.name.endsWith(".md")) {
@@ -27758,12 +27954,12 @@ function contentHash(content3) {
27758
27954
  return sha256(content3);
27759
27955
  }
27760
27956
  function hasLatDir(projectPath) {
27761
- const latDir = join4(projectPath, "lat.md");
27762
- return existsSync2(latDir) && statSync(latDir).isDirectory();
27957
+ const latDir = join6(projectPath, "lat.md");
27958
+ return existsSync3(latDir) && statSync2(latDir).isDirectory();
27763
27959
  }
27764
27960
  function refresh(projectPath) {
27765
- const latDir = join4(projectPath, "lat.md");
27766
- if (!existsSync2(latDir) || !statSync(latDir).isDirectory()) return 0;
27961
+ const latDir = join6(projectPath, "lat.md");
27962
+ if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
27767
27963
  const pid = ensureProject(projectPath);
27768
27964
  const files = listMarkdownFiles(latDir);
27769
27965
  let upserted = 0;
@@ -27885,6 +28081,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
27885
28081
  var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
27886
28082
  function create(input) {
27887
28083
  const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
28084
+ const crossProject2 = pid === null ? true : input.crossProject ?? false;
27888
28085
  if (!input.id) {
27889
28086
  const existing = pid !== null ? db().query(
27890
28087
  "SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
@@ -27902,6 +28099,11 @@ function create(input) {
27902
28099
  update(crossExisting.id, { content: input.content });
27903
28100
  return crossExisting.id;
27904
28101
  }
28102
+ const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
28103
+ if (fuzzyMatch) {
28104
+ update(fuzzyMatch.id, { content: input.content });
28105
+ return fuzzyMatch.id;
28106
+ }
27905
28107
  }
27906
28108
  const id = input.id ?? uuidv72();
27907
28109
  const now = Date.now();
@@ -27915,7 +28117,7 @@ function create(input) {
27915
28117
  input.title,
27916
28118
  input.content,
27917
28119
  input.session ?? null,
27918
- input.crossProject ?? false ? 1 : 0,
28120
+ crossProject2 ? 1 : 0,
27919
28121
  now,
27920
28122
  now
27921
28123
  );
@@ -27933,7 +28135,7 @@ function update(id, input) {
27933
28135
  }
27934
28136
  if (input.confidence !== void 0) {
27935
28137
  sets.push("confidence = ?");
27936
- params.push(input.confidence);
28138
+ params.push(Math.max(0, Math.min(1, input.confidence)));
27937
28139
  }
27938
28140
  sets.push("updated_at = ?");
27939
28141
  params.push(Date.now());
@@ -27949,6 +28151,50 @@ function update(id, input) {
27949
28151
  function remove(id) {
27950
28152
  db().query("DELETE FROM knowledge WHERE id = ?").run(id);
27951
28153
  }
28154
+ function titleOverlap(a, b) {
28155
+ const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
28156
+ const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
28157
+ if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
28158
+ const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
28159
+ return {
28160
+ coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
28161
+ intersectionSize: intersection2.length
28162
+ };
28163
+ }
28164
+ var FUZZY_DEDUP_THRESHOLD = 0.7;
28165
+ var FUZZY_DEDUP_MIN_OVERLAP = 4;
28166
+ var EMBEDDING_DEDUP_THRESHOLD = 0.935;
28167
+ function findFuzzyDuplicate(input) {
28168
+ const q = ftsQueryOr(input.title);
28169
+ if (q === EMPTY_QUERY) return null;
28170
+ const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
28171
+ try {
28172
+ const excludeClause = input.excludeId ? "AND k.id != ?" : "";
28173
+ const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
28174
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28175
+ WHERE knowledge_fts MATCH ?
28176
+ AND (k.project_id = ? OR k.cross_project = 1)
28177
+ AND k.confidence > 0.2
28178
+ ${excludeClause}
28179
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
28180
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28181
+ WHERE knowledge_fts MATCH ?
28182
+ AND (k.project_id IS NULL OR k.cross_project = 1)
28183
+ AND k.confidence > 0.2
28184
+ ${excludeClause}
28185
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
28186
+ const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
28187
+ const candidates = db().query(sql).all(...params);
28188
+ for (const candidate of candidates) {
28189
+ const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
28190
+ if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
28191
+ return candidate;
28192
+ }
28193
+ }
28194
+ } catch {
28195
+ }
28196
+ return null;
28197
+ }
27952
28198
  function forProject(projectPath, includeCross = true) {
27953
28199
  const pid = ensureProject(projectPath);
27954
28200
  if (includeCross) {
@@ -28096,6 +28342,13 @@ function all2() {
28096
28342
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
28097
28343
  ).all();
28098
28344
  }
28345
+ function crossProject() {
28346
+ return db().query(
28347
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28348
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28349
+ ORDER BY confidence DESC, updated_at DESC`
28350
+ ).all();
28351
+ }
28099
28352
  function searchLike2(input) {
28100
28353
  const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
28101
28354
  if (!terms.length) return [];
@@ -28327,6 +28580,102 @@ function check2(projectPath) {
28327
28580
  }
28328
28581
  return issues;
28329
28582
  }
28583
+ function _dedup(entries, dryRun) {
28584
+ if (entries.length < 2) return { clusters: [], totalRemoved: 0 };
28585
+ const embeddingMap = /* @__PURE__ */ new Map();
28586
+ {
28587
+ const entryIds = entries.map((e) => e.id);
28588
+ const placeholders = entryIds.map(() => "?").join(",");
28589
+ const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
28590
+ for (const row of rows) {
28591
+ try {
28592
+ embeddingMap.set(row.id, fromBlob(row.embedding));
28593
+ } catch {
28594
+ info(`skipping corrupted embedding for entry ${row.id}`);
28595
+ }
28596
+ }
28597
+ }
28598
+ const neighborMap = /* @__PURE__ */ new Map();
28599
+ for (const entry of entries) {
28600
+ const neighbors = [];
28601
+ const entryVec = embeddingMap.get(entry.id);
28602
+ for (const other of entries) {
28603
+ if (other.id === entry.id) continue;
28604
+ const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
28605
+ const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
28606
+ let embeddingMatch = false;
28607
+ let similarity = 0;
28608
+ if (entryVec) {
28609
+ const otherVec = embeddingMap.get(other.id);
28610
+ if (otherVec && entryVec.length === otherVec.length) {
28611
+ similarity = cosineSimilarity(entryVec, otherVec);
28612
+ embeddingMatch = similarity >= EMBEDDING_DEDUP_THRESHOLD;
28613
+ }
28614
+ }
28615
+ if (titleMatch || embeddingMatch) {
28616
+ neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
28617
+ }
28618
+ }
28619
+ neighbors.sort((a, b) => b.score - a.score);
28620
+ neighborMap.set(entry.id, neighbors);
28621
+ }
28622
+ const claimed = /* @__PURE__ */ new Set();
28623
+ const rawClusters = /* @__PURE__ */ new Map();
28624
+ const sortedIds = [...neighborMap.keys()].sort(
28625
+ (a, b) => neighborMap.get(b).length - neighborMap.get(a).length
28626
+ );
28627
+ for (const centerId of sortedIds) {
28628
+ if (claimed.has(centerId)) continue;
28629
+ claimed.add(centerId);
28630
+ const members = [centerId];
28631
+ for (const { id: neighborId } of neighborMap.get(centerId)) {
28632
+ if (claimed.has(neighborId)) continue;
28633
+ claimed.add(neighborId);
28634
+ members.push(neighborId);
28635
+ }
28636
+ if (members.length > 1) {
28637
+ rawClusters.set(centerId, members);
28638
+ }
28639
+ }
28640
+ const entryById = new Map(entries.map((e) => [e.id, e]));
28641
+ const result = [];
28642
+ let totalRemoved = 0;
28643
+ for (const members of rawClusters.values()) {
28644
+ if (members.length < 2) continue;
28645
+ const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
28646
+ if (b.confidence !== a.confidence) return b.confidence - a.confidence;
28647
+ if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
28648
+ return a.title.length - b.title.length;
28649
+ });
28650
+ const survivor = sorted[0];
28651
+ const merged = sorted.slice(1);
28652
+ result.push({
28653
+ surviving: { id: survivor.id, title: survivor.title },
28654
+ merged: merged.map((e) => ({ id: e.id, title: e.title }))
28655
+ });
28656
+ if (!dryRun) {
28657
+ for (const entry of merged) {
28658
+ remove(entry.id);
28659
+ }
28660
+ }
28661
+ totalRemoved += merged.length;
28662
+ }
28663
+ result.sort((a, b) => b.merged.length - a.merged.length);
28664
+ return { clusters: result, totalRemoved };
28665
+ }
28666
+ async function deduplicate(projectPath, opts) {
28667
+ const entries = forProject(projectPath, false);
28668
+ return _dedup(entries, opts?.dryRun ?? true);
28669
+ }
28670
+ async function deduplicateGlobal(opts) {
28671
+ const entries = db().query(
28672
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28673
+ WHERE project_id IS NULL
28674
+ AND confidence > 0.2
28675
+ ORDER BY confidence DESC, updated_at DESC`
28676
+ ).all();
28677
+ return _dedup(entries, opts?.dryRun ?? true);
28678
+ }
28330
28679
 
28331
28680
  // src/data.ts
28332
28681
  var data_exports = {};
@@ -28351,11 +28700,11 @@ __export(data_exports, {
28351
28700
  resolveId: () => resolveId,
28352
28701
  wipeDatabase: () => wipeDatabase
28353
28702
  });
28354
- import { statSync as statSync3, unlinkSync, existsSync as existsSync4 } from "fs";
28703
+ import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
28355
28704
 
28356
28705
  // src/agents-file.ts
28357
- import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2, statSync as statSync2 } from "fs";
28358
- import { dirname as dirname2, join as join5 } from "path";
28706
+ import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
28707
+ import { dirname as dirname2, join as join7 } from "path";
28359
28708
  var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
28360
28709
  var LORE_SECTION_END = "<!-- End lore-managed section -->";
28361
28710
  var ALL_START_MARKERS = [
@@ -28386,7 +28735,7 @@ function setCache(fp, entry) {
28386
28735
  ).run(key, value, value);
28387
28736
  }
28388
28737
  function clearLoreFileCache(projectPath) {
28389
- db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join5(projectPath, LORE_FILE));
28738
+ db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
28390
28739
  }
28391
28740
  function splitFile(fileContent) {
28392
28741
  const spans = [];
@@ -28499,7 +28848,7 @@ function exportToFile(input) {
28499
28848
  const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
28500
28849
  const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
28501
28850
  let fileContent = "";
28502
- if (existsSync3(input.filePath)) {
28851
+ if (existsSync4(input.filePath)) {
28503
28852
  fileContent = readFileSync3(input.filePath, "utf8");
28504
28853
  }
28505
28854
  const { before, after } = splitFile(fileContent);
@@ -28508,11 +28857,11 @@ function exportToFile(input) {
28508
28857
  const suffix = after.trimStart();
28509
28858
  const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
28510
28859
  const result = prefixWithSep + newSection + suffixWithSep;
28511
- mkdirSync2(dirname2(input.filePath), { recursive: true });
28860
+ mkdirSync3(dirname2(input.filePath), { recursive: true });
28512
28861
  writeFileSync(input.filePath, result, "utf8");
28513
28862
  }
28514
28863
  function shouldImport(input) {
28515
- if (!existsSync3(input.filePath)) return false;
28864
+ if (!existsSync4(input.filePath)) return false;
28516
28865
  const fileContent = readFileSync3(input.filePath, "utf8");
28517
28866
  const { section } = splitFile(fileContent);
28518
28867
  if (section === null) {
@@ -28533,18 +28882,26 @@ function _importEntries(entries, projectPath) {
28533
28882
  update(entry.id, { content: entry.content });
28534
28883
  }
28535
28884
  } else {
28536
- create({
28537
- projectPath,
28538
- category: entry.category,
28539
- title: entry.title,
28540
- content: entry.content,
28541
- scope: "project",
28542
- crossProject: false,
28543
- id: entry.id
28544
- });
28885
+ const pid = ensureProject(projectPath);
28886
+ const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
28887
+ if (fuzzyMatch) {
28888
+ if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
28889
+ update(fuzzyMatch.id, { content: entry.content });
28890
+ }
28891
+ } else {
28892
+ create({
28893
+ projectPath,
28894
+ category: entry.category,
28895
+ title: entry.title,
28896
+ content: entry.content,
28897
+ scope: "project",
28898
+ crossProject: false,
28899
+ id: entry.id
28900
+ });
28901
+ }
28545
28902
  }
28546
28903
  } else {
28547
- const existing = forProject(projectPath, true);
28904
+ const existing = forProject(projectPath, false);
28548
28905
  const titleMatch = existing.find(
28549
28906
  (e) => e.title.toLowerCase() === entry.title.toLowerCase()
28550
28907
  );
@@ -28562,7 +28919,7 @@ function _importEntries(entries, projectPath) {
28562
28919
  }
28563
28920
  }
28564
28921
  function importFromFile(input) {
28565
- if (!existsSync3(input.filePath)) return;
28922
+ if (!existsSync4(input.filePath)) return;
28566
28923
  const fileContent = readFileSync3(input.filePath, "utf8");
28567
28924
  const { section } = splitFile(fileContent);
28568
28925
  const textToParse = section ?? fileContent;
@@ -28571,25 +28928,25 @@ function importFromFile(input) {
28571
28928
  _importEntries(fileEntries, input.projectPath);
28572
28929
  }
28573
28930
  function loreFileExists(projectPath) {
28574
- return existsSync3(join5(projectPath, LORE_FILE));
28931
+ return existsSync4(join7(projectPath, LORE_FILE));
28575
28932
  }
28576
28933
  function exportLoreFile(projectPath) {
28577
28934
  const sectionBody = buildSection(projectPath);
28578
28935
  const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
28579
28936
  const contentHash2 = hashSection(content3);
28580
- const fp = join5(projectPath, LORE_FILE);
28937
+ const fp = join7(projectPath, LORE_FILE);
28581
28938
  const cached2 = getCache(fp);
28582
28939
  if (cached2 && cached2.hash === contentHash2) {
28583
28940
  return;
28584
28941
  }
28585
28942
  writeFileSync(fp, content3, "utf8");
28586
- const { mtimeMs } = statSync2(fp);
28943
+ const { mtimeMs } = statSync3(fp);
28587
28944
  setCache(fp, { mtimeMs, hash: contentHash2 });
28588
28945
  }
28589
28946
  function shouldImportLoreFile(projectPath) {
28590
- const fp = join5(projectPath, LORE_FILE);
28591
- if (!existsSync3(fp)) return false;
28592
- const { mtimeMs } = statSync2(fp);
28947
+ const fp = join7(projectPath, LORE_FILE);
28948
+ if (!existsSync4(fp)) return false;
28949
+ const { mtimeMs } = statSync3(fp);
28593
28950
  const cached2 = getCache(fp);
28594
28951
  if (cached2 && cached2.mtimeMs === mtimeMs) {
28595
28952
  return false;
@@ -28605,12 +28962,17 @@ function shouldImportLoreFile(projectPath) {
28605
28962
  return true;
28606
28963
  }
28607
28964
  function importLoreFile(projectPath) {
28608
- const fp = join5(projectPath, LORE_FILE);
28609
- if (!existsSync3(fp)) return;
28965
+ const fp = join7(projectPath, LORE_FILE);
28966
+ if (!existsSync4(fp)) return;
28610
28967
  const fileContent = readFileSync3(fp, "utf8");
28611
28968
  const fileEntries = parseEntriesFromSection(fileContent);
28612
28969
  if (!fileEntries.length) return;
28613
28970
  _importEntries(fileEntries, projectPath);
28971
+ try {
28972
+ const { mtimeMs } = statSync3(fp);
28973
+ setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
28974
+ } catch {
28975
+ }
28614
28976
  }
28615
28977
 
28616
28978
  // src/data.ts
@@ -28685,10 +29047,10 @@ function globalStats() {
28685
29047
  let db_size_bytes = 0;
28686
29048
  try {
28687
29049
  const p2 = dbPath();
28688
- db_size_bytes = statSync3(p2).size;
29050
+ db_size_bytes = statSync4(p2).size;
28689
29051
  const walPath = p2 + "-wal";
28690
- if (existsSync4(walPath)) {
28691
- db_size_bytes += statSync3(walPath).size;
29052
+ if (existsSync5(walPath)) {
29053
+ db_size_bytes += statSync4(walPath).size;
28692
29054
  }
28693
29055
  } catch {
28694
29056
  }
@@ -28739,7 +29101,7 @@ function clearProject(projectPath) {
28739
29101
  database.exec("ROLLBACK");
28740
29102
  throw e;
28741
29103
  }
28742
- if (existsSync4(projectPath)) {
29104
+ if (existsSync5(projectPath)) {
28743
29105
  try {
28744
29106
  exportLoreFile(projectPath);
28745
29107
  } catch {
@@ -28810,7 +29172,7 @@ function clearKnowledge(projectPath) {
28810
29172
  "SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
28811
29173
  ).get(pid).c;
28812
29174
  db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
28813
- if (existsSync4(projectPath)) {
29175
+ if (existsSync5(projectPath)) {
28814
29176
  try {
28815
29177
  exportLoreFile(projectPath);
28816
29178
  } catch {
@@ -28869,7 +29231,7 @@ function wipeDatabase() {
28869
29231
  close();
28870
29232
  for (const suffix of ["", "-wal", "-shm"]) {
28871
29233
  const fp = p2 + suffix;
28872
- if (existsSync4(fp)) {
29234
+ if (existsSync5(fp)) {
28873
29235
  try {
28874
29236
  unlinkSync(fp);
28875
29237
  } catch {
@@ -28910,7 +29272,7 @@ function backfillGitRemotes() {
28910
29272
  for (const project of projects) {
28911
29273
  let gitRemote = project.git_remote;
28912
29274
  if (!gitRemote) {
28913
- if (!existsSync4(project.path)) continue;
29275
+ if (!existsSync5(project.path)) continue;
28914
29276
  gitRemote = getGitRemote(project.path);
28915
29277
  if (!gitRemote) continue;
28916
29278
  const existing = db().query(
@@ -29009,6 +29371,32 @@ var PATTERNS = [
29009
29371
  regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
29010
29372
  category: "preference",
29011
29373
  titleFn: (m) => `Typically uses ${m[1].trim()}`
29374
+ },
29375
+ // Process instruction patterns — match distilled observations recording
29376
+ // user assertions about workflow/process rules. The distillation observer
29377
+ // normalizes user instructions into "User stated always X" phrasing.
29378
+ // These require "stated/asserted/said" to avoid overlapping with the
29379
+ // existing "typically uses" pattern above (which already handles
29380
+ // "user always use/prefer/go with X").
29381
+ {
29382
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
29383
+ category: "preference",
29384
+ titleFn: (m) => `Always ${m[1].trim()}`
29385
+ },
29386
+ {
29387
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
29388
+ category: "preference",
29389
+ titleFn: (m) => `Never ${m[1].trim()}`
29390
+ },
29391
+ {
29392
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
29393
+ category: "preference",
29394
+ titleFn: (m) => `Make sure to ${m[1].trim()}`
29395
+ },
29396
+ {
29397
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
29398
+ category: "preference",
29399
+ titleFn: (m) => `Always ${m[1].trim()}`
29012
29400
  }
29013
29401
  ];
29014
29402
  function extractPatterns(observations) {
@@ -29018,6 +29406,8 @@ function extractPatterns(observations) {
29018
29406
  regex.lastIndex = 0;
29019
29407
  let match;
29020
29408
  while ((match = regex.exec(observations)) !== null) {
29409
+ const captures = match.slice(1);
29410
+ if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
29021
29411
  const title = titleFn(match);
29022
29412
  const key = title.toLowerCase();
29023
29413
  if (seen.has(key)) continue;
@@ -29137,7 +29527,7 @@ function getSessionState(sessionID) {
29137
29527
  }
29138
29528
  return state;
29139
29529
  }
29140
- function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29530
+ function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
29141
29531
  if (thresholdMs <= 0) return { triggered: false };
29142
29532
  const state = getSessionState(sessionID);
29143
29533
  if (state.lastTurnAt === 0) return { triggered: false };
@@ -29147,7 +29537,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29147
29537
  state.rawWindowCache = null;
29148
29538
  state.distillationSnapshot = null;
29149
29539
  state.cameOutOfIdle = true;
29150
- state.postIdleCompact = true;
29540
+ state.postIdleCompact = !skipCompact;
29151
29541
  return { triggered: true, idleMs };
29152
29542
  }
29153
29543
  function getLastTurnAt(sessionID) {
@@ -30284,7 +30674,8 @@ async function distillSegment(input) {
30284
30674
  embedDistillation(distillId, result.observations);
30285
30675
  }
30286
30676
  if (config2().knowledge.enabled) {
30287
- for (const pat of extractPatterns(result.observations)) {
30677
+ const patterns = extractPatterns(result.observations);
30678
+ for (const pat of patterns) {
30288
30679
  try {
30289
30680
  create({
30290
30681
  projectPath: input.projectPath,
@@ -30297,6 +30688,9 @@ async function distillSegment(input) {
30297
30688
  } catch {
30298
30689
  }
30299
30690
  }
30691
+ if (patterns.length > 0) {
30692
+ info(`pattern extraction: ${patterns.length} entries from distillation`);
30693
+ }
30300
30694
  }
30301
30695
  return result;
30302
30696
  }
@@ -30346,7 +30740,8 @@ async function metaDistill(input) {
30346
30740
  embedDistillation(metaId, result.observations);
30347
30741
  }
30348
30742
  if (config2().knowledge.enabled) {
30349
- for (const pat of extractPatterns(result.observations)) {
30743
+ const patterns = extractPatterns(result.observations);
30744
+ for (const pat of patterns) {
30350
30745
  try {
30351
30746
  create({
30352
30747
  projectPath: input.projectPath,
@@ -30359,6 +30754,9 @@ async function metaDistill(input) {
30359
30754
  } catch {
30360
30755
  }
30361
30756
  }
30757
+ if (patterns.length > 0) {
30758
+ info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
30759
+ }
30362
30760
  }
30363
30761
  return result;
30364
30762
  }
@@ -30397,10 +30795,150 @@ function backfillMetrics() {
30397
30795
  // src/curator.ts
30398
30796
  var curator_exports = {};
30399
30797
  __export(curator_exports, {
30798
+ MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
30799
+ applyOps: () => applyOps,
30400
30800
  consolidate: () => consolidate,
30801
+ parseOps: () => parseOps,
30401
30802
  resetCurationTracker: () => resetCurationTracker,
30402
30803
  run: () => run2
30403
30804
  });
30805
+
30806
+ // src/instruction-detect.ts
30807
+ var instruction_detect_exports = {};
30808
+ __export(instruction_detect_exports, {
30809
+ detectAndFormat: () => detectAndFormat,
30810
+ extractInstructionCandidates: () => extractInstructionCandidates,
30811
+ findRepeatedInstructions: () => findRepeatedInstructions,
30812
+ formatForCurator: () => formatForCurator
30813
+ });
30814
+ var DEFAULT_REPETITION_THRESHOLD = 2;
30815
+ var VECTOR_SIMILARITY_THRESHOLD = 0.5;
30816
+ var MAX_CANDIDATES = 5;
30817
+ var INSTRUCTION_PATTERNS = [
30818
+ /\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
30819
+ /\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
30820
+ /\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
30821
+ /\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
30822
+ /\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
30823
+ /\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
30824
+ ];
30825
+ function extractInstructionCandidates(messages) {
30826
+ const candidates = [];
30827
+ const seen = /* @__PURE__ */ new Set();
30828
+ for (const msg of messages) {
30829
+ if (msg.role !== "user") continue;
30830
+ for (const pattern of INSTRUCTION_PATTERNS) {
30831
+ pattern.lastIndex = 0;
30832
+ let match;
30833
+ while ((match = pattern.exec(msg.content)) !== null) {
30834
+ const text4 = match[1]?.trim();
30835
+ if (!text4 || text4.length < 10) continue;
30836
+ const key = text4.toLowerCase();
30837
+ if (seen.has(key)) continue;
30838
+ seen.add(key);
30839
+ candidates.push({
30840
+ text: text4,
30841
+ sessionID: msg.session_id
30842
+ });
30843
+ if (candidates.length >= MAX_CANDIDATES) return candidates;
30844
+ }
30845
+ }
30846
+ }
30847
+ return candidates;
30848
+ }
30849
+ async function findRepeatedInstructions(input) {
30850
+ const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
30851
+ if (!input.candidates.length) return [];
30852
+ const pid = ensureProject(input.projectPath);
30853
+ let candidateEmbeddings = [];
30854
+ if (isAvailable()) {
30855
+ try {
30856
+ candidateEmbeddings = await embed(
30857
+ input.candidates.map((c) => c.text),
30858
+ "query"
30859
+ );
30860
+ } catch (err) {
30861
+ warn("instruction-detect: batch embedding failed:", err);
30862
+ }
30863
+ }
30864
+ const results = [];
30865
+ for (let i = 0; i < input.candidates.length; i++) {
30866
+ const candidate = input.candidates[i];
30867
+ const sessionIDs = /* @__PURE__ */ new Set();
30868
+ if (candidateEmbeddings.length > i) {
30869
+ const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
30870
+ for (const hit of hits) {
30871
+ if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
30872
+ sessionIDs.add(hit.session_id);
30873
+ }
30874
+ }
30875
+ }
30876
+ const terms = filterTerms(candidate.text);
30877
+ if (terms.length >= 2) {
30878
+ const searchText = terms.slice(0, 5).join(" ");
30879
+ const ftsHits = searchDistillationsFTS(pid, searchText);
30880
+ for (const hit of ftsHits) {
30881
+ if (hit.session_id !== input.currentSessionID) {
30882
+ sessionIDs.add(hit.session_id);
30883
+ }
30884
+ }
30885
+ }
30886
+ if (sessionIDs.size >= threshold) {
30887
+ results.push({
30888
+ instruction: candidate.text,
30889
+ priorSessionCount: sessionIDs.size
30890
+ });
30891
+ }
30892
+ }
30893
+ return results;
30894
+ }
30895
+ function searchDistillationsFTS(projectId2, rawQuery) {
30896
+ const matchExpr = ftsQueryOr(rawQuery);
30897
+ if (matchExpr === EMPTY_QUERY) return [];
30898
+ const sql = `SELECT d.id, d.session_id
30899
+ FROM distillation_fts f
30900
+ CROSS JOIN distillations d ON d.rowid = f.rowid
30901
+ WHERE distillation_fts MATCH ?
30902
+ AND d.project_id = ?
30903
+ ORDER BY rank LIMIT 30`;
30904
+ try {
30905
+ return db().query(sql).all(matchExpr, projectId2);
30906
+ } catch (err) {
30907
+ warn("instruction-detect: FTS search failed:", err);
30908
+ return [];
30909
+ }
30910
+ }
30911
+ function formatForCurator(instructions) {
30912
+ if (!instructions.length) return "";
30913
+ const lines = instructions.map(
30914
+ (i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
30915
+ );
30916
+ return `
30917
+
30918
+ ---
30919
+ CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
30920
+ The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
30921
+ ${lines.join("\n")}`;
30922
+ }
30923
+ async function detectAndFormat(input) {
30924
+ const messages = bySession(input.projectPath, input.sessionID);
30925
+ const candidates = extractInstructionCandidates(messages);
30926
+ if (!candidates.length) return "";
30927
+ const repeated = await findRepeatedInstructions({
30928
+ projectPath: input.projectPath,
30929
+ currentSessionID: input.sessionID,
30930
+ candidates,
30931
+ threshold: input.threshold
30932
+ });
30933
+ if (repeated.length) {
30934
+ info(
30935
+ `instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
30936
+ );
30937
+ }
30938
+ return formatForCurator(repeated);
30939
+ }
30940
+
30941
+ // src/curator.ts
30404
30942
  var MAX_ENTRY_CONTENT_LENGTH = 1200;
30405
30943
  function parseOps(text4) {
30406
30944
  const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
@@ -30414,40 +30952,14 @@ function parseOps(text4) {
30414
30952
  return [];
30415
30953
  }
30416
30954
  }
30417
- var lastCuratedAt = /* @__PURE__ */ new Map();
30418
- async function run2(input) {
30419
- const cfg = config2();
30420
- if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
30421
- const all3 = bySession(input.projectPath, input.sessionID);
30422
- const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
30423
- const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
30424
- if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
30425
- const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
30426
- const existing = forProject(input.projectPath, false);
30427
- const existingForPrompt = existing.map((e) => ({
30428
- id: e.id,
30429
- category: e.category,
30430
- title: e.title,
30431
- content: e.content
30432
- }));
30433
- const userContent = curatorUser({
30434
- messages: text4,
30435
- existing: existingForPrompt
30436
- });
30437
- const model = input.model ?? cfg.model;
30438
- const responseText = await input.llm.prompt(
30439
- CURATOR_SYSTEM,
30440
- userContent,
30441
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
30442
- );
30443
- if (!responseText) return { created: 0, updated: 0, deleted: 0 };
30444
- const ops = parseOps(responseText);
30955
+ function applyOps(ops, input) {
30445
30956
  let created = 0;
30446
30957
  let updated = 0;
30447
30958
  let deleted = 0;
30448
30959
  const idsToSync = [];
30449
30960
  for (const op of ops) {
30450
30961
  if (op.op === "create") {
30962
+ if (input.skipCreate) continue;
30451
30963
  const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30452
30964
  const id = create({
30453
30965
  projectPath: op.scope === "project" ? input.projectPath : void 0,
@@ -30479,9 +30991,64 @@ async function run2(input) {
30479
30991
  for (const id of idsToSync) {
30480
30992
  syncRefs(id);
30481
30993
  }
30482
- lastCuratedAt.set(input.sessionID, Date.now());
30483
30994
  return { created, updated, deleted };
30484
30995
  }
30996
+ var lastCuratedAt = /* @__PURE__ */ new Map();
30997
+ async function run2(input) {
30998
+ const cfg = config2();
30999
+ if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
31000
+ const all3 = bySession(input.projectPath, input.sessionID);
31001
+ const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
31002
+ const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
31003
+ if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
31004
+ const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
31005
+ const existing = forProject(input.projectPath, false);
31006
+ const existingForPrompt = existing.map((e) => ({
31007
+ id: e.id,
31008
+ category: e.category,
31009
+ title: e.title,
31010
+ content: e.content
31011
+ }));
31012
+ const baseUserContent = curatorUser({
31013
+ messages: text4,
31014
+ existing: existingForPrompt
31015
+ });
31016
+ let crossSessionContext = "";
31017
+ try {
31018
+ crossSessionContext = await detectAndFormat({
31019
+ projectPath: input.projectPath,
31020
+ sessionID: input.sessionID
31021
+ });
31022
+ } catch (err) {
31023
+ warn("instruction-detect failed (non-fatal):", err);
31024
+ }
31025
+ const userContent = baseUserContent + crossSessionContext;
31026
+ const model = input.model ?? cfg.model;
31027
+ const responseText = await input.llm.prompt(
31028
+ CURATOR_SYSTEM,
31029
+ userContent,
31030
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
31031
+ );
31032
+ if (!responseText) return { created: 0, updated: 0, deleted: 0 };
31033
+ const ops = parseOps(responseText);
31034
+ const result = applyOps(ops, {
31035
+ projectPath: input.projectPath,
31036
+ sessionID: input.sessionID
31037
+ });
31038
+ if (result.created > 0) {
31039
+ try {
31040
+ const dupes = await deduplicate(input.projectPath, { dryRun: false });
31041
+ if (dupes.totalRemoved > 0) {
31042
+ info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
31043
+ result.deleted += dupes.totalRemoved;
31044
+ }
31045
+ } catch (err) {
31046
+ warn("post-curation dedup failed (non-fatal):", err);
31047
+ }
31048
+ }
31049
+ lastCuratedAt.set(input.sessionID, Date.now());
31050
+ return result;
31051
+ }
30485
31052
  function resetCurationTracker(sessionID) {
30486
31053
  if (sessionID) {
30487
31054
  lastCuratedAt.delete(sessionID);
@@ -30512,26 +31079,1431 @@ async function consolidate(input) {
30512
31079
  );
30513
31080
  if (!responseText) return { updated: 0, deleted: 0 };
30514
31081
  const ops = parseOps(responseText);
30515
- let updated = 0;
30516
- let deleted = 0;
30517
- for (const op of ops) {
30518
- if (op.op === "update") {
30519
- const entry = get(op.id);
30520
- if (entry) {
30521
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30522
- update(op.id, { content: content3, confidence: op.confidence });
30523
- updated++;
31082
+ const result = applyOps(ops, {
31083
+ projectPath: input.projectPath,
31084
+ sessionID: input.sessionID,
31085
+ skipCreate: true
31086
+ // Consolidation must not add entries.
31087
+ });
31088
+ return { updated: result.updated, deleted: result.deleted };
31089
+ }
31090
+
31091
+ // src/import/index.ts
31092
+ var import_exports = {};
31093
+ __export(import_exports, {
31094
+ clearProviders: () => clearProviders,
31095
+ computeHash: () => computeHash,
31096
+ detectAll: () => detectAll,
31097
+ extractKnowledge: () => extractKnowledge,
31098
+ getProvider: () => getProvider2,
31099
+ getProviders: () => getProviders,
31100
+ isImported: () => isImported,
31101
+ listImports: () => listImports,
31102
+ recordImport: () => recordImport,
31103
+ registerProvider: () => registerProvider
31104
+ });
31105
+
31106
+ // src/import/providers/index.ts
31107
+ var providers = [];
31108
+ function registerProvider(provider) {
31109
+ providers.push(provider);
31110
+ }
31111
+ function getProviders() {
31112
+ return providers;
31113
+ }
31114
+ function getProvider2(name) {
31115
+ return providers.find((p2) => p2.name === name);
31116
+ }
31117
+ function clearProviders() {
31118
+ providers.length = 0;
31119
+ }
31120
+
31121
+ // src/import/detect.ts
31122
+ function detectAll(projectPath) {
31123
+ const results = [];
31124
+ for (const provider of getProviders()) {
31125
+ try {
31126
+ const sessions = provider.detect(projectPath);
31127
+ if (sessions.length > 0) {
31128
+ results.push({
31129
+ agentName: provider.name,
31130
+ agentDisplayName: provider.displayName,
31131
+ sessions,
31132
+ totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
31133
+ totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
31134
+ });
30524
31135
  }
30525
- } else if (op.op === "delete") {
30526
- const entry = get(op.id);
30527
- if (entry) {
30528
- remove(op.id);
30529
- deleted++;
31136
+ } catch (err) {
31137
+ }
31138
+ }
31139
+ return results.sort((a, b) => b.totalMessages - a.totalMessages);
31140
+ }
31141
+
31142
+ // src/import/extract.ts
31143
+ var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
31144
+
31145
+ ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
31146
+ - Architecture decisions, design patterns, and project conventions
31147
+ - Gotchas, non-obvious bugs, and their fixes
31148
+ - Developer preferences and workflow patterns
31149
+ - Key technical choices and their rationale
31150
+
31151
+ Ignore:
31152
+ - References to the other agent's specific capabilities or limitations
31153
+ - Task-specific state that is no longer current (e.g. "currently debugging X")
31154
+ - Debugging steps for issues that were already resolved
31155
+ - Transient conversation artifacts (greetings, acknowledgments, status updates)`;
31156
+ async function extractKnowledge(input) {
31157
+ const result = {
31158
+ created: 0,
31159
+ updated: 0,
31160
+ deleted: 0,
31161
+ chunksProcessed: 0,
31162
+ chunksFailed: 0
31163
+ };
31164
+ const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
31165
+ for (let i = 0; i < sorted.length; i++) {
31166
+ const chunk = sorted[i];
31167
+ const existing = forProject(input.projectPath, false);
31168
+ const existingForPrompt = existing.map((e) => ({
31169
+ id: e.id,
31170
+ category: e.category,
31171
+ title: e.title,
31172
+ content: e.content
31173
+ }));
31174
+ const userContent = curatorUser({
31175
+ messages: chunk.text,
31176
+ existing: existingForPrompt
31177
+ });
31178
+ try {
31179
+ const response = await input.llm.prompt(
31180
+ IMPORT_CURATOR_SYSTEM,
31181
+ userContent,
31182
+ {
31183
+ model: input.model,
31184
+ workerID: "lore-import",
31185
+ thinking: false,
31186
+ maxTokens: 4096,
31187
+ sessionID: input.sessionID
31188
+ }
31189
+ );
31190
+ if (response) {
31191
+ const ops = parseOps(response);
31192
+ const applied = applyOps(ops, {
31193
+ projectPath: input.projectPath,
31194
+ sessionID: input.sessionID
31195
+ });
31196
+ result.created += applied.created;
31197
+ result.updated += applied.updated;
31198
+ result.deleted += applied.deleted;
31199
+ }
31200
+ result.chunksProcessed++;
31201
+ } catch {
31202
+ result.chunksFailed++;
31203
+ }
31204
+ input.onProgress?.({
31205
+ current: i + 1,
31206
+ total: sorted.length,
31207
+ created: result.created,
31208
+ updated: result.updated
31209
+ });
31210
+ }
31211
+ return result;
31212
+ }
31213
+
31214
+ // src/import/history.ts
31215
+ function isImported(projectPath, agentName, sourceId, sourceHash) {
31216
+ const projectId2 = ensureProject(projectPath);
31217
+ const row = db().query(
31218
+ `SELECT * FROM import_history
31219
+ WHERE project_id = ? AND agent_name = ? AND source_id = ?`
31220
+ ).get(projectId2, agentName, sourceId);
31221
+ if (!row) return null;
31222
+ if (row.source_hash !== sourceHash) return null;
31223
+ return row;
31224
+ }
31225
+ function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
31226
+ const projectId2 = ensureProject(projectPath);
31227
+ db().query(
31228
+ `INSERT OR REPLACE INTO import_history
31229
+ (id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
31230
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
31231
+ ).run(
31232
+ crypto.randomUUID(),
31233
+ projectId2,
31234
+ agentName,
31235
+ sourceId,
31236
+ sourceHash,
31237
+ stats.created,
31238
+ stats.updated,
31239
+ Date.now()
31240
+ );
31241
+ }
31242
+ function listImports(projectPath) {
31243
+ const projectId2 = ensureProject(projectPath);
31244
+ return db().query(
31245
+ `SELECT * FROM import_history
31246
+ WHERE project_id = ? AND source_id != '__declined__'
31247
+ ORDER BY imported_at DESC`
31248
+ ).all(projectId2);
31249
+ }
31250
+ function computeHash(parts) {
31251
+ return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
31252
+ }
31253
+
31254
+ // src/import/providers/claude-code.ts
31255
+ import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
31256
+ import { join as join8 } from "path";
31257
+ import { homedir as homedir2 } from "os";
31258
+ var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
31259
+ var MAX_TOOL_OUTPUT_CHARS = 500;
31260
+ var DEFAULT_MAX_TOKENS = 12288;
31261
+ function manglePath(projectPath) {
31262
+ return projectPath.replace(/\//g, "-");
31263
+ }
31264
+ function estimateTokens4(text4) {
31265
+ return Math.ceil(text4.length / 3);
31266
+ }
31267
+ function truncate(text4, max) {
31268
+ if (text4.length <= max) return text4;
31269
+ return text4.slice(0, max) + "...";
31270
+ }
31271
+ function blockToText(block) {
31272
+ switch (block.type) {
31273
+ case "text":
31274
+ return block.text;
31275
+ case "tool_use": {
31276
+ const tu = block;
31277
+ const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
31278
+ return `[tool: ${tu.name}] ${inputSummary}`;
31279
+ }
31280
+ case "tool_result": {
31281
+ const tr = block;
31282
+ let content3;
31283
+ if (typeof tr.content === "string") {
31284
+ content3 = tr.content;
31285
+ } else if (Array.isArray(tr.content)) {
31286
+ content3 = tr.content.map((b) => {
31287
+ if (b.type === "text") return b.text;
31288
+ return "";
31289
+ }).filter(Boolean).join("\n");
31290
+ } else {
31291
+ content3 = "";
31292
+ }
31293
+ return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
31294
+ }
31295
+ case "thinking":
31296
+ return null;
31297
+ default:
31298
+ return null;
31299
+ }
31300
+ }
31301
+ function lineToText(parsed) {
31302
+ if (parsed.type === "user") {
31303
+ const msg = parsed;
31304
+ const content3 = msg.message.content;
31305
+ if (typeof content3 === "string") {
31306
+ return `[user] ${content3}`;
31307
+ }
31308
+ const parts = content3.map(blockToText).filter(Boolean);
31309
+ return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
31310
+ }
31311
+ if (parsed.type === "assistant") {
31312
+ const msg = parsed;
31313
+ const blocks = msg.message.content;
31314
+ if (!Array.isArray(blocks)) return null;
31315
+ const parts = blocks.map(blockToText).filter(Boolean);
31316
+ return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
31317
+ }
31318
+ return null;
31319
+ }
31320
+ function parseJSONL(filePath) {
31321
+ const raw = readFileSync4(filePath, "utf-8");
31322
+ const lines = [];
31323
+ for (const line of raw.split("\n")) {
31324
+ if (!line.trim()) continue;
31325
+ try {
31326
+ lines.push(JSON.parse(line));
31327
+ } catch {
31328
+ }
31329
+ }
31330
+ return lines;
31331
+ }
31332
+ function getSessionMetadata(filePath) {
31333
+ let raw;
31334
+ try {
31335
+ raw = readFileSync4(filePath, "utf-8");
31336
+ } catch {
31337
+ return null;
31338
+ }
31339
+ const lines = raw.split("\n").filter((l) => l.trim());
31340
+ if (lines.length === 0) return null;
31341
+ let sessionId;
31342
+ let startedAt = Infinity;
31343
+ let lastActivityAt = 0;
31344
+ let messageCount = 0;
31345
+ for (const line of lines) {
31346
+ try {
31347
+ const parsed = JSON.parse(line);
31348
+ if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
31349
+ if (parsed.timestamp) {
31350
+ const ts = new Date(parsed.timestamp).getTime();
31351
+ if (!Number.isNaN(ts)) {
31352
+ if (ts < startedAt) startedAt = ts;
31353
+ if (ts > lastActivityAt) lastActivityAt = ts;
31354
+ }
31355
+ }
31356
+ if (parsed.type === "user" || parsed.type === "assistant") {
31357
+ messageCount++;
31358
+ }
31359
+ } catch {
31360
+ }
31361
+ }
31362
+ if (!sessionId || messageCount === 0) return null;
31363
+ const fileSize = raw.length;
31364
+ const estimatedTokens = Math.ceil(fileSize / 5);
31365
+ return {
31366
+ sessionId,
31367
+ startedAt: startedAt === Infinity ? Date.now() : startedAt,
31368
+ lastActivityAt,
31369
+ messageCount,
31370
+ estimatedTokens
31371
+ };
31372
+ }
31373
+ var claudeCodeProvider = {
31374
+ name: "claude-code",
31375
+ displayName: "Claude Code",
31376
+ detect(projectPath) {
31377
+ const mangled = manglePath(projectPath);
31378
+ const dir = join8(CLAUDE_DIR, mangled);
31379
+ let entries;
31380
+ try {
31381
+ entries = readdirSync2(dir);
31382
+ } catch {
31383
+ return [];
31384
+ }
31385
+ const sessions = [];
31386
+ for (const entry of entries) {
31387
+ if (!entry.endsWith(".jsonl")) continue;
31388
+ const filePath = join8(dir, entry);
31389
+ try {
31390
+ const stat = statSync5(filePath);
31391
+ if (!stat.isFile()) continue;
31392
+ } catch {
31393
+ continue;
31394
+ }
31395
+ const meta3 = getSessionMetadata(filePath);
31396
+ if (!meta3) continue;
31397
+ if (meta3.messageCount < 3) continue;
31398
+ const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
31399
+ sessions.push({
31400
+ id: filePath,
31401
+ label: `${dateStr} (${meta3.messageCount} messages)`,
31402
+ startedAt: meta3.startedAt,
31403
+ lastActivityAt: meta3.lastActivityAt,
31404
+ estimatedTokens: meta3.estimatedTokens,
31405
+ messageCount: meta3.messageCount
31406
+ });
31407
+ }
31408
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31409
+ },
31410
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
31411
+ const chunks = [];
31412
+ for (const filePath of sessionIds) {
31413
+ const lines = parseJSONL(filePath);
31414
+ const messages = [];
31415
+ for (const line of lines) {
31416
+ const text4 = lineToText(line);
31417
+ if (!text4) continue;
31418
+ const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
31419
+ messages.push({ text: text4, timestamp: ts });
31420
+ }
31421
+ if (messages.length === 0) continue;
31422
+ let currentTexts = [];
31423
+ let currentTokens = 0;
31424
+ let chunkStart = messages[0].timestamp;
31425
+ let chunkIndex = 0;
31426
+ const flushChunk = () => {
31427
+ if (currentTexts.length === 0) return;
31428
+ chunkIndex++;
31429
+ const text4 = currentTexts.join("\n\n");
31430
+ chunks.push({
31431
+ label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
31432
+ text: text4,
31433
+ estimatedTokens: estimateTokens4(text4),
31434
+ timestamp: chunkStart
31435
+ });
31436
+ currentTexts = [];
31437
+ currentTokens = 0;
31438
+ };
31439
+ for (const msg of messages) {
31440
+ const msgTokens = estimateTokens4(msg.text);
31441
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31442
+ flushChunk();
31443
+ chunkStart = msg.timestamp;
31444
+ }
31445
+ currentTexts.push(msg.text);
31446
+ currentTokens += msgTokens;
31447
+ }
31448
+ flushChunk();
31449
+ }
31450
+ return chunks;
31451
+ }
31452
+ };
31453
+ registerProvider(claudeCodeProvider);
31454
+
31455
+ // src/import/providers/codex.ts
31456
+ import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
31457
+ import { join as join9 } from "path";
31458
+ import { homedir as homedir3 } from "os";
31459
+ var CODEX_DIR = join9(homedir3(), ".codex");
31460
+ var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
31461
+ var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
31462
+ var MAX_TOOL_OUTPUT_CHARS2 = 500;
31463
+ var DEFAULT_MAX_TOKENS2 = 12288;
31464
+ function estimateTokens5(text4) {
31465
+ return Math.ceil(text4.length / 3);
31466
+ }
31467
+ function truncate2(text4, max) {
31468
+ if (text4.length <= max) return text4;
31469
+ return text4.slice(0, max) + "...";
31470
+ }
31471
+ function findJsonlFiles(dir) {
31472
+ const results = [];
31473
+ if (!existsSync6(dir)) return results;
31474
+ const walk = (d) => {
31475
+ let entries;
31476
+ try {
31477
+ entries = readdirSync3(d);
31478
+ } catch {
31479
+ return;
31480
+ }
31481
+ for (const entry of entries) {
31482
+ const full = join9(d, entry);
31483
+ try {
31484
+ const stat = statSync6(full);
31485
+ if (stat.isDirectory()) walk(full);
31486
+ else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
31487
+ } catch {
31488
+ }
31489
+ }
31490
+ };
31491
+ walk(dir);
31492
+ return results;
31493
+ }
31494
+ function responseItemToText(item) {
31495
+ if (!item) return null;
31496
+ if (item.type === "message" && item.role && item.content) {
31497
+ const text4 = extractContent(item.content);
31498
+ if (text4) return `[${item.role}] ${text4}`;
31499
+ }
31500
+ if (item.type === "function_call" && item.name) {
31501
+ const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
31502
+ return `[tool: ${item.name}] ${args}`;
31503
+ }
31504
+ if (item.type === "function_call_output" && item.output) {
31505
+ return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
31506
+ }
31507
+ return null;
31508
+ }
31509
+ function extractContent(content3) {
31510
+ if (typeof content3 === "string") return content3;
31511
+ if (!Array.isArray(content3)) return null;
31512
+ const parts = [];
31513
+ for (const part of content3) {
31514
+ if ("text" in part && typeof part.text === "string") {
31515
+ parts.push(part.text);
31516
+ }
31517
+ }
31518
+ return parts.length > 0 ? parts.join("\n") : null;
31519
+ }
31520
+ function parseJSONL2(filePath) {
31521
+ let raw;
31522
+ try {
31523
+ raw = readFileSync5(filePath, "utf-8");
31524
+ } catch {
31525
+ return [];
31526
+ }
31527
+ const lines = [];
31528
+ for (const line of raw.split("\n")) {
31529
+ if (!line.trim()) continue;
31530
+ try {
31531
+ lines.push(JSON.parse(line));
31532
+ } catch {
31533
+ }
31534
+ }
31535
+ return lines;
31536
+ }
31537
+ function getSessionMeta(filePath) {
31538
+ let raw;
31539
+ try {
31540
+ raw = readFileSync5(filePath, "utf-8");
31541
+ } catch {
31542
+ return null;
31543
+ }
31544
+ const lines = raw.split("\n").filter((l) => l.trim());
31545
+ if (lines.length === 0) return null;
31546
+ let meta3;
31547
+ try {
31548
+ meta3 = JSON.parse(lines[0]);
31549
+ } catch {
31550
+ return null;
31551
+ }
31552
+ if (meta3.type !== "session_meta") return null;
31553
+ const payload = meta3.payload;
31554
+ let messageCount = 0;
31555
+ for (const line of lines) {
31556
+ try {
31557
+ const parsed = JSON.parse(line);
31558
+ if (parsed.type === "response_item" || parsed.type === "event_msg") {
31559
+ messageCount++;
31560
+ }
31561
+ } catch {
31562
+ }
31563
+ }
31564
+ return {
31565
+ id: payload.meta.id,
31566
+ cwd: payload.meta.cwd,
31567
+ timestamp: payload.meta.timestamp,
31568
+ messageCount,
31569
+ fileSize: raw.length
31570
+ };
31571
+ }
31572
+ var codexProvider = {
31573
+ name: "codex",
31574
+ displayName: "Codex",
31575
+ detect(projectPath) {
31576
+ const sessions = [];
31577
+ const allFiles = [
31578
+ ...findJsonlFiles(SESSIONS_DIR),
31579
+ ...findJsonlFiles(ARCHIVED_DIR)
31580
+ ];
31581
+ for (const filePath of allFiles) {
31582
+ const meta3 = getSessionMeta(filePath);
31583
+ if (!meta3) continue;
31584
+ if (meta3.cwd !== projectPath) continue;
31585
+ if (meta3.messageCount < 3) continue;
31586
+ const ts = new Date(meta3.timestamp).getTime();
31587
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
31588
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
31589
+ sessions.push({
31590
+ id: filePath,
31591
+ label: `${dateStr} (${meta3.messageCount} messages)`,
31592
+ startedAt: ts,
31593
+ lastActivityAt: ts,
31594
+ // Best approximation without reading all lines
31595
+ estimatedTokens,
31596
+ messageCount: meta3.messageCount
31597
+ });
31598
+ }
31599
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31600
+ },
31601
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
31602
+ const chunks = [];
31603
+ for (const filePath of sessionIds) {
31604
+ const lines = parseJSONL2(filePath);
31605
+ const messages = [];
31606
+ let sessionTimestamp = Date.now();
31607
+ const firstLine = lines[0];
31608
+ if (firstLine?.type === "session_meta") {
31609
+ const meta3 = firstLine;
31610
+ const ts = new Date(meta3.payload.meta.timestamp).getTime();
31611
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
31612
+ }
31613
+ for (const line of lines) {
31614
+ if (line.type === "response_item") {
31615
+ const ri = line;
31616
+ const text4 = responseItemToText(ri.payload);
31617
+ if (text4) {
31618
+ messages.push({ text: text4, timestamp: sessionTimestamp });
31619
+ }
31620
+ } else if (line.type === "event_msg") {
31621
+ const ev = line;
31622
+ if (ev.payload.output) {
31623
+ messages.push({
31624
+ text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
31625
+ timestamp: sessionTimestamp
31626
+ });
31627
+ }
31628
+ } else if (line.type === "compacted") {
31629
+ const comp = line;
31630
+ if (comp.payload.replacement_history) {
31631
+ for (const item of comp.payload.replacement_history) {
31632
+ const text4 = responseItemToText(item);
31633
+ if (text4) {
31634
+ messages.push({ text: text4, timestamp: sessionTimestamp });
31635
+ }
31636
+ }
31637
+ }
31638
+ }
31639
+ }
31640
+ if (messages.length === 0) continue;
31641
+ let currentTexts = [];
31642
+ let currentTokens = 0;
31643
+ let chunkIndex = 0;
31644
+ const flushChunk = () => {
31645
+ if (currentTexts.length === 0) return;
31646
+ chunkIndex++;
31647
+ const text4 = currentTexts.join("\n\n");
31648
+ chunks.push({
31649
+ label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
31650
+ text: text4,
31651
+ estimatedTokens: estimateTokens5(text4),
31652
+ timestamp: sessionTimestamp
31653
+ });
31654
+ currentTexts = [];
31655
+ currentTokens = 0;
31656
+ };
31657
+ for (const msg of messages) {
31658
+ const msgTokens = estimateTokens5(msg.text);
31659
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31660
+ flushChunk();
31661
+ }
31662
+ currentTexts.push(msg.text);
31663
+ currentTokens += msgTokens;
31664
+ }
31665
+ flushChunk();
31666
+ }
31667
+ return chunks;
31668
+ }
31669
+ };
31670
+ registerProvider(codexProvider);
31671
+
31672
+ // src/import/providers/opencode.ts
31673
+ import { existsSync as existsSync7 } from "fs";
31674
+ import { join as join10 } from "path";
31675
+ import { homedir as homedir4 } from "os";
31676
+ var OPENCODE_DB_PATH = join10(
31677
+ process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
31678
+ "opencode",
31679
+ "opencode.db"
31680
+ );
31681
+ var MAX_TOOL_OUTPUT_CHARS3 = 500;
31682
+ var DEFAULT_MAX_TOKENS3 = 12288;
31683
+ function estimateTokens6(text4) {
31684
+ return Math.ceil(text4.length / 3);
31685
+ }
31686
+ function truncate3(text4, max) {
31687
+ if (text4.length <= max) return text4;
31688
+ return text4.slice(0, max) + "...";
31689
+ }
31690
+ function openDB() {
31691
+ if (!existsSync7(OPENCODE_DB_PATH)) return null;
31692
+ try {
31693
+ return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
31694
+ } catch {
31695
+ return null;
31696
+ }
31697
+ }
31698
+ function tableExists(database, table) {
31699
+ const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
31700
+ return row != null;
31701
+ }
31702
+ function partsToConversationText(parts) {
31703
+ const segments = [];
31704
+ for (const part of parts) {
31705
+ if (part.type === "text" && part.text) {
31706
+ segments.push(part.text);
31707
+ } else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
31708
+ segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
31709
+ }
31710
+ }
31711
+ return segments.join("\n");
31712
+ }
31713
+ var opencodeProvider = {
31714
+ name: "opencode",
31715
+ displayName: "OpenCode",
31716
+ detect(projectPath) {
31717
+ const database = openDB();
31718
+ if (!database) return [];
31719
+ try {
31720
+ if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
31721
+ return [];
31722
+ }
31723
+ const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
31724
+ if (!project) return [];
31725
+ const sessions = database.query(
31726
+ `SELECT s.id, s.title, s.time_created, s.time_updated,
31727
+ (SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
31728
+ FROM session s
31729
+ WHERE s.project_id = ? AND s.parent_id IS NULL
31730
+ ORDER BY s.time_updated DESC`
31731
+ ).all(project.id);
31732
+ const results = [];
31733
+ for (const sess of sessions) {
31734
+ if (sess.msg_count < 3) continue;
31735
+ const estimatedTokens = sess.msg_count * 500;
31736
+ const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
31737
+ const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
31738
+ results.push({
31739
+ id: sess.id,
31740
+ label,
31741
+ startedAt: sess.time_created,
31742
+ lastActivityAt: sess.time_updated,
31743
+ estimatedTokens,
31744
+ messageCount: sess.msg_count
31745
+ });
30530
31746
  }
31747
+ return results;
31748
+ } finally {
31749
+ database.close();
30531
31750
  }
31751
+ },
31752
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
31753
+ const database = openDB();
31754
+ if (!database) return [];
31755
+ const chunks = [];
31756
+ try {
31757
+ const hasParts = tableExists(database, "part");
31758
+ for (const sessionId of sessionIds) {
31759
+ const messages = database.query(
31760
+ `SELECT id, data, time_created FROM message
31761
+ WHERE session_id = ?
31762
+ ORDER BY time_created ASC`
31763
+ ).all(sessionId);
31764
+ if (messages.length === 0) continue;
31765
+ const textMessages = [];
31766
+ for (const msg of messages) {
31767
+ let msgData;
31768
+ try {
31769
+ msgData = JSON.parse(msg.data);
31770
+ } catch {
31771
+ continue;
31772
+ }
31773
+ const role = msgData.role ?? "unknown";
31774
+ let contentText = "";
31775
+ if (hasParts) {
31776
+ const parts = database.query(
31777
+ `SELECT data FROM part
31778
+ WHERE message_id = ?
31779
+ ORDER BY time_created ASC`
31780
+ ).all(msg.id);
31781
+ const parsedParts = [];
31782
+ for (const p2 of parts) {
31783
+ try {
31784
+ parsedParts.push(JSON.parse(p2.data));
31785
+ } catch {
31786
+ }
31787
+ }
31788
+ contentText = partsToConversationText(parsedParts);
31789
+ }
31790
+ if (!contentText.trim()) continue;
31791
+ textMessages.push({
31792
+ text: `[${role}] ${contentText}`,
31793
+ timestamp: msg.time_created
31794
+ });
31795
+ }
31796
+ if (textMessages.length === 0) continue;
31797
+ let currentTexts = [];
31798
+ let currentTokens = 0;
31799
+ let chunkStart = textMessages[0].timestamp;
31800
+ let chunkIndex = 0;
31801
+ const flushChunk = () => {
31802
+ if (currentTexts.length === 0) return;
31803
+ chunkIndex++;
31804
+ const text4 = currentTexts.join("\n\n");
31805
+ chunks.push({
31806
+ label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
31807
+ text: text4,
31808
+ estimatedTokens: estimateTokens6(text4),
31809
+ timestamp: chunkStart
31810
+ });
31811
+ currentTexts = [];
31812
+ currentTokens = 0;
31813
+ };
31814
+ for (const msg of textMessages) {
31815
+ const msgTokens = estimateTokens6(msg.text);
31816
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31817
+ flushChunk();
31818
+ chunkStart = msg.timestamp;
31819
+ }
31820
+ currentTexts.push(msg.text);
31821
+ currentTokens += msgTokens;
31822
+ }
31823
+ flushChunk();
31824
+ }
31825
+ } finally {
31826
+ database.close();
31827
+ }
31828
+ return chunks;
31829
+ }
31830
+ };
31831
+ registerProvider(opencodeProvider);
31832
+
31833
+ // src/import/providers/cline.ts
31834
+ import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
31835
+ import { join as join11 } from "path";
31836
+ import { homedir as homedir5 } from "os";
31837
+ var MAX_TOOL_OUTPUT_CHARS4 = 500;
31838
+ var DEFAULT_MAX_TOKENS4 = 12288;
31839
+ var EXTENSION_IDS = [
31840
+ "saoudrizwan.claude-dev",
31841
+ "cline.cline"
31842
+ ];
31843
+ function estimateTokens7(text4) {
31844
+ return Math.ceil(text4.length / 3);
31845
+ }
31846
+ function truncate4(text4, max) {
31847
+ if (text4.length <= max) return text4;
31848
+ return text4.slice(0, max) + "...";
31849
+ }
31850
+ function findGlobalStorageDirs() {
31851
+ const home = homedir5();
31852
+ const dirs = [];
31853
+ const basePaths = [];
31854
+ const platform = process.platform;
31855
+ if (platform === "darwin") {
31856
+ basePaths.push(
31857
+ join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
31858
+ join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
31859
+ join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
31860
+ );
31861
+ } else if (platform === "win32") {
31862
+ const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
31863
+ basePaths.push(
31864
+ join11(appdata, "Code", "User", "globalStorage"),
31865
+ join11(appdata, "Code - Insiders", "User", "globalStorage"),
31866
+ join11(appdata, "VSCodium", "User", "globalStorage")
31867
+ );
31868
+ } else {
31869
+ const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
31870
+ basePaths.push(
31871
+ join11(configHome, "Code", "User", "globalStorage"),
31872
+ join11(configHome, "Code - Insiders", "User", "globalStorage"),
31873
+ join11(configHome, "VSCodium", "User", "globalStorage")
31874
+ );
31875
+ basePaths.push(
31876
+ join11(home, ".vscode", "data", "User", "globalStorage"),
31877
+ join11(home, ".vscode-insiders", "data", "User", "globalStorage")
31878
+ );
31879
+ }
31880
+ for (const base of basePaths) {
31881
+ for (const extId of EXTENSION_IDS) {
31882
+ const dir = join11(base, extId);
31883
+ if (existsSync8(dir)) dirs.push(dir);
31884
+ }
31885
+ }
31886
+ return dirs;
31887
+ }
31888
+ function loadTaskHistory(storageDir, projectPath) {
31889
+ const paths = [
31890
+ join11(storageDir, "state", "taskHistory.json"),
31891
+ join11(storageDir, "taskHistory.json")
31892
+ ];
31893
+ for (const historyPath of paths) {
31894
+ if (!existsSync8(historyPath)) continue;
31895
+ try {
31896
+ const raw = readFileSync6(historyPath, "utf-8");
31897
+ const items = JSON.parse(raw);
31898
+ if (!Array.isArray(items)) continue;
31899
+ return items.filter(
31900
+ (item) => item.cwdOnTaskInitialization === projectPath
31901
+ );
31902
+ } catch {
31903
+ continue;
31904
+ }
31905
+ }
31906
+ return [];
31907
+ }
31908
+ function readConversation(taskDir) {
31909
+ const filePath = join11(taskDir, "api_conversation_history.json");
31910
+ if (!existsSync8(filePath)) return [];
31911
+ try {
31912
+ const raw = readFileSync6(filePath, "utf-8");
31913
+ const messages = JSON.parse(raw);
31914
+ return Array.isArray(messages) ? messages : [];
31915
+ } catch {
31916
+ return [];
31917
+ }
31918
+ }
31919
+ function blockToText2(block) {
31920
+ switch (block.type) {
31921
+ case "text":
31922
+ return block.text;
31923
+ case "tool_use": {
31924
+ const tu = block;
31925
+ return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
31926
+ }
31927
+ case "tool_result": {
31928
+ const tr = block;
31929
+ let content3;
31930
+ if (typeof tr.content === "string") {
31931
+ content3 = tr.content;
31932
+ } else if (Array.isArray(tr.content)) {
31933
+ content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
31934
+ } else {
31935
+ content3 = "";
31936
+ }
31937
+ return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
31938
+ }
31939
+ default:
31940
+ return null;
31941
+ }
31942
+ }
31943
+ function messageToText(msg) {
31944
+ if (typeof msg.content === "string") {
31945
+ return msg.content ? `[${msg.role}] ${msg.content}` : null;
31946
+ }
31947
+ const parts = msg.content.map(blockToText2).filter(Boolean);
31948
+ return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
31949
+ }
31950
+ var clineProvider = {
31951
+ name: "cline",
31952
+ displayName: "Cline",
31953
+ detect(projectPath) {
31954
+ const sessions = [];
31955
+ const storageDirs = findGlobalStorageDirs();
31956
+ for (const storageDir of storageDirs) {
31957
+ const tasks = loadTaskHistory(storageDir, projectPath);
31958
+ for (const task of tasks) {
31959
+ const taskDir = join11(storageDir, "tasks", task.id);
31960
+ if (!existsSync8(taskDir)) continue;
31961
+ const messages = readConversation(taskDir);
31962
+ if (messages.length < 3) continue;
31963
+ const dateStr = new Date(task.ts).toISOString().slice(0, 10);
31964
+ const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
31965
+ const historyFile = join11(taskDir, "api_conversation_history.json");
31966
+ let estimatedTokens = messages.length * 500;
31967
+ try {
31968
+ const stat = statSync7(historyFile);
31969
+ estimatedTokens = Math.ceil(stat.size / 5);
31970
+ } catch {
31971
+ }
31972
+ sessions.push({
31973
+ id: taskDir,
31974
+ label,
31975
+ startedAt: task.ts,
31976
+ lastActivityAt: task.ts,
31977
+ estimatedTokens,
31978
+ messageCount: messages.length
31979
+ });
31980
+ }
31981
+ }
31982
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31983
+ },
31984
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
31985
+ const chunks = [];
31986
+ for (const taskDir of sessionIds) {
31987
+ const messages = readConversation(taskDir);
31988
+ if (messages.length === 0) continue;
31989
+ let sessionTimestamp;
31990
+ try {
31991
+ sessionTimestamp = statSync7(taskDir).mtimeMs;
31992
+ } catch {
31993
+ sessionTimestamp = Date.now();
31994
+ }
31995
+ const textMessages = [];
31996
+ for (const msg of messages) {
31997
+ const text4 = messageToText(msg);
31998
+ if (text4) textMessages.push({ text: text4 });
31999
+ }
32000
+ if (textMessages.length === 0) continue;
32001
+ let currentTexts = [];
32002
+ let currentTokens = 0;
32003
+ let chunkIndex = 0;
32004
+ const flushChunk = () => {
32005
+ if (currentTexts.length === 0) return;
32006
+ chunkIndex++;
32007
+ const text4 = currentTexts.join("\n\n");
32008
+ chunks.push({
32009
+ label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32010
+ text: text4,
32011
+ estimatedTokens: estimateTokens7(text4),
32012
+ timestamp: sessionTimestamp
32013
+ });
32014
+ currentTexts = [];
32015
+ currentTokens = 0;
32016
+ };
32017
+ for (const msg of textMessages) {
32018
+ const msgTokens = estimateTokens7(msg.text);
32019
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32020
+ flushChunk();
32021
+ }
32022
+ currentTexts.push(msg.text);
32023
+ currentTokens += msgTokens;
32024
+ }
32025
+ flushChunk();
32026
+ }
32027
+ return chunks;
30532
32028
  }
30533
- return { updated, deleted };
32029
+ };
32030
+ registerProvider(clineProvider);
32031
+
32032
+ // src/import/providers/continue.ts
32033
+ import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
32034
+ import { join as join12 } from "path";
32035
+ import { homedir as homedir6 } from "os";
32036
+ var MAX_TOOL_OUTPUT_CHARS5 = 500;
32037
+ var DEFAULT_MAX_TOKENS5 = 12288;
32038
+ function estimateTokens8(text4) {
32039
+ return Math.ceil(text4.length / 3);
32040
+ }
32041
+ function truncate5(text4, max) {
32042
+ if (text4.length <= max) return text4;
32043
+ return text4.slice(0, max) + "...";
30534
32044
  }
32045
+ function continueDir() {
32046
+ return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
32047
+ }
32048
+ function loadSessionIndex() {
32049
+ const indexPath = join12(continueDir(), "sessions", "sessions.json");
32050
+ if (!existsSync9(indexPath)) return [];
32051
+ try {
32052
+ const raw = readFileSync7(indexPath, "utf-8");
32053
+ const parsed = JSON.parse(raw);
32054
+ return Array.isArray(parsed) ? parsed : [];
32055
+ } catch {
32056
+ return [];
32057
+ }
32058
+ }
32059
+ function loadSession(sessionId) {
32060
+ const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
32061
+ if (!existsSync9(filePath)) return null;
32062
+ try {
32063
+ const raw = readFileSync7(filePath, "utf-8");
32064
+ return JSON.parse(raw);
32065
+ } catch {
32066
+ return null;
32067
+ }
32068
+ }
32069
+ function extractMessageContent(content3) {
32070
+ if (typeof content3 === "string") return content3;
32071
+ if (!Array.isArray(content3)) return "";
32072
+ return content3.filter(
32073
+ (part) => part.type === "text" && typeof part.text === "string"
32074
+ ).map((part) => part.text).join("\n");
32075
+ }
32076
+ function historyItemToText(item) {
32077
+ const msg = item.message;
32078
+ if (!msg) return null;
32079
+ if (msg.role === "system") return null;
32080
+ const parts = [];
32081
+ const content3 = extractMessageContent(msg.content);
32082
+ if (content3) parts.push(content3);
32083
+ if (msg.toolCalls) {
32084
+ for (const call of msg.toolCalls) {
32085
+ if (call.function) {
32086
+ const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
32087
+ parts.push(`[tool: ${call.function.name}] ${args}`);
32088
+ }
32089
+ }
32090
+ }
32091
+ if (item.toolCallStates) {
32092
+ for (const state of item.toolCallStates) {
32093
+ if (state.output && state.status === "done") {
32094
+ parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
32095
+ }
32096
+ }
32097
+ }
32098
+ if (parts.length === 0) return null;
32099
+ const role = msg.role === "tool" ? "tool_result" : msg.role;
32100
+ return `[${role}] ${parts.join("\n")}`;
32101
+ }
32102
+ var continueProvider = {
32103
+ name: "continue",
32104
+ displayName: "Continue",
32105
+ detect(projectPath) {
32106
+ const sessions = [];
32107
+ const index2 = loadSessionIndex();
32108
+ for (const meta3 of index2) {
32109
+ if (meta3.workspaceDirectory !== projectPath) continue;
32110
+ const session = loadSession(meta3.sessionId);
32111
+ if (!session || !session.history || session.history.length < 3) continue;
32112
+ const ts = new Date(meta3.dateCreated).getTime();
32113
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32114
+ const messageCount = session.history.length;
32115
+ const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
32116
+ const estimatedTokens = messageCount * 500;
32117
+ sessions.push({
32118
+ id: meta3.sessionId,
32119
+ label,
32120
+ startedAt: ts,
32121
+ lastActivityAt: ts,
32122
+ estimatedTokens,
32123
+ messageCount
32124
+ });
32125
+ }
32126
+ const sessionsDir = join12(continueDir(), "sessions");
32127
+ if (existsSync9(sessionsDir)) {
32128
+ const existingIds = new Set(sessions.map((s) => s.id));
32129
+ let entries;
32130
+ try {
32131
+ entries = readdirSync5(sessionsDir);
32132
+ } catch {
32133
+ entries = [];
32134
+ }
32135
+ for (const entry of entries) {
32136
+ if (!entry.endsWith(".json") || entry === "sessions.json") continue;
32137
+ const sessionId = entry.replace(".json", "");
32138
+ if (existingIds.has(sessionId)) continue;
32139
+ const session = loadSession(sessionId);
32140
+ if (!session) continue;
32141
+ if (session.workspaceDirectory !== projectPath) continue;
32142
+ if (!session.history || session.history.length < 3) continue;
32143
+ const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
32144
+ sessions.push({
32145
+ id: sessionId,
32146
+ label: `${dateStr} (${session.history.length} messages)`,
32147
+ startedAt: Date.now(),
32148
+ lastActivityAt: Date.now(),
32149
+ estimatedTokens: session.history.length * 500,
32150
+ messageCount: session.history.length
32151
+ });
32152
+ }
32153
+ }
32154
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32155
+ },
32156
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
32157
+ const chunks = [];
32158
+ for (const sessionId of sessionIds) {
32159
+ const session = loadSession(sessionId);
32160
+ if (!session || !session.history) continue;
32161
+ const textMessages = [];
32162
+ for (const item of session.history) {
32163
+ const text4 = historyItemToText(item);
32164
+ if (text4) textMessages.push({ text: text4 });
32165
+ }
32166
+ if (textMessages.length === 0) continue;
32167
+ const sessionTimestamp = Date.now();
32168
+ let currentTexts = [];
32169
+ let currentTokens = 0;
32170
+ let chunkIndex = 0;
32171
+ const flushChunk = () => {
32172
+ if (currentTexts.length === 0) return;
32173
+ chunkIndex++;
32174
+ const text4 = currentTexts.join("\n\n");
32175
+ chunks.push({
32176
+ label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
32177
+ text: text4,
32178
+ estimatedTokens: estimateTokens8(text4),
32179
+ timestamp: sessionTimestamp
32180
+ });
32181
+ currentTexts = [];
32182
+ currentTokens = 0;
32183
+ };
32184
+ for (const msg of textMessages) {
32185
+ const msgTokens = estimateTokens8(msg.text);
32186
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32187
+ flushChunk();
32188
+ }
32189
+ currentTexts.push(msg.text);
32190
+ currentTokens += msgTokens;
32191
+ }
32192
+ flushChunk();
32193
+ }
32194
+ return chunks;
32195
+ }
32196
+ };
32197
+ registerProvider(continueProvider);
32198
+
32199
+ // src/import/providers/pi.ts
32200
+ import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
32201
+ import { join as join13 } from "path";
32202
+ import { homedir as homedir7 } from "os";
32203
+ var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
32204
+ var MAX_TOOL_OUTPUT_CHARS6 = 500;
32205
+ var DEFAULT_MAX_TOKENS6 = 12288;
32206
+ function estimateTokens9(text4) {
32207
+ return Math.ceil(text4.length / 3);
32208
+ }
32209
+ function truncate6(text4, max) {
32210
+ if (text4.length <= max) return text4;
32211
+ return text4.slice(0, max) + "...";
32212
+ }
32213
+ function encodeCwd(cwd) {
32214
+ const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
32215
+ return `--${encoded}--`;
32216
+ }
32217
+ function parseJSONL3(filePath) {
32218
+ let raw;
32219
+ try {
32220
+ raw = readFileSync8(filePath, "utf-8");
32221
+ } catch {
32222
+ return [];
32223
+ }
32224
+ const lines = [];
32225
+ for (const line of raw.split("\n")) {
32226
+ if (!line.trim()) continue;
32227
+ try {
32228
+ lines.push(JSON.parse(line));
32229
+ } catch {
32230
+ }
32231
+ }
32232
+ return lines;
32233
+ }
32234
+ function linearize(lines) {
32235
+ if (lines.length === 0) return [];
32236
+ const children = /* @__PURE__ */ new Map();
32237
+ const byId = /* @__PURE__ */ new Map();
32238
+ let rootLine = null;
32239
+ for (const line of lines) {
32240
+ if (line.type === "session") {
32241
+ rootLine = line;
32242
+ continue;
32243
+ }
32244
+ if (!line.id) continue;
32245
+ byId.set(line.id, line);
32246
+ const pid = line.parentId;
32247
+ if (pid) {
32248
+ const siblings = children.get(pid) ?? [];
32249
+ siblings.push(line);
32250
+ children.set(pid, siblings);
32251
+ }
32252
+ }
32253
+ if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
32254
+ const result = [];
32255
+ let currentId = rootLine.id;
32256
+ while (currentId) {
32257
+ const kids = children.get(currentId);
32258
+ if (!kids || kids.length === 0) break;
32259
+ const next = kids[kids.length - 1];
32260
+ result.push(next);
32261
+ currentId = next.id;
32262
+ }
32263
+ return result;
32264
+ }
32265
+ function getSessionMeta2(filePath) {
32266
+ const lines = parseJSONL3(filePath);
32267
+ if (lines.length === 0) return null;
32268
+ const header = lines[0];
32269
+ if (header.type !== "session") return null;
32270
+ const session = header;
32271
+ const messageCount = lines.filter((l) => l.type === "message").length;
32272
+ let fileSize;
32273
+ try {
32274
+ fileSize = statSync8(filePath).size;
32275
+ } catch {
32276
+ fileSize = 0;
32277
+ }
32278
+ const ts = new Date(session.timestamp).getTime();
32279
+ return {
32280
+ id: session.id,
32281
+ cwd: session.cwd,
32282
+ timestamp: Number.isNaN(ts) ? Date.now() : ts,
32283
+ messageCount,
32284
+ fileSize
32285
+ };
32286
+ }
32287
+ var piProvider = {
32288
+ name: "pi",
32289
+ displayName: "Pi",
32290
+ detect(projectPath) {
32291
+ const encoded = encodeCwd(projectPath);
32292
+ const dir = join13(PI_DIR, encoded);
32293
+ let entries;
32294
+ try {
32295
+ entries = readdirSync6(dir);
32296
+ } catch {
32297
+ return [];
32298
+ }
32299
+ const sessions = [];
32300
+ for (const entry of entries) {
32301
+ if (!entry.endsWith(".jsonl")) continue;
32302
+ const filePath = join13(dir, entry);
32303
+ const meta3 = getSessionMeta2(filePath);
32304
+ if (!meta3) continue;
32305
+ if (meta3.messageCount < 3) continue;
32306
+ const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
32307
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
32308
+ sessions.push({
32309
+ id: filePath,
32310
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32311
+ startedAt: meta3.timestamp,
32312
+ lastActivityAt: meta3.timestamp,
32313
+ estimatedTokens,
32314
+ messageCount: meta3.messageCount
32315
+ });
32316
+ }
32317
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32318
+ },
32319
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
32320
+ const chunks = [];
32321
+ for (const filePath of sessionIds) {
32322
+ const allLines = parseJSONL3(filePath);
32323
+ const linearLines = linearize(allLines);
32324
+ let sessionTimestamp = Date.now();
32325
+ const header = allLines.find((l) => l.type === "session");
32326
+ if (header?.type === "session") {
32327
+ const session = header;
32328
+ const ts = new Date(session.timestamp).getTime();
32329
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
32330
+ }
32331
+ const messages = [];
32332
+ for (const line of linearLines) {
32333
+ if (line.type === "message") {
32334
+ const msg = line;
32335
+ const content3 = msg.message.content;
32336
+ if (!content3) continue;
32337
+ const ts = new Date(msg.timestamp).getTime();
32338
+ messages.push({
32339
+ text: `[${msg.message.role}] ${content3}`,
32340
+ timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
32341
+ });
32342
+ } else if (line.type === "compaction") {
32343
+ const comp = line;
32344
+ if (comp.summary) {
32345
+ messages.push({
32346
+ text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
32347
+ timestamp: sessionTimestamp
32348
+ });
32349
+ }
32350
+ }
32351
+ }
32352
+ if (messages.length === 0) continue;
32353
+ let currentTexts = [];
32354
+ let currentTokens = 0;
32355
+ let chunkIndex = 0;
32356
+ const flushChunk = () => {
32357
+ if (currentTexts.length === 0) return;
32358
+ chunkIndex++;
32359
+ const text4 = currentTexts.join("\n\n");
32360
+ chunks.push({
32361
+ label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32362
+ text: text4,
32363
+ estimatedTokens: estimateTokens9(text4),
32364
+ timestamp: sessionTimestamp
32365
+ });
32366
+ currentTexts = [];
32367
+ currentTokens = 0;
32368
+ };
32369
+ for (const msg of messages) {
32370
+ const msgTokens = estimateTokens9(msg.text);
32371
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32372
+ flushChunk();
32373
+ }
32374
+ currentTexts.push(msg.text);
32375
+ currentTokens += msgTokens;
32376
+ }
32377
+ flushChunk();
32378
+ }
32379
+ return chunks;
32380
+ }
32381
+ };
32382
+ registerProvider(piProvider);
32383
+
32384
+ // src/import/providers/aider.ts
32385
+ import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
32386
+ import { join as join14 } from "path";
32387
+ var HISTORY_FILE = ".aider.chat.history.md";
32388
+ var DEFAULT_MAX_TOKENS7 = 12288;
32389
+ var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
32390
+ function estimateTokens10(text4) {
32391
+ return Math.ceil(text4.length / 3);
32392
+ }
32393
+ function parseAiderHistory(content3) {
32394
+ const lines = content3.split("\n");
32395
+ const messages = [];
32396
+ let currentRole = null;
32397
+ let currentLines = [];
32398
+ const flush = () => {
32399
+ if (currentRole && currentLines.length > 0) {
32400
+ const text4 = currentLines.join("\n").trim();
32401
+ if (text4) {
32402
+ messages.push({ role: currentRole, text: text4 });
32403
+ }
32404
+ }
32405
+ currentLines = [];
32406
+ };
32407
+ for (const line of lines) {
32408
+ const match = ROLE_HEADER_RE.exec(line);
32409
+ if (match) {
32410
+ flush();
32411
+ currentRole = match[1].toLowerCase();
32412
+ continue;
32413
+ }
32414
+ if (line.trim() === "---") {
32415
+ flush();
32416
+ currentRole = null;
32417
+ continue;
32418
+ }
32419
+ if (currentRole) {
32420
+ currentLines.push(line);
32421
+ }
32422
+ }
32423
+ flush();
32424
+ return messages;
32425
+ }
32426
+ var aiderProvider = {
32427
+ name: "aider",
32428
+ displayName: "Aider",
32429
+ detect(projectPath) {
32430
+ const filePath = join14(projectPath, HISTORY_FILE);
32431
+ if (!existsSync11(filePath)) return [];
32432
+ let stat;
32433
+ try {
32434
+ stat = statSync9(filePath);
32435
+ } catch {
32436
+ return [];
32437
+ }
32438
+ if (!stat.isFile() || stat.size === 0) return [];
32439
+ let content3;
32440
+ try {
32441
+ content3 = readFileSync9(filePath, "utf-8");
32442
+ } catch {
32443
+ return [];
32444
+ }
32445
+ const messages = parseAiderHistory(content3);
32446
+ if (messages.length < 3) return [];
32447
+ const estimatedTokens = estimateTokens10(content3);
32448
+ return [
32449
+ {
32450
+ id: filePath,
32451
+ label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
32452
+ startedAt: stat.birthtimeMs || stat.ctimeMs,
32453
+ lastActivityAt: stat.mtimeMs,
32454
+ estimatedTokens,
32455
+ messageCount: messages.length
32456
+ }
32457
+ ];
32458
+ },
32459
+ readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
32460
+ const chunks = [];
32461
+ for (const filePath of sessionIds) {
32462
+ let content3;
32463
+ try {
32464
+ content3 = readFileSync9(filePath, "utf-8");
32465
+ } catch {
32466
+ continue;
32467
+ }
32468
+ const messages = parseAiderHistory(content3);
32469
+ if (messages.length === 0) continue;
32470
+ let fileTimestamp;
32471
+ try {
32472
+ fileTimestamp = statSync9(filePath).mtimeMs;
32473
+ } catch {
32474
+ fileTimestamp = Date.now();
32475
+ }
32476
+ let currentTexts = [];
32477
+ let currentTokens = 0;
32478
+ let chunkIndex = 0;
32479
+ const flushChunk = () => {
32480
+ if (currentTexts.length === 0) return;
32481
+ chunkIndex++;
32482
+ const text4 = currentTexts.join("\n\n");
32483
+ chunks.push({
32484
+ label: `Aider history (${chunkIndex})`,
32485
+ text: text4,
32486
+ estimatedTokens: estimateTokens10(text4),
32487
+ timestamp: fileTimestamp
32488
+ });
32489
+ currentTexts = [];
32490
+ currentTokens = 0;
32491
+ };
32492
+ for (const msg of messages) {
32493
+ const formatted = `[${msg.role}] ${msg.text}`;
32494
+ const msgTokens = estimateTokens10(formatted);
32495
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32496
+ flushChunk();
32497
+ }
32498
+ currentTexts.push(formatted);
32499
+ currentTokens += msgTokens;
32500
+ }
32501
+ flushChunk();
32502
+ }
32503
+ return chunks;
32504
+ }
32505
+ };
32506
+ registerProvider(aiderProvider);
30535
32507
 
30536
32508
  // src/recall.ts
30537
32509
  function getTaggedText(tagged) {
@@ -30797,7 +32769,10 @@ async function searchRecall(input) {
30797
32769
  info("recall: query expansion failed, using original:", err);
30798
32770
  }
30799
32771
  }
32772
+ const queryTermCount = filterTerms(query).length;
32773
+ const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
30800
32774
  const allRrfLists = [];
32775
+ let primaryListEnd = 0;
30801
32776
  for (const q of queries) {
30802
32777
  const knowledgeResults = [];
30803
32778
  if (knowledgeEnabled && scope !== "session") {
@@ -30874,7 +32849,11 @@ async function searchRecall(input) {
30874
32849
  key: (r) => `t:${r.item.id}`
30875
32850
  });
30876
32851
  }
32852
+ if (primaryListEnd === 0) {
32853
+ primaryListEnd = allRrfLists.length;
32854
+ }
30877
32855
  }
32856
+ const perQueryListEnd = allRrfLists.length;
30878
32857
  if (isAvailable() && scope !== "session") {
30879
32858
  try {
30880
32859
  const [queryVec] = await embed([query], "query");
@@ -30893,7 +32872,8 @@ async function searchRecall(input) {
30893
32872
  if (vectorTagged.length) {
30894
32873
  allRrfLists.push({
30895
32874
  items: vectorTagged,
30896
- key: (r) => `k:${r.item.id}`
32875
+ key: (r) => `k:${r.item.id}`,
32876
+ weight: vectorWeight
30897
32877
  });
30898
32878
  }
30899
32879
  }
@@ -30912,7 +32892,8 @@ async function searchRecall(input) {
30912
32892
  if (distVectorTagged.length) {
30913
32893
  allRrfLists.push({
30914
32894
  items: distVectorTagged,
30915
- key: (r) => `d:${r.item.id}`
32895
+ key: (r) => `d:${r.item.id}`,
32896
+ weight: vectorWeight
30916
32897
  });
30917
32898
  }
30918
32899
  }
@@ -30936,7 +32917,8 @@ async function searchRecall(input) {
30936
32917
  if (temporalVectorTagged.length) {
30937
32918
  allRrfLists.push({
30938
32919
  items: temporalVectorTagged,
30939
- key: (r) => `t:${r.item.id}`
32920
+ key: (r) => `t:${r.item.id}`,
32921
+ weight: vectorWeight
30940
32922
  });
30941
32923
  }
30942
32924
  }
@@ -31039,6 +33021,15 @@ async function searchRecall(input) {
31039
33021
  });
31040
33022
  }
31041
33023
  }
33024
+ const MAX_RRF_LISTS = 10;
33025
+ if (allRrfLists.length > MAX_RRF_LISTS) {
33026
+ const primary = allRrfLists.slice(0, primaryListEnd);
33027
+ const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
33028
+ const supplemental = allRrfLists.slice(perQueryListEnd);
33029
+ const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
33030
+ allRrfLists.length = 0;
33031
+ allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
33032
+ }
31042
33033
  const fused = reciprocalRankFusion(allRrfLists);
31043
33034
  const maxResults = limit * 3;
31044
33035
  return fused.slice(0, maxResults);
@@ -31108,9 +33099,6 @@ async function runRecall(input) {
31108
33099
  if (input.id) {
31109
33100
  return recallById(input.id);
31110
33101
  }
31111
- if (ftsQuery(input.query) === EMPTY_QUERY) {
31112
- return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
31113
- }
31114
33102
  const fused = await searchRecall(input);
31115
33103
  const recallCfg = input.searchConfig?.recall;
31116
33104
  return formatFusedResults(fused, {
@@ -31157,9 +33145,11 @@ export {
31157
33145
  config2 as config,
31158
33146
  consolidationUser,
31159
33147
  consumeCameOutOfIdle,
33148
+ import_exports as conversationImport,
31160
33149
  curator_exports as curator,
31161
33150
  curatorUser,
31162
33151
  data_exports as data,
33152
+ dataDir,
31163
33153
  db,
31164
33154
  dbPath,
31165
33155
  distillation_exports as distillation,
@@ -31179,6 +33169,7 @@ export {
31179
33169
  ftsQueryRelaxed,
31180
33170
  getGitRemote,
31181
33171
  getInstanceId,
33172
+ getLastImportAt,
31182
33173
  getLastTransformEstimate,
31183
33174
  getLastTransformedCount,
31184
33175
  getLastTurnAt,
@@ -31191,6 +33182,7 @@ export {
31191
33182
  importLoreFile,
31192
33183
  inline,
31193
33184
  inspectSessionState,
33185
+ instruction_detect_exports as instructionDetect,
31194
33186
  isFirstRun,
31195
33187
  isReasoningPart,
31196
33188
  isTextPart,
@@ -31227,6 +33219,7 @@ export {
31227
33219
  searchRecall,
31228
33220
  serialize,
31229
33221
  setForceMinLayer,
33222
+ setLastImportAt,
31230
33223
  setLastTurnAtForTest,
31231
33224
  setLtmTokens,
31232
33225
  setMaxContextTokens,