@loreai/core 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +12 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +22 -38
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  14. package/dist/bun/embedding-worker-types.d.ts +17 -12
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  16. package/dist/bun/embedding-worker.d.ts +9 -2
  17. package/dist/bun/embedding-worker.d.ts.map +1 -1
  18. package/dist/bun/embedding-worker.js +38864 -33
  19. package/dist/bun/embedding-worker.js.map +4 -4
  20. package/dist/bun/embedding.d.ts +30 -22
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/gradient.d.ts +8 -1
  23. package/dist/bun/gradient.d.ts.map +1 -1
  24. package/dist/bun/import/detect.d.ts +14 -0
  25. package/dist/bun/import/detect.d.ts.map +1 -0
  26. package/dist/bun/import/extract.d.ts +43 -0
  27. package/dist/bun/import/extract.d.ts.map +1 -0
  28. package/dist/bun/import/history.d.ts +40 -0
  29. package/dist/bun/import/history.d.ts.map +1 -0
  30. package/dist/bun/import/index.d.ts +17 -0
  31. package/dist/bun/import/index.d.ts.map +1 -0
  32. package/dist/bun/import/providers/aider.d.ts +2 -0
  33. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  34. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  35. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  36. package/dist/bun/import/providers/cline.d.ts +2 -0
  37. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  38. package/dist/bun/import/providers/codex.d.ts +2 -0
  39. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  40. package/dist/bun/import/providers/continue.d.ts +2 -0
  41. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  42. package/dist/bun/import/providers/index.d.ts +19 -0
  43. package/dist/bun/import/providers/index.d.ts.map +1 -0
  44. package/dist/bun/import/providers/opencode.d.ts +2 -0
  45. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  46. package/dist/bun/import/providers/pi.d.ts +2 -0
  47. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  48. package/dist/bun/import/types.d.ts +82 -0
  49. package/dist/bun/import/types.d.ts.map +1 -0
  50. package/dist/bun/index.d.ts +4 -1
  51. package/dist/bun/index.d.ts.map +1 -1
  52. package/dist/bun/index.js +2217 -224
  53. package/dist/bun/index.js.map +4 -4
  54. package/dist/bun/instruction-detect.d.ts +66 -0
  55. package/dist/bun/instruction-detect.d.ts.map +1 -0
  56. package/dist/bun/log.d.ts +9 -0
  57. package/dist/bun/log.d.ts.map +1 -1
  58. package/dist/bun/ltm.d.ts +40 -0
  59. package/dist/bun/ltm.d.ts.map +1 -1
  60. package/dist/bun/pattern-extract.d.ts +7 -0
  61. package/dist/bun/pattern-extract.d.ts.map +1 -1
  62. package/dist/bun/prompt.d.ts +1 -1
  63. package/dist/bun/prompt.d.ts.map +1 -1
  64. package/dist/bun/recall.d.ts.map +1 -1
  65. package/dist/bun/search.d.ts +5 -3
  66. package/dist/bun/search.d.ts.map +1 -1
  67. package/dist/bun/temporal.d.ts.map +1 -1
  68. package/dist/bun/types.d.ts +1 -1
  69. package/dist/node/agents-file.d.ts +4 -0
  70. package/dist/node/agents-file.d.ts.map +1 -1
  71. package/dist/node/config.d.ts +2 -0
  72. package/dist/node/config.d.ts.map +1 -1
  73. package/dist/node/curator.d.ts +45 -0
  74. package/dist/node/curator.d.ts.map +1 -1
  75. package/dist/node/data-dir.d.ts +18 -0
  76. package/dist/node/data-dir.d.ts.map +1 -0
  77. package/dist/node/db.d.ts +12 -0
  78. package/dist/node/db.d.ts.map +1 -1
  79. package/dist/node/distillation.d.ts.map +1 -1
  80. package/dist/node/embedding-vendor.d.ts +22 -38
  81. package/dist/node/embedding-vendor.d.ts.map +1 -1
  82. package/dist/node/embedding-worker-types.d.ts +17 -12
  83. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  84. package/dist/node/embedding-worker.d.ts +9 -2
  85. package/dist/node/embedding-worker.d.ts.map +1 -1
  86. package/dist/node/embedding-worker.js +38864 -33
  87. package/dist/node/embedding-worker.js.map +4 -4
  88. package/dist/node/embedding.d.ts +30 -22
  89. package/dist/node/embedding.d.ts.map +1 -1
  90. package/dist/node/gradient.d.ts +8 -1
  91. package/dist/node/gradient.d.ts.map +1 -1
  92. package/dist/node/import/detect.d.ts +14 -0
  93. package/dist/node/import/detect.d.ts.map +1 -0
  94. package/dist/node/import/extract.d.ts +43 -0
  95. package/dist/node/import/extract.d.ts.map +1 -0
  96. package/dist/node/import/history.d.ts +40 -0
  97. package/dist/node/import/history.d.ts.map +1 -0
  98. package/dist/node/import/index.d.ts +17 -0
  99. package/dist/node/import/index.d.ts.map +1 -0
  100. package/dist/node/import/providers/aider.d.ts +2 -0
  101. package/dist/node/import/providers/aider.d.ts.map +1 -0
  102. package/dist/node/import/providers/claude-code.d.ts +2 -0
  103. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  104. package/dist/node/import/providers/cline.d.ts +2 -0
  105. package/dist/node/import/providers/cline.d.ts.map +1 -0
  106. package/dist/node/import/providers/codex.d.ts +2 -0
  107. package/dist/node/import/providers/codex.d.ts.map +1 -0
  108. package/dist/node/import/providers/continue.d.ts +2 -0
  109. package/dist/node/import/providers/continue.d.ts.map +1 -0
  110. package/dist/node/import/providers/index.d.ts +19 -0
  111. package/dist/node/import/providers/index.d.ts.map +1 -0
  112. package/dist/node/import/providers/opencode.d.ts +2 -0
  113. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  114. package/dist/node/import/providers/pi.d.ts +2 -0
  115. package/dist/node/import/providers/pi.d.ts.map +1 -0
  116. package/dist/node/import/types.d.ts +82 -0
  117. package/dist/node/import/types.d.ts.map +1 -0
  118. package/dist/node/index.d.ts +4 -1
  119. package/dist/node/index.d.ts.map +1 -1
  120. package/dist/node/index.js +2217 -224
  121. package/dist/node/index.js.map +4 -4
  122. package/dist/node/instruction-detect.d.ts +66 -0
  123. package/dist/node/instruction-detect.d.ts.map +1 -0
  124. package/dist/node/log.d.ts +9 -0
  125. package/dist/node/log.d.ts.map +1 -1
  126. package/dist/node/ltm.d.ts +40 -0
  127. package/dist/node/ltm.d.ts.map +1 -1
  128. package/dist/node/pattern-extract.d.ts +7 -0
  129. package/dist/node/pattern-extract.d.ts.map +1 -1
  130. package/dist/node/prompt.d.ts +1 -1
  131. package/dist/node/prompt.d.ts.map +1 -1
  132. package/dist/node/recall.d.ts.map +1 -1
  133. package/dist/node/search.d.ts +5 -3
  134. package/dist/node/search.d.ts.map +1 -1
  135. package/dist/node/temporal.d.ts.map +1 -1
  136. package/dist/node/types.d.ts +1 -1
  137. package/dist/types/agents-file.d.ts +4 -0
  138. package/dist/types/agents-file.d.ts.map +1 -1
  139. package/dist/types/config.d.ts +2 -0
  140. package/dist/types/config.d.ts.map +1 -1
  141. package/dist/types/curator.d.ts +45 -0
  142. package/dist/types/curator.d.ts.map +1 -1
  143. package/dist/types/data-dir.d.ts +18 -0
  144. package/dist/types/data-dir.d.ts.map +1 -0
  145. package/dist/types/db.d.ts +12 -0
  146. package/dist/types/db.d.ts.map +1 -1
  147. package/dist/types/distillation.d.ts.map +1 -1
  148. package/dist/types/embedding-vendor.d.ts +22 -38
  149. package/dist/types/embedding-vendor.d.ts.map +1 -1
  150. package/dist/types/embedding-worker-types.d.ts +17 -12
  151. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  152. package/dist/types/embedding-worker.d.ts +9 -2
  153. package/dist/types/embedding-worker.d.ts.map +1 -1
  154. package/dist/types/embedding.d.ts +30 -22
  155. package/dist/types/embedding.d.ts.map +1 -1
  156. package/dist/types/gradient.d.ts +8 -1
  157. package/dist/types/gradient.d.ts.map +1 -1
  158. package/dist/types/import/detect.d.ts +14 -0
  159. package/dist/types/import/detect.d.ts.map +1 -0
  160. package/dist/types/import/extract.d.ts +43 -0
  161. package/dist/types/import/extract.d.ts.map +1 -0
  162. package/dist/types/import/history.d.ts +40 -0
  163. package/dist/types/import/history.d.ts.map +1 -0
  164. package/dist/types/import/index.d.ts +17 -0
  165. package/dist/types/import/index.d.ts.map +1 -0
  166. package/dist/types/import/providers/aider.d.ts +2 -0
  167. package/dist/types/import/providers/aider.d.ts.map +1 -0
  168. package/dist/types/import/providers/claude-code.d.ts +2 -0
  169. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  170. package/dist/types/import/providers/cline.d.ts +2 -0
  171. package/dist/types/import/providers/cline.d.ts.map +1 -0
  172. package/dist/types/import/providers/codex.d.ts +2 -0
  173. package/dist/types/import/providers/codex.d.ts.map +1 -0
  174. package/dist/types/import/providers/continue.d.ts +2 -0
  175. package/dist/types/import/providers/continue.d.ts.map +1 -0
  176. package/dist/types/import/providers/index.d.ts +19 -0
  177. package/dist/types/import/providers/index.d.ts.map +1 -0
  178. package/dist/types/import/providers/opencode.d.ts +2 -0
  179. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  180. package/dist/types/import/providers/pi.d.ts +2 -0
  181. package/dist/types/import/providers/pi.d.ts.map +1 -0
  182. package/dist/types/import/types.d.ts +82 -0
  183. package/dist/types/import/types.d.ts.map +1 -0
  184. package/dist/types/index.d.ts +4 -1
  185. package/dist/types/index.d.ts.map +1 -1
  186. package/dist/types/instruction-detect.d.ts +66 -0
  187. package/dist/types/instruction-detect.d.ts.map +1 -0
  188. package/dist/types/log.d.ts +9 -0
  189. package/dist/types/log.d.ts.map +1 -1
  190. package/dist/types/ltm.d.ts +40 -0
  191. package/dist/types/ltm.d.ts.map +1 -1
  192. package/dist/types/pattern-extract.d.ts +7 -0
  193. package/dist/types/pattern-extract.d.ts.map +1 -1
  194. package/dist/types/prompt.d.ts +1 -1
  195. package/dist/types/prompt.d.ts.map +1 -1
  196. package/dist/types/recall.d.ts.map +1 -1
  197. package/dist/types/search.d.ts +5 -3
  198. package/dist/types/search.d.ts.map +1 -1
  199. package/dist/types/temporal.d.ts.map +1 -1
  200. package/dist/types/types.d.ts +1 -1
  201. package/package.json +2 -4
  202. package/src/agents-file.ts +41 -13
  203. package/src/config.ts +31 -18
  204. package/src/curator.ts +111 -75
  205. package/src/data-dir.ts +76 -0
  206. package/src/db.ts +110 -11
  207. package/src/distillation.ts +10 -2
  208. package/src/embedding-vendor.ts +23 -40
  209. package/src/embedding-worker-types.ts +19 -11
  210. package/src/embedding-worker.ts +111 -47
  211. package/src/embedding.ts +196 -171
  212. package/src/gradient.ts +9 -1
  213. package/src/import/detect.ts +37 -0
  214. package/src/import/extract.ts +137 -0
  215. package/src/import/history.ts +99 -0
  216. package/src/import/index.ts +45 -0
  217. package/src/import/providers/aider.ts +207 -0
  218. package/src/import/providers/claude-code.ts +339 -0
  219. package/src/import/providers/cline.ts +324 -0
  220. package/src/import/providers/codex.ts +369 -0
  221. package/src/import/providers/continue.ts +304 -0
  222. package/src/import/providers/index.ts +32 -0
  223. package/src/import/providers/opencode.ts +272 -0
  224. package/src/import/providers/pi.ts +332 -0
  225. package/src/import/types.ts +91 -0
  226. package/src/index.ts +5 -0
  227. package/src/instruction-detect.ts +275 -0
  228. package/src/log.ts +91 -3
  229. package/src/ltm.ts +316 -3
  230. package/src/pattern-extract.ts +41 -0
  231. package/src/prompt.ts +7 -1
  232. package/src/recall.ts +43 -5
  233. package/src/search.ts +7 -5
  234. package/src/temporal.ts +8 -6
  235. package/src/types.ts +1 -1
package/dist/bun/index.js CHANGED
@@ -145,9 +145,8 @@ function sha256(input) {
145
145
  }
146
146
 
147
147
  // src/db.ts
148
- import { join, dirname } from "path";
148
+ import { join as join2, dirname } from "path";
149
149
  import { mkdirSync } from "fs";
150
- import { homedir } from "os";
151
150
 
152
151
  // src/git.ts
153
152
  import { execSync } from "child_process";
@@ -210,6 +209,36 @@ function getGitRemote(path) {
210
209
  }
211
210
  }
212
211
 
212
+ // src/data-dir.ts
213
+ import { existsSync, renameSync } from "node:fs";
214
+ import { join } from "node:path";
215
+ import { homedir } from "node:os";
216
+ var OLD_DIR_NAME = "opencode-lore";
217
+ var NEW_DIR_NAME = "lore";
218
+ var migrationAttempted = false;
219
+ function baseDir() {
220
+ return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
221
+ }
222
+ function migrateDataDir() {
223
+ if (migrationAttempted) return;
224
+ migrationAttempted = true;
225
+ if (process.env.NODE_ENV === "test") return;
226
+ const base = baseDir();
227
+ const oldDir = join(base, OLD_DIR_NAME);
228
+ const newDir = join(base, NEW_DIR_NAME);
229
+ try {
230
+ if (existsSync(oldDir) && !existsSync(newDir)) {
231
+ renameSync(oldDir, newDir);
232
+ console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
233
+ }
234
+ } catch {
235
+ }
236
+ }
237
+ function dataDir() {
238
+ migrateDataDir();
239
+ return join(baseDir(), NEW_DIR_NAME);
240
+ }
241
+
213
242
  // src/db.ts
214
243
  function repoNameFromRemote(remote) {
215
244
  if (!remote) return null;
@@ -646,17 +675,74 @@ var MIGRATIONS = [
646
675
  ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
647
676
  ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
648
677
  ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
678
+ `,
679
+ `
680
+ -- Version 19: Import history for conversation import idempotency.
681
+ -- Tracks which external agent sessions have been imported to prevent
682
+ -- re-importing unchanged sources and to record user-declined imports.
683
+ CREATE TABLE IF NOT EXISTS import_history (
684
+ id TEXT PRIMARY KEY,
685
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
686
+ agent_name TEXT NOT NULL,
687
+ source_id TEXT NOT NULL,
688
+ source_hash TEXT NOT NULL,
689
+ entries_created INTEGER NOT NULL DEFAULT 0,
690
+ entries_updated INTEGER NOT NULL DEFAULT 0,
691
+ imported_at INTEGER NOT NULL,
692
+ UNIQUE(project_id, agent_name, source_id)
693
+ );
694
+ CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
695
+ `,
696
+ `
697
+ -- Version 20: Purge worker boilerplate from temporal messages.
698
+ -- Legacy gateway/plugin worker calls (distillation observer, curator,
699
+ -- consolidation, reflector, eval) stored their full system prompts
700
+ -- (containing entire conversation transcripts, up to 1.6MB each) as
701
+ -- temporal messages. These pollute FTS search results by matching
702
+ -- virtually any domain keyword. Safe to delete: their actual output
703
+ -- (distillations, knowledge entries) is stored in dedicated tables.
704
+ DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
705
+ OR content LIKE '%You are a long-term memory curator.%'
706
+ OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
707
+ OR content LIKE '%You are a memory reflector.%'
708
+ OR content LIKE '%You are evaluating distillation quality.%';
709
+ `,
710
+ `
711
+ -- Version 21: Persist avoided compaction data from live sessions.
712
+ -- Historical estimates previously re-simulated avoided compactions from
713
+ -- temporal message token estimates (chars/3), missing system prompt and
714
+ -- tool definition overhead. Persisting the live session's real shadow
715
+ -- context tracking (from actual API-reported total input tokens) gives
716
+ -- accurate post-restart historical estimates.
717
+ ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
718
+ ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
719
+ `,
720
+ `
721
+ -- Version 22: Track when conversation import was last offered/run.
722
+ -- NULL means import has never been offered for this project.
723
+ -- Used by auto-import to avoid re-prompting, and by explicit
724
+ -- \`lore import\` for incremental imports (only newer conversations).
725
+ ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
726
+
727
+ -- Backfill: migrate legacy __declined__ sentinel rows so existing
728
+ -- users who previously declined are not re-prompted after upgrading.
729
+ UPDATE projects SET last_import_at = (
730
+ SELECT ih.imported_at FROM import_history ih
731
+ WHERE ih.project_id = projects.id
732
+ AND ih.source_id = '__declined__'
733
+ LIMIT 1
734
+ )
735
+ WHERE EXISTS (
736
+ SELECT 1 FROM import_history ih
737
+ WHERE ih.project_id = projects.id
738
+ AND ih.source_id = '__declined__'
739
+ );
649
740
  `
650
741
  ];
651
- function dataDir() {
652
- const xdg = process.env.XDG_DATA_HOME;
653
- const base = xdg || join(homedir(), ".local", "share");
654
- return join(base, "opencode-lore");
655
- }
656
742
  function dbPath() {
657
743
  const envPath = process.env.LORE_DB_PATH;
658
744
  if (envPath) return envPath;
659
- return join(dataDir(), "lore.db");
745
+ return join2(dataDir(), "lore.db");
660
746
  }
661
747
  var instance;
662
748
  function db() {
@@ -674,7 +760,7 @@ function db() {
674
760
  }
675
761
  const dir = dataDir();
676
762
  mkdirSync(dir, { recursive: true });
677
- path = join(dir, "lore.db");
763
+ path = join2(dir, "lore.db");
678
764
  }
679
765
  const database = new Database(path);
680
766
  database.exec("PRAGMA journal_mode = WAL");
@@ -841,6 +927,15 @@ function isFirstRun() {
841
927
  const row = db().query("SELECT COUNT(*) as count FROM projects").get();
842
928
  return row.count === 0;
843
929
  }
930
+ function getLastImportAt(projectPath) {
931
+ const id = ensureProject(projectPath);
932
+ const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
933
+ return row?.last_import_at ?? null;
934
+ }
935
+ function setLastImportAt(projectPath, timestamp) {
936
+ const id = ensureProject(projectPath);
937
+ db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
938
+ }
844
939
  function loadForceMinLayer(sessionID) {
845
940
  const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
846
941
  return row?.force_min_layer ?? 0;
@@ -859,8 +954,9 @@ function saveSessionCosts(sessionID, costs) {
859
954
  `INSERT INTO session_state (session_id, force_min_layer, updated_at,
860
955
  conversation_cost, worker_cost, conversation_turns,
861
956
  cache_read_tokens, cache_write_tokens,
862
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings)
863
- VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
957
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
958
+ avoided_compactions, avoided_compaction_cost)
959
+ VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
864
960
  ON CONFLICT(session_id) DO UPDATE SET
865
961
  conversation_cost = excluded.conversation_cost,
866
962
  worker_cost = excluded.worker_cost,
@@ -872,6 +968,8 @@ function saveSessionCosts(sessionID, costs) {
872
968
  ttl_savings = excluded.ttl_savings,
873
969
  ttl_hits = excluded.ttl_hits,
874
970
  batch_savings = excluded.batch_savings,
971
+ avoided_compactions = excluded.avoided_compactions,
972
+ avoided_compaction_cost = excluded.avoided_compaction_cost,
875
973
  updated_at = excluded.updated_at`
876
974
  ).run(
877
975
  sessionID,
@@ -886,14 +984,17 @@ function saveSessionCosts(sessionID, costs) {
886
984
  costs.warmupHits,
887
985
  costs.ttlSavings,
888
986
  costs.ttlHits,
889
- costs.batchSavings
987
+ costs.batchSavings,
988
+ costs.avoidedCompactions,
989
+ costs.avoidedCompactionCost
890
990
  );
891
991
  }
892
992
  function loadSessionCosts(sessionID) {
893
993
  const row = db().query(
894
994
  `SELECT conversation_cost, worker_cost, conversation_turns,
895
995
  cache_read_tokens, cache_write_tokens,
896
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
996
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
997
+ avoided_compactions, avoided_compaction_cost
897
998
  FROM session_state WHERE session_id = ?`
898
999
  ).get(sessionID);
899
1000
  if (!row) return null;
@@ -907,14 +1008,17 @@ function loadSessionCosts(sessionID) {
907
1008
  warmupHits: row.warmup_hits,
908
1009
  ttlSavings: row.ttl_savings,
909
1010
  ttlHits: row.ttl_hits,
910
- batchSavings: row.batch_savings
1011
+ batchSavings: row.batch_savings,
1012
+ avoidedCompactions: row.avoided_compactions,
1013
+ avoidedCompactionCost: row.avoided_compaction_cost
911
1014
  };
912
1015
  }
913
1016
  function loadAllSessionCosts() {
914
1017
  const rows = db().query(
915
1018
  `SELECT session_id, conversation_cost, worker_cost, conversation_turns,
916
1019
  cache_read_tokens, cache_write_tokens,
917
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1020
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1021
+ avoided_compactions, avoided_compaction_cost
918
1022
  FROM session_state
919
1023
  WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
920
1024
  ).all();
@@ -930,7 +1034,9 @@ function loadAllSessionCosts() {
930
1034
  warmupHits: row.warmup_hits,
931
1035
  ttlSavings: row.ttl_savings,
932
1036
  ttlHits: row.ttl_hits,
933
- batchSavings: row.batch_savings
1037
+ batchSavings: row.batch_savings,
1038
+ avoidedCompactions: row.avoided_compactions,
1039
+ avoidedCompactionCost: row.avoided_compaction_cost
934
1040
  });
935
1041
  }
936
1042
  return result;
@@ -9753,7 +9859,7 @@ var handle = {
9753
9859
  };
9754
9860
 
9755
9861
  // ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
9756
- var join2 = [joinDefaults];
9862
+ var join3 = [joinDefaults];
9757
9863
  function joinDefaults(left, right, parent, state) {
9758
9864
  if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
9759
9865
  return false;
@@ -10173,7 +10279,7 @@ function toMarkdown(tree, options) {
10173
10279
  handle: void 0,
10174
10280
  indentLines,
10175
10281
  indexStack: [],
10176
- join: [...join2],
10282
+ join: [...join3],
10177
10283
  options: {},
10178
10284
  safe: safeBound,
10179
10285
  stack: [],
@@ -11898,6 +12004,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
11898
12004
  - Environment/tooling setup details that affect development
11899
12005
  - Important relationships between components that aren't obvious from reading the code
11900
12006
  - User preferences and working style specific to how they use this project
12007
+ - Repeated user instructions \u2014 when the user says things like "always", "never",
12008
+ "make sure to", "don't forget to", these are high-value preference candidates.
12009
+ If you see instruction-like language, prioritize extracting it as a "preference" entry.
12010
+ These instructions represent how the user wants to work and should persist across sessions.
11901
12011
 
11902
12012
  Do NOT extract:
11903
12013
  - Task-specific details (file currently being edited, current bug being fixed)
@@ -11982,7 +12092,9 @@ IMPORTANT:
11982
12092
  2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
11983
12093
  3. If entries cover the same system from different angles, merge them: update one, delete the rest.
11984
12094
  4. Only create a new entry for genuinely distinct knowledge with no existing home.
11985
- 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
12095
+ 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
12096
+ 6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
12097
+ These are strong signals for "preference" entries with high confidence.`;
11986
12098
  }
11987
12099
  var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
11988
12100
 
@@ -12146,9 +12258,12 @@ var log_exports = {};
12146
12258
  __export(log_exports, {
12147
12259
  error: () => error,
12148
12260
  info: () => info,
12261
+ logFilePath: () => logFilePath,
12149
12262
  registerSink: () => registerSink,
12150
12263
  warn: () => warn
12151
12264
  });
12265
+ import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
12266
+ import { join as join4 } from "node:path";
12152
12267
  var sink = null;
12153
12268
  function registerSink(s) {
12154
12269
  sink = s;
@@ -12163,17 +12278,71 @@ function findError(args) {
12163
12278
  }
12164
12279
  return void 0;
12165
12280
  }
12281
+ var LOG_MAX_BYTES = 5 * 1024 * 1024;
12282
+ var ROTATION_CHECK_INTERVAL = 1e3;
12283
+ var logPath;
12284
+ var logPathResolved = false;
12285
+ var writeCount = 0;
12286
+ function resolveLogPath() {
12287
+ if (process.env.NODE_ENV === "test") return void 0;
12288
+ try {
12289
+ const dir = dataDir();
12290
+ mkdirSync2(dir, { recursive: true });
12291
+ return join4(dir, "lore.log");
12292
+ } catch {
12293
+ return void 0;
12294
+ }
12295
+ }
12296
+ function logFilePath() {
12297
+ if (!logPathResolved) {
12298
+ logPath = resolveLogPath();
12299
+ logPathResolved = true;
12300
+ }
12301
+ return logPath;
12302
+ }
12303
+ function maybeRotate() {
12304
+ if (!logPath) return;
12305
+ try {
12306
+ const stat = statSync(logPath);
12307
+ if (stat.size > LOG_MAX_BYTES) {
12308
+ renameSync2(logPath, logPath + ".1");
12309
+ }
12310
+ } catch {
12311
+ }
12312
+ }
12313
+ function writeToFile(level, message) {
12314
+ const path = logFilePath();
12315
+ if (!path) return;
12316
+ if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
12317
+ maybeRotate();
12318
+ }
12319
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
12320
+ const tag = level.toUpperCase().padEnd(5);
12321
+ const flat = message.replace(/\n/g, "\\n");
12322
+ const line = `${ts} [${tag}] ${flat}
12323
+ `;
12324
+ try {
12325
+ appendFileSync(path, line);
12326
+ } catch {
12327
+ }
12328
+ }
12166
12329
  function info(...args) {
12167
12330
  if (isDebug) console.error("[lore]", ...args);
12168
- sink?.info(formatArgs(args));
12331
+ const msg = formatArgs(args);
12332
+ sink?.info(msg);
12333
+ writeToFile("info", msg);
12169
12334
  }
12170
12335
  function warn(...args) {
12171
12336
  if (isDebug) console.error("[lore] WARN:", ...args);
12172
- sink?.warn(formatArgs(args));
12337
+ const msg = formatArgs(args);
12338
+ sink?.warn(msg);
12339
+ writeToFile("warn", msg);
12173
12340
  }
12174
12341
  function error(...args) {
12175
12342
  console.error("[lore]", ...args);
12176
- sink?.error(formatArgs(args));
12343
+ const msg = formatArgs(args);
12344
+ sink?.error(msg);
12345
+ writeToFile("error", msg);
12177
12346
  const err = findError(args);
12178
12347
  if (err) sink?.captureException(err);
12179
12348
  }
@@ -12333,10 +12502,11 @@ function extractTopTerms(text4, limit = 40) {
12333
12502
  function reciprocalRankFusion(lists, k = 60) {
12334
12503
  const scores = /* @__PURE__ */ new Map();
12335
12504
  for (const list4 of lists) {
12505
+ const w = list4.weight ?? 1;
12336
12506
  for (let rank = 0; rank < list4.items.length; rank++) {
12337
12507
  const item = list4.items[rank];
12338
12508
  const id = list4.key(item);
12339
- const rrfScore = 1 / (k + rank);
12509
+ const rrfScore = w / (k + rank);
12340
12510
  const existing = scores.get(id);
12341
12511
  if (existing) {
12342
12512
  existing.score += rrfScore;
@@ -12390,8 +12560,8 @@ async function expandQuery(llm, query, model, sessionID) {
12390
12560
  var embedding_exports = {};
12391
12561
  __export(embedding_exports, {
12392
12562
  LocalProviderUnavailableError: () => LocalProviderUnavailableError,
12393
- _markFastembedUnavailable: () => _markFastembedUnavailable,
12394
- _resetFastembedProbe: () => _resetFastembedProbe,
12563
+ _markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
12564
+ _resetLocalProviderProbe: () => _resetLocalProviderProbe,
12395
12565
  _restoreProvider: () => _restoreProvider,
12396
12566
  _saveAndClearProvider: () => _saveAndClearProvider,
12397
12567
  _shutdownAndDisable: () => _shutdownAndDisable,
@@ -12410,6 +12580,7 @@ __export(embedding_exports, {
12410
12580
  runStartupBackfill: () => runStartupBackfill,
12411
12581
  toBlob: () => toBlob,
12412
12582
  vectorSearch: () => vectorSearch,
12583
+ vectorSearchAllDistillations: () => vectorSearchAllDistillations,
12413
12584
  vectorSearchDistillations: () => vectorSearchDistillations,
12414
12585
  vectorSearchTemporal: () => vectorSearchTemporal
12415
12586
  });
@@ -26183,8 +26354,8 @@ function date4(params) {
26183
26354
  config(en_default());
26184
26355
 
26185
26356
  // src/config.ts
26186
- import { existsSync, readFileSync } from "node:fs";
26187
- import { join as join3 } from "node:path";
26357
+ import { existsSync as existsSync2, readFileSync } from "node:fs";
26358
+ import { join as join5 } from "node:path";
26188
26359
  var LoreConfig = external_exports.object({
26189
26360
  model: external_exports.object({
26190
26361
  providerID: external_exports.string(),
@@ -26301,15 +26472,25 @@ var LoreConfig = external_exports.object({
26301
26472
  }).default({ title: 6, content: 2, category: 3 }),
26302
26473
  /** Max results per source in recall tool before fusion. Default: 10. */
26303
26474
  recallLimit: external_exports.number().min(1).max(50).default(10),
26304
- /** Enable LLM-based query expansion for the recall tool. Default: false.
26305
- * When enabled, the configured model generates 2–3 alternative query phrasings
26306
- * before search, improving recall for ambiguous queries. */
26307
- queryExpansion: external_exports.boolean().default(false),
26475
+ /** Enable LLM-based query expansion for the recall tool. Default: true.
26476
+ * The configured model generates 2–3 alternative query phrasings before
26477
+ * search, improving recall for ambiguous queries. Guarded by a 3-second
26478
+ * timeout — if expansion fails or times out, the original query is used. */
26479
+ queryExpansion: external_exports.boolean().default(true),
26480
+ /** RRF weight multiplier for vector search lists. Applied when the query
26481
+ * has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
26482
+ * Boosts semantic/vector results relative to keyword-based BM25 lists.
26483
+ * Default: 1.5. Set to 1.0 to disable. */
26484
+ vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
26485
+ /** Minimum meaningful query terms (after stopword removal) to activate
26486
+ * vector boost. Short keyword queries (1-2 terms) are left unweighted
26487
+ * since BM25 excels there. Default: 3. */
26488
+ vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
26308
26489
  /** Vector embedding search.
26309
26490
  * Supports multiple providers:
26310
- * - "local" (default): fastembed + ONNX Runtime, no API key needed.
26311
- * Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
26312
- * cached in ~/.cache/fastembed. ~150ms per query embed.
26491
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
26492
+ * 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
26493
+ * cached locally. Uses task instruction prefixes (search_document: / search_query:).
26313
26494
  * - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
26314
26495
  * - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
26315
26496
  * Set enabled: false to explicitly disable even with a provider available. */
@@ -26318,19 +26499,20 @@ var LoreConfig = external_exports.object({
26318
26499
  * Set to false to explicitly disable. */
26319
26500
  enabled: external_exports.boolean().default(true),
26320
26501
  /** Embedding provider. Default: "local".
26321
- * - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
26502
+ * - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
26322
26503
  * - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
26323
26504
  * - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
26324
26505
  provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
26325
26506
  /** Model ID for the embedding provider. Default depends on provider. */
26326
- model: external_exports.string().default("BGESmallENV15"),
26327
- /** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
26328
- dimensions: external_exports.number().min(64).max(2048).default(384)
26507
+ model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
26508
+ /** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
26509
+ * For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
26510
+ dimensions: external_exports.number().min(64).max(2048).default(768)
26329
26511
  }).default({
26330
26512
  enabled: true,
26331
26513
  provider: "local",
26332
- model: "BGESmallENV15",
26333
- dimensions: 384
26514
+ model: "nomic-ai/nomic-embed-text-v1.5",
26515
+ dimensions: 768
26334
26516
  }),
26335
26517
  /** Recall output formatting — controls how search results are presented to the agent. */
26336
26518
  recall: external_exports.object({
@@ -26347,8 +26529,10 @@ var LoreConfig = external_exports.object({
26347
26529
  }).default({
26348
26530
  ftsWeights: { title: 6, content: 2, category: 3 },
26349
26531
  recallLimit: 10,
26350
- queryExpansion: false,
26351
- embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 },
26532
+ queryExpansion: true,
26533
+ vectorBoostWeight: 1.5,
26534
+ vectorBoostMinTerms: 3,
26535
+ embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26352
26536
  recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
26353
26537
  }),
26354
26538
  cache: external_exports.object({
@@ -26366,9 +26550,9 @@ var LoreConfig = external_exports.object({
26366
26550
  warming: external_exports.object({
26367
26551
  /** Enable cache warming. Default: true. */
26368
26552
  enabled: external_exports.boolean().default(true),
26369
- /** Override the survival probability threshold below which warming is
26370
- * skipped. Default: auto-derived from cache read/write cost ratio
26371
- * (~0.08 for 5m TTL, ~0.05 for 1h TTL). */
26553
+ /** Override the return probability threshold below which warming is
26554
+ * skipped. Default: auto-derived from corrected cost ratio
26555
+ * read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
26372
26556
  minReturnProbability: external_exports.number().min(0).max(1).optional()
26373
26557
  }).default({ enabled: true })
26374
26558
  }).default({
@@ -26388,8 +26572,8 @@ function config2() {
26388
26572
  return current;
26389
26573
  }
26390
26574
  async function load(directory) {
26391
- const path = join3(directory, ".lore.json");
26392
- if (existsSync(path)) {
26575
+ const path = join5(directory, ".lore.json");
26576
+ if (existsSync2(path)) {
26393
26577
  const raw = JSON.parse(readFileSync(path, "utf8"));
26394
26578
  current = LoreConfig.parse(raw);
26395
26579
  return current;
@@ -26420,8 +26604,7 @@ function vendorModelInfo() {
26420
26604
  const reg = getRegistration();
26421
26605
  if (!reg) return null;
26422
26606
  return {
26423
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
26424
- modelName: reg.modelName
26607
+ localModelPath: reg.localModelPath
26425
26608
  };
26426
26609
  }
26427
26610
  function isVendoredBinary() {
@@ -26508,62 +26691,31 @@ var OpenAIProvider = class {
26508
26691
  var LocalProviderUnavailableError = class extends Error {
26509
26692
  constructor(cause) {
26510
26693
  super(
26511
- "Local embedding provider unavailable: 'fastembed' is not installed. Configure search.embeddings.provider to 'voyage' or 'openai', or reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install."
26694
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
26512
26695
  );
26513
26696
  this.name = "LocalProviderUnavailableError";
26514
26697
  if (cause !== void 0) this.cause = cause;
26515
26698
  }
26516
26699
  };
26517
- var fastembedModule = null;
26518
- var fastembedProbed = false;
26519
- var fastembedAvailable = false;
26520
- var fastembedLogged = false;
26521
- function _resetFastembedProbe() {
26522
- fastembedModule = null;
26523
- fastembedProbed = false;
26524
- fastembedAvailable = false;
26525
- fastembedLogged = false;
26526
- }
26527
- function _markFastembedUnavailable() {
26528
- fastembedModule = null;
26529
- fastembedProbed = true;
26530
- fastembedAvailable = false;
26531
- fastembedLogged = true;
26532
- }
26533
- async function tryLoadFastembed() {
26534
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26535
- try {
26536
- const mod = await loadFastembedModule();
26537
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26538
- fastembedModule = mod;
26539
- fastembedAvailable = true;
26540
- } catch (err) {
26541
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26542
- fastembedAvailable = false;
26543
- if (!fastembedLogged) {
26544
- fastembedLogged = true;
26545
- const msg = err instanceof Error ? err.message : String(err);
26546
- const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
26547
- info(
26548
- `local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
26549
- );
26550
- }
26551
- } finally {
26552
- fastembedProbed = true;
26553
- }
26554
- return fastembedAvailable ? fastembedModule : null;
26700
+ var localProviderKnownBroken = false;
26701
+ var localProviderErrorLogged = false;
26702
+ function _resetLocalProviderProbe() {
26703
+ localProviderKnownBroken = false;
26704
+ localProviderErrorLogged = false;
26555
26705
  }
26556
- async function loadFastembedModule() {
26557
- return await import("fastembed");
26706
+ function _markLocalProviderUnavailable() {
26707
+ localProviderKnownBroken = true;
26708
+ localProviderErrorLogged = true;
26558
26709
  }
26559
- function fastembedKnownUnavailable() {
26560
- return fastembedProbed && !fastembedAvailable;
26710
+ function localProviderKnownUnavailable() {
26711
+ return localProviderKnownBroken;
26561
26712
  }
26562
26713
  var LocalProvider = class {
26563
26714
  // With inference off the main thread, large batches no longer block
26564
26715
  // the event loop. 256 maximises throughput per round-trip to the
26565
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
26566
- // the worker's priority queue breathing room for recall queries.
26716
+ // worker. Backfill callers use token-budget-based batching (see
26717
+ // nextBatch) to give the worker's priority queue breathing room
26718
+ // for recall queries and prevent OOM on long texts.
26567
26719
  maxBatchSize = 256;
26568
26720
  worker = null;
26569
26721
  workerReady = false;
@@ -26571,14 +26723,14 @@ var LocalProvider = class {
26571
26723
  pendingRequests = /* @__PURE__ */ new Map();
26572
26724
  nextRequestId = 0;
26573
26725
  initPromise = null;
26574
- modelName;
26575
- constructor(modelName) {
26576
- this.modelName = modelName;
26726
+ modelId;
26727
+ dimensions;
26728
+ constructor(modelId, dimensions) {
26729
+ this.modelId = modelId;
26730
+ this.dimensions = dimensions;
26577
26731
  }
26578
26732
  /**
26579
- * Ensure the worker thread is running. Probes fastembed on the main
26580
- * thread first (fast, cached) as a fast-fail gate — the worker is only
26581
- * spawned if the module is known-loadable. Worker startup failure is
26733
+ * Ensure the worker thread is running. Worker startup failure is
26582
26734
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
26583
26735
  * auto-fallback to remote providers.
26584
26736
  */
@@ -26587,8 +26739,7 @@ var LocalProvider = class {
26587
26739
  if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
26588
26740
  if (this.initPromise) return this.initPromise;
26589
26741
  this.initPromise = (async () => {
26590
- const fastembed = await tryLoadFastembed();
26591
- if (!fastembed) throw new LocalProviderUnavailableError();
26742
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
26592
26743
  const { Worker } = await import("node:worker_threads");
26593
26744
  const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
26594
26745
  let workerUrl;
@@ -26606,8 +26757,9 @@ var LocalProvider = class {
26606
26757
  }
26607
26758
  const vendor = vendorModelInfo();
26608
26759
  const workerInitData = {
26609
- modelName: this.modelName,
26610
- vendorModel: vendor ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName } : null
26760
+ modelId: this.modelId,
26761
+ dimensions: this.dimensions,
26762
+ vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
26611
26763
  };
26612
26764
  this.worker = new Worker(workerUrl, { workerData: workerInitData });
26613
26765
  this.worker.unref();
@@ -26634,6 +26786,13 @@ var LocalProvider = class {
26634
26786
  case "init-error": {
26635
26787
  this.workerInitError = msg.error;
26636
26788
  this.workerReady = false;
26789
+ localProviderKnownBroken = true;
26790
+ if (!localProviderErrorLogged) {
26791
+ localProviderErrorLogged = true;
26792
+ info(
26793
+ `local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
26794
+ );
26795
+ }
26637
26796
  for (const [, p2] of this.pendingRequests) {
26638
26797
  p2.reject(new LocalProviderUnavailableError(msg.error));
26639
26798
  }
@@ -26685,6 +26844,8 @@ var LocalProvider = class {
26685
26844
  }
26686
26845
  async embed(texts, inputType) {
26687
26846
  await this.ensureWorker();
26847
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
26848
+ const prefixed = texts.map((t2) => prefix + t2);
26688
26849
  const id = this.nextRequestId++;
26689
26850
  const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
26690
26851
  return new Promise((resolve, reject) => {
@@ -26693,7 +26854,7 @@ var LocalProvider = class {
26693
26854
  this.worker.postMessage({
26694
26855
  type: "embed",
26695
26856
  id,
26696
- texts,
26857
+ texts: prefixed,
26697
26858
  inputType,
26698
26859
  priority
26699
26860
  });
@@ -26701,8 +26862,6 @@ var LocalProvider = class {
26701
26862
  }
26702
26863
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
26703
26864
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
26704
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
26705
- * NAPI fatal error when tearing down onnxruntime's native bindings.
26706
26865
  *
26707
26866
  * Returns a promise that resolves once the worker has fully exited. Callers
26708
26867
  * that need a clean teardown (tests, config change) should await the result.
@@ -26725,7 +26884,7 @@ var LocalProvider = class {
26725
26884
  }
26726
26885
  };
26727
26886
  var PROVIDER_DEFAULTS = {
26728
- local: { model: "BGESmallENV15", dimensions: 384 },
26887
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26729
26888
  voyage: { model: "voyage-code-3", dimensions: 1024 },
26730
26889
  openai: { model: "text-embedding-3-small", dimensions: 1536 }
26731
26890
  };
@@ -26749,7 +26908,7 @@ function getProvider() {
26749
26908
  const model = cfg.model;
26750
26909
  switch (providerName) {
26751
26910
  case "local": {
26752
- cachedProvider = new LocalProvider(model);
26911
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
26753
26912
  break;
26754
26913
  }
26755
26914
  case "voyage": {
@@ -26826,7 +26985,7 @@ function pickRemoteFallback() {
26826
26985
  function isAvailable() {
26827
26986
  const provider = getProvider();
26828
26987
  if (!provider) return false;
26829
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
26988
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
26830
26989
  return true;
26831
26990
  }
26832
26991
  async function embed(texts, inputType) {
@@ -26841,7 +27000,7 @@ async function embed(texts, inputType) {
26841
27000
  if (!remoteFallbackLogged) {
26842
27001
  remoteFallbackLogged = true;
26843
27002
  info(
26844
- `fastembed unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
27003
+ `local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
26845
27004
  );
26846
27005
  }
26847
27006
  cachedProvider = fallback.provider;
@@ -26893,6 +27052,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
26893
27052
  scored.sort((a, b) => b.similarity - a.similarity);
26894
27053
  return scored.slice(0, limit);
26895
27054
  }
27055
+ var MAX_DISTILLATION_VECTOR_ROWS = 500;
27056
+ function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
27057
+ const rows = db().query(
27058
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
27059
+ ).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
27060
+ const scored = [];
27061
+ for (const row of rows) {
27062
+ const vec = fromBlob(row.embedding);
27063
+ const sim = cosineSimilarity(queryEmbedding, vec);
27064
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
27065
+ }
27066
+ scored.sort((a, b) => b.similarity - a.similarity);
27067
+ return scored.slice(0, limit);
27068
+ }
26896
27069
  function embedKnowledgeEntry(id, title, content3) {
26897
27070
  const text4 = `${title}
26898
27071
  ${content3}`;
@@ -26994,20 +27167,37 @@ async function runStartupBackfill() {
26994
27167
  );
26995
27168
  info(`embedding startup: ${parts.join("; ")}`);
26996
27169
  }
26997
- var BACKFILL_CHUNK_SIZE = 32;
27170
+ var MAX_BACKFILL_CHUNK = 8;
27171
+ var MAX_BATCH_TOKEN_AREA = 4096;
27172
+ var CHARS_PER_TOKEN = 4;
27173
+ function nextBatch(rows, start) {
27174
+ const batch = [];
27175
+ let maxTokens = 0;
27176
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
27177
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
27178
+ const newMax = Math.max(maxTokens, estTokens);
27179
+ const newArea = (batch.length + 1) * newMax;
27180
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
27181
+ batch.push(rows[i]);
27182
+ maxTokens = newMax;
27183
+ }
27184
+ return batch;
27185
+ }
26998
27186
  async function backfillEmbeddings() {
26999
27187
  checkConfigChange();
27000
27188
  const provider = getProvider();
27001
27189
  if (!provider) return 0;
27002
27190
  const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
27003
27191
  if (!rows.length) return 0;
27192
+ const items = rows.map((r) => ({ ...r, text: `${r.title}
27193
+ ${r.content}` }));
27004
27194
  let embedded = 0;
27005
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27006
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27007
- const texts = batch.map((r) => `${r.title}
27008
- ${r.content}`);
27195
+ let i = 0;
27196
+ while (i < items.length) {
27197
+ const batch = nextBatch(items, i);
27198
+ i += batch.length;
27009
27199
  try {
27010
- const vectors = await embed(texts, "document");
27200
+ const vectors = await embed(batch.map((b) => b.text), "document");
27011
27201
  const update2 = db().prepare(
27012
27202
  "UPDATE knowledge SET embedding = ? WHERE id = ?"
27013
27203
  );
@@ -27016,7 +27206,7 @@ ${r.content}`);
27016
27206
  embedded++;
27017
27207
  }
27018
27208
  } catch (err) {
27019
- info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27209
+ error(`embedding backfill batch failed (${batch.length} items):`, err);
27020
27210
  }
27021
27211
  }
27022
27212
  if (embedded > 0) {
@@ -27034,11 +27224,13 @@ async function backfillDistillationEmbeddings() {
27034
27224
  let embedded = 0;
27035
27225
  const PROGRESS_INTERVAL = 256;
27036
27226
  let nextProgressAt = PROGRESS_INTERVAL;
27037
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27038
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27039
- const texts = batch.map((r) => r.observations);
27227
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
27228
+ let i = 0;
27229
+ while (i < items.length) {
27230
+ const batch = nextBatch(items, i);
27231
+ i += batch.length;
27040
27232
  try {
27041
- const vectors = await embed(texts, "document");
27233
+ const vectors = await embed(batch.map((b) => b.text), "document");
27042
27234
  const update2 = db().prepare(
27043
27235
  "UPDATE distillations SET embedding = ? WHERE id = ?"
27044
27236
  );
@@ -27047,7 +27239,7 @@ async function backfillDistillationEmbeddings() {
27047
27239
  embedded++;
27048
27240
  }
27049
27241
  } catch (err) {
27050
- info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27242
+ error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
27051
27243
  }
27052
27244
  if (embedded >= nextProgressAt) {
27053
27245
  info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
@@ -27161,7 +27353,7 @@ function searchLike(input) {
27161
27353
  if (!terms.length) return [];
27162
27354
  const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
27163
27355
  const likeParams = terms.map((t2) => `%${t2}%`);
27164
- const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27356
+ const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27165
27357
  const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
27166
27358
  return db().query(query).all(...params);
27167
27359
  }
@@ -27170,10 +27362,10 @@ function search2(input) {
27170
27362
  const limit = input.limit ?? 20;
27171
27363
  const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
27172
27364
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27173
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27365
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27174
27366
  ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
27175
27367
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27176
- WHERE f.content MATCH ? AND m.project_id = ?
27368
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27177
27369
  ORDER BY rank LIMIT ?`;
27178
27370
  try {
27179
27371
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27194,10 +27386,10 @@ function searchScored(input) {
27194
27386
  const limit = input.limit ?? 20;
27195
27387
  const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
27196
27388
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27197
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27389
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27198
27390
  ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
27199
27391
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27200
- WHERE f.content MATCH ? AND m.project_id = ?
27392
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27201
27393
  ORDER BY rank LIMIT ?`;
27202
27394
  try {
27203
27395
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27288,7 +27480,11 @@ __export(ltm_exports, {
27288
27480
  check: () => check2,
27289
27481
  cleanDeadRefs: () => cleanDeadRefs,
27290
27482
  create: () => create,
27483
+ crossProject: () => crossProject,
27484
+ deduplicate: () => deduplicate,
27485
+ deduplicateGlobal: () => deduplicateGlobal,
27291
27486
  extractRefs: () => extractRefs,
27487
+ findFuzzyDuplicate: () => findFuzzyDuplicate,
27292
27488
  forProject: () => forProject,
27293
27489
  forSession: () => forSession,
27294
27490
  get: () => get,
@@ -27647,8 +27843,8 @@ __export(lat_reader_exports, {
27647
27843
  scoreForSession: () => scoreForSession,
27648
27844
  searchScored: () => searchScored2
27649
27845
  });
27650
- import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync2, statSync } from "fs";
27651
- import { join as join4, relative } from "path";
27846
+ import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
27847
+ import { join as join6, relative } from "path";
27652
27848
  var processor2 = remark();
27653
27849
  function estimateTokens2(text4) {
27654
27850
  return Math.ceil(text4.length / 3);
@@ -27726,7 +27922,7 @@ function listMarkdownFiles(dir) {
27726
27922
  try {
27727
27923
  const entries = readdirSync(dir, { withFileTypes: true });
27728
27924
  for (const entry of entries) {
27729
- const fullPath = join4(dir, entry.name);
27925
+ const fullPath = join6(dir, entry.name);
27730
27926
  if (entry.isDirectory() && !entry.name.startsWith(".")) {
27731
27927
  results.push(...listMarkdownFiles(fullPath));
27732
27928
  } else if (entry.isFile() && entry.name.endsWith(".md")) {
@@ -27741,12 +27937,12 @@ function contentHash(content3) {
27741
27937
  return sha256(content3);
27742
27938
  }
27743
27939
  function hasLatDir(projectPath) {
27744
- const latDir = join4(projectPath, "lat.md");
27745
- return existsSync2(latDir) && statSync(latDir).isDirectory();
27940
+ const latDir = join6(projectPath, "lat.md");
27941
+ return existsSync3(latDir) && statSync2(latDir).isDirectory();
27746
27942
  }
27747
27943
  function refresh(projectPath) {
27748
- const latDir = join4(projectPath, "lat.md");
27749
- if (!existsSync2(latDir) || !statSync(latDir).isDirectory()) return 0;
27944
+ const latDir = join6(projectPath, "lat.md");
27945
+ if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
27750
27946
  const pid = ensureProject(projectPath);
27751
27947
  const files = listMarkdownFiles(latDir);
27752
27948
  let upserted = 0;
@@ -27868,6 +28064,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
27868
28064
  var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
27869
28065
  function create(input) {
27870
28066
  const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
28067
+ const crossProject2 = pid === null ? true : input.crossProject ?? false;
27871
28068
  if (!input.id) {
27872
28069
  const existing = pid !== null ? db().query(
27873
28070
  "SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
@@ -27885,6 +28082,11 @@ function create(input) {
27885
28082
  update(crossExisting.id, { content: input.content });
27886
28083
  return crossExisting.id;
27887
28084
  }
28085
+ const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
28086
+ if (fuzzyMatch) {
28087
+ update(fuzzyMatch.id, { content: input.content });
28088
+ return fuzzyMatch.id;
28089
+ }
27888
28090
  }
27889
28091
  const id = input.id ?? uuidv72();
27890
28092
  const now = Date.now();
@@ -27898,7 +28100,7 @@ function create(input) {
27898
28100
  input.title,
27899
28101
  input.content,
27900
28102
  input.session ?? null,
27901
- input.crossProject ?? false ? 1 : 0,
28103
+ crossProject2 ? 1 : 0,
27902
28104
  now,
27903
28105
  now
27904
28106
  );
@@ -27916,7 +28118,7 @@ function update(id, input) {
27916
28118
  }
27917
28119
  if (input.confidence !== void 0) {
27918
28120
  sets.push("confidence = ?");
27919
- params.push(input.confidence);
28121
+ params.push(Math.max(0, Math.min(1, input.confidence)));
27920
28122
  }
27921
28123
  sets.push("updated_at = ?");
27922
28124
  params.push(Date.now());
@@ -27932,6 +28134,50 @@ function update(id, input) {
27932
28134
  function remove(id) {
27933
28135
  db().query("DELETE FROM knowledge WHERE id = ?").run(id);
27934
28136
  }
28137
+ function titleOverlap(a, b) {
28138
+ const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
28139
+ const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
28140
+ if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
28141
+ const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
28142
+ return {
28143
+ coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
28144
+ intersectionSize: intersection2.length
28145
+ };
28146
+ }
28147
+ var FUZZY_DEDUP_THRESHOLD = 0.7;
28148
+ var FUZZY_DEDUP_MIN_OVERLAP = 4;
28149
+ var EMBEDDING_DEDUP_THRESHOLD = 0.935;
28150
+ function findFuzzyDuplicate(input) {
28151
+ const q = ftsQueryOr(input.title);
28152
+ if (q === EMPTY_QUERY) return null;
28153
+ const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
28154
+ try {
28155
+ const excludeClause = input.excludeId ? "AND k.id != ?" : "";
28156
+ const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
28157
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28158
+ WHERE knowledge_fts MATCH ?
28159
+ AND (k.project_id = ? OR k.cross_project = 1)
28160
+ AND k.confidence > 0.2
28161
+ ${excludeClause}
28162
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
28163
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28164
+ WHERE knowledge_fts MATCH ?
28165
+ AND (k.project_id IS NULL OR k.cross_project = 1)
28166
+ AND k.confidence > 0.2
28167
+ ${excludeClause}
28168
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
28169
+ const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
28170
+ const candidates = db().query(sql).all(...params);
28171
+ for (const candidate of candidates) {
28172
+ const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
28173
+ if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
28174
+ return candidate;
28175
+ }
28176
+ }
28177
+ } catch {
28178
+ }
28179
+ return null;
28180
+ }
27935
28181
  function forProject(projectPath, includeCross = true) {
27936
28182
  const pid = ensureProject(projectPath);
27937
28183
  if (includeCross) {
@@ -28079,6 +28325,13 @@ function all2() {
28079
28325
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
28080
28326
  ).all();
28081
28327
  }
28328
+ function crossProject() {
28329
+ return db().query(
28330
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28331
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28332
+ ORDER BY confidence DESC, updated_at DESC`
28333
+ ).all();
28334
+ }
28082
28335
  function searchLike2(input) {
28083
28336
  const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
28084
28337
  if (!terms.length) return [];
@@ -28310,6 +28563,102 @@ function check2(projectPath) {
28310
28563
  }
28311
28564
  return issues;
28312
28565
  }
28566
+ function _dedup(entries, dryRun) {
28567
+ if (entries.length < 2) return { clusters: [], totalRemoved: 0 };
28568
+ const embeddingMap = /* @__PURE__ */ new Map();
28569
+ {
28570
+ const entryIds = entries.map((e) => e.id);
28571
+ const placeholders = entryIds.map(() => "?").join(",");
28572
+ const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
28573
+ for (const row of rows) {
28574
+ try {
28575
+ embeddingMap.set(row.id, fromBlob(row.embedding));
28576
+ } catch {
28577
+ info(`skipping corrupted embedding for entry ${row.id}`);
28578
+ }
28579
+ }
28580
+ }
28581
+ const neighborMap = /* @__PURE__ */ new Map();
28582
+ for (const entry of entries) {
28583
+ const neighbors = [];
28584
+ const entryVec = embeddingMap.get(entry.id);
28585
+ for (const other of entries) {
28586
+ if (other.id === entry.id) continue;
28587
+ const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
28588
+ const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
28589
+ let embeddingMatch = false;
28590
+ let similarity = 0;
28591
+ if (entryVec) {
28592
+ const otherVec = embeddingMap.get(other.id);
28593
+ if (otherVec && entryVec.length === otherVec.length) {
28594
+ similarity = cosineSimilarity(entryVec, otherVec);
28595
+ embeddingMatch = similarity >= EMBEDDING_DEDUP_THRESHOLD;
28596
+ }
28597
+ }
28598
+ if (titleMatch || embeddingMatch) {
28599
+ neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
28600
+ }
28601
+ }
28602
+ neighbors.sort((a, b) => b.score - a.score);
28603
+ neighborMap.set(entry.id, neighbors);
28604
+ }
28605
+ const claimed = /* @__PURE__ */ new Set();
28606
+ const rawClusters = /* @__PURE__ */ new Map();
28607
+ const sortedIds = [...neighborMap.keys()].sort(
28608
+ (a, b) => neighborMap.get(b).length - neighborMap.get(a).length
28609
+ );
28610
+ for (const centerId of sortedIds) {
28611
+ if (claimed.has(centerId)) continue;
28612
+ claimed.add(centerId);
28613
+ const members = [centerId];
28614
+ for (const { id: neighborId } of neighborMap.get(centerId)) {
28615
+ if (claimed.has(neighborId)) continue;
28616
+ claimed.add(neighborId);
28617
+ members.push(neighborId);
28618
+ }
28619
+ if (members.length > 1) {
28620
+ rawClusters.set(centerId, members);
28621
+ }
28622
+ }
28623
+ const entryById = new Map(entries.map((e) => [e.id, e]));
28624
+ const result = [];
28625
+ let totalRemoved = 0;
28626
+ for (const members of rawClusters.values()) {
28627
+ if (members.length < 2) continue;
28628
+ const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
28629
+ if (b.confidence !== a.confidence) return b.confidence - a.confidence;
28630
+ if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
28631
+ return a.title.length - b.title.length;
28632
+ });
28633
+ const survivor = sorted[0];
28634
+ const merged = sorted.slice(1);
28635
+ result.push({
28636
+ surviving: { id: survivor.id, title: survivor.title },
28637
+ merged: merged.map((e) => ({ id: e.id, title: e.title }))
28638
+ });
28639
+ if (!dryRun) {
28640
+ for (const entry of merged) {
28641
+ remove(entry.id);
28642
+ }
28643
+ }
28644
+ totalRemoved += merged.length;
28645
+ }
28646
+ result.sort((a, b) => b.merged.length - a.merged.length);
28647
+ return { clusters: result, totalRemoved };
28648
+ }
28649
+ async function deduplicate(projectPath, opts) {
28650
+ const entries = forProject(projectPath, false);
28651
+ return _dedup(entries, opts?.dryRun ?? true);
28652
+ }
28653
+ async function deduplicateGlobal(opts) {
28654
+ const entries = db().query(
28655
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28656
+ WHERE project_id IS NULL
28657
+ AND confidence > 0.2
28658
+ ORDER BY confidence DESC, updated_at DESC`
28659
+ ).all();
28660
+ return _dedup(entries, opts?.dryRun ?? true);
28661
+ }
28313
28662
 
28314
28663
  // src/data.ts
28315
28664
  var data_exports = {};
@@ -28334,11 +28683,11 @@ __export(data_exports, {
28334
28683
  resolveId: () => resolveId,
28335
28684
  wipeDatabase: () => wipeDatabase
28336
28685
  });
28337
- import { statSync as statSync3, unlinkSync, existsSync as existsSync4 } from "fs";
28686
+ import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
28338
28687
 
28339
28688
  // src/agents-file.ts
28340
- import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2, statSync as statSync2 } from "fs";
28341
- import { dirname as dirname2, join as join5 } from "path";
28689
+ import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
28690
+ import { dirname as dirname2, join as join7 } from "path";
28342
28691
  var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
28343
28692
  var LORE_SECTION_END = "<!-- End lore-managed section -->";
28344
28693
  var ALL_START_MARKERS = [
@@ -28369,7 +28718,7 @@ function setCache(fp, entry) {
28369
28718
  ).run(key, value, value);
28370
28719
  }
28371
28720
  function clearLoreFileCache(projectPath) {
28372
- db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join5(projectPath, LORE_FILE));
28721
+ db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
28373
28722
  }
28374
28723
  function splitFile(fileContent) {
28375
28724
  const spans = [];
@@ -28482,7 +28831,7 @@ function exportToFile(input) {
28482
28831
  const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
28483
28832
  const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
28484
28833
  let fileContent = "";
28485
- if (existsSync3(input.filePath)) {
28834
+ if (existsSync4(input.filePath)) {
28486
28835
  fileContent = readFileSync3(input.filePath, "utf8");
28487
28836
  }
28488
28837
  const { before, after } = splitFile(fileContent);
@@ -28491,11 +28840,11 @@ function exportToFile(input) {
28491
28840
  const suffix = after.trimStart();
28492
28841
  const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
28493
28842
  const result = prefixWithSep + newSection + suffixWithSep;
28494
- mkdirSync2(dirname2(input.filePath), { recursive: true });
28843
+ mkdirSync3(dirname2(input.filePath), { recursive: true });
28495
28844
  writeFileSync(input.filePath, result, "utf8");
28496
28845
  }
28497
28846
  function shouldImport(input) {
28498
- if (!existsSync3(input.filePath)) return false;
28847
+ if (!existsSync4(input.filePath)) return false;
28499
28848
  const fileContent = readFileSync3(input.filePath, "utf8");
28500
28849
  const { section } = splitFile(fileContent);
28501
28850
  if (section === null) {
@@ -28516,18 +28865,26 @@ function _importEntries(entries, projectPath) {
28516
28865
  update(entry.id, { content: entry.content });
28517
28866
  }
28518
28867
  } else {
28519
- create({
28520
- projectPath,
28521
- category: entry.category,
28522
- title: entry.title,
28523
- content: entry.content,
28524
- scope: "project",
28525
- crossProject: false,
28526
- id: entry.id
28527
- });
28868
+ const pid = ensureProject(projectPath);
28869
+ const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
28870
+ if (fuzzyMatch) {
28871
+ if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
28872
+ update(fuzzyMatch.id, { content: entry.content });
28873
+ }
28874
+ } else {
28875
+ create({
28876
+ projectPath,
28877
+ category: entry.category,
28878
+ title: entry.title,
28879
+ content: entry.content,
28880
+ scope: "project",
28881
+ crossProject: false,
28882
+ id: entry.id
28883
+ });
28884
+ }
28528
28885
  }
28529
28886
  } else {
28530
- const existing = forProject(projectPath, true);
28887
+ const existing = forProject(projectPath, false);
28531
28888
  const titleMatch = existing.find(
28532
28889
  (e) => e.title.toLowerCase() === entry.title.toLowerCase()
28533
28890
  );
@@ -28545,7 +28902,7 @@ function _importEntries(entries, projectPath) {
28545
28902
  }
28546
28903
  }
28547
28904
  function importFromFile(input) {
28548
- if (!existsSync3(input.filePath)) return;
28905
+ if (!existsSync4(input.filePath)) return;
28549
28906
  const fileContent = readFileSync3(input.filePath, "utf8");
28550
28907
  const { section } = splitFile(fileContent);
28551
28908
  const textToParse = section ?? fileContent;
@@ -28554,25 +28911,25 @@ function importFromFile(input) {
28554
28911
  _importEntries(fileEntries, input.projectPath);
28555
28912
  }
28556
28913
  function loreFileExists(projectPath) {
28557
- return existsSync3(join5(projectPath, LORE_FILE));
28914
+ return existsSync4(join7(projectPath, LORE_FILE));
28558
28915
  }
28559
28916
  function exportLoreFile(projectPath) {
28560
28917
  const sectionBody = buildSection(projectPath);
28561
28918
  const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
28562
28919
  const contentHash2 = hashSection(content3);
28563
- const fp = join5(projectPath, LORE_FILE);
28920
+ const fp = join7(projectPath, LORE_FILE);
28564
28921
  const cached2 = getCache(fp);
28565
28922
  if (cached2 && cached2.hash === contentHash2) {
28566
28923
  return;
28567
28924
  }
28568
28925
  writeFileSync(fp, content3, "utf8");
28569
- const { mtimeMs } = statSync2(fp);
28926
+ const { mtimeMs } = statSync3(fp);
28570
28927
  setCache(fp, { mtimeMs, hash: contentHash2 });
28571
28928
  }
28572
28929
  function shouldImportLoreFile(projectPath) {
28573
- const fp = join5(projectPath, LORE_FILE);
28574
- if (!existsSync3(fp)) return false;
28575
- const { mtimeMs } = statSync2(fp);
28930
+ const fp = join7(projectPath, LORE_FILE);
28931
+ if (!existsSync4(fp)) return false;
28932
+ const { mtimeMs } = statSync3(fp);
28576
28933
  const cached2 = getCache(fp);
28577
28934
  if (cached2 && cached2.mtimeMs === mtimeMs) {
28578
28935
  return false;
@@ -28588,12 +28945,17 @@ function shouldImportLoreFile(projectPath) {
28588
28945
  return true;
28589
28946
  }
28590
28947
  function importLoreFile(projectPath) {
28591
- const fp = join5(projectPath, LORE_FILE);
28592
- if (!existsSync3(fp)) return;
28948
+ const fp = join7(projectPath, LORE_FILE);
28949
+ if (!existsSync4(fp)) return;
28593
28950
  const fileContent = readFileSync3(fp, "utf8");
28594
28951
  const fileEntries = parseEntriesFromSection(fileContent);
28595
28952
  if (!fileEntries.length) return;
28596
28953
  _importEntries(fileEntries, projectPath);
28954
+ try {
28955
+ const { mtimeMs } = statSync3(fp);
28956
+ setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
28957
+ } catch {
28958
+ }
28597
28959
  }
28598
28960
 
28599
28961
  // src/data.ts
@@ -28668,10 +29030,10 @@ function globalStats() {
28668
29030
  let db_size_bytes = 0;
28669
29031
  try {
28670
29032
  const p2 = dbPath();
28671
- db_size_bytes = statSync3(p2).size;
29033
+ db_size_bytes = statSync4(p2).size;
28672
29034
  const walPath = p2 + "-wal";
28673
- if (existsSync4(walPath)) {
28674
- db_size_bytes += statSync3(walPath).size;
29035
+ if (existsSync5(walPath)) {
29036
+ db_size_bytes += statSync4(walPath).size;
28675
29037
  }
28676
29038
  } catch {
28677
29039
  }
@@ -28722,7 +29084,7 @@ function clearProject(projectPath) {
28722
29084
  database.exec("ROLLBACK");
28723
29085
  throw e;
28724
29086
  }
28725
- if (existsSync4(projectPath)) {
29087
+ if (existsSync5(projectPath)) {
28726
29088
  try {
28727
29089
  exportLoreFile(projectPath);
28728
29090
  } catch {
@@ -28793,7 +29155,7 @@ function clearKnowledge(projectPath) {
28793
29155
  "SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
28794
29156
  ).get(pid).c;
28795
29157
  db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
28796
- if (existsSync4(projectPath)) {
29158
+ if (existsSync5(projectPath)) {
28797
29159
  try {
28798
29160
  exportLoreFile(projectPath);
28799
29161
  } catch {
@@ -28852,7 +29214,7 @@ function wipeDatabase() {
28852
29214
  close();
28853
29215
  for (const suffix of ["", "-wal", "-shm"]) {
28854
29216
  const fp = p2 + suffix;
28855
- if (existsSync4(fp)) {
29217
+ if (existsSync5(fp)) {
28856
29218
  try {
28857
29219
  unlinkSync(fp);
28858
29220
  } catch {
@@ -28893,7 +29255,7 @@ function backfillGitRemotes() {
28893
29255
  for (const project of projects) {
28894
29256
  let gitRemote = project.git_remote;
28895
29257
  if (!gitRemote) {
28896
- if (!existsSync4(project.path)) continue;
29258
+ if (!existsSync5(project.path)) continue;
28897
29259
  gitRemote = getGitRemote(project.path);
28898
29260
  if (!gitRemote) continue;
28899
29261
  const existing = db().query(
@@ -28992,6 +29354,32 @@ var PATTERNS = [
28992
29354
  regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
28993
29355
  category: "preference",
28994
29356
  titleFn: (m) => `Typically uses ${m[1].trim()}`
29357
+ },
29358
+ // Process instruction patterns — match distilled observations recording
29359
+ // user assertions about workflow/process rules. The distillation observer
29360
+ // normalizes user instructions into "User stated always X" phrasing.
29361
+ // These require "stated/asserted/said" to avoid overlapping with the
29362
+ // existing "typically uses" pattern above (which already handles
29363
+ // "user always use/prefer/go with X").
29364
+ {
29365
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
29366
+ category: "preference",
29367
+ titleFn: (m) => `Always ${m[1].trim()}`
29368
+ },
29369
+ {
29370
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
29371
+ category: "preference",
29372
+ titleFn: (m) => `Never ${m[1].trim()}`
29373
+ },
29374
+ {
29375
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
29376
+ category: "preference",
29377
+ titleFn: (m) => `Make sure to ${m[1].trim()}`
29378
+ },
29379
+ {
29380
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
29381
+ category: "preference",
29382
+ titleFn: (m) => `Always ${m[1].trim()}`
28995
29383
  }
28996
29384
  ];
28997
29385
  function extractPatterns(observations) {
@@ -29001,6 +29389,8 @@ function extractPatterns(observations) {
29001
29389
  regex.lastIndex = 0;
29002
29390
  let match;
29003
29391
  while ((match = regex.exec(observations)) !== null) {
29392
+ const captures = match.slice(1);
29393
+ if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
29004
29394
  const title = titleFn(match);
29005
29395
  const key = title.toLowerCase();
29006
29396
  if (seen.has(key)) continue;
@@ -29120,7 +29510,7 @@ function getSessionState(sessionID) {
29120
29510
  }
29121
29511
  return state;
29122
29512
  }
29123
- function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29513
+ function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
29124
29514
  if (thresholdMs <= 0) return { triggered: false };
29125
29515
  const state = getSessionState(sessionID);
29126
29516
  if (state.lastTurnAt === 0) return { triggered: false };
@@ -29130,7 +29520,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29130
29520
  state.rawWindowCache = null;
29131
29521
  state.distillationSnapshot = null;
29132
29522
  state.cameOutOfIdle = true;
29133
- state.postIdleCompact = true;
29523
+ state.postIdleCompact = !skipCompact;
29134
29524
  return { triggered: true, idleMs };
29135
29525
  }
29136
29526
  function getLastTurnAt(sessionID) {
@@ -30267,7 +30657,8 @@ async function distillSegment(input) {
30267
30657
  embedDistillation(distillId, result.observations);
30268
30658
  }
30269
30659
  if (config2().knowledge.enabled) {
30270
- for (const pat of extractPatterns(result.observations)) {
30660
+ const patterns = extractPatterns(result.observations);
30661
+ for (const pat of patterns) {
30271
30662
  try {
30272
30663
  create({
30273
30664
  projectPath: input.projectPath,
@@ -30280,6 +30671,9 @@ async function distillSegment(input) {
30280
30671
  } catch {
30281
30672
  }
30282
30673
  }
30674
+ if (patterns.length > 0) {
30675
+ info(`pattern extraction: ${patterns.length} entries from distillation`);
30676
+ }
30283
30677
  }
30284
30678
  return result;
30285
30679
  }
@@ -30329,7 +30723,8 @@ async function metaDistill(input) {
30329
30723
  embedDistillation(metaId, result.observations);
30330
30724
  }
30331
30725
  if (config2().knowledge.enabled) {
30332
- for (const pat of extractPatterns(result.observations)) {
30726
+ const patterns = extractPatterns(result.observations);
30727
+ for (const pat of patterns) {
30333
30728
  try {
30334
30729
  create({
30335
30730
  projectPath: input.projectPath,
@@ -30342,6 +30737,9 @@ async function metaDistill(input) {
30342
30737
  } catch {
30343
30738
  }
30344
30739
  }
30740
+ if (patterns.length > 0) {
30741
+ info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
30742
+ }
30345
30743
  }
30346
30744
  return result;
30347
30745
  }
@@ -30380,10 +30778,150 @@ function backfillMetrics() {
30380
30778
  // src/curator.ts
30381
30779
  var curator_exports = {};
30382
30780
  __export(curator_exports, {
30781
+ MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
30782
+ applyOps: () => applyOps,
30383
30783
  consolidate: () => consolidate,
30784
+ parseOps: () => parseOps,
30384
30785
  resetCurationTracker: () => resetCurationTracker,
30385
30786
  run: () => run2
30386
30787
  });
30788
+
30789
+ // src/instruction-detect.ts
30790
+ var instruction_detect_exports = {};
30791
+ __export(instruction_detect_exports, {
30792
+ detectAndFormat: () => detectAndFormat,
30793
+ extractInstructionCandidates: () => extractInstructionCandidates,
30794
+ findRepeatedInstructions: () => findRepeatedInstructions,
30795
+ formatForCurator: () => formatForCurator
30796
+ });
30797
+ var DEFAULT_REPETITION_THRESHOLD = 2;
30798
+ var VECTOR_SIMILARITY_THRESHOLD = 0.5;
30799
+ var MAX_CANDIDATES = 5;
30800
+ var INSTRUCTION_PATTERNS = [
30801
+ /\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
30802
+ /\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
30803
+ /\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
30804
+ /\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
30805
+ /\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
30806
+ /\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
30807
+ ];
30808
+ function extractInstructionCandidates(messages) {
30809
+ const candidates = [];
30810
+ const seen = /* @__PURE__ */ new Set();
30811
+ for (const msg of messages) {
30812
+ if (msg.role !== "user") continue;
30813
+ for (const pattern of INSTRUCTION_PATTERNS) {
30814
+ pattern.lastIndex = 0;
30815
+ let match;
30816
+ while ((match = pattern.exec(msg.content)) !== null) {
30817
+ const text4 = match[1]?.trim();
30818
+ if (!text4 || text4.length < 10) continue;
30819
+ const key = text4.toLowerCase();
30820
+ if (seen.has(key)) continue;
30821
+ seen.add(key);
30822
+ candidates.push({
30823
+ text: text4,
30824
+ sessionID: msg.session_id
30825
+ });
30826
+ if (candidates.length >= MAX_CANDIDATES) return candidates;
30827
+ }
30828
+ }
30829
+ }
30830
+ return candidates;
30831
+ }
30832
+ async function findRepeatedInstructions(input) {
30833
+ const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
30834
+ if (!input.candidates.length) return [];
30835
+ const pid = ensureProject(input.projectPath);
30836
+ let candidateEmbeddings = [];
30837
+ if (isAvailable()) {
30838
+ try {
30839
+ candidateEmbeddings = await embed(
30840
+ input.candidates.map((c) => c.text),
30841
+ "query"
30842
+ );
30843
+ } catch (err) {
30844
+ warn("instruction-detect: batch embedding failed:", err);
30845
+ }
30846
+ }
30847
+ const results = [];
30848
+ for (let i = 0; i < input.candidates.length; i++) {
30849
+ const candidate = input.candidates[i];
30850
+ const sessionIDs = /* @__PURE__ */ new Set();
30851
+ if (candidateEmbeddings.length > i) {
30852
+ const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
30853
+ for (const hit of hits) {
30854
+ if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
30855
+ sessionIDs.add(hit.session_id);
30856
+ }
30857
+ }
30858
+ }
30859
+ const terms = filterTerms(candidate.text);
30860
+ if (terms.length >= 2) {
30861
+ const searchText = terms.slice(0, 5).join(" ");
30862
+ const ftsHits = searchDistillationsFTS(pid, searchText);
30863
+ for (const hit of ftsHits) {
30864
+ if (hit.session_id !== input.currentSessionID) {
30865
+ sessionIDs.add(hit.session_id);
30866
+ }
30867
+ }
30868
+ }
30869
+ if (sessionIDs.size >= threshold) {
30870
+ results.push({
30871
+ instruction: candidate.text,
30872
+ priorSessionCount: sessionIDs.size
30873
+ });
30874
+ }
30875
+ }
30876
+ return results;
30877
+ }
30878
+ function searchDistillationsFTS(projectId2, rawQuery) {
30879
+ const matchExpr = ftsQueryOr(rawQuery);
30880
+ if (matchExpr === EMPTY_QUERY) return [];
30881
+ const sql = `SELECT d.id, d.session_id
30882
+ FROM distillation_fts f
30883
+ CROSS JOIN distillations d ON d.rowid = f.rowid
30884
+ WHERE distillation_fts MATCH ?
30885
+ AND d.project_id = ?
30886
+ ORDER BY rank LIMIT 30`;
30887
+ try {
30888
+ return db().query(sql).all(matchExpr, projectId2);
30889
+ } catch (err) {
30890
+ warn("instruction-detect: FTS search failed:", err);
30891
+ return [];
30892
+ }
30893
+ }
30894
+ function formatForCurator(instructions) {
30895
+ if (!instructions.length) return "";
30896
+ const lines = instructions.map(
30897
+ (i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
30898
+ );
30899
+ return `
30900
+
30901
+ ---
30902
+ CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
30903
+ The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
30904
+ ${lines.join("\n")}`;
30905
+ }
30906
+ async function detectAndFormat(input) {
30907
+ const messages = bySession(input.projectPath, input.sessionID);
30908
+ const candidates = extractInstructionCandidates(messages);
30909
+ if (!candidates.length) return "";
30910
+ const repeated = await findRepeatedInstructions({
30911
+ projectPath: input.projectPath,
30912
+ currentSessionID: input.sessionID,
30913
+ candidates,
30914
+ threshold: input.threshold
30915
+ });
30916
+ if (repeated.length) {
30917
+ info(
30918
+ `instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
30919
+ );
30920
+ }
30921
+ return formatForCurator(repeated);
30922
+ }
30923
+
30924
+ // src/curator.ts
30387
30925
  var MAX_ENTRY_CONTENT_LENGTH = 1200;
30388
30926
  function parseOps(text4) {
30389
30927
  const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
@@ -30397,40 +30935,14 @@ function parseOps(text4) {
30397
30935
  return [];
30398
30936
  }
30399
30937
  }
30400
- var lastCuratedAt = /* @__PURE__ */ new Map();
30401
- async function run2(input) {
30402
- const cfg = config2();
30403
- if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
30404
- const all3 = bySession(input.projectPath, input.sessionID);
30405
- const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
30406
- const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
30407
- if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
30408
- const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
30409
- const existing = forProject(input.projectPath, false);
30410
- const existingForPrompt = existing.map((e) => ({
30411
- id: e.id,
30412
- category: e.category,
30413
- title: e.title,
30414
- content: e.content
30415
- }));
30416
- const userContent = curatorUser({
30417
- messages: text4,
30418
- existing: existingForPrompt
30419
- });
30420
- const model = input.model ?? cfg.model;
30421
- const responseText = await input.llm.prompt(
30422
- CURATOR_SYSTEM,
30423
- userContent,
30424
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
30425
- );
30426
- if (!responseText) return { created: 0, updated: 0, deleted: 0 };
30427
- const ops = parseOps(responseText);
30938
+ function applyOps(ops, input) {
30428
30939
  let created = 0;
30429
30940
  let updated = 0;
30430
30941
  let deleted = 0;
30431
30942
  const idsToSync = [];
30432
30943
  for (const op of ops) {
30433
30944
  if (op.op === "create") {
30945
+ if (input.skipCreate) continue;
30434
30946
  const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30435
30947
  const id = create({
30436
30948
  projectPath: op.scope === "project" ? input.projectPath : void 0,
@@ -30462,9 +30974,64 @@ async function run2(input) {
30462
30974
  for (const id of idsToSync) {
30463
30975
  syncRefs(id);
30464
30976
  }
30465
- lastCuratedAt.set(input.sessionID, Date.now());
30466
30977
  return { created, updated, deleted };
30467
30978
  }
30979
+ var lastCuratedAt = /* @__PURE__ */ new Map();
30980
+ async function run2(input) {
30981
+ const cfg = config2();
30982
+ if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
30983
+ const all3 = bySession(input.projectPath, input.sessionID);
30984
+ const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
30985
+ const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
30986
+ if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
30987
+ const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
30988
+ const existing = forProject(input.projectPath, false);
30989
+ const existingForPrompt = existing.map((e) => ({
30990
+ id: e.id,
30991
+ category: e.category,
30992
+ title: e.title,
30993
+ content: e.content
30994
+ }));
30995
+ const baseUserContent = curatorUser({
30996
+ messages: text4,
30997
+ existing: existingForPrompt
30998
+ });
30999
+ let crossSessionContext = "";
31000
+ try {
31001
+ crossSessionContext = await detectAndFormat({
31002
+ projectPath: input.projectPath,
31003
+ sessionID: input.sessionID
31004
+ });
31005
+ } catch (err) {
31006
+ warn("instruction-detect failed (non-fatal):", err);
31007
+ }
31008
+ const userContent = baseUserContent + crossSessionContext;
31009
+ const model = input.model ?? cfg.model;
31010
+ const responseText = await input.llm.prompt(
31011
+ CURATOR_SYSTEM,
31012
+ userContent,
31013
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
31014
+ );
31015
+ if (!responseText) return { created: 0, updated: 0, deleted: 0 };
31016
+ const ops = parseOps(responseText);
31017
+ const result = applyOps(ops, {
31018
+ projectPath: input.projectPath,
31019
+ sessionID: input.sessionID
31020
+ });
31021
+ if (result.created > 0) {
31022
+ try {
31023
+ const dupes = await deduplicate(input.projectPath, { dryRun: false });
31024
+ if (dupes.totalRemoved > 0) {
31025
+ info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
31026
+ result.deleted += dupes.totalRemoved;
31027
+ }
31028
+ } catch (err) {
31029
+ warn("post-curation dedup failed (non-fatal):", err);
31030
+ }
31031
+ }
31032
+ lastCuratedAt.set(input.sessionID, Date.now());
31033
+ return result;
31034
+ }
30468
31035
  function resetCurationTracker(sessionID) {
30469
31036
  if (sessionID) {
30470
31037
  lastCuratedAt.delete(sessionID);
@@ -30495,26 +31062,1431 @@ async function consolidate(input) {
30495
31062
  );
30496
31063
  if (!responseText) return { updated: 0, deleted: 0 };
30497
31064
  const ops = parseOps(responseText);
30498
- let updated = 0;
30499
- let deleted = 0;
30500
- for (const op of ops) {
30501
- if (op.op === "update") {
30502
- const entry = get(op.id);
30503
- if (entry) {
30504
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30505
- update(op.id, { content: content3, confidence: op.confidence });
30506
- updated++;
31065
+ const result = applyOps(ops, {
31066
+ projectPath: input.projectPath,
31067
+ sessionID: input.sessionID,
31068
+ skipCreate: true
31069
+ // Consolidation must not add entries.
31070
+ });
31071
+ return { updated: result.updated, deleted: result.deleted };
31072
+ }
31073
+
31074
+ // src/import/index.ts
31075
+ var import_exports = {};
31076
+ __export(import_exports, {
31077
+ clearProviders: () => clearProviders,
31078
+ computeHash: () => computeHash,
31079
+ detectAll: () => detectAll,
31080
+ extractKnowledge: () => extractKnowledge,
31081
+ getProvider: () => getProvider2,
31082
+ getProviders: () => getProviders,
31083
+ isImported: () => isImported,
31084
+ listImports: () => listImports,
31085
+ recordImport: () => recordImport,
31086
+ registerProvider: () => registerProvider
31087
+ });
31088
+
31089
+ // src/import/providers/index.ts
31090
+ var providers = [];
31091
+ function registerProvider(provider) {
31092
+ providers.push(provider);
31093
+ }
31094
+ function getProviders() {
31095
+ return providers;
31096
+ }
31097
+ function getProvider2(name) {
31098
+ return providers.find((p2) => p2.name === name);
31099
+ }
31100
+ function clearProviders() {
31101
+ providers.length = 0;
31102
+ }
31103
+
31104
+ // src/import/detect.ts
31105
+ function detectAll(projectPath) {
31106
+ const results = [];
31107
+ for (const provider of getProviders()) {
31108
+ try {
31109
+ const sessions = provider.detect(projectPath);
31110
+ if (sessions.length > 0) {
31111
+ results.push({
31112
+ agentName: provider.name,
31113
+ agentDisplayName: provider.displayName,
31114
+ sessions,
31115
+ totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
31116
+ totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
31117
+ });
30507
31118
  }
30508
- } else if (op.op === "delete") {
30509
- const entry = get(op.id);
30510
- if (entry) {
30511
- remove(op.id);
30512
- deleted++;
31119
+ } catch (err) {
31120
+ }
31121
+ }
31122
+ return results.sort((a, b) => b.totalMessages - a.totalMessages);
31123
+ }
31124
+
31125
+ // src/import/extract.ts
31126
+ var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
31127
+
31128
+ ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
31129
+ - Architecture decisions, design patterns, and project conventions
31130
+ - Gotchas, non-obvious bugs, and their fixes
31131
+ - Developer preferences and workflow patterns
31132
+ - Key technical choices and their rationale
31133
+
31134
+ Ignore:
31135
+ - References to the other agent's specific capabilities or limitations
31136
+ - Task-specific state that is no longer current (e.g. "currently debugging X")
31137
+ - Debugging steps for issues that were already resolved
31138
+ - Transient conversation artifacts (greetings, acknowledgments, status updates)`;
31139
+ async function extractKnowledge(input) {
31140
+ const result = {
31141
+ created: 0,
31142
+ updated: 0,
31143
+ deleted: 0,
31144
+ chunksProcessed: 0,
31145
+ chunksFailed: 0
31146
+ };
31147
+ const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
31148
+ for (let i = 0; i < sorted.length; i++) {
31149
+ const chunk = sorted[i];
31150
+ const existing = forProject(input.projectPath, false);
31151
+ const existingForPrompt = existing.map((e) => ({
31152
+ id: e.id,
31153
+ category: e.category,
31154
+ title: e.title,
31155
+ content: e.content
31156
+ }));
31157
+ const userContent = curatorUser({
31158
+ messages: chunk.text,
31159
+ existing: existingForPrompt
31160
+ });
31161
+ try {
31162
+ const response = await input.llm.prompt(
31163
+ IMPORT_CURATOR_SYSTEM,
31164
+ userContent,
31165
+ {
31166
+ model: input.model,
31167
+ workerID: "lore-import",
31168
+ thinking: false,
31169
+ maxTokens: 4096,
31170
+ sessionID: input.sessionID
31171
+ }
31172
+ );
31173
+ if (response) {
31174
+ const ops = parseOps(response);
31175
+ const applied = applyOps(ops, {
31176
+ projectPath: input.projectPath,
31177
+ sessionID: input.sessionID
31178
+ });
31179
+ result.created += applied.created;
31180
+ result.updated += applied.updated;
31181
+ result.deleted += applied.deleted;
31182
+ }
31183
+ result.chunksProcessed++;
31184
+ } catch {
31185
+ result.chunksFailed++;
31186
+ }
31187
+ input.onProgress?.({
31188
+ current: i + 1,
31189
+ total: sorted.length,
31190
+ created: result.created,
31191
+ updated: result.updated
31192
+ });
31193
+ }
31194
+ return result;
31195
+ }
31196
+
31197
+ // src/import/history.ts
31198
+ function isImported(projectPath, agentName, sourceId, sourceHash) {
31199
+ const projectId2 = ensureProject(projectPath);
31200
+ const row = db().query(
31201
+ `SELECT * FROM import_history
31202
+ WHERE project_id = ? AND agent_name = ? AND source_id = ?`
31203
+ ).get(projectId2, agentName, sourceId);
31204
+ if (!row) return null;
31205
+ if (row.source_hash !== sourceHash) return null;
31206
+ return row;
31207
+ }
31208
+ function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
31209
+ const projectId2 = ensureProject(projectPath);
31210
+ db().query(
31211
+ `INSERT OR REPLACE INTO import_history
31212
+ (id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
31213
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
31214
+ ).run(
31215
+ crypto.randomUUID(),
31216
+ projectId2,
31217
+ agentName,
31218
+ sourceId,
31219
+ sourceHash,
31220
+ stats.created,
31221
+ stats.updated,
31222
+ Date.now()
31223
+ );
31224
+ }
31225
+ function listImports(projectPath) {
31226
+ const projectId2 = ensureProject(projectPath);
31227
+ return db().query(
31228
+ `SELECT * FROM import_history
31229
+ WHERE project_id = ? AND source_id != '__declined__'
31230
+ ORDER BY imported_at DESC`
31231
+ ).all(projectId2);
31232
+ }
31233
+ function computeHash(parts) {
31234
+ return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
31235
+ }
31236
+
31237
+ // src/import/providers/claude-code.ts
31238
+ import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
31239
+ import { join as join8 } from "path";
31240
+ import { homedir as homedir2 } from "os";
31241
+ var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
31242
+ var MAX_TOOL_OUTPUT_CHARS = 500;
31243
+ var DEFAULT_MAX_TOKENS = 12288;
31244
+ function manglePath(projectPath) {
31245
+ return projectPath.replace(/\//g, "-");
31246
+ }
31247
+ function estimateTokens4(text4) {
31248
+ return Math.ceil(text4.length / 3);
31249
+ }
31250
+ function truncate(text4, max) {
31251
+ if (text4.length <= max) return text4;
31252
+ return text4.slice(0, max) + "...";
31253
+ }
31254
+ function blockToText(block) {
31255
+ switch (block.type) {
31256
+ case "text":
31257
+ return block.text;
31258
+ case "tool_use": {
31259
+ const tu = block;
31260
+ const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
31261
+ return `[tool: ${tu.name}] ${inputSummary}`;
31262
+ }
31263
+ case "tool_result": {
31264
+ const tr = block;
31265
+ let content3;
31266
+ if (typeof tr.content === "string") {
31267
+ content3 = tr.content;
31268
+ } else if (Array.isArray(tr.content)) {
31269
+ content3 = tr.content.map((b) => {
31270
+ if (b.type === "text") return b.text;
31271
+ return "";
31272
+ }).filter(Boolean).join("\n");
31273
+ } else {
31274
+ content3 = "";
31275
+ }
31276
+ return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
31277
+ }
31278
+ case "thinking":
31279
+ return null;
31280
+ default:
31281
+ return null;
31282
+ }
31283
+ }
31284
+ function lineToText(parsed) {
31285
+ if (parsed.type === "user") {
31286
+ const msg = parsed;
31287
+ const content3 = msg.message.content;
31288
+ if (typeof content3 === "string") {
31289
+ return `[user] ${content3}`;
31290
+ }
31291
+ const parts = content3.map(blockToText).filter(Boolean);
31292
+ return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
31293
+ }
31294
+ if (parsed.type === "assistant") {
31295
+ const msg = parsed;
31296
+ const blocks = msg.message.content;
31297
+ if (!Array.isArray(blocks)) return null;
31298
+ const parts = blocks.map(blockToText).filter(Boolean);
31299
+ return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
31300
+ }
31301
+ return null;
31302
+ }
31303
+ function parseJSONL(filePath) {
31304
+ const raw = readFileSync4(filePath, "utf-8");
31305
+ const lines = [];
31306
+ for (const line of raw.split("\n")) {
31307
+ if (!line.trim()) continue;
31308
+ try {
31309
+ lines.push(JSON.parse(line));
31310
+ } catch {
31311
+ }
31312
+ }
31313
+ return lines;
31314
+ }
31315
+ function getSessionMetadata(filePath) {
31316
+ let raw;
31317
+ try {
31318
+ raw = readFileSync4(filePath, "utf-8");
31319
+ } catch {
31320
+ return null;
31321
+ }
31322
+ const lines = raw.split("\n").filter((l) => l.trim());
31323
+ if (lines.length === 0) return null;
31324
+ let sessionId;
31325
+ let startedAt = Infinity;
31326
+ let lastActivityAt = 0;
31327
+ let messageCount = 0;
31328
+ for (const line of lines) {
31329
+ try {
31330
+ const parsed = JSON.parse(line);
31331
+ if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
31332
+ if (parsed.timestamp) {
31333
+ const ts = new Date(parsed.timestamp).getTime();
31334
+ if (!Number.isNaN(ts)) {
31335
+ if (ts < startedAt) startedAt = ts;
31336
+ if (ts > lastActivityAt) lastActivityAt = ts;
31337
+ }
31338
+ }
31339
+ if (parsed.type === "user" || parsed.type === "assistant") {
31340
+ messageCount++;
31341
+ }
31342
+ } catch {
31343
+ }
31344
+ }
31345
+ if (!sessionId || messageCount === 0) return null;
31346
+ const fileSize = raw.length;
31347
+ const estimatedTokens = Math.ceil(fileSize / 5);
31348
+ return {
31349
+ sessionId,
31350
+ startedAt: startedAt === Infinity ? Date.now() : startedAt,
31351
+ lastActivityAt,
31352
+ messageCount,
31353
+ estimatedTokens
31354
+ };
31355
+ }
31356
+ var claudeCodeProvider = {
31357
+ name: "claude-code",
31358
+ displayName: "Claude Code",
31359
+ detect(projectPath) {
31360
+ const mangled = manglePath(projectPath);
31361
+ const dir = join8(CLAUDE_DIR, mangled);
31362
+ let entries;
31363
+ try {
31364
+ entries = readdirSync2(dir);
31365
+ } catch {
31366
+ return [];
31367
+ }
31368
+ const sessions = [];
31369
+ for (const entry of entries) {
31370
+ if (!entry.endsWith(".jsonl")) continue;
31371
+ const filePath = join8(dir, entry);
31372
+ try {
31373
+ const stat = statSync5(filePath);
31374
+ if (!stat.isFile()) continue;
31375
+ } catch {
31376
+ continue;
31377
+ }
31378
+ const meta3 = getSessionMetadata(filePath);
31379
+ if (!meta3) continue;
31380
+ if (meta3.messageCount < 3) continue;
31381
+ const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
31382
+ sessions.push({
31383
+ id: filePath,
31384
+ label: `${dateStr} (${meta3.messageCount} messages)`,
31385
+ startedAt: meta3.startedAt,
31386
+ lastActivityAt: meta3.lastActivityAt,
31387
+ estimatedTokens: meta3.estimatedTokens,
31388
+ messageCount: meta3.messageCount
31389
+ });
31390
+ }
31391
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31392
+ },
31393
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
31394
+ const chunks = [];
31395
+ for (const filePath of sessionIds) {
31396
+ const lines = parseJSONL(filePath);
31397
+ const messages = [];
31398
+ for (const line of lines) {
31399
+ const text4 = lineToText(line);
31400
+ if (!text4) continue;
31401
+ const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
31402
+ messages.push({ text: text4, timestamp: ts });
31403
+ }
31404
+ if (messages.length === 0) continue;
31405
+ let currentTexts = [];
31406
+ let currentTokens = 0;
31407
+ let chunkStart = messages[0].timestamp;
31408
+ let chunkIndex = 0;
31409
+ const flushChunk = () => {
31410
+ if (currentTexts.length === 0) return;
31411
+ chunkIndex++;
31412
+ const text4 = currentTexts.join("\n\n");
31413
+ chunks.push({
31414
+ label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
31415
+ text: text4,
31416
+ estimatedTokens: estimateTokens4(text4),
31417
+ timestamp: chunkStart
31418
+ });
31419
+ currentTexts = [];
31420
+ currentTokens = 0;
31421
+ };
31422
+ for (const msg of messages) {
31423
+ const msgTokens = estimateTokens4(msg.text);
31424
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31425
+ flushChunk();
31426
+ chunkStart = msg.timestamp;
31427
+ }
31428
+ currentTexts.push(msg.text);
31429
+ currentTokens += msgTokens;
31430
+ }
31431
+ flushChunk();
31432
+ }
31433
+ return chunks;
31434
+ }
31435
+ };
31436
+ registerProvider(claudeCodeProvider);
31437
+
31438
+ // src/import/providers/codex.ts
31439
+ import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
31440
+ import { join as join9 } from "path";
31441
+ import { homedir as homedir3 } from "os";
31442
+ var CODEX_DIR = join9(homedir3(), ".codex");
31443
+ var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
31444
+ var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
31445
+ var MAX_TOOL_OUTPUT_CHARS2 = 500;
31446
+ var DEFAULT_MAX_TOKENS2 = 12288;
31447
+ function estimateTokens5(text4) {
31448
+ return Math.ceil(text4.length / 3);
31449
+ }
31450
+ function truncate2(text4, max) {
31451
+ if (text4.length <= max) return text4;
31452
+ return text4.slice(0, max) + "...";
31453
+ }
31454
+ function findJsonlFiles(dir) {
31455
+ const results = [];
31456
+ if (!existsSync6(dir)) return results;
31457
+ const walk = (d) => {
31458
+ let entries;
31459
+ try {
31460
+ entries = readdirSync3(d);
31461
+ } catch {
31462
+ return;
31463
+ }
31464
+ for (const entry of entries) {
31465
+ const full = join9(d, entry);
31466
+ try {
31467
+ const stat = statSync6(full);
31468
+ if (stat.isDirectory()) walk(full);
31469
+ else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
31470
+ } catch {
31471
+ }
31472
+ }
31473
+ };
31474
+ walk(dir);
31475
+ return results;
31476
+ }
31477
+ function responseItemToText(item) {
31478
+ if (!item) return null;
31479
+ if (item.type === "message" && item.role && item.content) {
31480
+ const text4 = extractContent(item.content);
31481
+ if (text4) return `[${item.role}] ${text4}`;
31482
+ }
31483
+ if (item.type === "function_call" && item.name) {
31484
+ const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
31485
+ return `[tool: ${item.name}] ${args}`;
31486
+ }
31487
+ if (item.type === "function_call_output" && item.output) {
31488
+ return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
31489
+ }
31490
+ return null;
31491
+ }
31492
+ function extractContent(content3) {
31493
+ if (typeof content3 === "string") return content3;
31494
+ if (!Array.isArray(content3)) return null;
31495
+ const parts = [];
31496
+ for (const part of content3) {
31497
+ if ("text" in part && typeof part.text === "string") {
31498
+ parts.push(part.text);
31499
+ }
31500
+ }
31501
+ return parts.length > 0 ? parts.join("\n") : null;
31502
+ }
31503
+ function parseJSONL2(filePath) {
31504
+ let raw;
31505
+ try {
31506
+ raw = readFileSync5(filePath, "utf-8");
31507
+ } catch {
31508
+ return [];
31509
+ }
31510
+ const lines = [];
31511
+ for (const line of raw.split("\n")) {
31512
+ if (!line.trim()) continue;
31513
+ try {
31514
+ lines.push(JSON.parse(line));
31515
+ } catch {
31516
+ }
31517
+ }
31518
+ return lines;
31519
+ }
31520
+ function getSessionMeta(filePath) {
31521
+ let raw;
31522
+ try {
31523
+ raw = readFileSync5(filePath, "utf-8");
31524
+ } catch {
31525
+ return null;
31526
+ }
31527
+ const lines = raw.split("\n").filter((l) => l.trim());
31528
+ if (lines.length === 0) return null;
31529
+ let meta3;
31530
+ try {
31531
+ meta3 = JSON.parse(lines[0]);
31532
+ } catch {
31533
+ return null;
31534
+ }
31535
+ if (meta3.type !== "session_meta") return null;
31536
+ const payload = meta3.payload;
31537
+ let messageCount = 0;
31538
+ for (const line of lines) {
31539
+ try {
31540
+ const parsed = JSON.parse(line);
31541
+ if (parsed.type === "response_item" || parsed.type === "event_msg") {
31542
+ messageCount++;
31543
+ }
31544
+ } catch {
31545
+ }
31546
+ }
31547
+ return {
31548
+ id: payload.meta.id,
31549
+ cwd: payload.meta.cwd,
31550
+ timestamp: payload.meta.timestamp,
31551
+ messageCount,
31552
+ fileSize: raw.length
31553
+ };
31554
+ }
31555
+ var codexProvider = {
31556
+ name: "codex",
31557
+ displayName: "Codex",
31558
+ detect(projectPath) {
31559
+ const sessions = [];
31560
+ const allFiles = [
31561
+ ...findJsonlFiles(SESSIONS_DIR),
31562
+ ...findJsonlFiles(ARCHIVED_DIR)
31563
+ ];
31564
+ for (const filePath of allFiles) {
31565
+ const meta3 = getSessionMeta(filePath);
31566
+ if (!meta3) continue;
31567
+ if (meta3.cwd !== projectPath) continue;
31568
+ if (meta3.messageCount < 3) continue;
31569
+ const ts = new Date(meta3.timestamp).getTime();
31570
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
31571
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
31572
+ sessions.push({
31573
+ id: filePath,
31574
+ label: `${dateStr} (${meta3.messageCount} messages)`,
31575
+ startedAt: ts,
31576
+ lastActivityAt: ts,
31577
+ // Best approximation without reading all lines
31578
+ estimatedTokens,
31579
+ messageCount: meta3.messageCount
31580
+ });
31581
+ }
31582
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31583
+ },
31584
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
31585
+ const chunks = [];
31586
+ for (const filePath of sessionIds) {
31587
+ const lines = parseJSONL2(filePath);
31588
+ const messages = [];
31589
+ let sessionTimestamp = Date.now();
31590
+ const firstLine = lines[0];
31591
+ if (firstLine?.type === "session_meta") {
31592
+ const meta3 = firstLine;
31593
+ const ts = new Date(meta3.payload.meta.timestamp).getTime();
31594
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
31595
+ }
31596
+ for (const line of lines) {
31597
+ if (line.type === "response_item") {
31598
+ const ri = line;
31599
+ const text4 = responseItemToText(ri.payload);
31600
+ if (text4) {
31601
+ messages.push({ text: text4, timestamp: sessionTimestamp });
31602
+ }
31603
+ } else if (line.type === "event_msg") {
31604
+ const ev = line;
31605
+ if (ev.payload.output) {
31606
+ messages.push({
31607
+ text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
31608
+ timestamp: sessionTimestamp
31609
+ });
31610
+ }
31611
+ } else if (line.type === "compacted") {
31612
+ const comp = line;
31613
+ if (comp.payload.replacement_history) {
31614
+ for (const item of comp.payload.replacement_history) {
31615
+ const text4 = responseItemToText(item);
31616
+ if (text4) {
31617
+ messages.push({ text: text4, timestamp: sessionTimestamp });
31618
+ }
31619
+ }
31620
+ }
31621
+ }
31622
+ }
31623
+ if (messages.length === 0) continue;
31624
+ let currentTexts = [];
31625
+ let currentTokens = 0;
31626
+ let chunkIndex = 0;
31627
+ const flushChunk = () => {
31628
+ if (currentTexts.length === 0) return;
31629
+ chunkIndex++;
31630
+ const text4 = currentTexts.join("\n\n");
31631
+ chunks.push({
31632
+ label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
31633
+ text: text4,
31634
+ estimatedTokens: estimateTokens5(text4),
31635
+ timestamp: sessionTimestamp
31636
+ });
31637
+ currentTexts = [];
31638
+ currentTokens = 0;
31639
+ };
31640
+ for (const msg of messages) {
31641
+ const msgTokens = estimateTokens5(msg.text);
31642
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31643
+ flushChunk();
31644
+ }
31645
+ currentTexts.push(msg.text);
31646
+ currentTokens += msgTokens;
31647
+ }
31648
+ flushChunk();
31649
+ }
31650
+ return chunks;
31651
+ }
31652
+ };
31653
+ registerProvider(codexProvider);
31654
+
31655
+ // src/import/providers/opencode.ts
31656
+ import { existsSync as existsSync7 } from "fs";
31657
+ import { join as join10 } from "path";
31658
+ import { homedir as homedir4 } from "os";
31659
+ var OPENCODE_DB_PATH = join10(
31660
+ process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
31661
+ "opencode",
31662
+ "opencode.db"
31663
+ );
31664
+ var MAX_TOOL_OUTPUT_CHARS3 = 500;
31665
+ var DEFAULT_MAX_TOKENS3 = 12288;
31666
+ function estimateTokens6(text4) {
31667
+ return Math.ceil(text4.length / 3);
31668
+ }
31669
+ function truncate3(text4, max) {
31670
+ if (text4.length <= max) return text4;
31671
+ return text4.slice(0, max) + "...";
31672
+ }
31673
+ function openDB() {
31674
+ if (!existsSync7(OPENCODE_DB_PATH)) return null;
31675
+ try {
31676
+ return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
31677
+ } catch {
31678
+ return null;
31679
+ }
31680
+ }
31681
+ function tableExists(database, table) {
31682
+ const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
31683
+ return row != null;
31684
+ }
31685
+ function partsToConversationText(parts) {
31686
+ const segments = [];
31687
+ for (const part of parts) {
31688
+ if (part.type === "text" && part.text) {
31689
+ segments.push(part.text);
31690
+ } else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
31691
+ segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
31692
+ }
31693
+ }
31694
+ return segments.join("\n");
31695
+ }
31696
+ var opencodeProvider = {
31697
+ name: "opencode",
31698
+ displayName: "OpenCode",
31699
+ detect(projectPath) {
31700
+ const database = openDB();
31701
+ if (!database) return [];
31702
+ try {
31703
+ if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
31704
+ return [];
31705
+ }
31706
+ const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
31707
+ if (!project) return [];
31708
+ const sessions = database.query(
31709
+ `SELECT s.id, s.title, s.time_created, s.time_updated,
31710
+ (SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
31711
+ FROM session s
31712
+ WHERE s.project_id = ? AND s.parent_id IS NULL
31713
+ ORDER BY s.time_updated DESC`
31714
+ ).all(project.id);
31715
+ const results = [];
31716
+ for (const sess of sessions) {
31717
+ if (sess.msg_count < 3) continue;
31718
+ const estimatedTokens = sess.msg_count * 500;
31719
+ const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
31720
+ const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
31721
+ results.push({
31722
+ id: sess.id,
31723
+ label,
31724
+ startedAt: sess.time_created,
31725
+ lastActivityAt: sess.time_updated,
31726
+ estimatedTokens,
31727
+ messageCount: sess.msg_count
31728
+ });
30513
31729
  }
31730
+ return results;
31731
+ } finally {
31732
+ database.close();
30514
31733
  }
31734
+ },
31735
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
31736
+ const database = openDB();
31737
+ if (!database) return [];
31738
+ const chunks = [];
31739
+ try {
31740
+ const hasParts = tableExists(database, "part");
31741
+ for (const sessionId of sessionIds) {
31742
+ const messages = database.query(
31743
+ `SELECT id, data, time_created FROM message
31744
+ WHERE session_id = ?
31745
+ ORDER BY time_created ASC`
31746
+ ).all(sessionId);
31747
+ if (messages.length === 0) continue;
31748
+ const textMessages = [];
31749
+ for (const msg of messages) {
31750
+ let msgData;
31751
+ try {
31752
+ msgData = JSON.parse(msg.data);
31753
+ } catch {
31754
+ continue;
31755
+ }
31756
+ const role = msgData.role ?? "unknown";
31757
+ let contentText = "";
31758
+ if (hasParts) {
31759
+ const parts = database.query(
31760
+ `SELECT data FROM part
31761
+ WHERE message_id = ?
31762
+ ORDER BY time_created ASC`
31763
+ ).all(msg.id);
31764
+ const parsedParts = [];
31765
+ for (const p2 of parts) {
31766
+ try {
31767
+ parsedParts.push(JSON.parse(p2.data));
31768
+ } catch {
31769
+ }
31770
+ }
31771
+ contentText = partsToConversationText(parsedParts);
31772
+ }
31773
+ if (!contentText.trim()) continue;
31774
+ textMessages.push({
31775
+ text: `[${role}] ${contentText}`,
31776
+ timestamp: msg.time_created
31777
+ });
31778
+ }
31779
+ if (textMessages.length === 0) continue;
31780
+ let currentTexts = [];
31781
+ let currentTokens = 0;
31782
+ let chunkStart = textMessages[0].timestamp;
31783
+ let chunkIndex = 0;
31784
+ const flushChunk = () => {
31785
+ if (currentTexts.length === 0) return;
31786
+ chunkIndex++;
31787
+ const text4 = currentTexts.join("\n\n");
31788
+ chunks.push({
31789
+ label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
31790
+ text: text4,
31791
+ estimatedTokens: estimateTokens6(text4),
31792
+ timestamp: chunkStart
31793
+ });
31794
+ currentTexts = [];
31795
+ currentTokens = 0;
31796
+ };
31797
+ for (const msg of textMessages) {
31798
+ const msgTokens = estimateTokens6(msg.text);
31799
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
31800
+ flushChunk();
31801
+ chunkStart = msg.timestamp;
31802
+ }
31803
+ currentTexts.push(msg.text);
31804
+ currentTokens += msgTokens;
31805
+ }
31806
+ flushChunk();
31807
+ }
31808
+ } finally {
31809
+ database.close();
31810
+ }
31811
+ return chunks;
31812
+ }
31813
+ };
31814
+ registerProvider(opencodeProvider);
31815
+
31816
+ // src/import/providers/cline.ts
31817
+ import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
31818
+ import { join as join11 } from "path";
31819
+ import { homedir as homedir5 } from "os";
31820
+ var MAX_TOOL_OUTPUT_CHARS4 = 500;
31821
+ var DEFAULT_MAX_TOKENS4 = 12288;
31822
+ var EXTENSION_IDS = [
31823
+ "saoudrizwan.claude-dev",
31824
+ "cline.cline"
31825
+ ];
31826
+ function estimateTokens7(text4) {
31827
+ return Math.ceil(text4.length / 3);
31828
+ }
31829
+ function truncate4(text4, max) {
31830
+ if (text4.length <= max) return text4;
31831
+ return text4.slice(0, max) + "...";
31832
+ }
31833
+ function findGlobalStorageDirs() {
31834
+ const home = homedir5();
31835
+ const dirs = [];
31836
+ const basePaths = [];
31837
+ const platform = process.platform;
31838
+ if (platform === "darwin") {
31839
+ basePaths.push(
31840
+ join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
31841
+ join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
31842
+ join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
31843
+ );
31844
+ } else if (platform === "win32") {
31845
+ const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
31846
+ basePaths.push(
31847
+ join11(appdata, "Code", "User", "globalStorage"),
31848
+ join11(appdata, "Code - Insiders", "User", "globalStorage"),
31849
+ join11(appdata, "VSCodium", "User", "globalStorage")
31850
+ );
31851
+ } else {
31852
+ const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
31853
+ basePaths.push(
31854
+ join11(configHome, "Code", "User", "globalStorage"),
31855
+ join11(configHome, "Code - Insiders", "User", "globalStorage"),
31856
+ join11(configHome, "VSCodium", "User", "globalStorage")
31857
+ );
31858
+ basePaths.push(
31859
+ join11(home, ".vscode", "data", "User", "globalStorage"),
31860
+ join11(home, ".vscode-insiders", "data", "User", "globalStorage")
31861
+ );
31862
+ }
31863
+ for (const base of basePaths) {
31864
+ for (const extId of EXTENSION_IDS) {
31865
+ const dir = join11(base, extId);
31866
+ if (existsSync8(dir)) dirs.push(dir);
31867
+ }
31868
+ }
31869
+ return dirs;
31870
+ }
31871
+ function loadTaskHistory(storageDir, projectPath) {
31872
+ const paths = [
31873
+ join11(storageDir, "state", "taskHistory.json"),
31874
+ join11(storageDir, "taskHistory.json")
31875
+ ];
31876
+ for (const historyPath of paths) {
31877
+ if (!existsSync8(historyPath)) continue;
31878
+ try {
31879
+ const raw = readFileSync6(historyPath, "utf-8");
31880
+ const items = JSON.parse(raw);
31881
+ if (!Array.isArray(items)) continue;
31882
+ return items.filter(
31883
+ (item) => item.cwdOnTaskInitialization === projectPath
31884
+ );
31885
+ } catch {
31886
+ continue;
31887
+ }
31888
+ }
31889
+ return [];
31890
+ }
31891
+ function readConversation(taskDir) {
31892
+ const filePath = join11(taskDir, "api_conversation_history.json");
31893
+ if (!existsSync8(filePath)) return [];
31894
+ try {
31895
+ const raw = readFileSync6(filePath, "utf-8");
31896
+ const messages = JSON.parse(raw);
31897
+ return Array.isArray(messages) ? messages : [];
31898
+ } catch {
31899
+ return [];
31900
+ }
31901
+ }
31902
+ function blockToText2(block) {
31903
+ switch (block.type) {
31904
+ case "text":
31905
+ return block.text;
31906
+ case "tool_use": {
31907
+ const tu = block;
31908
+ return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
31909
+ }
31910
+ case "tool_result": {
31911
+ const tr = block;
31912
+ let content3;
31913
+ if (typeof tr.content === "string") {
31914
+ content3 = tr.content;
31915
+ } else if (Array.isArray(tr.content)) {
31916
+ content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
31917
+ } else {
31918
+ content3 = "";
31919
+ }
31920
+ return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
31921
+ }
31922
+ default:
31923
+ return null;
31924
+ }
31925
+ }
31926
+ function messageToText(msg) {
31927
+ if (typeof msg.content === "string") {
31928
+ return msg.content ? `[${msg.role}] ${msg.content}` : null;
31929
+ }
31930
+ const parts = msg.content.map(blockToText2).filter(Boolean);
31931
+ return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
31932
+ }
31933
+ var clineProvider = {
31934
+ name: "cline",
31935
+ displayName: "Cline",
31936
+ detect(projectPath) {
31937
+ const sessions = [];
31938
+ const storageDirs = findGlobalStorageDirs();
31939
+ for (const storageDir of storageDirs) {
31940
+ const tasks = loadTaskHistory(storageDir, projectPath);
31941
+ for (const task of tasks) {
31942
+ const taskDir = join11(storageDir, "tasks", task.id);
31943
+ if (!existsSync8(taskDir)) continue;
31944
+ const messages = readConversation(taskDir);
31945
+ if (messages.length < 3) continue;
31946
+ const dateStr = new Date(task.ts).toISOString().slice(0, 10);
31947
+ const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
31948
+ const historyFile = join11(taskDir, "api_conversation_history.json");
31949
+ let estimatedTokens = messages.length * 500;
31950
+ try {
31951
+ const stat = statSync7(historyFile);
31952
+ estimatedTokens = Math.ceil(stat.size / 5);
31953
+ } catch {
31954
+ }
31955
+ sessions.push({
31956
+ id: taskDir,
31957
+ label,
31958
+ startedAt: task.ts,
31959
+ lastActivityAt: task.ts,
31960
+ estimatedTokens,
31961
+ messageCount: messages.length
31962
+ });
31963
+ }
31964
+ }
31965
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
31966
+ },
31967
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
31968
+ const chunks = [];
31969
+ for (const taskDir of sessionIds) {
31970
+ const messages = readConversation(taskDir);
31971
+ if (messages.length === 0) continue;
31972
+ let sessionTimestamp;
31973
+ try {
31974
+ sessionTimestamp = statSync7(taskDir).mtimeMs;
31975
+ } catch {
31976
+ sessionTimestamp = Date.now();
31977
+ }
31978
+ const textMessages = [];
31979
+ for (const msg of messages) {
31980
+ const text4 = messageToText(msg);
31981
+ if (text4) textMessages.push({ text: text4 });
31982
+ }
31983
+ if (textMessages.length === 0) continue;
31984
+ let currentTexts = [];
31985
+ let currentTokens = 0;
31986
+ let chunkIndex = 0;
31987
+ const flushChunk = () => {
31988
+ if (currentTexts.length === 0) return;
31989
+ chunkIndex++;
31990
+ const text4 = currentTexts.join("\n\n");
31991
+ chunks.push({
31992
+ label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
31993
+ text: text4,
31994
+ estimatedTokens: estimateTokens7(text4),
31995
+ timestamp: sessionTimestamp
31996
+ });
31997
+ currentTexts = [];
31998
+ currentTokens = 0;
31999
+ };
32000
+ for (const msg of textMessages) {
32001
+ const msgTokens = estimateTokens7(msg.text);
32002
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32003
+ flushChunk();
32004
+ }
32005
+ currentTexts.push(msg.text);
32006
+ currentTokens += msgTokens;
32007
+ }
32008
+ flushChunk();
32009
+ }
32010
+ return chunks;
30515
32011
  }
30516
- return { updated, deleted };
32012
+ };
32013
+ registerProvider(clineProvider);
32014
+
32015
+ // src/import/providers/continue.ts
32016
+ import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
32017
+ import { join as join12 } from "path";
32018
+ import { homedir as homedir6 } from "os";
32019
+ var MAX_TOOL_OUTPUT_CHARS5 = 500;
32020
+ var DEFAULT_MAX_TOKENS5 = 12288;
32021
+ function estimateTokens8(text4) {
32022
+ return Math.ceil(text4.length / 3);
32023
+ }
32024
+ function truncate5(text4, max) {
32025
+ if (text4.length <= max) return text4;
32026
+ return text4.slice(0, max) + "...";
30517
32027
  }
32028
+ function continueDir() {
32029
+ return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
32030
+ }
32031
+ function loadSessionIndex() {
32032
+ const indexPath = join12(continueDir(), "sessions", "sessions.json");
32033
+ if (!existsSync9(indexPath)) return [];
32034
+ try {
32035
+ const raw = readFileSync7(indexPath, "utf-8");
32036
+ const parsed = JSON.parse(raw);
32037
+ return Array.isArray(parsed) ? parsed : [];
32038
+ } catch {
32039
+ return [];
32040
+ }
32041
+ }
32042
+ function loadSession(sessionId) {
32043
+ const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
32044
+ if (!existsSync9(filePath)) return null;
32045
+ try {
32046
+ const raw = readFileSync7(filePath, "utf-8");
32047
+ return JSON.parse(raw);
32048
+ } catch {
32049
+ return null;
32050
+ }
32051
+ }
32052
+ function extractMessageContent(content3) {
32053
+ if (typeof content3 === "string") return content3;
32054
+ if (!Array.isArray(content3)) return "";
32055
+ return content3.filter(
32056
+ (part) => part.type === "text" && typeof part.text === "string"
32057
+ ).map((part) => part.text).join("\n");
32058
+ }
32059
+ function historyItemToText(item) {
32060
+ const msg = item.message;
32061
+ if (!msg) return null;
32062
+ if (msg.role === "system") return null;
32063
+ const parts = [];
32064
+ const content3 = extractMessageContent(msg.content);
32065
+ if (content3) parts.push(content3);
32066
+ if (msg.toolCalls) {
32067
+ for (const call of msg.toolCalls) {
32068
+ if (call.function) {
32069
+ const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
32070
+ parts.push(`[tool: ${call.function.name}] ${args}`);
32071
+ }
32072
+ }
32073
+ }
32074
+ if (item.toolCallStates) {
32075
+ for (const state of item.toolCallStates) {
32076
+ if (state.output && state.status === "done") {
32077
+ parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
32078
+ }
32079
+ }
32080
+ }
32081
+ if (parts.length === 0) return null;
32082
+ const role = msg.role === "tool" ? "tool_result" : msg.role;
32083
+ return `[${role}] ${parts.join("\n")}`;
32084
+ }
32085
+ var continueProvider = {
32086
+ name: "continue",
32087
+ displayName: "Continue",
32088
+ detect(projectPath) {
32089
+ const sessions = [];
32090
+ const index2 = loadSessionIndex();
32091
+ for (const meta3 of index2) {
32092
+ if (meta3.workspaceDirectory !== projectPath) continue;
32093
+ const session = loadSession(meta3.sessionId);
32094
+ if (!session || !session.history || session.history.length < 3) continue;
32095
+ const ts = new Date(meta3.dateCreated).getTime();
32096
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32097
+ const messageCount = session.history.length;
32098
+ const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
32099
+ const estimatedTokens = messageCount * 500;
32100
+ sessions.push({
32101
+ id: meta3.sessionId,
32102
+ label,
32103
+ startedAt: ts,
32104
+ lastActivityAt: ts,
32105
+ estimatedTokens,
32106
+ messageCount
32107
+ });
32108
+ }
32109
+ const sessionsDir = join12(continueDir(), "sessions");
32110
+ if (existsSync9(sessionsDir)) {
32111
+ const existingIds = new Set(sessions.map((s) => s.id));
32112
+ let entries;
32113
+ try {
32114
+ entries = readdirSync5(sessionsDir);
32115
+ } catch {
32116
+ entries = [];
32117
+ }
32118
+ for (const entry of entries) {
32119
+ if (!entry.endsWith(".json") || entry === "sessions.json") continue;
32120
+ const sessionId = entry.replace(".json", "");
32121
+ if (existingIds.has(sessionId)) continue;
32122
+ const session = loadSession(sessionId);
32123
+ if (!session) continue;
32124
+ if (session.workspaceDirectory !== projectPath) continue;
32125
+ if (!session.history || session.history.length < 3) continue;
32126
+ const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
32127
+ sessions.push({
32128
+ id: sessionId,
32129
+ label: `${dateStr} (${session.history.length} messages)`,
32130
+ startedAt: Date.now(),
32131
+ lastActivityAt: Date.now(),
32132
+ estimatedTokens: session.history.length * 500,
32133
+ messageCount: session.history.length
32134
+ });
32135
+ }
32136
+ }
32137
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32138
+ },
32139
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
32140
+ const chunks = [];
32141
+ for (const sessionId of sessionIds) {
32142
+ const session = loadSession(sessionId);
32143
+ if (!session || !session.history) continue;
32144
+ const textMessages = [];
32145
+ for (const item of session.history) {
32146
+ const text4 = historyItemToText(item);
32147
+ if (text4) textMessages.push({ text: text4 });
32148
+ }
32149
+ if (textMessages.length === 0) continue;
32150
+ const sessionTimestamp = Date.now();
32151
+ let currentTexts = [];
32152
+ let currentTokens = 0;
32153
+ let chunkIndex = 0;
32154
+ const flushChunk = () => {
32155
+ if (currentTexts.length === 0) return;
32156
+ chunkIndex++;
32157
+ const text4 = currentTexts.join("\n\n");
32158
+ chunks.push({
32159
+ label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
32160
+ text: text4,
32161
+ estimatedTokens: estimateTokens8(text4),
32162
+ timestamp: sessionTimestamp
32163
+ });
32164
+ currentTexts = [];
32165
+ currentTokens = 0;
32166
+ };
32167
+ for (const msg of textMessages) {
32168
+ const msgTokens = estimateTokens8(msg.text);
32169
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32170
+ flushChunk();
32171
+ }
32172
+ currentTexts.push(msg.text);
32173
+ currentTokens += msgTokens;
32174
+ }
32175
+ flushChunk();
32176
+ }
32177
+ return chunks;
32178
+ }
32179
+ };
32180
+ registerProvider(continueProvider);
32181
+
32182
+ // src/import/providers/pi.ts
32183
+ import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
32184
+ import { join as join13 } from "path";
32185
+ import { homedir as homedir7 } from "os";
32186
+ var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
32187
+ var MAX_TOOL_OUTPUT_CHARS6 = 500;
32188
+ var DEFAULT_MAX_TOKENS6 = 12288;
32189
+ function estimateTokens9(text4) {
32190
+ return Math.ceil(text4.length / 3);
32191
+ }
32192
+ function truncate6(text4, max) {
32193
+ if (text4.length <= max) return text4;
32194
+ return text4.slice(0, max) + "...";
32195
+ }
32196
+ function encodeCwd(cwd) {
32197
+ const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
32198
+ return `--${encoded}--`;
32199
+ }
32200
+ function parseJSONL3(filePath) {
32201
+ let raw;
32202
+ try {
32203
+ raw = readFileSync8(filePath, "utf-8");
32204
+ } catch {
32205
+ return [];
32206
+ }
32207
+ const lines = [];
32208
+ for (const line of raw.split("\n")) {
32209
+ if (!line.trim()) continue;
32210
+ try {
32211
+ lines.push(JSON.parse(line));
32212
+ } catch {
32213
+ }
32214
+ }
32215
+ return lines;
32216
+ }
32217
+ function linearize(lines) {
32218
+ if (lines.length === 0) return [];
32219
+ const children = /* @__PURE__ */ new Map();
32220
+ const byId = /* @__PURE__ */ new Map();
32221
+ let rootLine = null;
32222
+ for (const line of lines) {
32223
+ if (line.type === "session") {
32224
+ rootLine = line;
32225
+ continue;
32226
+ }
32227
+ if (!line.id) continue;
32228
+ byId.set(line.id, line);
32229
+ const pid = line.parentId;
32230
+ if (pid) {
32231
+ const siblings = children.get(pid) ?? [];
32232
+ siblings.push(line);
32233
+ children.set(pid, siblings);
32234
+ }
32235
+ }
32236
+ if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
32237
+ const result = [];
32238
+ let currentId = rootLine.id;
32239
+ while (currentId) {
32240
+ const kids = children.get(currentId);
32241
+ if (!kids || kids.length === 0) break;
32242
+ const next = kids[kids.length - 1];
32243
+ result.push(next);
32244
+ currentId = next.id;
32245
+ }
32246
+ return result;
32247
+ }
32248
+ function getSessionMeta2(filePath) {
32249
+ const lines = parseJSONL3(filePath);
32250
+ if (lines.length === 0) return null;
32251
+ const header = lines[0];
32252
+ if (header.type !== "session") return null;
32253
+ const session = header;
32254
+ const messageCount = lines.filter((l) => l.type === "message").length;
32255
+ let fileSize;
32256
+ try {
32257
+ fileSize = statSync8(filePath).size;
32258
+ } catch {
32259
+ fileSize = 0;
32260
+ }
32261
+ const ts = new Date(session.timestamp).getTime();
32262
+ return {
32263
+ id: session.id,
32264
+ cwd: session.cwd,
32265
+ timestamp: Number.isNaN(ts) ? Date.now() : ts,
32266
+ messageCount,
32267
+ fileSize
32268
+ };
32269
+ }
32270
+ var piProvider = {
32271
+ name: "pi",
32272
+ displayName: "Pi",
32273
+ detect(projectPath) {
32274
+ const encoded = encodeCwd(projectPath);
32275
+ const dir = join13(PI_DIR, encoded);
32276
+ let entries;
32277
+ try {
32278
+ entries = readdirSync6(dir);
32279
+ } catch {
32280
+ return [];
32281
+ }
32282
+ const sessions = [];
32283
+ for (const entry of entries) {
32284
+ if (!entry.endsWith(".jsonl")) continue;
32285
+ const filePath = join13(dir, entry);
32286
+ const meta3 = getSessionMeta2(filePath);
32287
+ if (!meta3) continue;
32288
+ if (meta3.messageCount < 3) continue;
32289
+ const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
32290
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
32291
+ sessions.push({
32292
+ id: filePath,
32293
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32294
+ startedAt: meta3.timestamp,
32295
+ lastActivityAt: meta3.timestamp,
32296
+ estimatedTokens,
32297
+ messageCount: meta3.messageCount
32298
+ });
32299
+ }
32300
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32301
+ },
32302
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
32303
+ const chunks = [];
32304
+ for (const filePath of sessionIds) {
32305
+ const allLines = parseJSONL3(filePath);
32306
+ const linearLines = linearize(allLines);
32307
+ let sessionTimestamp = Date.now();
32308
+ const header = allLines.find((l) => l.type === "session");
32309
+ if (header?.type === "session") {
32310
+ const session = header;
32311
+ const ts = new Date(session.timestamp).getTime();
32312
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
32313
+ }
32314
+ const messages = [];
32315
+ for (const line of linearLines) {
32316
+ if (line.type === "message") {
32317
+ const msg = line;
32318
+ const content3 = msg.message.content;
32319
+ if (!content3) continue;
32320
+ const ts = new Date(msg.timestamp).getTime();
32321
+ messages.push({
32322
+ text: `[${msg.message.role}] ${content3}`,
32323
+ timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
32324
+ });
32325
+ } else if (line.type === "compaction") {
32326
+ const comp = line;
32327
+ if (comp.summary) {
32328
+ messages.push({
32329
+ text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
32330
+ timestamp: sessionTimestamp
32331
+ });
32332
+ }
32333
+ }
32334
+ }
32335
+ if (messages.length === 0) continue;
32336
+ let currentTexts = [];
32337
+ let currentTokens = 0;
32338
+ let chunkIndex = 0;
32339
+ const flushChunk = () => {
32340
+ if (currentTexts.length === 0) return;
32341
+ chunkIndex++;
32342
+ const text4 = currentTexts.join("\n\n");
32343
+ chunks.push({
32344
+ label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32345
+ text: text4,
32346
+ estimatedTokens: estimateTokens9(text4),
32347
+ timestamp: sessionTimestamp
32348
+ });
32349
+ currentTexts = [];
32350
+ currentTokens = 0;
32351
+ };
32352
+ for (const msg of messages) {
32353
+ const msgTokens = estimateTokens9(msg.text);
32354
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32355
+ flushChunk();
32356
+ }
32357
+ currentTexts.push(msg.text);
32358
+ currentTokens += msgTokens;
32359
+ }
32360
+ flushChunk();
32361
+ }
32362
+ return chunks;
32363
+ }
32364
+ };
32365
+ registerProvider(piProvider);
32366
+
32367
+ // src/import/providers/aider.ts
32368
+ import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
32369
+ import { join as join14 } from "path";
32370
+ var HISTORY_FILE = ".aider.chat.history.md";
32371
+ var DEFAULT_MAX_TOKENS7 = 12288;
32372
+ var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
32373
+ function estimateTokens10(text4) {
32374
+ return Math.ceil(text4.length / 3);
32375
+ }
32376
+ function parseAiderHistory(content3) {
32377
+ const lines = content3.split("\n");
32378
+ const messages = [];
32379
+ let currentRole = null;
32380
+ let currentLines = [];
32381
+ const flush = () => {
32382
+ if (currentRole && currentLines.length > 0) {
32383
+ const text4 = currentLines.join("\n").trim();
32384
+ if (text4) {
32385
+ messages.push({ role: currentRole, text: text4 });
32386
+ }
32387
+ }
32388
+ currentLines = [];
32389
+ };
32390
+ for (const line of lines) {
32391
+ const match = ROLE_HEADER_RE.exec(line);
32392
+ if (match) {
32393
+ flush();
32394
+ currentRole = match[1].toLowerCase();
32395
+ continue;
32396
+ }
32397
+ if (line.trim() === "---") {
32398
+ flush();
32399
+ currentRole = null;
32400
+ continue;
32401
+ }
32402
+ if (currentRole) {
32403
+ currentLines.push(line);
32404
+ }
32405
+ }
32406
+ flush();
32407
+ return messages;
32408
+ }
32409
+ var aiderProvider = {
32410
+ name: "aider",
32411
+ displayName: "Aider",
32412
+ detect(projectPath) {
32413
+ const filePath = join14(projectPath, HISTORY_FILE);
32414
+ if (!existsSync11(filePath)) return [];
32415
+ let stat;
32416
+ try {
32417
+ stat = statSync9(filePath);
32418
+ } catch {
32419
+ return [];
32420
+ }
32421
+ if (!stat.isFile() || stat.size === 0) return [];
32422
+ let content3;
32423
+ try {
32424
+ content3 = readFileSync9(filePath, "utf-8");
32425
+ } catch {
32426
+ return [];
32427
+ }
32428
+ const messages = parseAiderHistory(content3);
32429
+ if (messages.length < 3) return [];
32430
+ const estimatedTokens = estimateTokens10(content3);
32431
+ return [
32432
+ {
32433
+ id: filePath,
32434
+ label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
32435
+ startedAt: stat.birthtimeMs || stat.ctimeMs,
32436
+ lastActivityAt: stat.mtimeMs,
32437
+ estimatedTokens,
32438
+ messageCount: messages.length
32439
+ }
32440
+ ];
32441
+ },
32442
+ readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
32443
+ const chunks = [];
32444
+ for (const filePath of sessionIds) {
32445
+ let content3;
32446
+ try {
32447
+ content3 = readFileSync9(filePath, "utf-8");
32448
+ } catch {
32449
+ continue;
32450
+ }
32451
+ const messages = parseAiderHistory(content3);
32452
+ if (messages.length === 0) continue;
32453
+ let fileTimestamp;
32454
+ try {
32455
+ fileTimestamp = statSync9(filePath).mtimeMs;
32456
+ } catch {
32457
+ fileTimestamp = Date.now();
32458
+ }
32459
+ let currentTexts = [];
32460
+ let currentTokens = 0;
32461
+ let chunkIndex = 0;
32462
+ const flushChunk = () => {
32463
+ if (currentTexts.length === 0) return;
32464
+ chunkIndex++;
32465
+ const text4 = currentTexts.join("\n\n");
32466
+ chunks.push({
32467
+ label: `Aider history (${chunkIndex})`,
32468
+ text: text4,
32469
+ estimatedTokens: estimateTokens10(text4),
32470
+ timestamp: fileTimestamp
32471
+ });
32472
+ currentTexts = [];
32473
+ currentTokens = 0;
32474
+ };
32475
+ for (const msg of messages) {
32476
+ const formatted = `[${msg.role}] ${msg.text}`;
32477
+ const msgTokens = estimateTokens10(formatted);
32478
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32479
+ flushChunk();
32480
+ }
32481
+ currentTexts.push(formatted);
32482
+ currentTokens += msgTokens;
32483
+ }
32484
+ flushChunk();
32485
+ }
32486
+ return chunks;
32487
+ }
32488
+ };
32489
+ registerProvider(aiderProvider);
30518
32490
 
30519
32491
  // src/recall.ts
30520
32492
  function getTaggedText(tagged) {
@@ -30780,7 +32752,10 @@ async function searchRecall(input) {
30780
32752
  info("recall: query expansion failed, using original:", err);
30781
32753
  }
30782
32754
  }
32755
+ const queryTermCount = filterTerms(query).length;
32756
+ const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
30783
32757
  const allRrfLists = [];
32758
+ let primaryListEnd = 0;
30784
32759
  for (const q of queries) {
30785
32760
  const knowledgeResults = [];
30786
32761
  if (knowledgeEnabled && scope !== "session") {
@@ -30857,7 +32832,11 @@ async function searchRecall(input) {
30857
32832
  key: (r) => `t:${r.item.id}`
30858
32833
  });
30859
32834
  }
32835
+ if (primaryListEnd === 0) {
32836
+ primaryListEnd = allRrfLists.length;
32837
+ }
30860
32838
  }
32839
+ const perQueryListEnd = allRrfLists.length;
30861
32840
  if (isAvailable() && scope !== "session") {
30862
32841
  try {
30863
32842
  const [queryVec] = await embed([query], "query");
@@ -30876,7 +32855,8 @@ async function searchRecall(input) {
30876
32855
  if (vectorTagged.length) {
30877
32856
  allRrfLists.push({
30878
32857
  items: vectorTagged,
30879
- key: (r) => `k:${r.item.id}`
32858
+ key: (r) => `k:${r.item.id}`,
32859
+ weight: vectorWeight
30880
32860
  });
30881
32861
  }
30882
32862
  }
@@ -30895,7 +32875,8 @@ async function searchRecall(input) {
30895
32875
  if (distVectorTagged.length) {
30896
32876
  allRrfLists.push({
30897
32877
  items: distVectorTagged,
30898
- key: (r) => `d:${r.item.id}`
32878
+ key: (r) => `d:${r.item.id}`,
32879
+ weight: vectorWeight
30899
32880
  });
30900
32881
  }
30901
32882
  }
@@ -30919,7 +32900,8 @@ async function searchRecall(input) {
30919
32900
  if (temporalVectorTagged.length) {
30920
32901
  allRrfLists.push({
30921
32902
  items: temporalVectorTagged,
30922
- key: (r) => `t:${r.item.id}`
32903
+ key: (r) => `t:${r.item.id}`,
32904
+ weight: vectorWeight
30923
32905
  });
30924
32906
  }
30925
32907
  }
@@ -31022,6 +33004,15 @@ async function searchRecall(input) {
31022
33004
  });
31023
33005
  }
31024
33006
  }
33007
+ const MAX_RRF_LISTS = 10;
33008
+ if (allRrfLists.length > MAX_RRF_LISTS) {
33009
+ const primary = allRrfLists.slice(0, primaryListEnd);
33010
+ const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
33011
+ const supplemental = allRrfLists.slice(perQueryListEnd);
33012
+ const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
33013
+ allRrfLists.length = 0;
33014
+ allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
33015
+ }
31025
33016
  const fused = reciprocalRankFusion(allRrfLists);
31026
33017
  const maxResults = limit * 3;
31027
33018
  return fused.slice(0, maxResults);
@@ -31091,9 +33082,6 @@ async function runRecall(input) {
31091
33082
  if (input.id) {
31092
33083
  return recallById(input.id);
31093
33084
  }
31094
- if (ftsQuery(input.query) === EMPTY_QUERY) {
31095
- return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
31096
- }
31097
33085
  const fused = await searchRecall(input);
31098
33086
  const recallCfg = input.searchConfig?.recall;
31099
33087
  return formatFusedResults(fused, {
@@ -31140,9 +33128,11 @@ export {
31140
33128
  config2 as config,
31141
33129
  consolidationUser,
31142
33130
  consumeCameOutOfIdle,
33131
+ import_exports as conversationImport,
31143
33132
  curator_exports as curator,
31144
33133
  curatorUser,
31145
33134
  data_exports as data,
33135
+ dataDir,
31146
33136
  db,
31147
33137
  dbPath,
31148
33138
  distillation_exports as distillation,
@@ -31162,6 +33152,7 @@ export {
31162
33152
  ftsQueryRelaxed,
31163
33153
  getGitRemote,
31164
33154
  getInstanceId,
33155
+ getLastImportAt,
31165
33156
  getLastTransformEstimate,
31166
33157
  getLastTransformedCount,
31167
33158
  getLastTurnAt,
@@ -31174,6 +33165,7 @@ export {
31174
33165
  importLoreFile,
31175
33166
  inline,
31176
33167
  inspectSessionState,
33168
+ instruction_detect_exports as instructionDetect,
31177
33169
  isFirstRun,
31178
33170
  isReasoningPart,
31179
33171
  isTextPart,
@@ -31210,6 +33202,7 @@ export {
31210
33202
  searchRecall,
31211
33203
  serialize,
31212
33204
  setForceMinLayer,
33205
+ setLastImportAt,
31213
33206
  setLastTurnAtForTest,
31214
33207
  setLtmTokens,
31215
33208
  setMaxContextTokens,