@loreai/core 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +85 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts +2 -13
  12. package/dist/bun/distillation.d.ts.map +1 -1
  13. package/dist/bun/embedding-vendor.d.ts +22 -38
  14. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  15. package/dist/bun/embedding-worker-types.d.ts +17 -12
  16. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  17. package/dist/bun/embedding-worker.d.ts +9 -2
  18. package/dist/bun/embedding-worker.d.ts.map +1 -1
  19. package/dist/bun/embedding-worker.js +38864 -33
  20. package/dist/bun/embedding-worker.js.map +4 -4
  21. package/dist/bun/embedding.d.ts +35 -23
  22. package/dist/bun/embedding.d.ts.map +1 -1
  23. package/dist/bun/gradient.d.ts +17 -1
  24. package/dist/bun/gradient.d.ts.map +1 -1
  25. package/dist/bun/import/detect.d.ts +14 -0
  26. package/dist/bun/import/detect.d.ts.map +1 -0
  27. package/dist/bun/import/extract.d.ts +43 -0
  28. package/dist/bun/import/extract.d.ts.map +1 -0
  29. package/dist/bun/import/history.d.ts +40 -0
  30. package/dist/bun/import/history.d.ts.map +1 -0
  31. package/dist/bun/import/index.d.ts +17 -0
  32. package/dist/bun/import/index.d.ts.map +1 -0
  33. package/dist/bun/import/providers/aider.d.ts +2 -0
  34. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  35. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  36. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  37. package/dist/bun/import/providers/cline.d.ts +2 -0
  38. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  39. package/dist/bun/import/providers/codex.d.ts +2 -0
  40. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  41. package/dist/bun/import/providers/continue.d.ts +2 -0
  42. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  43. package/dist/bun/import/providers/index.d.ts +19 -0
  44. package/dist/bun/import/providers/index.d.ts.map +1 -0
  45. package/dist/bun/import/providers/opencode.d.ts +2 -0
  46. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  47. package/dist/bun/import/providers/pi.d.ts +2 -0
  48. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  49. package/dist/bun/import/types.d.ts +82 -0
  50. package/dist/bun/import/types.d.ts.map +1 -0
  51. package/dist/bun/index.d.ts +5 -2
  52. package/dist/bun/index.d.ts.map +1 -1
  53. package/dist/bun/index.js +3150 -439
  54. package/dist/bun/index.js.map +4 -4
  55. package/dist/bun/instruction-detect.d.ts +66 -0
  56. package/dist/bun/instruction-detect.d.ts.map +1 -0
  57. package/dist/bun/log.d.ts +9 -0
  58. package/dist/bun/log.d.ts.map +1 -1
  59. package/dist/bun/ltm.d.ts +139 -5
  60. package/dist/bun/ltm.d.ts.map +1 -1
  61. package/dist/bun/pattern-extract.d.ts +7 -0
  62. package/dist/bun/pattern-extract.d.ts.map +1 -1
  63. package/dist/bun/prompt.d.ts +1 -1
  64. package/dist/bun/prompt.d.ts.map +1 -1
  65. package/dist/bun/recall.d.ts.map +1 -1
  66. package/dist/bun/search.d.ts +5 -3
  67. package/dist/bun/search.d.ts.map +1 -1
  68. package/dist/bun/session-limiter.d.ts +26 -0
  69. package/dist/bun/session-limiter.d.ts.map +1 -0
  70. package/dist/bun/temporal.d.ts +2 -0
  71. package/dist/bun/temporal.d.ts.map +1 -1
  72. package/dist/bun/types.d.ts +1 -1
  73. package/dist/node/agents-file.d.ts +4 -0
  74. package/dist/node/agents-file.d.ts.map +1 -1
  75. package/dist/node/config.d.ts +2 -0
  76. package/dist/node/config.d.ts.map +1 -1
  77. package/dist/node/curator.d.ts +45 -0
  78. package/dist/node/curator.d.ts.map +1 -1
  79. package/dist/node/data-dir.d.ts +18 -0
  80. package/dist/node/data-dir.d.ts.map +1 -0
  81. package/dist/node/db.d.ts +85 -0
  82. package/dist/node/db.d.ts.map +1 -1
  83. package/dist/node/distillation.d.ts +2 -13
  84. package/dist/node/distillation.d.ts.map +1 -1
  85. package/dist/node/embedding-vendor.d.ts +22 -38
  86. package/dist/node/embedding-vendor.d.ts.map +1 -1
  87. package/dist/node/embedding-worker-types.d.ts +17 -12
  88. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  89. package/dist/node/embedding-worker.d.ts +9 -2
  90. package/dist/node/embedding-worker.d.ts.map +1 -1
  91. package/dist/node/embedding-worker.js +38864 -33
  92. package/dist/node/embedding-worker.js.map +4 -4
  93. package/dist/node/embedding.d.ts +35 -23
  94. package/dist/node/embedding.d.ts.map +1 -1
  95. package/dist/node/gradient.d.ts +17 -1
  96. package/dist/node/gradient.d.ts.map +1 -1
  97. package/dist/node/import/detect.d.ts +14 -0
  98. package/dist/node/import/detect.d.ts.map +1 -0
  99. package/dist/node/import/extract.d.ts +43 -0
  100. package/dist/node/import/extract.d.ts.map +1 -0
  101. package/dist/node/import/history.d.ts +40 -0
  102. package/dist/node/import/history.d.ts.map +1 -0
  103. package/dist/node/import/index.d.ts +17 -0
  104. package/dist/node/import/index.d.ts.map +1 -0
  105. package/dist/node/import/providers/aider.d.ts +2 -0
  106. package/dist/node/import/providers/aider.d.ts.map +1 -0
  107. package/dist/node/import/providers/claude-code.d.ts +2 -0
  108. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  109. package/dist/node/import/providers/cline.d.ts +2 -0
  110. package/dist/node/import/providers/cline.d.ts.map +1 -0
  111. package/dist/node/import/providers/codex.d.ts +2 -0
  112. package/dist/node/import/providers/codex.d.ts.map +1 -0
  113. package/dist/node/import/providers/continue.d.ts +2 -0
  114. package/dist/node/import/providers/continue.d.ts.map +1 -0
  115. package/dist/node/import/providers/index.d.ts +19 -0
  116. package/dist/node/import/providers/index.d.ts.map +1 -0
  117. package/dist/node/import/providers/opencode.d.ts +2 -0
  118. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  119. package/dist/node/import/providers/pi.d.ts +2 -0
  120. package/dist/node/import/providers/pi.d.ts.map +1 -0
  121. package/dist/node/import/types.d.ts +82 -0
  122. package/dist/node/import/types.d.ts.map +1 -0
  123. package/dist/node/index.d.ts +5 -2
  124. package/dist/node/index.d.ts.map +1 -1
  125. package/dist/node/index.js +3150 -439
  126. package/dist/node/index.js.map +4 -4
  127. package/dist/node/instruction-detect.d.ts +66 -0
  128. package/dist/node/instruction-detect.d.ts.map +1 -0
  129. package/dist/node/log.d.ts +9 -0
  130. package/dist/node/log.d.ts.map +1 -1
  131. package/dist/node/ltm.d.ts +139 -5
  132. package/dist/node/ltm.d.ts.map +1 -1
  133. package/dist/node/pattern-extract.d.ts +7 -0
  134. package/dist/node/pattern-extract.d.ts.map +1 -1
  135. package/dist/node/prompt.d.ts +1 -1
  136. package/dist/node/prompt.d.ts.map +1 -1
  137. package/dist/node/recall.d.ts.map +1 -1
  138. package/dist/node/search.d.ts +5 -3
  139. package/dist/node/search.d.ts.map +1 -1
  140. package/dist/node/session-limiter.d.ts +26 -0
  141. package/dist/node/session-limiter.d.ts.map +1 -0
  142. package/dist/node/temporal.d.ts +2 -0
  143. package/dist/node/temporal.d.ts.map +1 -1
  144. package/dist/node/types.d.ts +1 -1
  145. package/dist/types/agents-file.d.ts +4 -0
  146. package/dist/types/agents-file.d.ts.map +1 -1
  147. package/dist/types/config.d.ts +2 -0
  148. package/dist/types/config.d.ts.map +1 -1
  149. package/dist/types/curator.d.ts +45 -0
  150. package/dist/types/curator.d.ts.map +1 -1
  151. package/dist/types/data-dir.d.ts +18 -0
  152. package/dist/types/data-dir.d.ts.map +1 -0
  153. package/dist/types/db.d.ts +85 -0
  154. package/dist/types/db.d.ts.map +1 -1
  155. package/dist/types/distillation.d.ts +2 -13
  156. package/dist/types/distillation.d.ts.map +1 -1
  157. package/dist/types/embedding-vendor.d.ts +22 -38
  158. package/dist/types/embedding-vendor.d.ts.map +1 -1
  159. package/dist/types/embedding-worker-types.d.ts +17 -12
  160. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  161. package/dist/types/embedding-worker.d.ts +9 -2
  162. package/dist/types/embedding-worker.d.ts.map +1 -1
  163. package/dist/types/embedding.d.ts +35 -23
  164. package/dist/types/embedding.d.ts.map +1 -1
  165. package/dist/types/gradient.d.ts +17 -1
  166. package/dist/types/gradient.d.ts.map +1 -1
  167. package/dist/types/import/detect.d.ts +14 -0
  168. package/dist/types/import/detect.d.ts.map +1 -0
  169. package/dist/types/import/extract.d.ts +43 -0
  170. package/dist/types/import/extract.d.ts.map +1 -0
  171. package/dist/types/import/history.d.ts +40 -0
  172. package/dist/types/import/history.d.ts.map +1 -0
  173. package/dist/types/import/index.d.ts +17 -0
  174. package/dist/types/import/index.d.ts.map +1 -0
  175. package/dist/types/import/providers/aider.d.ts +2 -0
  176. package/dist/types/import/providers/aider.d.ts.map +1 -0
  177. package/dist/types/import/providers/claude-code.d.ts +2 -0
  178. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  179. package/dist/types/import/providers/cline.d.ts +2 -0
  180. package/dist/types/import/providers/cline.d.ts.map +1 -0
  181. package/dist/types/import/providers/codex.d.ts +2 -0
  182. package/dist/types/import/providers/codex.d.ts.map +1 -0
  183. package/dist/types/import/providers/continue.d.ts +2 -0
  184. package/dist/types/import/providers/continue.d.ts.map +1 -0
  185. package/dist/types/import/providers/index.d.ts +19 -0
  186. package/dist/types/import/providers/index.d.ts.map +1 -0
  187. package/dist/types/import/providers/opencode.d.ts +2 -0
  188. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  189. package/dist/types/import/providers/pi.d.ts +2 -0
  190. package/dist/types/import/providers/pi.d.ts.map +1 -0
  191. package/dist/types/import/types.d.ts +82 -0
  192. package/dist/types/import/types.d.ts.map +1 -0
  193. package/dist/types/index.d.ts +5 -2
  194. package/dist/types/index.d.ts.map +1 -1
  195. package/dist/types/instruction-detect.d.ts +66 -0
  196. package/dist/types/instruction-detect.d.ts.map +1 -0
  197. package/dist/types/log.d.ts +9 -0
  198. package/dist/types/log.d.ts.map +1 -1
  199. package/dist/types/ltm.d.ts +139 -5
  200. package/dist/types/ltm.d.ts.map +1 -1
  201. package/dist/types/pattern-extract.d.ts +7 -0
  202. package/dist/types/pattern-extract.d.ts.map +1 -1
  203. package/dist/types/prompt.d.ts +1 -1
  204. package/dist/types/prompt.d.ts.map +1 -1
  205. package/dist/types/recall.d.ts.map +1 -1
  206. package/dist/types/search.d.ts +5 -3
  207. package/dist/types/search.d.ts.map +1 -1
  208. package/dist/types/session-limiter.d.ts +26 -0
  209. package/dist/types/session-limiter.d.ts.map +1 -0
  210. package/dist/types/temporal.d.ts +2 -0
  211. package/dist/types/temporal.d.ts.map +1 -1
  212. package/dist/types/types.d.ts +1 -1
  213. package/package.json +3 -4
  214. package/src/agents-file.ts +41 -13
  215. package/src/config.ts +31 -18
  216. package/src/curator.ts +163 -75
  217. package/src/data-dir.ts +76 -0
  218. package/src/db.ts +457 -11
  219. package/src/distillation.ts +65 -16
  220. package/src/embedding-vendor.ts +23 -40
  221. package/src/embedding-worker-types.ts +19 -11
  222. package/src/embedding-worker.ts +111 -47
  223. package/src/embedding.ts +224 -174
  224. package/src/gradient.ts +192 -75
  225. package/src/import/detect.ts +37 -0
  226. package/src/import/extract.ts +137 -0
  227. package/src/import/history.ts +99 -0
  228. package/src/import/index.ts +45 -0
  229. package/src/import/providers/aider.ts +207 -0
  230. package/src/import/providers/claude-code.ts +339 -0
  231. package/src/import/providers/cline.ts +324 -0
  232. package/src/import/providers/codex.ts +369 -0
  233. package/src/import/providers/continue.ts +304 -0
  234. package/src/import/providers/index.ts +32 -0
  235. package/src/import/providers/opencode.ts +272 -0
  236. package/src/import/providers/pi.ts +332 -0
  237. package/src/import/types.ts +91 -0
  238. package/src/index.ts +13 -0
  239. package/src/instruction-detect.ts +275 -0
  240. package/src/log.ts +91 -3
  241. package/src/ltm.ts +789 -41
  242. package/src/pattern-extract.ts +41 -0
  243. package/src/prompt.ts +7 -1
  244. package/src/recall.ts +43 -5
  245. package/src/search.ts +7 -5
  246. package/src/session-limiter.ts +47 -0
  247. package/src/temporal.ts +18 -6
  248. package/src/types.ts +1 -1
package/dist/bun/index.js CHANGED
@@ -125,6 +125,7 @@ __export(temporal_exports, {
125
125
  CHUNK_TERMINATOR: () => CHUNK_TERMINATOR,
126
126
  bySession: () => bySession,
127
127
  count: () => count,
128
+ hasMessages: () => hasMessages,
128
129
  markDistilled: () => markDistilled,
129
130
  partsToText: () => partsToText,
130
131
  prune: () => prune,
@@ -145,9 +146,8 @@ function sha256(input) {
145
146
  }
146
147
 
147
148
  // src/db.ts
148
- import { join, dirname } from "path";
149
+ import { join as join2, dirname } from "path";
149
150
  import { mkdirSync } from "fs";
150
- import { homedir } from "os";
151
151
 
152
152
  // src/git.ts
153
153
  import { execSync } from "child_process";
@@ -210,6 +210,36 @@ function getGitRemote(path) {
210
210
  }
211
211
  }
212
212
 
213
+ // src/data-dir.ts
214
+ import { existsSync, renameSync } from "node:fs";
215
+ import { join } from "node:path";
216
+ import { homedir } from "node:os";
217
+ var OLD_DIR_NAME = "opencode-lore";
218
+ var NEW_DIR_NAME = "lore";
219
+ var migrationAttempted = false;
220
+ function baseDir() {
221
+ return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
222
+ }
223
+ function migrateDataDir() {
224
+ if (migrationAttempted) return;
225
+ migrationAttempted = true;
226
+ if (process.env.NODE_ENV === "test") return;
227
+ const base = baseDir();
228
+ const oldDir = join(base, OLD_DIR_NAME);
229
+ const newDir = join(base, NEW_DIR_NAME);
230
+ try {
231
+ if (existsSync(oldDir) && !existsSync(newDir)) {
232
+ renameSync(oldDir, newDir);
233
+ console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
234
+ }
235
+ } catch {
236
+ }
237
+ }
238
+ function dataDir() {
239
+ migrateDataDir();
240
+ return join(baseDir(), NEW_DIR_NAME);
241
+ }
242
+
213
243
  // src/db.ts
214
244
  function repoNameFromRemote(remote) {
215
245
  if (!remote) return null;
@@ -646,17 +676,123 @@ var MIGRATIONS = [
646
676
  ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
647
677
  ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
648
678
  ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
679
+ `,
680
+ `
681
+ -- Version 19: Import history for conversation import idempotency.
682
+ -- Tracks which external agent sessions have been imported to prevent
683
+ -- re-importing unchanged sources and to record user-declined imports.
684
+ CREATE TABLE IF NOT EXISTS import_history (
685
+ id TEXT PRIMARY KEY,
686
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
687
+ agent_name TEXT NOT NULL,
688
+ source_id TEXT NOT NULL,
689
+ source_hash TEXT NOT NULL,
690
+ entries_created INTEGER NOT NULL DEFAULT 0,
691
+ entries_updated INTEGER NOT NULL DEFAULT 0,
692
+ imported_at INTEGER NOT NULL,
693
+ UNIQUE(project_id, agent_name, source_id)
694
+ );
695
+ CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
696
+ `,
697
+ `
698
+ -- Version 20: Purge worker boilerplate from temporal messages.
699
+ -- Legacy gateway/plugin worker calls (distillation observer, curator,
700
+ -- consolidation, reflector, eval) stored their full system prompts
701
+ -- (containing entire conversation transcripts, up to 1.6MB each) as
702
+ -- temporal messages. These pollute FTS search results by matching
703
+ -- virtually any domain keyword. Safe to delete: their actual output
704
+ -- (distillations, knowledge entries) is stored in dedicated tables.
705
+ DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
706
+ OR content LIKE '%You are a long-term memory curator.%'
707
+ OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
708
+ OR content LIKE '%You are a memory reflector.%'
709
+ OR content LIKE '%You are evaluating distillation quality.%';
710
+ `,
711
+ `
712
+ -- Version 21: Persist avoided compaction data from live sessions.
713
+ -- Historical estimates previously re-simulated avoided compactions from
714
+ -- temporal message token estimates (chars/3), missing system prompt and
715
+ -- tool definition overhead. Persisting the live session's real shadow
716
+ -- context tracking (from actual API-reported total input tokens) gives
717
+ -- accurate post-restart historical estimates.
718
+ ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
719
+ ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
720
+ `,
721
+ `
722
+ -- Version 22: Track when conversation import was last offered/run.
723
+ -- NULL means import has never been offered for this project.
724
+ -- Used by auto-import to avoid re-prompting, and by explicit
725
+ -- \`lore import\` for incremental imports (only newer conversations).
726
+ ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
727
+
728
+ -- Backfill: migrate legacy __declined__ sentinel rows so existing
729
+ -- users who previously declined are not re-prompted after upgrading.
730
+ UPDATE projects SET last_import_at = (
731
+ SELECT ih.imported_at FROM import_history ih
732
+ WHERE ih.project_id = projects.id
733
+ AND ih.source_id = '__declined__'
734
+ LIMIT 1
735
+ )
736
+ WHERE EXISTS (
737
+ SELECT 1 FROM import_history ih
738
+ WHERE ih.project_id = projects.id
739
+ AND ih.source_id = '__declined__'
740
+ );
741
+ `,
742
+ `
743
+ -- Version 23: Persist volatile session tracking state across restarts.
744
+ -- Previously these were in-memory only, causing duplicate processing,
745
+ -- false compaction detection, and expensive prompt cache busts on restart.
746
+ ALTER TABLE session_state ADD COLUMN last_curated_at INTEGER NOT NULL DEFAULT 0;
747
+ ALTER TABLE session_state ADD COLUMN message_count INTEGER NOT NULL DEFAULT 0;
748
+ ALTER TABLE session_state ADD COLUMN turns_since_curation INTEGER NOT NULL DEFAULT 0;
749
+ ALTER TABLE session_state ADD COLUMN ltm_cache_text TEXT;
750
+ ALTER TABLE session_state ADD COLUMN ltm_cache_tokens INTEGER;
751
+ ALTER TABLE session_state ADD COLUMN ltm_pin_text TEXT;
752
+ ALTER TABLE session_state ADD COLUMN ltm_pin_tokens INTEGER;
753
+ ALTER TABLE session_state ADD COLUMN consecutive_text_only_turns INTEGER NOT NULL DEFAULT 0;
754
+ `,
755
+ `
756
+ -- Version 24: Persist remaining volatile session state across restarts.
757
+ -- Session identity (Tier 1/2/3 session correlation)
758
+ ALTER TABLE session_state ADD COLUMN fingerprint TEXT NOT NULL DEFAULT '';
759
+ ALTER TABLE session_state ADD COLUMN header_session_id TEXT;
760
+ ALTER TABLE session_state ADD COLUMN header_name TEXT;
761
+ -- Cache warming state
762
+ ALTER TABLE session_state ADD COLUMN resolved_conversation_ttl TEXT NOT NULL DEFAULT '5m';
763
+ ALTER TABLE session_state ADD COLUMN warmup_state TEXT;
764
+ -- Gradient calibration state (survives restarts to avoid uncalibrated busts)
765
+ ALTER TABLE session_state ADD COLUMN dynamic_context_cap REAL NOT NULL DEFAULT 0;
766
+ ALTER TABLE session_state ADD COLUMN bust_rate_ema REAL NOT NULL DEFAULT -1;
767
+ ALTER TABLE session_state ADD COLUMN inter_bust_interval_ema REAL NOT NULL DEFAULT -1;
768
+ ALTER TABLE session_state ADD COLUMN last_layer INTEGER NOT NULL DEFAULT 0;
769
+ ALTER TABLE session_state ADD COLUMN last_known_input INTEGER NOT NULL DEFAULT 0;
770
+ ALTER TABLE session_state ADD COLUMN last_turn_at INTEGER NOT NULL DEFAULT 0;
771
+ ALTER TABLE session_state ADD COLUMN last_bust_at INTEGER NOT NULL DEFAULT 0;
772
+ `,
773
+ `
774
+ -- Version 25: Adaptive dedup threshold \u2014 store accept/reject feedback
775
+ -- on embedding-based duplicate pairs for per-project threshold calibration.
776
+ -- Titles stored instead of FK IDs because entries are deleted during dedup;
777
+ -- the similarity float is the actual calibration input.
778
+ CREATE TABLE IF NOT EXISTS dedup_feedback (
779
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
780
+ project_id TEXT,
781
+ entry_a_title TEXT NOT NULL,
782
+ entry_b_title TEXT NOT NULL,
783
+ similarity REAL NOT NULL,
784
+ accepted INTEGER NOT NULL,
785
+ source TEXT NOT NULL DEFAULT 'manual',
786
+ created_at INTEGER NOT NULL
787
+ );
788
+ CREATE INDEX IF NOT EXISTS idx_dedup_feedback_project
789
+ ON dedup_feedback(project_id);
649
790
  `
650
791
  ];
651
- function dataDir() {
652
- const xdg = process.env.XDG_DATA_HOME;
653
- const base = xdg || join(homedir(), ".local", "share");
654
- return join(base, "opencode-lore");
655
- }
656
792
  function dbPath() {
657
793
  const envPath = process.env.LORE_DB_PATH;
658
794
  if (envPath) return envPath;
659
- return join(dataDir(), "lore.db");
795
+ return join2(dataDir(), "lore.db");
660
796
  }
661
797
  var instance;
662
798
  function db() {
@@ -674,7 +810,7 @@ function db() {
674
810
  }
675
811
  const dir = dataDir();
676
812
  mkdirSync(dir, { recursive: true });
677
- path = join(dir, "lore.db");
813
+ path = join2(dir, "lore.db");
678
814
  }
679
815
  const database = new Database(path);
680
816
  database.exec("PRAGMA journal_mode = WAL");
@@ -787,6 +923,11 @@ function close() {
787
923
  }
788
924
  }
789
925
  function ensureProject(path, name) {
926
+ if (!process.env.LORE_DB_PATH && /^\/test\//.test(path)) {
927
+ throw new Error(
928
+ `Refusing to create project with test path "${path}" in the production DB. Set LORE_DB_PATH to a temp path, or run tests via \`bun test\` from the repo root.`
929
+ );
930
+ }
790
931
  const existing = db().query("SELECT id, git_remote FROM projects WHERE path = ?").get(path);
791
932
  if (existing) {
792
933
  if (!existing.git_remote) {
@@ -841,6 +982,15 @@ function isFirstRun() {
841
982
  const row = db().query("SELECT COUNT(*) as count FROM projects").get();
842
983
  return row.count === 0;
843
984
  }
985
+ function getLastImportAt(projectPath) {
986
+ const id = ensureProject(projectPath);
987
+ const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
988
+ return row?.last_import_at ?? null;
989
+ }
990
+ function setLastImportAt(projectPath, timestamp) {
991
+ const id = ensureProject(projectPath);
992
+ db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
993
+ }
844
994
  function loadForceMinLayer(sessionID) {
845
995
  const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
846
996
  return row?.force_min_layer ?? 0;
@@ -859,8 +1009,9 @@ function saveSessionCosts(sessionID, costs) {
859
1009
  `INSERT INTO session_state (session_id, force_min_layer, updated_at,
860
1010
  conversation_cost, worker_cost, conversation_turns,
861
1011
  cache_read_tokens, cache_write_tokens,
862
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings)
863
- VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1012
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1013
+ avoided_compactions, avoided_compaction_cost)
1014
+ VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
864
1015
  ON CONFLICT(session_id) DO UPDATE SET
865
1016
  conversation_cost = excluded.conversation_cost,
866
1017
  worker_cost = excluded.worker_cost,
@@ -872,6 +1023,8 @@ function saveSessionCosts(sessionID, costs) {
872
1023
  ttl_savings = excluded.ttl_savings,
873
1024
  ttl_hits = excluded.ttl_hits,
874
1025
  batch_savings = excluded.batch_savings,
1026
+ avoided_compactions = excluded.avoided_compactions,
1027
+ avoided_compaction_cost = excluded.avoided_compaction_cost,
875
1028
  updated_at = excluded.updated_at`
876
1029
  ).run(
877
1030
  sessionID,
@@ -886,14 +1039,17 @@ function saveSessionCosts(sessionID, costs) {
886
1039
  costs.warmupHits,
887
1040
  costs.ttlSavings,
888
1041
  costs.ttlHits,
889
- costs.batchSavings
1042
+ costs.batchSavings,
1043
+ costs.avoidedCompactions,
1044
+ costs.avoidedCompactionCost
890
1045
  );
891
1046
  }
892
1047
  function loadSessionCosts(sessionID) {
893
1048
  const row = db().query(
894
1049
  `SELECT conversation_cost, worker_cost, conversation_turns,
895
1050
  cache_read_tokens, cache_write_tokens,
896
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1051
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1052
+ avoided_compactions, avoided_compaction_cost
897
1053
  FROM session_state WHERE session_id = ?`
898
1054
  ).get(sessionID);
899
1055
  if (!row) return null;
@@ -907,14 +1063,17 @@ function loadSessionCosts(sessionID) {
907
1063
  warmupHits: row.warmup_hits,
908
1064
  ttlSavings: row.ttl_savings,
909
1065
  ttlHits: row.ttl_hits,
910
- batchSavings: row.batch_savings
1066
+ batchSavings: row.batch_savings,
1067
+ avoidedCompactions: row.avoided_compactions,
1068
+ avoidedCompactionCost: row.avoided_compaction_cost
911
1069
  };
912
1070
  }
913
1071
  function loadAllSessionCosts() {
914
1072
  const rows = db().query(
915
1073
  `SELECT session_id, conversation_cost, worker_cost, conversation_turns,
916
1074
  cache_read_tokens, cache_write_tokens,
917
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1075
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1076
+ avoided_compactions, avoided_compaction_cost
918
1077
  FROM session_state
919
1078
  WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
920
1079
  ).all();
@@ -930,11 +1089,160 @@ function loadAllSessionCosts() {
930
1089
  warmupHits: row.warmup_hits,
931
1090
  ttlSavings: row.ttl_savings,
932
1091
  ttlHits: row.ttl_hits,
933
- batchSavings: row.batch_savings
1092
+ batchSavings: row.batch_savings,
1093
+ avoidedCompactions: row.avoided_compactions,
1094
+ avoidedCompactionCost: row.avoided_compaction_cost
934
1095
  });
935
1096
  }
936
1097
  return result;
937
1098
  }
1099
+ function saveSessionTracking(sessionID, state) {
1100
+ const now = Date.now();
1101
+ db().query(
1102
+ "INSERT OR IGNORE INTO session_state (session_id, force_min_layer, updated_at) VALUES (?, 0, ?)"
1103
+ ).run(sessionID, now);
1104
+ const sets = ["updated_at = ?"];
1105
+ const vals = [now];
1106
+ if (state.lastCuratedAt !== void 0) {
1107
+ sets.push("last_curated_at = ?");
1108
+ vals.push(state.lastCuratedAt);
1109
+ }
1110
+ if (state.messageCount !== void 0) {
1111
+ sets.push("message_count = ?");
1112
+ vals.push(state.messageCount);
1113
+ }
1114
+ if (state.turnsSinceCuration !== void 0) {
1115
+ sets.push("turns_since_curation = ?");
1116
+ vals.push(state.turnsSinceCuration);
1117
+ }
1118
+ if (state.consecutiveTextOnlyTurns !== void 0) {
1119
+ sets.push("consecutive_text_only_turns = ?");
1120
+ vals.push(state.consecutiveTextOnlyTurns);
1121
+ }
1122
+ if (state.ltmCacheText !== void 0) {
1123
+ sets.push("ltm_cache_text = ?");
1124
+ vals.push(state.ltmCacheText);
1125
+ }
1126
+ if (state.ltmCacheTokens !== void 0) {
1127
+ sets.push("ltm_cache_tokens = ?");
1128
+ vals.push(state.ltmCacheTokens);
1129
+ }
1130
+ if (state.ltmPinText !== void 0) {
1131
+ sets.push("ltm_pin_text = ?");
1132
+ vals.push(state.ltmPinText);
1133
+ }
1134
+ if (state.ltmPinTokens !== void 0) {
1135
+ sets.push("ltm_pin_tokens = ?");
1136
+ vals.push(state.ltmPinTokens);
1137
+ }
1138
+ if (state.fingerprint !== void 0) {
1139
+ sets.push("fingerprint = ?");
1140
+ vals.push(state.fingerprint);
1141
+ }
1142
+ if (state.headerSessionId !== void 0) {
1143
+ sets.push("header_session_id = ?");
1144
+ vals.push(state.headerSessionId);
1145
+ }
1146
+ if (state.headerName !== void 0) {
1147
+ sets.push("header_name = ?");
1148
+ vals.push(state.headerName);
1149
+ }
1150
+ if (state.resolvedConversationTTL !== void 0) {
1151
+ sets.push("resolved_conversation_ttl = ?");
1152
+ vals.push(state.resolvedConversationTTL);
1153
+ }
1154
+ if (state.warmupState !== void 0) {
1155
+ sets.push("warmup_state = ?");
1156
+ vals.push(state.warmupState);
1157
+ }
1158
+ if (state.dynamicContextCap !== void 0) {
1159
+ sets.push("dynamic_context_cap = ?");
1160
+ vals.push(state.dynamicContextCap);
1161
+ }
1162
+ if (state.bustRateEMA !== void 0) {
1163
+ sets.push("bust_rate_ema = ?");
1164
+ vals.push(state.bustRateEMA);
1165
+ }
1166
+ if (state.interBustIntervalEMA !== void 0) {
1167
+ sets.push("inter_bust_interval_ema = ?");
1168
+ vals.push(state.interBustIntervalEMA);
1169
+ }
1170
+ if (state.lastLayer !== void 0) {
1171
+ sets.push("last_layer = ?");
1172
+ vals.push(state.lastLayer);
1173
+ }
1174
+ if (state.lastKnownInput !== void 0) {
1175
+ sets.push("last_known_input = ?");
1176
+ vals.push(state.lastKnownInput);
1177
+ }
1178
+ if (state.lastTurnAt !== void 0) {
1179
+ sets.push("last_turn_at = ?");
1180
+ vals.push(state.lastTurnAt);
1181
+ }
1182
+ if (state.lastBustAt !== void 0) {
1183
+ sets.push("last_bust_at = ?");
1184
+ vals.push(state.lastBustAt);
1185
+ }
1186
+ db().query(
1187
+ "UPDATE session_state SET " + sets.join(", ") + " WHERE session_id = ?"
1188
+ ).run(...vals, sessionID);
1189
+ }
1190
+ function loadSessionTracking(sessionID) {
1191
+ const row = db().query(
1192
+ `SELECT last_curated_at, message_count, turns_since_curation,
1193
+ consecutive_text_only_turns,
1194
+ ltm_cache_text, ltm_cache_tokens, ltm_pin_text, ltm_pin_tokens,
1195
+ fingerprint, header_session_id, header_name,
1196
+ resolved_conversation_ttl, warmup_state,
1197
+ dynamic_context_cap, bust_rate_ema, inter_bust_interval_ema,
1198
+ last_layer, last_known_input, last_turn_at, last_bust_at
1199
+ FROM session_state WHERE session_id = ?`
1200
+ ).get(sessionID);
1201
+ if (!row) return null;
1202
+ return {
1203
+ lastCuratedAt: row.last_curated_at,
1204
+ messageCount: row.message_count,
1205
+ turnsSinceCuration: row.turns_since_curation,
1206
+ consecutiveTextOnlyTurns: row.consecutive_text_only_turns,
1207
+ ltmCacheText: row.ltm_cache_text,
1208
+ ltmCacheTokens: row.ltm_cache_tokens,
1209
+ ltmPinText: row.ltm_pin_text,
1210
+ ltmPinTokens: row.ltm_pin_tokens,
1211
+ fingerprint: row.fingerprint,
1212
+ headerSessionId: row.header_session_id,
1213
+ headerName: row.header_name,
1214
+ resolvedConversationTTL: row.resolved_conversation_ttl,
1215
+ warmupState: row.warmup_state,
1216
+ dynamicContextCap: row.dynamic_context_cap,
1217
+ bustRateEMA: row.bust_rate_ema,
1218
+ interBustIntervalEMA: row.inter_bust_interval_ema,
1219
+ lastLayer: row.last_layer,
1220
+ lastKnownInput: row.last_known_input,
1221
+ lastTurnAt: row.last_turn_at,
1222
+ lastBustAt: row.last_bust_at
1223
+ };
1224
+ }
1225
+ function loadHeaderSessionIndex() {
1226
+ const rows = db().query(
1227
+ `SELECT session_id, header_session_id, header_name
1228
+ FROM session_state
1229
+ WHERE header_session_id IS NOT NULL AND header_name IS NOT NULL`
1230
+ ).all();
1231
+ return rows.map((row) => ({
1232
+ sessionId: row.session_id,
1233
+ headerSessionId: row.header_session_id,
1234
+ headerName: row.header_name
1235
+ }));
1236
+ }
1237
+ function getKV(key) {
1238
+ const row = db().query("SELECT value FROM kv_meta WHERE key = ?").get(key);
1239
+ return row?.value ?? null;
1240
+ }
1241
+ function setKV(key, value) {
1242
+ db().query(
1243
+ "INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
1244
+ ).run(key, value, value);
1245
+ }
938
1246
  function getMeta(key) {
939
1247
  const row = db().query("SELECT value FROM metadata WHERE key = ?").get(key);
940
1248
  return row?.value ?? null;
@@ -9753,7 +10061,7 @@ var handle = {
9753
10061
  };
9754
10062
 
9755
10063
  // ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
9756
- var join2 = [joinDefaults];
10064
+ var join3 = [joinDefaults];
9757
10065
  function joinDefaults(left, right, parent, state) {
9758
10066
  if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
9759
10067
  return false;
@@ -10173,7 +10481,7 @@ function toMarkdown(tree, options) {
10173
10481
  handle: void 0,
10174
10482
  indentLines,
10175
10483
  indexStack: [],
10176
- join: [...join2],
10484
+ join: [...join3],
10177
10485
  options: {},
10178
10486
  safe: safeBound,
10179
10487
  stack: [],
@@ -11898,6 +12206,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
11898
12206
  - Environment/tooling setup details that affect development
11899
12207
  - Important relationships between components that aren't obvious from reading the code
11900
12208
  - User preferences and working style specific to how they use this project
12209
+ - Repeated user instructions \u2014 when the user says things like "always", "never",
12210
+ "make sure to", "don't forget to", these are high-value preference candidates.
12211
+ If you see instruction-like language, prioritize extracting it as a "preference" entry.
12212
+ These instructions represent how the user wants to work and should persist across sessions.
11901
12213
 
11902
12214
  Do NOT extract:
11903
12215
  - Task-specific details (file currently being edited, current bug being fixed)
@@ -11982,7 +12294,9 @@ IMPORTANT:
11982
12294
  2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
11983
12295
  3. If entries cover the same system from different angles, merge them: update one, delete the rest.
11984
12296
  4. Only create a new entry for genuinely distinct knowledge with no existing home.
11985
- 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
12297
+ 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
12298
+ 6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
12299
+ These are strong signals for "preference" entries with high confidence.`;
11986
12300
  }
11987
12301
  var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
11988
12302
 
@@ -12146,9 +12460,12 @@ var log_exports = {};
12146
12460
  __export(log_exports, {
12147
12461
  error: () => error,
12148
12462
  info: () => info,
12463
+ logFilePath: () => logFilePath,
12149
12464
  registerSink: () => registerSink,
12150
12465
  warn: () => warn
12151
12466
  });
12467
+ import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
12468
+ import { join as join4 } from "node:path";
12152
12469
  var sink = null;
12153
12470
  function registerSink(s) {
12154
12471
  sink = s;
@@ -12163,17 +12480,71 @@ function findError(args) {
12163
12480
  }
12164
12481
  return void 0;
12165
12482
  }
12483
+ var LOG_MAX_BYTES = 5 * 1024 * 1024;
12484
+ var ROTATION_CHECK_INTERVAL = 1e3;
12485
+ var logPath;
12486
+ var logPathResolved = false;
12487
+ var writeCount = 0;
12488
+ function resolveLogPath() {
12489
+ if (process.env.NODE_ENV === "test") return void 0;
12490
+ try {
12491
+ const dir = dataDir();
12492
+ mkdirSync2(dir, { recursive: true });
12493
+ return join4(dir, "lore.log");
12494
+ } catch {
12495
+ return void 0;
12496
+ }
12497
+ }
12498
+ function logFilePath() {
12499
+ if (!logPathResolved) {
12500
+ logPath = resolveLogPath();
12501
+ logPathResolved = true;
12502
+ }
12503
+ return logPath;
12504
+ }
12505
+ function maybeRotate() {
12506
+ if (!logPath) return;
12507
+ try {
12508
+ const stat = statSync(logPath);
12509
+ if (stat.size > LOG_MAX_BYTES) {
12510
+ renameSync2(logPath, logPath + ".1");
12511
+ }
12512
+ } catch {
12513
+ }
12514
+ }
12515
+ function writeToFile(level, message) {
12516
+ const path = logFilePath();
12517
+ if (!path) return;
12518
+ if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
12519
+ maybeRotate();
12520
+ }
12521
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
12522
+ const tag = level.toUpperCase().padEnd(5);
12523
+ const flat = message.replace(/\n/g, "\\n");
12524
+ const line = `${ts} [${tag}] ${flat}
12525
+ `;
12526
+ try {
12527
+ appendFileSync(path, line);
12528
+ } catch {
12529
+ }
12530
+ }
12166
12531
  function info(...args) {
12167
12532
  if (isDebug) console.error("[lore]", ...args);
12168
- sink?.info(formatArgs(args));
12533
+ const msg = formatArgs(args);
12534
+ sink?.info(msg);
12535
+ writeToFile("info", msg);
12169
12536
  }
12170
12537
  function warn(...args) {
12171
12538
  if (isDebug) console.error("[lore] WARN:", ...args);
12172
- sink?.warn(formatArgs(args));
12539
+ const msg = formatArgs(args);
12540
+ sink?.warn(msg);
12541
+ writeToFile("warn", msg);
12173
12542
  }
12174
12543
  function error(...args) {
12175
12544
  console.error("[lore]", ...args);
12176
- sink?.error(formatArgs(args));
12545
+ const msg = formatArgs(args);
12546
+ sink?.error(msg);
12547
+ writeToFile("error", msg);
12177
12548
  const err = findError(args);
12178
12549
  if (err) sink?.captureException(err);
12179
12550
  }
@@ -12333,10 +12704,11 @@ function extractTopTerms(text4, limit = 40) {
12333
12704
  function reciprocalRankFusion(lists, k = 60) {
12334
12705
  const scores = /* @__PURE__ */ new Map();
12335
12706
  for (const list4 of lists) {
12707
+ const w = list4.weight ?? 1;
12336
12708
  for (let rank = 0; rank < list4.items.length; rank++) {
12337
12709
  const item = list4.items[rank];
12338
12710
  const id = list4.key(item);
12339
- const rrfScore = 1 / (k + rank);
12711
+ const rrfScore = w / (k + rank);
12340
12712
  const existing = scores.get(id);
12341
12713
  if (existing) {
12342
12714
  existing.score += rrfScore;
@@ -12390,8 +12762,8 @@ async function expandQuery(llm, query, model, sessionID) {
12390
12762
  var embedding_exports = {};
12391
12763
  __export(embedding_exports, {
12392
12764
  LocalProviderUnavailableError: () => LocalProviderUnavailableError,
12393
- _markFastembedUnavailable: () => _markFastembedUnavailable,
12394
- _resetFastembedProbe: () => _resetFastembedProbe,
12765
+ _markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
12766
+ _resetLocalProviderProbe: () => _resetLocalProviderProbe,
12395
12767
  _restoreProvider: () => _restoreProvider,
12396
12768
  _saveAndClearProvider: () => _saveAndClearProvider,
12397
12769
  _shutdownAndDisable: () => _shutdownAndDisable,
@@ -12410,6 +12782,7 @@ __export(embedding_exports, {
12410
12782
  runStartupBackfill: () => runStartupBackfill,
12411
12783
  toBlob: () => toBlob,
12412
12784
  vectorSearch: () => vectorSearch,
12785
+ vectorSearchAllDistillations: () => vectorSearchAllDistillations,
12413
12786
  vectorSearchDistillations: () => vectorSearchDistillations,
12414
12787
  vectorSearchTemporal: () => vectorSearchTemporal
12415
12788
  });
@@ -26183,8 +26556,8 @@ function date4(params) {
26183
26556
  config(en_default());
26184
26557
 
26185
26558
  // src/config.ts
26186
- import { existsSync, readFileSync } from "node:fs";
26187
- import { join as join3 } from "node:path";
26559
+ import { existsSync as existsSync2, readFileSync } from "node:fs";
26560
+ import { join as join5 } from "node:path";
26188
26561
  var LoreConfig = external_exports.object({
26189
26562
  model: external_exports.object({
26190
26563
  providerID: external_exports.string(),
@@ -26301,15 +26674,25 @@ var LoreConfig = external_exports.object({
26301
26674
  }).default({ title: 6, content: 2, category: 3 }),
26302
26675
  /** Max results per source in recall tool before fusion. Default: 10. */
26303
26676
  recallLimit: external_exports.number().min(1).max(50).default(10),
26304
- /** Enable LLM-based query expansion for the recall tool. Default: false.
26305
- * When enabled, the configured model generates 2–3 alternative query phrasings
26306
- * before search, improving recall for ambiguous queries. */
26307
- queryExpansion: external_exports.boolean().default(false),
26677
+ /** Enable LLM-based query expansion for the recall tool. Default: true.
26678
+ * The configured model generates 2–3 alternative query phrasings before
26679
+ * search, improving recall for ambiguous queries. Guarded by a 3-second
26680
+ * timeout — if expansion fails or times out, the original query is used. */
26681
+ queryExpansion: external_exports.boolean().default(true),
26682
+ /** RRF weight multiplier for vector search lists. Applied when the query
26683
+ * has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
26684
+ * Boosts semantic/vector results relative to keyword-based BM25 lists.
26685
+ * Default: 1.5. Set to 1.0 to disable. */
26686
+ vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
26687
+ /** Minimum meaningful query terms (after stopword removal) to activate
26688
+ * vector boost. Short keyword queries (1-2 terms) are left unweighted
26689
+ * since BM25 excels there. Default: 3. */
26690
+ vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
26308
26691
  /** Vector embedding search.
26309
26692
  * Supports multiple providers:
26310
- * - "local" (default): fastembed + ONNX Runtime, no API key needed.
26311
- * Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
26312
- * cached in ~/.cache/fastembed. ~150ms per query embed.
26693
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
26694
+ * 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
26695
+ * cached locally. Uses task instruction prefixes (search_document: / search_query:).
26313
26696
  * - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
26314
26697
  * - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
26315
26698
  * Set enabled: false to explicitly disable even with a provider available. */
@@ -26318,19 +26701,20 @@ var LoreConfig = external_exports.object({
26318
26701
  * Set to false to explicitly disable. */
26319
26702
  enabled: external_exports.boolean().default(true),
26320
26703
  /** Embedding provider. Default: "local".
26321
- * - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
26704
+ * - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
26322
26705
  * - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
26323
26706
  * - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
26324
26707
  provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
26325
26708
  /** Model ID for the embedding provider. Default depends on provider. */
26326
- model: external_exports.string().default("BGESmallENV15"),
26327
- /** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
26328
- dimensions: external_exports.number().min(64).max(2048).default(384)
26709
+ model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
26710
+ /** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
26711
+ * For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
26712
+ dimensions: external_exports.number().min(64).max(2048).default(768)
26329
26713
  }).default({
26330
26714
  enabled: true,
26331
26715
  provider: "local",
26332
- model: "BGESmallENV15",
26333
- dimensions: 384
26716
+ model: "nomic-ai/nomic-embed-text-v1.5",
26717
+ dimensions: 768
26334
26718
  }),
26335
26719
  /** Recall output formatting — controls how search results are presented to the agent. */
26336
26720
  recall: external_exports.object({
@@ -26347,8 +26731,10 @@ var LoreConfig = external_exports.object({
26347
26731
  }).default({
26348
26732
  ftsWeights: { title: 6, content: 2, category: 3 },
26349
26733
  recallLimit: 10,
26350
- queryExpansion: false,
26351
- embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 },
26734
+ queryExpansion: true,
26735
+ vectorBoostWeight: 1.5,
26736
+ vectorBoostMinTerms: 3,
26737
+ embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26352
26738
  recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
26353
26739
  }),
26354
26740
  cache: external_exports.object({
@@ -26366,9 +26752,9 @@ var LoreConfig = external_exports.object({
26366
26752
  warming: external_exports.object({
26367
26753
  /** Enable cache warming. Default: true. */
26368
26754
  enabled: external_exports.boolean().default(true),
26369
- /** Override the survival probability threshold below which warming is
26370
- * skipped. Default: auto-derived from cache read/write cost ratio
26371
- * (~0.08 for 5m TTL, ~0.05 for 1h TTL). */
26755
+ /** Override the return probability threshold below which warming is
26756
+ * skipped. Default: auto-derived from corrected cost ratio
26757
+ * read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
26372
26758
  minReturnProbability: external_exports.number().min(0).max(1).optional()
26373
26759
  }).default({ enabled: true })
26374
26760
  }).default({
@@ -26388,8 +26774,8 @@ function config2() {
26388
26774
  return current;
26389
26775
  }
26390
26776
  async function load(directory) {
26391
- const path = join3(directory, ".lore.json");
26392
- if (existsSync(path)) {
26777
+ const path = join5(directory, ".lore.json");
26778
+ if (existsSync2(path)) {
26393
26779
  const raw = JSON.parse(readFileSync(path, "utf8"));
26394
26780
  current = LoreConfig.parse(raw);
26395
26781
  return current;
@@ -26420,8 +26806,7 @@ function vendorModelInfo() {
26420
26806
  const reg = getRegistration();
26421
26807
  if (!reg) return null;
26422
26808
  return {
26423
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
26424
- modelName: reg.modelName
26809
+ localModelPath: reg.localModelPath
26425
26810
  };
26426
26811
  }
26427
26812
  function isVendoredBinary() {
@@ -26508,62 +26893,31 @@ var OpenAIProvider = class {
26508
26893
  var LocalProviderUnavailableError = class extends Error {
26509
26894
  constructor(cause) {
26510
26895
  super(
26511
- "Local embedding provider unavailable: 'fastembed' is not installed. Configure search.embeddings.provider to 'voyage' or 'openai', or reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install."
26896
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
26512
26897
  );
26513
26898
  this.name = "LocalProviderUnavailableError";
26514
26899
  if (cause !== void 0) this.cause = cause;
26515
26900
  }
26516
26901
  };
26517
- var fastembedModule = null;
26518
- var fastembedProbed = false;
26519
- var fastembedAvailable = false;
26520
- var fastembedLogged = false;
26521
- function _resetFastembedProbe() {
26522
- fastembedModule = null;
26523
- fastembedProbed = false;
26524
- fastembedAvailable = false;
26525
- fastembedLogged = false;
26526
- }
26527
- function _markFastembedUnavailable() {
26528
- fastembedModule = null;
26529
- fastembedProbed = true;
26530
- fastembedAvailable = false;
26531
- fastembedLogged = true;
26532
- }
26533
- async function tryLoadFastembed() {
26534
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26535
- try {
26536
- const mod = await loadFastembedModule();
26537
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26538
- fastembedModule = mod;
26539
- fastembedAvailable = true;
26540
- } catch (err) {
26541
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26542
- fastembedAvailable = false;
26543
- if (!fastembedLogged) {
26544
- fastembedLogged = true;
26545
- const msg = err instanceof Error ? err.message : String(err);
26546
- const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
26547
- info(
26548
- `local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
26549
- );
26550
- }
26551
- } finally {
26552
- fastembedProbed = true;
26553
- }
26554
- return fastembedAvailable ? fastembedModule : null;
26902
+ var localProviderKnownBroken = false;
26903
+ var localProviderErrorLogged = false;
26904
+ function _resetLocalProviderProbe() {
26905
+ localProviderKnownBroken = false;
26906
+ localProviderErrorLogged = false;
26555
26907
  }
26556
- async function loadFastembedModule() {
26557
- return await import("fastembed");
26908
+ function _markLocalProviderUnavailable() {
26909
+ localProviderKnownBroken = true;
26910
+ localProviderErrorLogged = true;
26558
26911
  }
26559
- function fastembedKnownUnavailable() {
26560
- return fastembedProbed && !fastembedAvailable;
26912
+ function localProviderKnownUnavailable() {
26913
+ return localProviderKnownBroken;
26561
26914
  }
26562
26915
  var LocalProvider = class {
26563
26916
  // With inference off the main thread, large batches no longer block
26564
26917
  // the event loop. 256 maximises throughput per round-trip to the
26565
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
26566
- // the worker's priority queue breathing room for recall queries.
26918
+ // worker. Backfill callers use token-budget-based batching (see
26919
+ // nextBatch) to give the worker's priority queue breathing room
26920
+ // for recall queries and prevent OOM on long texts.
26567
26921
  maxBatchSize = 256;
26568
26922
  worker = null;
26569
26923
  workerReady = false;
@@ -26571,14 +26925,14 @@ var LocalProvider = class {
26571
26925
  pendingRequests = /* @__PURE__ */ new Map();
26572
26926
  nextRequestId = 0;
26573
26927
  initPromise = null;
26574
- modelName;
26575
- constructor(modelName) {
26576
- this.modelName = modelName;
26928
+ modelId;
26929
+ dimensions;
26930
+ constructor(modelId, dimensions) {
26931
+ this.modelId = modelId;
26932
+ this.dimensions = dimensions;
26577
26933
  }
26578
26934
  /**
26579
- * Ensure the worker thread is running. Probes fastembed on the main
26580
- * thread first (fast, cached) as a fast-fail gate — the worker is only
26581
- * spawned if the module is known-loadable. Worker startup failure is
26935
+ * Ensure the worker thread is running. Worker startup failure is
26582
26936
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
26583
26937
  * auto-fallback to remote providers.
26584
26938
  */
@@ -26587,8 +26941,7 @@ var LocalProvider = class {
26587
26941
  if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
26588
26942
  if (this.initPromise) return this.initPromise;
26589
26943
  this.initPromise = (async () => {
26590
- const fastembed = await tryLoadFastembed();
26591
- if (!fastembed) throw new LocalProviderUnavailableError();
26944
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
26592
26945
  const { Worker } = await import("node:worker_threads");
26593
26946
  const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
26594
26947
  let workerUrl;
@@ -26602,12 +26955,22 @@ var LocalProvider = class {
26602
26955
  workerUrl = vendorWorkerUrl;
26603
26956
  }
26604
26957
  } else {
26605
- workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
26958
+ const selfUrl = typeof import.meta.url === "string" ? import.meta.url : void 0;
26959
+ if (selfUrl) {
26960
+ workerUrl = new URL(
26961
+ `./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
26962
+ selfUrl
26963
+ );
26964
+ } else {
26965
+ const { pathToFileURL } = await import("node:url");
26966
+ workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
26967
+ }
26606
26968
  }
26607
26969
  const vendor = vendorModelInfo();
26608
26970
  const workerInitData = {
26609
- modelName: this.modelName,
26610
- vendorModel: vendor ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName } : null
26971
+ modelId: this.modelId,
26972
+ dimensions: this.dimensions,
26973
+ vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
26611
26974
  };
26612
26975
  this.worker = new Worker(workerUrl, { workerData: workerInitData });
26613
26976
  this.worker.unref();
@@ -26634,6 +26997,13 @@ var LocalProvider = class {
26634
26997
  case "init-error": {
26635
26998
  this.workerInitError = msg.error;
26636
26999
  this.workerReady = false;
27000
+ localProviderKnownBroken = true;
27001
+ if (!localProviderErrorLogged) {
27002
+ localProviderErrorLogged = true;
27003
+ info(
27004
+ `local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
27005
+ );
27006
+ }
26637
27007
  for (const [, p2] of this.pendingRequests) {
26638
27008
  p2.reject(new LocalProviderUnavailableError(msg.error));
26639
27009
  }
@@ -26685,6 +27055,8 @@ var LocalProvider = class {
26685
27055
  }
26686
27056
  async embed(texts, inputType) {
26687
27057
  await this.ensureWorker();
27058
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
27059
+ const prefixed = texts.map((t2) => prefix + t2);
26688
27060
  const id = this.nextRequestId++;
26689
27061
  const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
26690
27062
  return new Promise((resolve, reject) => {
@@ -26693,7 +27065,7 @@ var LocalProvider = class {
26693
27065
  this.worker.postMessage({
26694
27066
  type: "embed",
26695
27067
  id,
26696
- texts,
27068
+ texts: prefixed,
26697
27069
  inputType,
26698
27070
  priority
26699
27071
  });
@@ -26701,8 +27073,6 @@ var LocalProvider = class {
26701
27073
  }
26702
27074
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
26703
27075
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
26704
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
26705
- * NAPI fatal error when tearing down onnxruntime's native bindings.
26706
27076
  *
26707
27077
  * Returns a promise that resolves once the worker has fully exited. Callers
26708
27078
  * that need a clean teardown (tests, config change) should await the result.
@@ -26725,7 +27095,7 @@ var LocalProvider = class {
26725
27095
  }
26726
27096
  };
26727
27097
  var PROVIDER_DEFAULTS = {
26728
- local: { model: "BGESmallENV15", dimensions: 384 },
27098
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26729
27099
  voyage: { model: "voyage-code-3", dimensions: 1024 },
26730
27100
  openai: { model: "text-embedding-3-small", dimensions: 1536 }
26731
27101
  };
@@ -26749,7 +27119,7 @@ function getProvider() {
26749
27119
  const model = cfg.model;
26750
27120
  switch (providerName) {
26751
27121
  case "local": {
26752
- cachedProvider = new LocalProvider(model);
27122
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
26753
27123
  break;
26754
27124
  }
26755
27125
  case "voyage": {
@@ -26826,7 +27196,7 @@ function pickRemoteFallback() {
26826
27196
  function isAvailable() {
26827
27197
  const provider = getProvider();
26828
27198
  if (!provider) return false;
26829
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
27199
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
26830
27200
  return true;
26831
27201
  }
26832
27202
  async function embed(texts, inputType) {
@@ -26841,7 +27211,7 @@ async function embed(texts, inputType) {
26841
27211
  if (!remoteFallbackLogged) {
26842
27212
  remoteFallbackLogged = true;
26843
27213
  info(
26844
- `fastembed unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
27214
+ `local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
26845
27215
  );
26846
27216
  }
26847
27217
  cachedProvider = fallback.provider;
@@ -26869,8 +27239,14 @@ function fromBlob(blob) {
26869
27239
  const bytes = new Uint8Array(blob);
26870
27240
  return new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
26871
27241
  }
26872
- function vectorSearch(queryEmbedding, limit = 10) {
26873
- const rows = db().query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2").all();
27242
+ function vectorSearch(queryEmbedding, limit = 10, excludeCategories) {
27243
+ let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
27244
+ const params = [];
27245
+ if (excludeCategories?.length) {
27246
+ sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
27247
+ params.push(...excludeCategories);
27248
+ }
27249
+ const rows = db().query(sql).all(...params);
26874
27250
  const scored = [];
26875
27251
  for (const row of rows) {
26876
27252
  const vec = fromBlob(row.embedding);
@@ -26893,6 +27269,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
26893
27269
  scored.sort((a, b) => b.similarity - a.similarity);
26894
27270
  return scored.slice(0, limit);
26895
27271
  }
27272
+ var MAX_DISTILLATION_VECTOR_ROWS = 500;
27273
+ function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
27274
+ const rows = db().query(
27275
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
27276
+ ).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
27277
+ const scored = [];
27278
+ for (const row of rows) {
27279
+ const vec = fromBlob(row.embedding);
27280
+ const sim = cosineSimilarity(queryEmbedding, vec);
27281
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
27282
+ }
27283
+ scored.sort((a, b) => b.similarity - a.similarity);
27284
+ return scored.slice(0, limit);
27285
+ }
26896
27286
  function embedKnowledgeEntry(id, title, content3) {
26897
27287
  const text4 = `${title}
26898
27288
  ${content3}`;
@@ -26994,20 +27384,37 @@ async function runStartupBackfill() {
26994
27384
  );
26995
27385
  info(`embedding startup: ${parts.join("; ")}`);
26996
27386
  }
26997
- var BACKFILL_CHUNK_SIZE = 32;
27387
+ var MAX_BACKFILL_CHUNK = 8;
27388
+ var MAX_BATCH_TOKEN_AREA = 4096;
27389
+ var CHARS_PER_TOKEN = 4;
27390
+ function nextBatch(rows, start) {
27391
+ const batch = [];
27392
+ let maxTokens = 0;
27393
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
27394
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
27395
+ const newMax = Math.max(maxTokens, estTokens);
27396
+ const newArea = (batch.length + 1) * newMax;
27397
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
27398
+ batch.push(rows[i]);
27399
+ maxTokens = newMax;
27400
+ }
27401
+ return batch;
27402
+ }
26998
27403
  async function backfillEmbeddings() {
26999
27404
  checkConfigChange();
27000
27405
  const provider = getProvider();
27001
27406
  if (!provider) return 0;
27002
27407
  const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
27003
27408
  if (!rows.length) return 0;
27409
+ const items = rows.map((r) => ({ ...r, text: `${r.title}
27410
+ ${r.content}` }));
27004
27411
  let embedded = 0;
27005
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27006
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27007
- const texts = batch.map((r) => `${r.title}
27008
- ${r.content}`);
27412
+ let i = 0;
27413
+ while (i < items.length) {
27414
+ const batch = nextBatch(items, i);
27415
+ i += batch.length;
27009
27416
  try {
27010
- const vectors = await embed(texts, "document");
27417
+ const vectors = await embed(batch.map((b) => b.text), "document");
27011
27418
  const update2 = db().prepare(
27012
27419
  "UPDATE knowledge SET embedding = ? WHERE id = ?"
27013
27420
  );
@@ -27016,7 +27423,7 @@ ${r.content}`);
27016
27423
  embedded++;
27017
27424
  }
27018
27425
  } catch (err) {
27019
- info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27426
+ error(`embedding backfill batch failed (${batch.length} items):`, err);
27020
27427
  }
27021
27428
  }
27022
27429
  if (embedded > 0) {
@@ -27034,11 +27441,13 @@ async function backfillDistillationEmbeddings() {
27034
27441
  let embedded = 0;
27035
27442
  const PROGRESS_INTERVAL = 256;
27036
27443
  let nextProgressAt = PROGRESS_INTERVAL;
27037
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27038
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27039
- const texts = batch.map((r) => r.observations);
27444
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
27445
+ let i = 0;
27446
+ while (i < items.length) {
27447
+ const batch = nextBatch(items, i);
27448
+ i += batch.length;
27040
27449
  try {
27041
- const vectors = await embed(texts, "document");
27450
+ const vectors = await embed(batch.map((b) => b.text), "document");
27042
27451
  const update2 = db().prepare(
27043
27452
  "UPDATE distillations SET embedding = ? WHERE id = ?"
27044
27453
  );
@@ -27047,7 +27456,7 @@ async function backfillDistillationEmbeddings() {
27047
27456
  embedded++;
27048
27457
  }
27049
27458
  } catch (err) {
27050
- info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27459
+ error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
27051
27460
  }
27052
27461
  if (embedded >= nextProgressAt) {
27053
27462
  info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
@@ -27161,7 +27570,7 @@ function searchLike(input) {
27161
27570
  if (!terms.length) return [];
27162
27571
  const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
27163
27572
  const likeParams = terms.map((t2) => `%${t2}%`);
27164
- const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27573
+ const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27165
27574
  const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
27166
27575
  return db().query(query).all(...params);
27167
27576
  }
@@ -27170,10 +27579,10 @@ function search2(input) {
27170
27579
  const limit = input.limit ?? 20;
27171
27580
  const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
27172
27581
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27173
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27582
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27174
27583
  ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
27175
27584
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27176
- WHERE f.content MATCH ? AND m.project_id = ?
27585
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27177
27586
  ORDER BY rank LIMIT ?`;
27178
27587
  try {
27179
27588
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27194,10 +27603,10 @@ function searchScored(input) {
27194
27603
  const limit = input.limit ?? 20;
27195
27604
  const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
27196
27605
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27197
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27606
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27198
27607
  ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
27199
27608
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27200
- WHERE f.content MATCH ? AND m.project_id = ?
27609
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27201
27610
  ORDER BY rank LIMIT ?`;
27202
27611
  try {
27203
27612
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27226,6 +27635,12 @@ function count(projectPath, sessionID) {
27226
27635
  const params = sessionID ? [pid, sessionID] : [pid];
27227
27636
  return db().query(query).get(...params).count;
27228
27637
  }
27638
+ function hasMessages(projectPath, sessionID) {
27639
+ const pid = ensureProject(projectPath);
27640
+ return !!db().query(
27641
+ "SELECT 1 FROM temporal_messages WHERE project_id = ? AND session_id = ? LIMIT 1"
27642
+ ).get(pid, sessionID);
27643
+ }
27229
27644
  function undistilledCount(projectPath, sessionID) {
27230
27645
  const pid = ensureProject(projectPath);
27231
27646
  const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND distilled = 0";
@@ -27284,17 +27699,31 @@ function prune(input) {
27284
27699
  var ltm_exports = {};
27285
27700
  __export(ltm_exports, {
27286
27701
  all: () => all2,
27702
+ calibrateDedupThreshold: () => calibrateDedupThreshold,
27287
27703
  cascadeRefReplace: () => cascadeRefReplace,
27288
27704
  check: () => check2,
27289
27705
  cleanDeadRefs: () => cleanDeadRefs,
27290
27706
  create: () => create,
27707
+ crossProject: () => crossProject,
27708
+ dedupPairKey: () => dedupPairKey,
27709
+ deduplicate: () => deduplicate,
27710
+ deduplicateGlobal: () => deduplicateGlobal,
27291
27711
  extractRefs: () => extractRefs,
27712
+ findFuzzyDuplicate: () => findFuzzyDuplicate,
27292
27713
  forProject: () => forProject,
27293
27714
  forSession: () => forSession,
27294
27715
  get: () => get,
27716
+ getDedupFeedback: () => getDedupFeedback,
27717
+ getDedupFeedbackCount: () => getDedupFeedbackCount,
27718
+ loadCalibratedThreshold: () => loadCalibratedThreshold,
27719
+ pruneDedupFeedback: () => pruneDedupFeedback,
27295
27720
  pruneOversized: () => pruneOversized,
27721
+ recordAutoSignals: () => recordAutoSignals,
27722
+ recordDedupFeedback: () => recordDedupFeedback,
27723
+ recordDedupResultFeedback: () => recordDedupResultFeedback,
27296
27724
  remove: () => remove,
27297
27725
  resolveRef: () => resolveRef2,
27726
+ saveCalibratedThreshold: () => saveCalibratedThreshold,
27298
27727
  search: () => search3,
27299
27728
  searchScored: () => searchScored3,
27300
27729
  searchScoredOtherProjects: () => searchScoredOtherProjects,
@@ -27647,8 +28076,8 @@ __export(lat_reader_exports, {
27647
28076
  scoreForSession: () => scoreForSession,
27648
28077
  searchScored: () => searchScored2
27649
28078
  });
27650
- import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync2, statSync } from "fs";
27651
- import { join as join4, relative } from "path";
28079
+ import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
28080
+ import { join as join6, relative } from "path";
27652
28081
  var processor2 = remark();
27653
28082
  function estimateTokens2(text4) {
27654
28083
  return Math.ceil(text4.length / 3);
@@ -27726,7 +28155,7 @@ function listMarkdownFiles(dir) {
27726
28155
  try {
27727
28156
  const entries = readdirSync(dir, { withFileTypes: true });
27728
28157
  for (const entry of entries) {
27729
- const fullPath = join4(dir, entry.name);
28158
+ const fullPath = join6(dir, entry.name);
27730
28159
  if (entry.isDirectory() && !entry.name.startsWith(".")) {
27731
28160
  results.push(...listMarkdownFiles(fullPath));
27732
28161
  } else if (entry.isFile() && entry.name.endsWith(".md")) {
@@ -27741,12 +28170,12 @@ function contentHash(content3) {
27741
28170
  return sha256(content3);
27742
28171
  }
27743
28172
  function hasLatDir(projectPath) {
27744
- const latDir = join4(projectPath, "lat.md");
27745
- return existsSync2(latDir) && statSync(latDir).isDirectory();
28173
+ const latDir = join6(projectPath, "lat.md");
28174
+ return existsSync3(latDir) && statSync2(latDir).isDirectory();
27746
28175
  }
27747
28176
  function refresh(projectPath) {
27748
- const latDir = join4(projectPath, "lat.md");
27749
- if (!existsSync2(latDir) || !statSync(latDir).isDirectory()) return 0;
28177
+ const latDir = join6(projectPath, "lat.md");
28178
+ if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
27750
28179
  const pid = ensureProject(projectPath);
27751
28180
  const files = listMarkdownFiles(latDir);
27752
28181
  let upserted = 0;
@@ -27868,6 +28297,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
27868
28297
  var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
27869
28298
  function create(input) {
27870
28299
  const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
28300
+ const crossProject2 = pid === null ? true : input.crossProject ?? false;
27871
28301
  if (!input.id) {
27872
28302
  const existing = pid !== null ? db().query(
27873
28303
  "SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
@@ -27885,6 +28315,11 @@ function create(input) {
27885
28315
  update(crossExisting.id, { content: input.content });
27886
28316
  return crossExisting.id;
27887
28317
  }
28318
+ const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
28319
+ if (fuzzyMatch) {
28320
+ update(fuzzyMatch.id, { content: input.content });
28321
+ return fuzzyMatch.id;
28322
+ }
27888
28323
  }
27889
28324
  const id = input.id ?? uuidv72();
27890
28325
  const now = Date.now();
@@ -27898,7 +28333,7 @@ function create(input) {
27898
28333
  input.title,
27899
28334
  input.content,
27900
28335
  input.session ?? null,
27901
- input.crossProject ?? false ? 1 : 0,
28336
+ crossProject2 ? 1 : 0,
27902
28337
  now,
27903
28338
  now
27904
28339
  );
@@ -27916,7 +28351,7 @@ function update(id, input) {
27916
28351
  }
27917
28352
  if (input.confidence !== void 0) {
27918
28353
  sets.push("confidence = ?");
27919
- params.push(input.confidence);
28354
+ params.push(Math.max(0, Math.min(1, input.confidence)));
27920
28355
  }
27921
28356
  sets.push("updated_at = ?");
27922
28357
  params.push(Date.now());
@@ -27932,6 +28367,50 @@ function update(id, input) {
27932
28367
  function remove(id) {
27933
28368
  db().query("DELETE FROM knowledge WHERE id = ?").run(id);
27934
28369
  }
28370
+ function titleOverlap(a, b) {
28371
+ const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
28372
+ const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
28373
+ if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
28374
+ const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
28375
+ return {
28376
+ coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
28377
+ intersectionSize: intersection2.length
28378
+ };
28379
+ }
28380
+ var FUZZY_DEDUP_THRESHOLD = 0.7;
28381
+ var FUZZY_DEDUP_MIN_OVERLAP = 4;
28382
+ var EMBEDDING_DEDUP_THRESHOLD = 0.935;
28383
+ function findFuzzyDuplicate(input) {
28384
+ const q = ftsQueryOr(input.title);
28385
+ if (q === EMPTY_QUERY) return null;
28386
+ const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
28387
+ try {
28388
+ const excludeClause = input.excludeId ? "AND k.id != ?" : "";
28389
+ const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
28390
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28391
+ WHERE knowledge_fts MATCH ?
28392
+ AND (k.project_id = ? OR k.cross_project = 1)
28393
+ AND k.confidence > 0.2
28394
+ ${excludeClause}
28395
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
28396
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28397
+ WHERE knowledge_fts MATCH ?
28398
+ AND (k.project_id IS NULL OR k.cross_project = 1)
28399
+ AND k.confidence > 0.2
28400
+ ${excludeClause}
28401
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
28402
+ const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
28403
+ const candidates = db().query(sql).all(...params);
28404
+ for (const candidate of candidates) {
28405
+ const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
28406
+ if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
28407
+ return candidate;
28408
+ }
28409
+ }
28410
+ } catch {
28411
+ }
28412
+ return null;
28413
+ }
27935
28414
  function forProject(projectPath, includeCross = true) {
27936
28415
  const pid = ensureProject(projectPath);
27937
28416
  if (includeCross) {
@@ -27981,18 +28460,29 @@ function scoreEntriesFTS(sessionContext) {
27981
28460
  return /* @__PURE__ */ new Map();
27982
28461
  }
27983
28462
  }
27984
- function forSession(projectPath, sessionID, maxTokens) {
28463
+ async function forSession(projectPath, sessionID, maxTokens, options) {
27985
28464
  const pid = ensureProject(projectPath);
28465
+ const categoryFilter = options?.categories;
28466
+ const excludeFilter = options?.excludeCategories;
28467
+ let categoryClause = "";
28468
+ let categoryParams = [];
28469
+ if (categoryFilter?.length) {
28470
+ categoryClause = ` AND category IN (${categoryFilter.map(() => "?").join(",")})`;
28471
+ categoryParams = categoryFilter;
28472
+ } else if (excludeFilter?.length) {
28473
+ categoryClause = ` AND category NOT IN (${excludeFilter.map(() => "?").join(",")})`;
28474
+ categoryParams = excludeFilter;
28475
+ }
27986
28476
  const projectEntries = db().query(
27987
28477
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge
27988
- WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
28478
+ WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2${categoryClause}
27989
28479
  ORDER BY confidence DESC, updated_at DESC`
27990
- ).all(pid);
28480
+ ).all(pid, ...categoryParams);
27991
28481
  const crossEntries = db().query(
27992
28482
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge
27993
- WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28483
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2${categoryClause}
27994
28484
  ORDER BY confidence DESC, updated_at DESC`
27995
- ).all();
28485
+ ).all(...categoryParams);
27996
28486
  if (!crossEntries.length && !projectEntries.length) return [];
27997
28487
  let sessionContext = "";
27998
28488
  if (sessionID) {
@@ -28013,22 +28503,52 @@ function forSession(projectPath, sessionID, maxTokens) {
28013
28503
  sessionContext += recentMsgs.map((m) => m.content).join("\n");
28014
28504
  }
28015
28505
  }
28506
+ if (!sessionContext.trim() && options?.contextHint) {
28507
+ sessionContext = options.contextHint;
28508
+ }
28016
28509
  let scoredProject;
28017
28510
  let scoredCross;
28018
- if (sessionContext.trim().length > 20) {
28511
+ if (sessionContext.trim().length > 20 && isAvailable()) {
28512
+ let vectorScores;
28513
+ try {
28514
+ const [contextVec] = await embed([sessionContext], "query");
28515
+ const hits = vectorSearch(contextVec, 50, excludeFilter);
28516
+ vectorScores = new Map(hits.map((h3) => [h3.id, h3.similarity]));
28517
+ } catch (err) {
28518
+ warn("Vector scoring failed, falling back to FTS5:", err);
28519
+ vectorScores = /* @__PURE__ */ new Map();
28520
+ }
28521
+ if (vectorScores.size > 0) {
28522
+ const ftsScores = scoreEntriesFTS(sessionContext);
28523
+ const rawScored = projectEntries.map((entry) => {
28524
+ const vecScore = vectorScores.get(entry.id);
28525
+ const score = vecScore != null ? vecScore * entry.confidence : (ftsScores.get(entry.id) ?? 0) * entry.confidence;
28526
+ return { entry, score };
28527
+ });
28528
+ const matched = rawScored.filter((s) => s.score > 0);
28529
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
28530
+ const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28531
+ scoredProject = [...matched, ...safetyNet];
28532
+ scoredCross = crossEntries.filter((e) => vectorScores.has(e.id) || ftsScores.has(e.id)).map((e) => {
28533
+ const vecScore = vectorScores.get(e.id);
28534
+ const score = vecScore != null ? vecScore * e.confidence : (ftsScores.get(e.id) ?? 0) * e.confidence;
28535
+ return { entry: e, score };
28536
+ });
28537
+ } else {
28538
+ const ftsScores = scoreEntriesFTS(sessionContext);
28539
+ ({ scoredProject, scoredCross } = scoreFTS(
28540
+ projectEntries,
28541
+ crossEntries,
28542
+ ftsScores
28543
+ ));
28544
+ }
28545
+ } else if (sessionContext.trim().length > 20) {
28019
28546
  const ftsScores = scoreEntriesFTS(sessionContext);
28020
- const rawScored = projectEntries.map((entry) => ({
28021
- entry,
28022
- score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
28023
- }));
28024
- const matched = rawScored.filter((s) => s.score > 0);
28025
- const matchedIds = new Set(matched.map((s) => s.entry.id));
28026
- const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28027
- scoredProject = [...matched, ...safetyNet];
28028
- scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
28029
- entry: e,
28030
- score: (ftsScores.get(e.id) ?? 0) * e.confidence
28031
- }));
28547
+ ({ scoredProject, scoredCross } = scoreFTS(
28548
+ projectEntries,
28549
+ crossEntries,
28550
+ ftsScores
28551
+ ));
28032
28552
  } else {
28033
28553
  scoredProject = projectEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
28034
28554
  scoredCross = crossEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
@@ -28074,11 +28594,33 @@ function forSession(projectPath, sessionID, maxTokens) {
28074
28594
  }
28075
28595
  return result;
28076
28596
  }
28597
+ function scoreFTS(projectEntries, crossEntries, ftsScores) {
28598
+ const rawScored = projectEntries.map((entry) => ({
28599
+ entry,
28600
+ score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
28601
+ }));
28602
+ const matched = rawScored.filter((s) => s.score > 0);
28603
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
28604
+ const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28605
+ const scoredProject = [...matched, ...safetyNet];
28606
+ const scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
28607
+ entry: e,
28608
+ score: (ftsScores.get(e.id) ?? 0) * e.confidence
28609
+ }));
28610
+ return { scoredProject, scoredCross };
28611
+ }
28077
28612
  function all2() {
28078
28613
  return db().query(
28079
28614
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
28080
28615
  ).all();
28081
28616
  }
28617
+ function crossProject() {
28618
+ return db().query(
28619
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28620
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28621
+ ORDER BY confidence DESC, updated_at DESC`
28622
+ ).all();
28623
+ }
28082
28624
  function searchLike2(input) {
28083
28625
  const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
28084
28626
  if (!terms.length) return [];
@@ -28310,6 +28852,270 @@ function check2(projectPath) {
28310
28852
  }
28311
28853
  return issues;
28312
28854
  }
28855
+ function dedupPairKey(idA, idB) {
28856
+ return idA < idB ? `${idA}:${idB}` : `${idB}:${idA}`;
28857
+ }
28858
+ function _dedup(entries, dryRun, embeddingThreshold = EMBEDDING_DEDUP_THRESHOLD) {
28859
+ if (entries.length < 2) return { clusters: [], totalRemoved: 0, pairSimilarities: /* @__PURE__ */ new Map(), entryTitles: /* @__PURE__ */ new Map() };
28860
+ const embeddingMap = /* @__PURE__ */ new Map();
28861
+ {
28862
+ const entryIds = entries.map((e) => e.id);
28863
+ const placeholders = entryIds.map(() => "?").join(",");
28864
+ const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
28865
+ for (const row of rows) {
28866
+ try {
28867
+ embeddingMap.set(row.id, fromBlob(row.embedding));
28868
+ } catch {
28869
+ info(`skipping corrupted embedding for entry ${row.id}`);
28870
+ }
28871
+ }
28872
+ }
28873
+ const neighborMap = /* @__PURE__ */ new Map();
28874
+ const pairSimilarities = /* @__PURE__ */ new Map();
28875
+ for (const entry of entries) {
28876
+ const neighbors = [];
28877
+ const entryVec = embeddingMap.get(entry.id);
28878
+ for (const other of entries) {
28879
+ if (other.id === entry.id) continue;
28880
+ const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
28881
+ const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
28882
+ let embeddingMatch = false;
28883
+ let similarity = 0;
28884
+ if (entryVec) {
28885
+ const otherVec = embeddingMap.get(other.id);
28886
+ if (otherVec && entryVec.length === otherVec.length) {
28887
+ similarity = cosineSimilarity(entryVec, otherVec);
28888
+ embeddingMatch = similarity >= embeddingThreshold;
28889
+ }
28890
+ }
28891
+ if (similarity > 0) {
28892
+ const pk = dedupPairKey(entry.id, other.id);
28893
+ if (!pairSimilarities.has(pk)) {
28894
+ pairSimilarities.set(pk, similarity);
28895
+ }
28896
+ }
28897
+ if (titleMatch || embeddingMatch) {
28898
+ neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
28899
+ }
28900
+ }
28901
+ neighbors.sort((a, b) => b.score - a.score);
28902
+ neighborMap.set(entry.id, neighbors);
28903
+ }
28904
+ const claimed = /* @__PURE__ */ new Set();
28905
+ const rawClusters = /* @__PURE__ */ new Map();
28906
+ const sortedIds = [...neighborMap.keys()].sort(
28907
+ (a, b) => neighborMap.get(b).length - neighborMap.get(a).length
28908
+ );
28909
+ for (const centerId of sortedIds) {
28910
+ if (claimed.has(centerId)) continue;
28911
+ claimed.add(centerId);
28912
+ const members = [centerId];
28913
+ for (const { id: neighborId } of neighborMap.get(centerId)) {
28914
+ if (claimed.has(neighborId)) continue;
28915
+ claimed.add(neighborId);
28916
+ members.push(neighborId);
28917
+ }
28918
+ if (members.length > 1) {
28919
+ rawClusters.set(centerId, members);
28920
+ }
28921
+ }
28922
+ const entryById = new Map(entries.map((e) => [e.id, e]));
28923
+ const result = [];
28924
+ let totalRemoved = 0;
28925
+ for (const members of rawClusters.values()) {
28926
+ if (members.length < 2) continue;
28927
+ const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
28928
+ if (b.confidence !== a.confidence) return b.confidence - a.confidence;
28929
+ if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
28930
+ return a.title.length - b.title.length;
28931
+ });
28932
+ const survivor = sorted[0];
28933
+ const merged = sorted.slice(1);
28934
+ result.push({
28935
+ surviving: { id: survivor.id, title: survivor.title },
28936
+ merged: merged.map((e) => ({ id: e.id, title: e.title }))
28937
+ });
28938
+ if (!dryRun) {
28939
+ for (const entry of merged) {
28940
+ remove(entry.id);
28941
+ }
28942
+ }
28943
+ totalRemoved += merged.length;
28944
+ }
28945
+ result.sort((a, b) => b.merged.length - a.merged.length);
28946
+ const entryTitles = new Map(entries.map((e) => [e.id, e.title]));
28947
+ return { clusters: result, totalRemoved, pairSimilarities, entryTitles };
28948
+ }
28949
+ async function deduplicate(projectPath, opts) {
28950
+ const pid = ensureProject(projectPath);
28951
+ const threshold = loadCalibratedThreshold(pid) ?? EMBEDDING_DEDUP_THRESHOLD;
28952
+ const entries = forProject(projectPath, false);
28953
+ return _dedup(entries, opts?.dryRun ?? true, threshold);
28954
+ }
28955
+ async function deduplicateGlobal(opts) {
28956
+ const threshold = loadCalibratedThreshold(null) ?? EMBEDDING_DEDUP_THRESHOLD;
28957
+ const entries = db().query(
28958
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28959
+ WHERE project_id IS NULL
28960
+ AND confidence > 0.2
28961
+ ORDER BY confidence DESC, updated_at DESC`
28962
+ ).all();
28963
+ return _dedup(entries, opts?.dryRun ?? true, threshold);
28964
+ }
28965
+ var MIN_CALIBRATION_SAMPLES = 20;
28966
+ var DEFAULT_EMBEDDING_DEDUP_THRESHOLD = EMBEDDING_DEDUP_THRESHOLD;
28967
+ var AUTO_SIGNAL_MIN_SIMILARITY = 0.8;
28968
+ var AUTO_SIGNAL_MAX_PAIRS = 50;
28969
+ function recordDedupFeedback(input) {
28970
+ db().query(
28971
+ `INSERT INTO dedup_feedback
28972
+ (project_id, entry_a_title, entry_b_title, similarity, accepted, source, created_at)
28973
+ VALUES (?, ?, ?, ?, ?, ?, ?)`
28974
+ ).run(
28975
+ input.projectId,
28976
+ input.entryATitle,
28977
+ input.entryBTitle,
28978
+ input.similarity,
28979
+ input.accepted ? 1 : 0,
28980
+ input.source,
28981
+ Date.now()
28982
+ );
28983
+ }
28984
+ function recordDedupResultFeedback(projectId2, result, accepted, source) {
28985
+ for (const cluster of result.clusters) {
28986
+ for (const merged of cluster.merged) {
28987
+ const pk = dedupPairKey(cluster.surviving.id, merged.id);
28988
+ const similarity = result.pairSimilarities.get(pk);
28989
+ if (similarity != null && similarity > 0) {
28990
+ recordDedupFeedback({
28991
+ projectId: projectId2,
28992
+ entryATitle: cluster.surviving.title,
28993
+ entryBTitle: merged.title,
28994
+ similarity,
28995
+ accepted,
28996
+ source
28997
+ });
28998
+ }
28999
+ }
29000
+ }
29001
+ }
29002
+ function recordAutoSignals(projectId2, result) {
29003
+ const mergedPairs = /* @__PURE__ */ new Set();
29004
+ for (const cluster of result.clusters) {
29005
+ for (const merged of cluster.merged) {
29006
+ mergedPairs.add(dedupPairKey(cluster.surviving.id, merged.id));
29007
+ }
29008
+ }
29009
+ const titleMap = new Map(result.entryTitles);
29010
+ for (const cluster of result.clusters) {
29011
+ if (!titleMap.has(cluster.surviving.id)) {
29012
+ titleMap.set(cluster.surviving.id, cluster.surviving.title);
29013
+ }
29014
+ for (const m of cluster.merged) {
29015
+ if (!titleMap.has(m.id)) titleMap.set(m.id, m.title);
29016
+ }
29017
+ }
29018
+ const signals = [];
29019
+ for (const [pk, sim] of result.pairSimilarities) {
29020
+ if (sim < AUTO_SIGNAL_MIN_SIMILARITY) continue;
29021
+ if (mergedPairs.has(pk)) continue;
29022
+ const [idA, idB] = pk.split(":");
29023
+ const titleA = titleMap.get(idA);
29024
+ const titleB = titleMap.get(idB);
29025
+ if (!titleA || !titleB) continue;
29026
+ signals.push({ entryATitle: titleA, entryBTitle: titleB, similarity: sim });
29027
+ }
29028
+ const currentThreshold = loadCalibratedThreshold(projectId2) ?? DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
29029
+ signals.sort((a, b) => Math.abs(a.similarity - currentThreshold) - Math.abs(b.similarity - currentThreshold));
29030
+ const capped = signals.slice(0, AUTO_SIGNAL_MAX_PAIRS);
29031
+ pruneDedupFeedback(projectId2);
29032
+ for (const s of capped) {
29033
+ recordDedupFeedback({
29034
+ projectId: projectId2,
29035
+ entryATitle: s.entryATitle,
29036
+ entryBTitle: s.entryBTitle,
29037
+ similarity: s.similarity,
29038
+ accepted: false,
29039
+ source: "auto_dedup"
29040
+ });
29041
+ }
29042
+ }
29043
+ function getDedupFeedback(projectId2) {
29044
+ const rows = projectId2 !== null ? db().query(
29045
+ "SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id = ? ORDER BY similarity"
29046
+ ).all(projectId2) : db().query(
29047
+ "SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id IS NULL ORDER BY similarity"
29048
+ ).all();
29049
+ return rows.map((r) => ({ similarity: r.similarity, accepted: r.accepted === 1, source: r.source }));
29050
+ }
29051
+ function getDedupFeedbackCount(projectId2) {
29052
+ const row = projectId2 !== null ? db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id = ?").get(projectId2) : db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id IS NULL").get();
29053
+ return row?.cnt ?? 0;
29054
+ }
29055
+ var MAX_FEEDBACK_ROWS_PER_PROJECT = 500;
29056
+ function pruneDedupFeedback(projectId2) {
29057
+ const count3 = getDedupFeedbackCount(projectId2);
29058
+ if (count3 <= MAX_FEEDBACK_ROWS_PER_PROJECT) return;
29059
+ const excess = count3 - MAX_FEEDBACK_ROWS_PER_PROJECT;
29060
+ if (projectId2 !== null) {
29061
+ db().query(
29062
+ `DELETE FROM dedup_feedback WHERE id IN (
29063
+ SELECT id FROM dedup_feedback WHERE project_id = ?
29064
+ ORDER BY created_at ASC LIMIT ?
29065
+ )`
29066
+ ).run(projectId2, excess);
29067
+ } else {
29068
+ db().query(
29069
+ `DELETE FROM dedup_feedback WHERE id IN (
29070
+ SELECT id FROM dedup_feedback WHERE project_id IS NULL
29071
+ ORDER BY created_at ASC LIMIT ?
29072
+ )`
29073
+ ).run(excess);
29074
+ }
29075
+ }
29076
+ function calibrateDedupThreshold(projectId2) {
29077
+ const feedback = getDedupFeedback(projectId2);
29078
+ if (feedback.length < MIN_CALIBRATION_SAMPLES) return null;
29079
+ const accepted = feedback.filter((f) => f.accepted);
29080
+ const rejected = feedback.filter((f) => !f.accepted);
29081
+ if (rejected.length === 0) {
29082
+ const minAccepted = Math.min(...accepted.map((f) => f.similarity));
29083
+ return Math.max(0.85, minAccepted - 5e-3);
29084
+ }
29085
+ if (accepted.length === 0) {
29086
+ warn("dedup calibration: all feedback is reject \u2014 keeping default threshold");
29087
+ return null;
29088
+ }
29089
+ const allSims = [...new Set(feedback.map((f) => f.similarity))].sort((a, b) => a - b);
29090
+ let bestThreshold = DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
29091
+ let bestAccuracy = -1;
29092
+ for (let i = 0; i < allSims.length - 1; i++) {
29093
+ const candidate = (allSims[i] + allSims[i + 1]) / 2;
29094
+ const correctAccepted = accepted.filter((f) => f.similarity >= candidate).length;
29095
+ const correctRejected = rejected.filter((f) => f.similarity < candidate).length;
29096
+ const accuracy = (correctAccepted + correctRejected) / feedback.length;
29097
+ if (accuracy > bestAccuracy || accuracy === bestAccuracy && candidate > bestThreshold) {
29098
+ bestAccuracy = accuracy;
29099
+ bestThreshold = candidate;
29100
+ }
29101
+ }
29102
+ return Math.max(0.85, Math.min(0.98, bestThreshold));
29103
+ }
29104
+ function saveCalibratedThreshold(projectId2, threshold, sampleSize) {
29105
+ const key = `dedup_threshold:${projectId2 ?? "global"}`;
29106
+ setKV(key, JSON.stringify({ threshold, sampleSize, calibratedAt: Date.now() }));
29107
+ }
29108
+ function loadCalibratedThreshold(projectId2) {
29109
+ const key = `dedup_threshold:${projectId2 ?? "global"}`;
29110
+ const raw = getKV(key);
29111
+ if (!raw) return null;
29112
+ try {
29113
+ const parsed = JSON.parse(raw);
29114
+ return typeof parsed.threshold === "number" ? parsed.threshold : null;
29115
+ } catch {
29116
+ return null;
29117
+ }
29118
+ }
28313
29119
 
28314
29120
  // src/data.ts
28315
29121
  var data_exports = {};
@@ -28334,11 +29140,11 @@ __export(data_exports, {
28334
29140
  resolveId: () => resolveId,
28335
29141
  wipeDatabase: () => wipeDatabase
28336
29142
  });
28337
- import { statSync as statSync3, unlinkSync, existsSync as existsSync4 } from "fs";
29143
+ import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
28338
29144
 
28339
29145
  // src/agents-file.ts
28340
- import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2, statSync as statSync2 } from "fs";
28341
- import { dirname as dirname2, join as join5 } from "path";
29146
+ import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
29147
+ import { dirname as dirname2, join as join7 } from "path";
28342
29148
  var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
28343
29149
  var LORE_SECTION_END = "<!-- End lore-managed section -->";
28344
29150
  var ALL_START_MARKERS = [
@@ -28369,7 +29175,7 @@ function setCache(fp, entry) {
28369
29175
  ).run(key, value, value);
28370
29176
  }
28371
29177
  function clearLoreFileCache(projectPath) {
28372
- db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join5(projectPath, LORE_FILE));
29178
+ db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
28373
29179
  }
28374
29180
  function splitFile(fileContent) {
28375
29181
  const spans = [];
@@ -28482,7 +29288,7 @@ function exportToFile(input) {
28482
29288
  const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
28483
29289
  const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
28484
29290
  let fileContent = "";
28485
- if (existsSync3(input.filePath)) {
29291
+ if (existsSync4(input.filePath)) {
28486
29292
  fileContent = readFileSync3(input.filePath, "utf8");
28487
29293
  }
28488
29294
  const { before, after } = splitFile(fileContent);
@@ -28491,11 +29297,11 @@ function exportToFile(input) {
28491
29297
  const suffix = after.trimStart();
28492
29298
  const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
28493
29299
  const result = prefixWithSep + newSection + suffixWithSep;
28494
- mkdirSync2(dirname2(input.filePath), { recursive: true });
29300
+ mkdirSync3(dirname2(input.filePath), { recursive: true });
28495
29301
  writeFileSync(input.filePath, result, "utf8");
28496
29302
  }
28497
29303
  function shouldImport(input) {
28498
- if (!existsSync3(input.filePath)) return false;
29304
+ if (!existsSync4(input.filePath)) return false;
28499
29305
  const fileContent = readFileSync3(input.filePath, "utf8");
28500
29306
  const { section } = splitFile(fileContent);
28501
29307
  if (section === null) {
@@ -28516,18 +29322,26 @@ function _importEntries(entries, projectPath) {
28516
29322
  update(entry.id, { content: entry.content });
28517
29323
  }
28518
29324
  } else {
28519
- create({
28520
- projectPath,
28521
- category: entry.category,
28522
- title: entry.title,
28523
- content: entry.content,
28524
- scope: "project",
28525
- crossProject: false,
28526
- id: entry.id
28527
- });
29325
+ const pid = ensureProject(projectPath);
29326
+ const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
29327
+ if (fuzzyMatch) {
29328
+ if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
29329
+ update(fuzzyMatch.id, { content: entry.content });
29330
+ }
29331
+ } else {
29332
+ create({
29333
+ projectPath,
29334
+ category: entry.category,
29335
+ title: entry.title,
29336
+ content: entry.content,
29337
+ scope: "project",
29338
+ crossProject: false,
29339
+ id: entry.id
29340
+ });
29341
+ }
28528
29342
  }
28529
29343
  } else {
28530
- const existing = forProject(projectPath, true);
29344
+ const existing = forProject(projectPath, false);
28531
29345
  const titleMatch = existing.find(
28532
29346
  (e) => e.title.toLowerCase() === entry.title.toLowerCase()
28533
29347
  );
@@ -28545,7 +29359,7 @@ function _importEntries(entries, projectPath) {
28545
29359
  }
28546
29360
  }
28547
29361
  function importFromFile(input) {
28548
- if (!existsSync3(input.filePath)) return;
29362
+ if (!existsSync4(input.filePath)) return;
28549
29363
  const fileContent = readFileSync3(input.filePath, "utf8");
28550
29364
  const { section } = splitFile(fileContent);
28551
29365
  const textToParse = section ?? fileContent;
@@ -28554,25 +29368,25 @@ function importFromFile(input) {
28554
29368
  _importEntries(fileEntries, input.projectPath);
28555
29369
  }
28556
29370
  function loreFileExists(projectPath) {
28557
- return existsSync3(join5(projectPath, LORE_FILE));
29371
+ return existsSync4(join7(projectPath, LORE_FILE));
28558
29372
  }
28559
29373
  function exportLoreFile(projectPath) {
28560
29374
  const sectionBody = buildSection(projectPath);
28561
29375
  const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
28562
29376
  const contentHash2 = hashSection(content3);
28563
- const fp = join5(projectPath, LORE_FILE);
29377
+ const fp = join7(projectPath, LORE_FILE);
28564
29378
  const cached2 = getCache(fp);
28565
29379
  if (cached2 && cached2.hash === contentHash2) {
28566
29380
  return;
28567
29381
  }
28568
29382
  writeFileSync(fp, content3, "utf8");
28569
- const { mtimeMs } = statSync2(fp);
29383
+ const { mtimeMs } = statSync3(fp);
28570
29384
  setCache(fp, { mtimeMs, hash: contentHash2 });
28571
29385
  }
28572
29386
  function shouldImportLoreFile(projectPath) {
28573
- const fp = join5(projectPath, LORE_FILE);
28574
- if (!existsSync3(fp)) return false;
28575
- const { mtimeMs } = statSync2(fp);
29387
+ const fp = join7(projectPath, LORE_FILE);
29388
+ if (!existsSync4(fp)) return false;
29389
+ const { mtimeMs } = statSync3(fp);
28576
29390
  const cached2 = getCache(fp);
28577
29391
  if (cached2 && cached2.mtimeMs === mtimeMs) {
28578
29392
  return false;
@@ -28588,12 +29402,17 @@ function shouldImportLoreFile(projectPath) {
28588
29402
  return true;
28589
29403
  }
28590
29404
  function importLoreFile(projectPath) {
28591
- const fp = join5(projectPath, LORE_FILE);
28592
- if (!existsSync3(fp)) return;
29405
+ const fp = join7(projectPath, LORE_FILE);
29406
+ if (!existsSync4(fp)) return;
28593
29407
  const fileContent = readFileSync3(fp, "utf8");
28594
29408
  const fileEntries = parseEntriesFromSection(fileContent);
28595
29409
  if (!fileEntries.length) return;
28596
29410
  _importEntries(fileEntries, projectPath);
29411
+ try {
29412
+ const { mtimeMs } = statSync3(fp);
29413
+ setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
29414
+ } catch {
29415
+ }
28597
29416
  }
28598
29417
 
28599
29418
  // src/data.ts
@@ -28668,10 +29487,10 @@ function globalStats() {
28668
29487
  let db_size_bytes = 0;
28669
29488
  try {
28670
29489
  const p2 = dbPath();
28671
- db_size_bytes = statSync3(p2).size;
29490
+ db_size_bytes = statSync4(p2).size;
28672
29491
  const walPath = p2 + "-wal";
28673
- if (existsSync4(walPath)) {
28674
- db_size_bytes += statSync3(walPath).size;
29492
+ if (existsSync5(walPath)) {
29493
+ db_size_bytes += statSync4(walPath).size;
28675
29494
  }
28676
29495
  } catch {
28677
29496
  }
@@ -28722,7 +29541,7 @@ function clearProject(projectPath) {
28722
29541
  database.exec("ROLLBACK");
28723
29542
  throw e;
28724
29543
  }
28725
- if (existsSync4(projectPath)) {
29544
+ if (existsSync5(projectPath)) {
28726
29545
  try {
28727
29546
  exportLoreFile(projectPath);
28728
29547
  } catch {
@@ -28793,7 +29612,7 @@ function clearKnowledge(projectPath) {
28793
29612
  "SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
28794
29613
  ).get(pid).c;
28795
29614
  db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
28796
- if (existsSync4(projectPath)) {
29615
+ if (existsSync5(projectPath)) {
28797
29616
  try {
28798
29617
  exportLoreFile(projectPath);
28799
29618
  } catch {
@@ -28852,7 +29671,7 @@ function wipeDatabase() {
28852
29671
  close();
28853
29672
  for (const suffix of ["", "-wal", "-shm"]) {
28854
29673
  const fp = p2 + suffix;
28855
- if (existsSync4(fp)) {
29674
+ if (existsSync5(fp)) {
28856
29675
  try {
28857
29676
  unlinkSync(fp);
28858
29677
  } catch {
@@ -28893,7 +29712,7 @@ function backfillGitRemotes() {
28893
29712
  for (const project of projects) {
28894
29713
  let gitRemote = project.git_remote;
28895
29714
  if (!gitRemote) {
28896
- if (!existsSync4(project.path)) continue;
29715
+ if (!existsSync5(project.path)) continue;
28897
29716
  gitRemote = getGitRemote(project.path);
28898
29717
  if (!gitRemote) continue;
28899
29718
  const existing = db().query(
@@ -28992,6 +29811,32 @@ var PATTERNS = [
28992
29811
  regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
28993
29812
  category: "preference",
28994
29813
  titleFn: (m) => `Typically uses ${m[1].trim()}`
29814
+ },
29815
+ // Process instruction patterns — match distilled observations recording
29816
+ // user assertions about workflow/process rules. The distillation observer
29817
+ // normalizes user instructions into "User stated always X" phrasing.
29818
+ // These require "stated/asserted/said" to avoid overlapping with the
29819
+ // existing "typically uses" pattern above (which already handles
29820
+ // "user always use/prefer/go with X").
29821
+ {
29822
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
29823
+ category: "preference",
29824
+ titleFn: (m) => `Always ${m[1].trim()}`
29825
+ },
29826
+ {
29827
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
29828
+ category: "preference",
29829
+ titleFn: (m) => `Never ${m[1].trim()}`
29830
+ },
29831
+ {
29832
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
29833
+ category: "preference",
29834
+ titleFn: (m) => `Make sure to ${m[1].trim()}`
29835
+ },
29836
+ {
29837
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
29838
+ category: "preference",
29839
+ titleFn: (m) => `Always ${m[1].trim()}`
28995
29840
  }
28996
29841
  ];
28997
29842
  function extractPatterns(observations) {
@@ -29001,6 +29846,8 @@ function extractPatterns(observations) {
29001
29846
  regex.lastIndex = 0;
29002
29847
  let match;
29003
29848
  while ((match = regex.exec(observations)) !== null) {
29849
+ const captures = match.slice(1);
29850
+ if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
29004
29851
  const title = titleFn(match);
29005
29852
  const key = title.toLowerCase();
29006
29853
  if (seen.has(key)) continue;
@@ -29116,11 +29963,21 @@ function getSessionState(sessionID) {
29116
29963
  if (!state) {
29117
29964
  state = makeSessionState();
29118
29965
  state.forceMinLayer = loadForceMinLayer(sessionID);
29966
+ const persisted = loadSessionTracking(sessionID);
29967
+ if (persisted && persisted.lastTurnAt > 0) {
29968
+ state.dynamicContextCap = persisted.dynamicContextCap;
29969
+ state.bustRateEMA = persisted.bustRateEMA;
29970
+ state.interBustIntervalEMA = persisted.interBustIntervalEMA;
29971
+ state.lastLayer = persisted.lastLayer;
29972
+ state.lastKnownInput = persisted.lastKnownInput;
29973
+ state.lastTurnAt = persisted.lastTurnAt;
29974
+ state.lastBustAt = persisted.lastBustAt;
29975
+ }
29119
29976
  sessionStates.set(sessionID, state);
29120
29977
  }
29121
29978
  return state;
29122
29979
  }
29123
- function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29980
+ function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
29124
29981
  if (thresholdMs <= 0) return { triggered: false };
29125
29982
  const state = getSessionState(sessionID);
29126
29983
  if (state.lastTurnAt === 0) return { triggered: false };
@@ -29130,7 +29987,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29130
29987
  state.rawWindowCache = null;
29131
29988
  state.distillationSnapshot = null;
29132
29989
  state.cameOutOfIdle = true;
29133
- state.postIdleCompact = true;
29990
+ state.postIdleCompact = !skipCompact;
29134
29991
  return { triggered: true, idleMs };
29135
29992
  }
29136
29993
  function getLastTurnAt(sessionID) {
@@ -29221,6 +30078,19 @@ function inspectSessionState(sessionID) {
29221
30078
  function setLastTurnAtForTest(sessionID, ms) {
29222
30079
  getSessionState(sessionID).lastTurnAt = ms;
29223
30080
  }
30081
+ function saveGradientState(sessionID) {
30082
+ const state = sessionStates.get(sessionID);
30083
+ if (!state) return;
30084
+ saveSessionTracking(sessionID, {
30085
+ dynamicContextCap: state.dynamicContextCap,
30086
+ bustRateEMA: state.bustRateEMA,
30087
+ interBustIntervalEMA: state.interBustIntervalEMA,
30088
+ lastLayer: state.lastLayer,
30089
+ lastKnownInput: state.lastKnownInput,
30090
+ lastTurnAt: state.lastTurnAt,
30091
+ lastBustAt: state.lastBustAt
30092
+ });
30093
+ }
29224
30094
  function loadDistillations(projectPath, sessionID) {
29225
30095
  const pid = ensureProject(projectPath);
29226
30096
  const query = sessionID ? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC" : "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
@@ -29505,6 +30375,26 @@ function buildPrefixMessages(formatted) {
29505
30375
  }
29506
30376
  ];
29507
30377
  }
30378
+ var DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
30379
+ var GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
30380
+ var ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
30381
+ function importanceBonus(d) {
30382
+ let bonus = 0;
30383
+ if (DECISION_RE.test(d.observations)) bonus += 0.3;
30384
+ if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
30385
+ if (ARCH_RE.test(d.observations)) bonus += 0.1;
30386
+ if (d.generation >= 1) bonus += 0.2;
30387
+ return Math.min(bonus, 1);
30388
+ }
30389
+ function selectDistillations(all3, limit) {
30390
+ if (all3.length <= limit) return all3;
30391
+ const maxIdx = all3.length - 1;
30392
+ const scored = all3.map((d, i) => ({
30393
+ d,
30394
+ score: (maxIdx > 0 ? i / maxIdx : 1) * 0.7 + importanceBonus(d) * 0.3
30395
+ }));
30396
+ return scored.sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.d).sort((a, b) => a.created_at - b.created_at);
30397
+ }
29508
30398
  function distilledPrefix(distillations) {
29509
30399
  if (!distillations.length) return [];
29510
30400
  const formatted = formatDistillations(distillations);
@@ -29622,6 +30512,11 @@ function tryFitStable(input) {
29622
30512
  }
29623
30513
  return result;
29624
30514
  }
30515
+ var COMPRESSION_STAGES = [
30516
+ { strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
30517
+ { strip: "old-tools", rawFrac: 0.5, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
30518
+ { strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false }
30519
+ ];
29625
30520
  var urgentDistillationMap = /* @__PURE__ */ new Map();
29626
30521
  function needsUrgentDistillation(sessionID) {
29627
30522
  const v = urgentDistillationMap.get(sessionID) ?? false;
@@ -29653,7 +30548,7 @@ function transformInner(input) {
29653
30548
  if (calibrated) return true;
29654
30549
  return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
29655
30550
  }
29656
- if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
30551
+ if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
29657
30552
  effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
29658
30553
  }
29659
30554
  const postIdleCompact = sessState.postIdleCompact;
@@ -29691,7 +30586,8 @@ function transformInner(input) {
29691
30586
  totalTokens: Math.max(0, messageTokens),
29692
30587
  usable,
29693
30588
  distilledBudget,
29694
- rawBudget
30589
+ rawBudget,
30590
+ refreshLtm: false
29695
30591
  };
29696
30592
  }
29697
30593
  const turnStart = currentTurnStart(input.messages);
@@ -29701,67 +30597,52 @@ function transformInner(input) {
29701
30597
  const msgs = distilledPrefix(distillations);
29702
30598
  return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
29703
30599
  })();
29704
- if (effectiveMinLayer <= 1) {
29705
- const layer1 = sid ? tryFitStable({
29706
- messages: dedupMessages,
29707
- prefix: cached2.messages,
29708
- prefixTokens: cached2.tokens,
29709
- distilledBudget,
29710
- rawBudget,
29711
- sessionID: sid,
29712
- sessState
29713
- }) : tryFit({
29714
- messages: dedupMessages,
29715
- prefix: cached2.messages,
29716
- prefixTokens: cached2.tokens,
29717
- distilledBudget,
29718
- rawBudget,
29719
- strip: "none"
29720
- });
29721
- if (fitsWithSafetyMargin(layer1)) {
29722
- if (cached2.tokens === 0 && sid) {
30600
+ for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
30601
+ const stageLayer = s + 1;
30602
+ if (effectiveMinLayer > stageLayer) continue;
30603
+ const stage = COMPRESSION_STAGES[s];
30604
+ const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
30605
+ const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
30606
+ let stagePrefix = cached2.messages;
30607
+ let stagePrefixTokens = cached2.tokens;
30608
+ if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
30609
+ const trimmed = selectDistillations(distillations, stage.distLimit);
30610
+ stagePrefix = distilledPrefix(trimmed);
30611
+ stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
30612
+ }
30613
+ let result;
30614
+ if (stage.useStableWindow && sid) {
30615
+ result = tryFitStable({
30616
+ messages: dedupMessages,
30617
+ prefix: stagePrefix,
30618
+ prefixTokens: stagePrefixTokens,
30619
+ distilledBudget: stageDistBudget,
30620
+ rawBudget: stageRawBudget,
30621
+ sessionID: sid,
30622
+ sessState
30623
+ });
30624
+ } else {
30625
+ sessState.rawWindowCache = null;
30626
+ result = tryFit({
30627
+ messages: dedupMessages,
30628
+ prefix: stagePrefix,
30629
+ prefixTokens: stagePrefixTokens,
30630
+ distilledBudget: stageDistBudget,
30631
+ rawBudget: stageRawBudget,
30632
+ strip: stage.strip,
30633
+ protectedTurns: stage.protectedTurns
30634
+ });
30635
+ }
30636
+ if (fitsWithSafetyMargin(result)) {
30637
+ if (sid && (s > 0 || cached2.tokens === 0)) {
29723
30638
  urgentDistillationMap.set(sid, true);
29724
30639
  }
29725
- return { ...layer1, layer: 1, usable, distilledBudget, rawBudget };
30640
+ return { ...result, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
29726
30641
  }
29727
30642
  }
29728
30643
  sessState.rawWindowCache = null;
29729
- if (effectiveMinLayer <= 2) {
29730
- const layer2 = tryFit({
29731
- messages: dedupMessages,
29732
- prefix: cached2.messages,
29733
- prefixTokens: cached2.tokens,
29734
- distilledBudget,
29735
- rawBudget: Math.floor(usable * 0.5),
29736
- // give raw more room
29737
- strip: "old-tools",
29738
- protectedTurns: 2
29739
- });
29740
- if (fitsWithSafetyMargin(layer2)) {
29741
- if (sid) urgentDistillationMap.set(sid, true);
29742
- return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
29743
- }
29744
- }
29745
- const trimmedDistillations = distillations.slice(-5);
29746
- const trimmedPrefix = distilledPrefix(trimmedDistillations);
29747
- const trimmedPrefixTokens = trimmedPrefix.reduce(
29748
- (sum, m) => sum + estimateMessage(m),
29749
- 0
29750
- );
29751
- const layer3 = tryFit({
29752
- messages: dedupMessages,
29753
- prefix: trimmedPrefix,
29754
- prefixTokens: trimmedPrefixTokens,
29755
- distilledBudget: Math.floor(usable * 0.15),
29756
- rawBudget: Math.floor(usable * 0.55),
29757
- strip: "all-tools"
29758
- });
29759
- if (fitsWithSafetyMargin(layer3)) {
29760
- if (sid) urgentDistillationMap.set(sid, true);
29761
- return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
29762
- }
29763
30644
  if (sid) urgentDistillationMap.set(sid, true);
29764
- const nuclearDistillations = distillations.slice(-2);
30645
+ const nuclearDistillations = selectDistillations(distillations, 2);
29765
30646
  const nuclearPrefix = distilledPrefix(nuclearDistillations);
29766
30647
  const nuclearPrefixTokens = nuclearPrefix.reduce(
29767
30648
  (sum, m) => sum + estimateMessage(m),
@@ -29800,7 +30681,8 @@ function transformInner(input) {
29800
30681
  totalTokens: nuclearPrefixTokens + nuclearRawTokens,
29801
30682
  usable,
29802
30683
  distilledBudget,
29803
- rawBudget
30684
+ rawBudget,
30685
+ refreshLtm: true
29804
30686
  };
29805
30687
  }
29806
30688
  function transform2(input) {
@@ -29907,10 +30789,189 @@ function isWorkerSession(sessionID) {
29907
30789
  return workerSessionIDs.has(sessionID);
29908
30790
  }
29909
30791
 
29910
- // src/distillation.ts
29911
- function compressionRatio(distilledTokens, sourceTokens) {
29912
- if (sourceTokens <= 0) return 0;
29913
- return distilledTokens / Math.sqrt(sourceTokens);
30792
+ // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
30793
+ var Node = class {
30794
+ value;
30795
+ next;
30796
+ constructor(value) {
30797
+ this.value = value;
30798
+ }
30799
+ };
30800
+ var Queue = class {
30801
+ #head;
30802
+ #tail;
30803
+ #size;
30804
+ constructor() {
30805
+ this.clear();
30806
+ }
30807
+ enqueue(value) {
30808
+ const node2 = new Node(value);
30809
+ if (this.#head) {
30810
+ this.#tail.next = node2;
30811
+ this.#tail = node2;
30812
+ } else {
30813
+ this.#head = node2;
30814
+ this.#tail = node2;
30815
+ }
30816
+ this.#size++;
30817
+ }
30818
+ dequeue() {
30819
+ const current2 = this.#head;
30820
+ if (!current2) {
30821
+ return;
30822
+ }
30823
+ this.#head = this.#head.next;
30824
+ this.#size--;
30825
+ if (!this.#head) {
30826
+ this.#tail = void 0;
30827
+ }
30828
+ return current2.value;
30829
+ }
30830
+ peek() {
30831
+ if (!this.#head) {
30832
+ return;
30833
+ }
30834
+ return this.#head.value;
30835
+ }
30836
+ clear() {
30837
+ this.#head = void 0;
30838
+ this.#tail = void 0;
30839
+ this.#size = 0;
30840
+ }
30841
+ get size() {
30842
+ return this.#size;
30843
+ }
30844
+ *[Symbol.iterator]() {
30845
+ let current2 = this.#head;
30846
+ while (current2) {
30847
+ yield current2.value;
30848
+ current2 = current2.next;
30849
+ }
30850
+ }
30851
+ *drain() {
30852
+ while (this.#head) {
30853
+ yield this.dequeue();
30854
+ }
30855
+ }
30856
+ };
30857
+
30858
+ // ../../node_modules/.bun/p-limit@7.3.0/node_modules/p-limit/index.js
30859
+ function pLimit(concurrency) {
30860
+ let rejectOnClear = false;
30861
+ if (typeof concurrency === "object") {
30862
+ ({ concurrency, rejectOnClear = false } = concurrency);
30863
+ }
30864
+ validateConcurrency(concurrency);
30865
+ if (typeof rejectOnClear !== "boolean") {
30866
+ throw new TypeError("Expected `rejectOnClear` to be a boolean");
30867
+ }
30868
+ const queue = new Queue();
30869
+ let activeCount = 0;
30870
+ const resumeNext = () => {
30871
+ if (activeCount < concurrency && queue.size > 0) {
30872
+ activeCount++;
30873
+ queue.dequeue().run();
30874
+ }
30875
+ };
30876
+ const next = () => {
30877
+ activeCount--;
30878
+ resumeNext();
30879
+ };
30880
+ const run3 = async (function_, resolve, arguments_) => {
30881
+ const result = (async () => function_(...arguments_))();
30882
+ resolve(result);
30883
+ try {
30884
+ await result;
30885
+ } catch {
30886
+ }
30887
+ next();
30888
+ };
30889
+ const enqueue = (function_, resolve, reject, arguments_) => {
30890
+ const queueItem = { reject };
30891
+ new Promise((internalResolve) => {
30892
+ queueItem.run = internalResolve;
30893
+ queue.enqueue(queueItem);
30894
+ }).then(run3.bind(void 0, function_, resolve, arguments_));
30895
+ if (activeCount < concurrency) {
30896
+ resumeNext();
30897
+ }
30898
+ };
30899
+ const generator = (function_, ...arguments_) => new Promise((resolve, reject) => {
30900
+ enqueue(function_, resolve, reject, arguments_);
30901
+ });
30902
+ Object.defineProperties(generator, {
30903
+ activeCount: {
30904
+ get: () => activeCount
30905
+ },
30906
+ pendingCount: {
30907
+ get: () => queue.size
30908
+ },
30909
+ clearQueue: {
30910
+ value() {
30911
+ if (!rejectOnClear) {
30912
+ queue.clear();
30913
+ return;
30914
+ }
30915
+ const abortError = AbortSignal.abort().reason;
30916
+ while (queue.size > 0) {
30917
+ queue.dequeue().reject(abortError);
30918
+ }
30919
+ }
30920
+ },
30921
+ concurrency: {
30922
+ get: () => concurrency,
30923
+ set(newConcurrency) {
30924
+ validateConcurrency(newConcurrency);
30925
+ concurrency = newConcurrency;
30926
+ queueMicrotask(() => {
30927
+ while (activeCount < concurrency && queue.size > 0) {
30928
+ resumeNext();
30929
+ }
30930
+ });
30931
+ }
30932
+ },
30933
+ map: {
30934
+ async value(iterable, function_) {
30935
+ const promises = Array.from(iterable, (value, index2) => this(function_, value, index2));
30936
+ return Promise.all(promises);
30937
+ }
30938
+ }
30939
+ });
30940
+ return generator;
30941
+ }
30942
+ function validateConcurrency(concurrency) {
30943
+ if (!((Number.isInteger(concurrency) || concurrency === Number.POSITIVE_INFINITY) && concurrency > 0)) {
30944
+ throw new TypeError("Expected `concurrency` to be a number from 1 and up");
30945
+ }
30946
+ }
30947
+
30948
+ // src/session-limiter.ts
30949
+ function createLimiterPool() {
30950
+ const limiters = /* @__PURE__ */ new Map();
30951
+ function get2(key) {
30952
+ let limiter = limiters.get(key);
30953
+ if (!limiter) {
30954
+ limiter = pLimit(1);
30955
+ limiters.set(key, limiter);
30956
+ }
30957
+ return limiter;
30958
+ }
30959
+ function isBusy(key) {
30960
+ const limiter = limiters.get(key);
30961
+ return limiter ? limiter.activeCount + limiter.pendingCount > 0 : false;
30962
+ }
30963
+ function clear() {
30964
+ limiters.clear();
30965
+ }
30966
+ return { get: get2, isBusy, clear };
30967
+ }
30968
+ var distillLimiter = createLimiterPool();
30969
+ var curatorLimiter = createLimiterPool();
30970
+
30971
+ // src/distillation.ts
30972
+ function compressionRatio(distilledTokens, sourceTokens) {
30973
+ if (sourceTokens <= 0) return 0;
30974
+ return distilledTokens / Math.sqrt(sourceTokens);
29914
30975
  }
29915
30976
  function maxAllowedExpansion(sourceTokens) {
29916
30977
  if (sourceTokens < 100) return sourceTokens * 5;
@@ -30151,6 +31212,9 @@ function resetOrphans(projectPath, sessionID) {
30151
31212
  return orphans.length;
30152
31213
  }
30153
31214
  async function run(input) {
31215
+ return distillLimiter.get(input.sessionID)(() => runInner(input));
31216
+ }
31217
+ async function runInner(input) {
30154
31218
  const orphans = resetOrphans(input.projectPath, input.sessionID);
30155
31219
  if (orphans > 0) {
30156
31220
  info(
@@ -30194,7 +31258,7 @@ async function run(input) {
30194
31258
  }
30195
31259
  }
30196
31260
  if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
30197
- await metaDistill({
31261
+ await metaDistillInner({
30198
31262
  llm: input.llm,
30199
31263
  projectPath: input.projectPath,
30200
31264
  sessionID: input.sessionID,
@@ -30244,17 +31308,25 @@ async function distillSegment(input) {
30244
31308
  );
30245
31309
  return null;
30246
31310
  }
30247
- const distillId = storeDistillation({
30248
- projectPath: input.projectPath,
30249
- sessionID: input.sessionID,
30250
- observations: result.observations,
30251
- sourceIDs: input.messages.map((m) => m.id),
30252
- generation: 0,
30253
- rCompression: rComp,
30254
- cNorm,
30255
- callType: input.callType
30256
- });
30257
- markDistilled(input.messages.map((m) => m.id));
31311
+ let distillId;
31312
+ db().exec("BEGIN IMMEDIATE");
31313
+ try {
31314
+ distillId = storeDistillation({
31315
+ projectPath: input.projectPath,
31316
+ sessionID: input.sessionID,
31317
+ observations: result.observations,
31318
+ sourceIDs: input.messages.map((m) => m.id),
31319
+ generation: 0,
31320
+ rCompression: rComp,
31321
+ cNorm,
31322
+ callType: input.callType
31323
+ });
31324
+ markDistilled(input.messages.map((m) => m.id));
31325
+ db().exec("COMMIT");
31326
+ } catch (e) {
31327
+ db().exec("ROLLBACK");
31328
+ throw e;
31329
+ }
30258
31330
  info(
30259
31331
  `distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
30260
31332
  );
@@ -30267,7 +31339,8 @@ async function distillSegment(input) {
30267
31339
  embedDistillation(distillId, result.observations);
30268
31340
  }
30269
31341
  if (config2().knowledge.enabled) {
30270
- for (const pat of extractPatterns(result.observations)) {
31342
+ const patterns = extractPatterns(result.observations);
31343
+ for (const pat of patterns) {
30271
31344
  try {
30272
31345
  create({
30273
31346
  projectPath: input.projectPath,
@@ -30280,10 +31353,16 @@ async function distillSegment(input) {
30280
31353
  } catch {
30281
31354
  }
30282
31355
  }
31356
+ if (patterns.length > 0) {
31357
+ info(`pattern extraction: ${patterns.length} entries from distillation`);
31358
+ }
30283
31359
  }
30284
31360
  return result;
30285
31361
  }
30286
31362
  async function metaDistill(input) {
31363
+ return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
31364
+ }
31365
+ async function metaDistillInner(input) {
30287
31366
  const existing = loadGen0(input.projectPath, input.sessionID);
30288
31367
  const priorMeta = latestMeta(input.projectPath, input.sessionID);
30289
31368
  if (priorMeta) {
@@ -30325,196 +31404,1801 @@ async function metaDistill(input) {
30325
31404
  db().exec("ROLLBACK");
30326
31405
  throw e;
30327
31406
  }
30328
- if (isAvailable()) {
30329
- embedDistillation(metaId, result.observations);
31407
+ if (isAvailable()) {
31408
+ embedDistillation(metaId, result.observations);
31409
+ }
31410
+ if (config2().knowledge.enabled) {
31411
+ const patterns = extractPatterns(result.observations);
31412
+ for (const pat of patterns) {
31413
+ try {
31414
+ create({
31415
+ projectPath: input.projectPath,
31416
+ category: pat.category,
31417
+ title: pat.title,
31418
+ content: pat.content,
31419
+ session: input.sessionID,
31420
+ scope: "project"
31421
+ });
31422
+ } catch {
31423
+ }
31424
+ }
31425
+ if (patterns.length > 0) {
31426
+ info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
31427
+ }
31428
+ }
31429
+ return result;
31430
+ }
31431
+ function backfillMetrics() {
31432
+ const rows = db().query(
31433
+ "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
31434
+ ).all();
31435
+ if (!rows.length) return 0;
31436
+ const update2 = db().prepare(
31437
+ "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
31438
+ );
31439
+ let updated = 0;
31440
+ for (const row of rows) {
31441
+ const sourceIds = parseSourceIds(row.source_ids);
31442
+ if (!sourceIds.length) continue;
31443
+ const placeholders = sourceIds.map(() => "?").join(",");
31444
+ const sources = db().query(
31445
+ `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
31446
+ ).all(...sourceIds);
31447
+ if (!sources.length) continue;
31448
+ const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
31449
+ const timestamps = sources.map((s) => s.created_at);
31450
+ const rComp = compressionRatio(row.token_count, sourceTokens);
31451
+ const cNorm = temporalCnorm(timestamps);
31452
+ update2.run(rComp, cNorm, row.id);
31453
+ updated++;
31454
+ }
31455
+ if (updated > 0) {
31456
+ info(
31457
+ `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
31458
+ );
31459
+ }
31460
+ return updated;
31461
+ }
31462
+
31463
+ // src/curator.ts
31464
+ var curator_exports = {};
31465
+ __export(curator_exports, {
31466
+ MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
31467
+ applyOps: () => applyOps,
31468
+ consolidate: () => consolidate,
31469
+ parseOps: () => parseOps,
31470
+ resetCurationTracker: () => resetCurationTracker,
31471
+ run: () => run2
31472
+ });
31473
+
31474
+ // src/instruction-detect.ts
31475
+ var instruction_detect_exports = {};
31476
+ __export(instruction_detect_exports, {
31477
+ detectAndFormat: () => detectAndFormat,
31478
+ extractInstructionCandidates: () => extractInstructionCandidates,
31479
+ findRepeatedInstructions: () => findRepeatedInstructions,
31480
+ formatForCurator: () => formatForCurator
31481
+ });
31482
+ var DEFAULT_REPETITION_THRESHOLD = 2;
31483
+ var VECTOR_SIMILARITY_THRESHOLD = 0.5;
31484
+ var MAX_CANDIDATES = 5;
31485
+ var INSTRUCTION_PATTERNS = [
31486
+ /\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
31487
+ /\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
31488
+ /\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
31489
+ /\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
31490
+ /\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
31491
+ /\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
31492
+ ];
31493
+ function extractInstructionCandidates(messages) {
31494
+ const candidates = [];
31495
+ const seen = /* @__PURE__ */ new Set();
31496
+ for (const msg of messages) {
31497
+ if (msg.role !== "user") continue;
31498
+ for (const pattern of INSTRUCTION_PATTERNS) {
31499
+ pattern.lastIndex = 0;
31500
+ let match;
31501
+ while ((match = pattern.exec(msg.content)) !== null) {
31502
+ const text4 = match[1]?.trim();
31503
+ if (!text4 || text4.length < 10) continue;
31504
+ const key = text4.toLowerCase();
31505
+ if (seen.has(key)) continue;
31506
+ seen.add(key);
31507
+ candidates.push({
31508
+ text: text4,
31509
+ sessionID: msg.session_id
31510
+ });
31511
+ if (candidates.length >= MAX_CANDIDATES) return candidates;
31512
+ }
31513
+ }
31514
+ }
31515
+ return candidates;
31516
+ }
31517
+ async function findRepeatedInstructions(input) {
31518
+ const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
31519
+ if (!input.candidates.length) return [];
31520
+ const pid = ensureProject(input.projectPath);
31521
+ let candidateEmbeddings = [];
31522
+ if (isAvailable()) {
31523
+ try {
31524
+ candidateEmbeddings = await embed(
31525
+ input.candidates.map((c) => c.text),
31526
+ "query"
31527
+ );
31528
+ } catch (err) {
31529
+ warn("instruction-detect: batch embedding failed:", err);
31530
+ }
31531
+ }
31532
+ const results = [];
31533
+ for (let i = 0; i < input.candidates.length; i++) {
31534
+ const candidate = input.candidates[i];
31535
+ const sessionIDs = /* @__PURE__ */ new Set();
31536
+ if (candidateEmbeddings.length > i) {
31537
+ const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
31538
+ for (const hit of hits) {
31539
+ if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
31540
+ sessionIDs.add(hit.session_id);
31541
+ }
31542
+ }
31543
+ }
31544
+ const terms = filterTerms(candidate.text);
31545
+ if (terms.length >= 2) {
31546
+ const searchText = terms.slice(0, 5).join(" ");
31547
+ const ftsHits = searchDistillationsFTS(pid, searchText);
31548
+ for (const hit of ftsHits) {
31549
+ if (hit.session_id !== input.currentSessionID) {
31550
+ sessionIDs.add(hit.session_id);
31551
+ }
31552
+ }
31553
+ }
31554
+ if (sessionIDs.size >= threshold) {
31555
+ results.push({
31556
+ instruction: candidate.text,
31557
+ priorSessionCount: sessionIDs.size
31558
+ });
31559
+ }
31560
+ }
31561
+ return results;
31562
+ }
31563
+ function searchDistillationsFTS(projectId2, rawQuery) {
31564
+ const matchExpr = ftsQueryOr(rawQuery);
31565
+ if (matchExpr === EMPTY_QUERY) return [];
31566
+ const sql = `SELECT d.id, d.session_id
31567
+ FROM distillation_fts f
31568
+ CROSS JOIN distillations d ON d.rowid = f.rowid
31569
+ WHERE distillation_fts MATCH ?
31570
+ AND d.project_id = ?
31571
+ ORDER BY rank LIMIT 30`;
31572
+ try {
31573
+ return db().query(sql).all(matchExpr, projectId2);
31574
+ } catch (err) {
31575
+ warn("instruction-detect: FTS search failed:", err);
31576
+ return [];
31577
+ }
31578
+ }
31579
+ function formatForCurator(instructions) {
31580
+ if (!instructions.length) return "";
31581
+ const lines = instructions.map(
31582
+ (i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
31583
+ );
31584
+ return `
31585
+
31586
+ ---
31587
+ CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
31588
+ The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
31589
+ ${lines.join("\n")}`;
31590
+ }
31591
+ async function detectAndFormat(input) {
31592
+ const messages = bySession(input.projectPath, input.sessionID);
31593
+ const candidates = extractInstructionCandidates(messages);
31594
+ if (!candidates.length) return "";
31595
+ const repeated = await findRepeatedInstructions({
31596
+ projectPath: input.projectPath,
31597
+ currentSessionID: input.sessionID,
31598
+ candidates,
31599
+ threshold: input.threshold
31600
+ });
31601
+ if (repeated.length) {
31602
+ info(
31603
+ `instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
31604
+ );
31605
+ }
31606
+ return formatForCurator(repeated);
31607
+ }
31608
+
31609
+ // src/curator.ts
31610
+ var MAX_ENTRY_CONTENT_LENGTH = 1200;
31611
+ function parseOps(text4) {
31612
+ const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
31613
+ try {
31614
+ const parsed = JSON.parse(cleaned);
31615
+ if (!Array.isArray(parsed)) return [];
31616
+ return parsed.filter(
31617
+ (op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
31618
+ );
31619
+ } catch {
31620
+ return [];
31621
+ }
31622
+ }
31623
+ function applyOps(ops, input) {
31624
+ let created = 0;
31625
+ let updated = 0;
31626
+ let deleted = 0;
31627
+ const idsToSync = [];
31628
+ for (const op of ops) {
31629
+ if (op.op === "create") {
31630
+ if (input.skipCreate) continue;
31631
+ const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
31632
+ const id = create({
31633
+ projectPath: op.scope === "project" ? input.projectPath : void 0,
31634
+ category: op.category,
31635
+ title: op.title,
31636
+ content: content3,
31637
+ session: input.sessionID,
31638
+ scope: op.scope,
31639
+ crossProject: op.crossProject ?? true
31640
+ });
31641
+ idsToSync.push(id);
31642
+ created++;
31643
+ } else if (op.op === "update") {
31644
+ const entry = get(op.id);
31645
+ if (entry) {
31646
+ const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
31647
+ update(op.id, { content: content3, confidence: op.confidence });
31648
+ if (op.content !== void 0) idsToSync.push(op.id);
31649
+ updated++;
31650
+ }
31651
+ } else if (op.op === "delete") {
31652
+ const entry = get(op.id);
31653
+ if (entry) {
31654
+ remove(op.id);
31655
+ deleted++;
31656
+ }
31657
+ }
31658
+ }
31659
+ for (const id of idsToSync) {
31660
+ syncRefs(id);
31661
+ }
31662
+ return { created, updated, deleted };
31663
+ }
31664
+ var lastCuratedAt = /* @__PURE__ */ new Map();
31665
+ function getLastCuratedAt(sessionID) {
31666
+ const cached2 = lastCuratedAt.get(sessionID);
31667
+ if (cached2 !== void 0) return cached2;
31668
+ const persisted = loadSessionTracking(sessionID);
31669
+ const ts = persisted?.lastCuratedAt ?? 0;
31670
+ lastCuratedAt.set(sessionID, ts);
31671
+ return ts;
31672
+ }
31673
+ async function run2(input) {
31674
+ const cfg = config2();
31675
+ if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
31676
+ if (curatorLimiter.isBusy(input.sessionID)) {
31677
+ info(`curation skipped: already running for session ${input.sessionID.slice(0, 16)}`);
31678
+ return { created: 0, updated: 0, deleted: 0 };
31679
+ }
31680
+ return curatorLimiter.get(input.sessionID)(() => runInner2(input));
31681
+ }
31682
+ async function runInner2(input) {
31683
+ const cfg = config2();
31684
+ const all3 = bySession(input.projectPath, input.sessionID);
31685
+ const sessionCuratedAt = getLastCuratedAt(input.sessionID);
31686
+ const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
31687
+ if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
31688
+ const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
31689
+ const existing = forProject(input.projectPath, false);
31690
+ const existingForPrompt = existing.map((e) => ({
31691
+ id: e.id,
31692
+ category: e.category,
31693
+ title: e.title,
31694
+ content: e.content
31695
+ }));
31696
+ const baseUserContent = curatorUser({
31697
+ messages: text4,
31698
+ existing: existingForPrompt
31699
+ });
31700
+ let crossSessionContext = "";
31701
+ try {
31702
+ crossSessionContext = await detectAndFormat({
31703
+ projectPath: input.projectPath,
31704
+ sessionID: input.sessionID
31705
+ });
31706
+ } catch (err) {
31707
+ warn("instruction-detect failed (non-fatal):", err);
31708
+ }
31709
+ const userContent = baseUserContent + crossSessionContext;
31710
+ const model = input.model ?? cfg.model;
31711
+ const responseText = await input.llm.prompt(
31712
+ CURATOR_SYSTEM,
31713
+ userContent,
31714
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
31715
+ );
31716
+ if (!responseText) return { created: 0, updated: 0, deleted: 0 };
31717
+ const ops = parseOps(responseText);
31718
+ const result = applyOps(ops, {
31719
+ projectPath: input.projectPath,
31720
+ sessionID: input.sessionID
31721
+ });
31722
+ if (result.created > 0) {
31723
+ try {
31724
+ const dupes = await deduplicate(input.projectPath, { dryRun: false });
31725
+ if (dupes.totalRemoved > 0) {
31726
+ info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
31727
+ result.deleted += dupes.totalRemoved;
31728
+ }
31729
+ if (dupes.pairSimilarities.size > 0) {
31730
+ const pid = ensureProject(input.projectPath);
31731
+ recordAutoSignals(pid, dupes);
31732
+ const newThreshold = calibrateDedupThreshold(pid);
31733
+ if (newThreshold !== null) {
31734
+ const count3 = getDedupFeedbackCount(pid);
31735
+ saveCalibratedThreshold(pid, newThreshold, count3);
31736
+ }
31737
+ }
31738
+ } catch (err) {
31739
+ warn("post-curation dedup failed (non-fatal):", err);
31740
+ }
31741
+ }
31742
+ const now = Date.now();
31743
+ lastCuratedAt.set(input.sessionID, now);
31744
+ saveSessionTracking(input.sessionID, { lastCuratedAt: now });
31745
+ return result;
31746
+ }
31747
+ function resetCurationTracker(sessionID) {
31748
+ if (sessionID) {
31749
+ lastCuratedAt.delete(sessionID);
31750
+ } else {
31751
+ lastCuratedAt.clear();
31752
+ }
31753
+ }
31754
+ async function consolidate(input) {
31755
+ const cfg = config2();
31756
+ if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
31757
+ const entries = forProject(input.projectPath, false);
31758
+ if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
31759
+ const entriesForPrompt = entries.map((e) => ({
31760
+ id: e.id,
31761
+ category: e.category,
31762
+ title: e.title,
31763
+ content: e.content
31764
+ }));
31765
+ const userContent = consolidationUser({
31766
+ entries: entriesForPrompt,
31767
+ targetMax: cfg.curator.maxEntries
31768
+ });
31769
+ const model = input.model ?? cfg.model;
31770
+ const responseText = await input.llm.prompt(
31771
+ CONSOLIDATION_SYSTEM,
31772
+ userContent,
31773
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
31774
+ );
31775
+ if (!responseText) return { updated: 0, deleted: 0 };
31776
+ const ops = parseOps(responseText);
31777
+ const result = applyOps(ops, {
31778
+ projectPath: input.projectPath,
31779
+ sessionID: input.sessionID,
31780
+ skipCreate: true
31781
+ // Consolidation must not add entries.
31782
+ });
31783
+ return { updated: result.updated, deleted: result.deleted };
31784
+ }
31785
+
31786
+ // src/import/index.ts
31787
+ var import_exports = {};
31788
+ __export(import_exports, {
31789
+ clearProviders: () => clearProviders,
31790
+ computeHash: () => computeHash,
31791
+ detectAll: () => detectAll,
31792
+ extractKnowledge: () => extractKnowledge,
31793
+ getProvider: () => getProvider2,
31794
+ getProviders: () => getProviders,
31795
+ isImported: () => isImported,
31796
+ listImports: () => listImports,
31797
+ recordImport: () => recordImport,
31798
+ registerProvider: () => registerProvider
31799
+ });
31800
+
31801
+ // src/import/providers/index.ts
31802
+ var providers = [];
31803
+ function registerProvider(provider) {
31804
+ providers.push(provider);
31805
+ }
31806
+ function getProviders() {
31807
+ return providers;
31808
+ }
31809
+ function getProvider2(name) {
31810
+ return providers.find((p2) => p2.name === name);
31811
+ }
31812
+ function clearProviders() {
31813
+ providers.length = 0;
31814
+ }
31815
+
31816
+ // src/import/detect.ts
31817
+ function detectAll(projectPath) {
31818
+ const results = [];
31819
+ for (const provider of getProviders()) {
31820
+ try {
31821
+ const sessions = provider.detect(projectPath);
31822
+ if (sessions.length > 0) {
31823
+ results.push({
31824
+ agentName: provider.name,
31825
+ agentDisplayName: provider.displayName,
31826
+ sessions,
31827
+ totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
31828
+ totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
31829
+ });
31830
+ }
31831
+ } catch (err) {
31832
+ }
31833
+ }
31834
+ return results.sort((a, b) => b.totalMessages - a.totalMessages);
31835
+ }
31836
+
31837
+ // src/import/extract.ts
31838
+ var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
31839
+
31840
+ ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
31841
+ - Architecture decisions, design patterns, and project conventions
31842
+ - Gotchas, non-obvious bugs, and their fixes
31843
+ - Developer preferences and workflow patterns
31844
+ - Key technical choices and their rationale
31845
+
31846
+ Ignore:
31847
+ - References to the other agent's specific capabilities or limitations
31848
+ - Task-specific state that is no longer current (e.g. "currently debugging X")
31849
+ - Debugging steps for issues that were already resolved
31850
+ - Transient conversation artifacts (greetings, acknowledgments, status updates)`;
31851
+ async function extractKnowledge(input) {
31852
+ const result = {
31853
+ created: 0,
31854
+ updated: 0,
31855
+ deleted: 0,
31856
+ chunksProcessed: 0,
31857
+ chunksFailed: 0
31858
+ };
31859
+ const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
31860
+ for (let i = 0; i < sorted.length; i++) {
31861
+ const chunk = sorted[i];
31862
+ const existing = forProject(input.projectPath, false);
31863
+ const existingForPrompt = existing.map((e) => ({
31864
+ id: e.id,
31865
+ category: e.category,
31866
+ title: e.title,
31867
+ content: e.content
31868
+ }));
31869
+ const userContent = curatorUser({
31870
+ messages: chunk.text,
31871
+ existing: existingForPrompt
31872
+ });
31873
+ try {
31874
+ const response = await input.llm.prompt(
31875
+ IMPORT_CURATOR_SYSTEM,
31876
+ userContent,
31877
+ {
31878
+ model: input.model,
31879
+ workerID: "lore-import",
31880
+ thinking: false,
31881
+ maxTokens: 4096,
31882
+ sessionID: input.sessionID
31883
+ }
31884
+ );
31885
+ if (response) {
31886
+ const ops = parseOps(response);
31887
+ const applied = applyOps(ops, {
31888
+ projectPath: input.projectPath,
31889
+ sessionID: input.sessionID
31890
+ });
31891
+ result.created += applied.created;
31892
+ result.updated += applied.updated;
31893
+ result.deleted += applied.deleted;
31894
+ }
31895
+ result.chunksProcessed++;
31896
+ } catch {
31897
+ result.chunksFailed++;
31898
+ }
31899
+ input.onProgress?.({
31900
+ current: i + 1,
31901
+ total: sorted.length,
31902
+ created: result.created,
31903
+ updated: result.updated
31904
+ });
31905
+ }
31906
+ return result;
31907
+ }
31908
+
31909
+ // src/import/history.ts
31910
+ function isImported(projectPath, agentName, sourceId, sourceHash) {
31911
+ const projectId2 = ensureProject(projectPath);
31912
+ const row = db().query(
31913
+ `SELECT * FROM import_history
31914
+ WHERE project_id = ? AND agent_name = ? AND source_id = ?`
31915
+ ).get(projectId2, agentName, sourceId);
31916
+ if (!row) return null;
31917
+ if (row.source_hash !== sourceHash) return null;
31918
+ return row;
31919
+ }
31920
+ function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
31921
+ const projectId2 = ensureProject(projectPath);
31922
+ db().query(
31923
+ `INSERT OR REPLACE INTO import_history
31924
+ (id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
31925
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
31926
+ ).run(
31927
+ crypto.randomUUID(),
31928
+ projectId2,
31929
+ agentName,
31930
+ sourceId,
31931
+ sourceHash,
31932
+ stats.created,
31933
+ stats.updated,
31934
+ Date.now()
31935
+ );
31936
+ }
31937
+ function listImports(projectPath) {
31938
+ const projectId2 = ensureProject(projectPath);
31939
+ return db().query(
31940
+ `SELECT * FROM import_history
31941
+ WHERE project_id = ? AND source_id != '__declined__'
31942
+ ORDER BY imported_at DESC`
31943
+ ).all(projectId2);
31944
+ }
31945
+ function computeHash(parts) {
31946
+ return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
31947
+ }
31948
+
31949
+ // src/import/providers/claude-code.ts
31950
+ import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
31951
+ import { join as join8 } from "path";
31952
+ import { homedir as homedir2 } from "os";
31953
+ var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
31954
+ var MAX_TOOL_OUTPUT_CHARS = 500;
31955
+ var DEFAULT_MAX_TOKENS = 12288;
31956
+ function manglePath(projectPath) {
31957
+ return projectPath.replace(/\//g, "-");
31958
+ }
31959
+ function estimateTokens4(text4) {
31960
+ return Math.ceil(text4.length / 3);
31961
+ }
31962
+ function truncate(text4, max) {
31963
+ if (text4.length <= max) return text4;
31964
+ return text4.slice(0, max) + "...";
31965
+ }
31966
+ function blockToText(block) {
31967
+ switch (block.type) {
31968
+ case "text":
31969
+ return block.text;
31970
+ case "tool_use": {
31971
+ const tu = block;
31972
+ const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
31973
+ return `[tool: ${tu.name}] ${inputSummary}`;
31974
+ }
31975
+ case "tool_result": {
31976
+ const tr = block;
31977
+ let content3;
31978
+ if (typeof tr.content === "string") {
31979
+ content3 = tr.content;
31980
+ } else if (Array.isArray(tr.content)) {
31981
+ content3 = tr.content.map((b) => {
31982
+ if (b.type === "text") return b.text;
31983
+ return "";
31984
+ }).filter(Boolean).join("\n");
31985
+ } else {
31986
+ content3 = "";
31987
+ }
31988
+ return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
31989
+ }
31990
+ case "thinking":
31991
+ return null;
31992
+ default:
31993
+ return null;
31994
+ }
31995
+ }
31996
+ function lineToText(parsed) {
31997
+ if (parsed.type === "user") {
31998
+ const msg = parsed;
31999
+ const content3 = msg.message.content;
32000
+ if (typeof content3 === "string") {
32001
+ return `[user] ${content3}`;
32002
+ }
32003
+ const parts = content3.map(blockToText).filter(Boolean);
32004
+ return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
32005
+ }
32006
+ if (parsed.type === "assistant") {
32007
+ const msg = parsed;
32008
+ const blocks = msg.message.content;
32009
+ if (!Array.isArray(blocks)) return null;
32010
+ const parts = blocks.map(blockToText).filter(Boolean);
32011
+ return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
32012
+ }
32013
+ return null;
32014
+ }
32015
+ function parseJSONL(filePath) {
32016
+ const raw = readFileSync4(filePath, "utf-8");
32017
+ const lines = [];
32018
+ for (const line of raw.split("\n")) {
32019
+ if (!line.trim()) continue;
32020
+ try {
32021
+ lines.push(JSON.parse(line));
32022
+ } catch {
32023
+ }
32024
+ }
32025
+ return lines;
32026
+ }
32027
+ function getSessionMetadata(filePath) {
32028
+ let raw;
32029
+ try {
32030
+ raw = readFileSync4(filePath, "utf-8");
32031
+ } catch {
32032
+ return null;
32033
+ }
32034
+ const lines = raw.split("\n").filter((l) => l.trim());
32035
+ if (lines.length === 0) return null;
32036
+ let sessionId;
32037
+ let startedAt = Infinity;
32038
+ let lastActivityAt = 0;
32039
+ let messageCount = 0;
32040
+ for (const line of lines) {
32041
+ try {
32042
+ const parsed = JSON.parse(line);
32043
+ if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
32044
+ if (parsed.timestamp) {
32045
+ const ts = new Date(parsed.timestamp).getTime();
32046
+ if (!Number.isNaN(ts)) {
32047
+ if (ts < startedAt) startedAt = ts;
32048
+ if (ts > lastActivityAt) lastActivityAt = ts;
32049
+ }
32050
+ }
32051
+ if (parsed.type === "user" || parsed.type === "assistant") {
32052
+ messageCount++;
32053
+ }
32054
+ } catch {
32055
+ }
32056
+ }
32057
+ if (!sessionId || messageCount === 0) return null;
32058
+ const fileSize = raw.length;
32059
+ const estimatedTokens = Math.ceil(fileSize / 5);
32060
+ return {
32061
+ sessionId,
32062
+ startedAt: startedAt === Infinity ? Date.now() : startedAt,
32063
+ lastActivityAt,
32064
+ messageCount,
32065
+ estimatedTokens
32066
+ };
32067
+ }
32068
+ var claudeCodeProvider = {
32069
+ name: "claude-code",
32070
+ displayName: "Claude Code",
32071
+ detect(projectPath) {
32072
+ const mangled = manglePath(projectPath);
32073
+ const dir = join8(CLAUDE_DIR, mangled);
32074
+ let entries;
32075
+ try {
32076
+ entries = readdirSync2(dir);
32077
+ } catch {
32078
+ return [];
32079
+ }
32080
+ const sessions = [];
32081
+ for (const entry of entries) {
32082
+ if (!entry.endsWith(".jsonl")) continue;
32083
+ const filePath = join8(dir, entry);
32084
+ try {
32085
+ const stat = statSync5(filePath);
32086
+ if (!stat.isFile()) continue;
32087
+ } catch {
32088
+ continue;
32089
+ }
32090
+ const meta3 = getSessionMetadata(filePath);
32091
+ if (!meta3) continue;
32092
+ if (meta3.messageCount < 3) continue;
32093
+ const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
32094
+ sessions.push({
32095
+ id: filePath,
32096
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32097
+ startedAt: meta3.startedAt,
32098
+ lastActivityAt: meta3.lastActivityAt,
32099
+ estimatedTokens: meta3.estimatedTokens,
32100
+ messageCount: meta3.messageCount
32101
+ });
32102
+ }
32103
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32104
+ },
32105
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
32106
+ const chunks = [];
32107
+ for (const filePath of sessionIds) {
32108
+ const lines = parseJSONL(filePath);
32109
+ const messages = [];
32110
+ for (const line of lines) {
32111
+ const text4 = lineToText(line);
32112
+ if (!text4) continue;
32113
+ const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
32114
+ messages.push({ text: text4, timestamp: ts });
32115
+ }
32116
+ if (messages.length === 0) continue;
32117
+ let currentTexts = [];
32118
+ let currentTokens = 0;
32119
+ let chunkStart = messages[0].timestamp;
32120
+ let chunkIndex = 0;
32121
+ const flushChunk = () => {
32122
+ if (currentTexts.length === 0) return;
32123
+ chunkIndex++;
32124
+ const text4 = currentTexts.join("\n\n");
32125
+ chunks.push({
32126
+ label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
32127
+ text: text4,
32128
+ estimatedTokens: estimateTokens4(text4),
32129
+ timestamp: chunkStart
32130
+ });
32131
+ currentTexts = [];
32132
+ currentTokens = 0;
32133
+ };
32134
+ for (const msg of messages) {
32135
+ const msgTokens = estimateTokens4(msg.text);
32136
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32137
+ flushChunk();
32138
+ chunkStart = msg.timestamp;
32139
+ }
32140
+ currentTexts.push(msg.text);
32141
+ currentTokens += msgTokens;
32142
+ }
32143
+ flushChunk();
32144
+ }
32145
+ return chunks;
32146
+ }
32147
+ };
32148
+ registerProvider(claudeCodeProvider);
32149
+
32150
+ // src/import/providers/codex.ts
32151
+ import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
32152
+ import { join as join9 } from "path";
32153
+ import { homedir as homedir3 } from "os";
32154
+ var CODEX_DIR = join9(homedir3(), ".codex");
32155
+ var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
32156
+ var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
32157
+ var MAX_TOOL_OUTPUT_CHARS2 = 500;
32158
+ var DEFAULT_MAX_TOKENS2 = 12288;
32159
+ function estimateTokens5(text4) {
32160
+ return Math.ceil(text4.length / 3);
32161
+ }
32162
+ function truncate2(text4, max) {
32163
+ if (text4.length <= max) return text4;
32164
+ return text4.slice(0, max) + "...";
32165
+ }
32166
+ function findJsonlFiles(dir) {
32167
+ const results = [];
32168
+ if (!existsSync6(dir)) return results;
32169
+ const walk = (d) => {
32170
+ let entries;
32171
+ try {
32172
+ entries = readdirSync3(d);
32173
+ } catch {
32174
+ return;
32175
+ }
32176
+ for (const entry of entries) {
32177
+ const full = join9(d, entry);
32178
+ try {
32179
+ const stat = statSync6(full);
32180
+ if (stat.isDirectory()) walk(full);
32181
+ else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
32182
+ } catch {
32183
+ }
32184
+ }
32185
+ };
32186
+ walk(dir);
32187
+ return results;
32188
+ }
32189
+ function responseItemToText(item) {
32190
+ if (!item) return null;
32191
+ if (item.type === "message" && item.role && item.content) {
32192
+ const text4 = extractContent(item.content);
32193
+ if (text4) return `[${item.role}] ${text4}`;
32194
+ }
32195
+ if (item.type === "function_call" && item.name) {
32196
+ const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
32197
+ return `[tool: ${item.name}] ${args}`;
32198
+ }
32199
+ if (item.type === "function_call_output" && item.output) {
32200
+ return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
32201
+ }
32202
+ return null;
32203
+ }
32204
+ function extractContent(content3) {
32205
+ if (typeof content3 === "string") return content3;
32206
+ if (!Array.isArray(content3)) return null;
32207
+ const parts = [];
32208
+ for (const part of content3) {
32209
+ if ("text" in part && typeof part.text === "string") {
32210
+ parts.push(part.text);
32211
+ }
32212
+ }
32213
+ return parts.length > 0 ? parts.join("\n") : null;
32214
+ }
32215
+ function parseJSONL2(filePath) {
32216
+ let raw;
32217
+ try {
32218
+ raw = readFileSync5(filePath, "utf-8");
32219
+ } catch {
32220
+ return [];
32221
+ }
32222
+ const lines = [];
32223
+ for (const line of raw.split("\n")) {
32224
+ if (!line.trim()) continue;
32225
+ try {
32226
+ lines.push(JSON.parse(line));
32227
+ } catch {
32228
+ }
32229
+ }
32230
+ return lines;
32231
+ }
32232
+ function getSessionMeta(filePath) {
32233
+ let raw;
32234
+ try {
32235
+ raw = readFileSync5(filePath, "utf-8");
32236
+ } catch {
32237
+ return null;
32238
+ }
32239
+ const lines = raw.split("\n").filter((l) => l.trim());
32240
+ if (lines.length === 0) return null;
32241
+ let meta3;
32242
+ try {
32243
+ meta3 = JSON.parse(lines[0]);
32244
+ } catch {
32245
+ return null;
32246
+ }
32247
+ if (meta3.type !== "session_meta") return null;
32248
+ const payload = meta3.payload;
32249
+ let messageCount = 0;
32250
+ for (const line of lines) {
32251
+ try {
32252
+ const parsed = JSON.parse(line);
32253
+ if (parsed.type === "response_item" || parsed.type === "event_msg") {
32254
+ messageCount++;
32255
+ }
32256
+ } catch {
32257
+ }
32258
+ }
32259
+ return {
32260
+ id: payload.meta.id,
32261
+ cwd: payload.meta.cwd,
32262
+ timestamp: payload.meta.timestamp,
32263
+ messageCount,
32264
+ fileSize: raw.length
32265
+ };
32266
+ }
32267
+ var codexProvider = {
32268
+ name: "codex",
32269
+ displayName: "Codex",
32270
+ detect(projectPath) {
32271
+ const sessions = [];
32272
+ const allFiles = [
32273
+ ...findJsonlFiles(SESSIONS_DIR),
32274
+ ...findJsonlFiles(ARCHIVED_DIR)
32275
+ ];
32276
+ for (const filePath of allFiles) {
32277
+ const meta3 = getSessionMeta(filePath);
32278
+ if (!meta3) continue;
32279
+ if (meta3.cwd !== projectPath) continue;
32280
+ if (meta3.messageCount < 3) continue;
32281
+ const ts = new Date(meta3.timestamp).getTime();
32282
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
32283
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32284
+ sessions.push({
32285
+ id: filePath,
32286
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32287
+ startedAt: ts,
32288
+ lastActivityAt: ts,
32289
+ // Best approximation without reading all lines
32290
+ estimatedTokens,
32291
+ messageCount: meta3.messageCount
32292
+ });
32293
+ }
32294
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32295
+ },
32296
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
32297
+ const chunks = [];
32298
+ for (const filePath of sessionIds) {
32299
+ const lines = parseJSONL2(filePath);
32300
+ const messages = [];
32301
+ let sessionTimestamp = Date.now();
32302
+ const firstLine = lines[0];
32303
+ if (firstLine?.type === "session_meta") {
32304
+ const meta3 = firstLine;
32305
+ const ts = new Date(meta3.payload.meta.timestamp).getTime();
32306
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
32307
+ }
32308
+ for (const line of lines) {
32309
+ if (line.type === "response_item") {
32310
+ const ri = line;
32311
+ const text4 = responseItemToText(ri.payload);
32312
+ if (text4) {
32313
+ messages.push({ text: text4, timestamp: sessionTimestamp });
32314
+ }
32315
+ } else if (line.type === "event_msg") {
32316
+ const ev = line;
32317
+ if (ev.payload.output) {
32318
+ messages.push({
32319
+ text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
32320
+ timestamp: sessionTimestamp
32321
+ });
32322
+ }
32323
+ } else if (line.type === "compacted") {
32324
+ const comp = line;
32325
+ if (comp.payload.replacement_history) {
32326
+ for (const item of comp.payload.replacement_history) {
32327
+ const text4 = responseItemToText(item);
32328
+ if (text4) {
32329
+ messages.push({ text: text4, timestamp: sessionTimestamp });
32330
+ }
32331
+ }
32332
+ }
32333
+ }
32334
+ }
32335
+ if (messages.length === 0) continue;
32336
+ let currentTexts = [];
32337
+ let currentTokens = 0;
32338
+ let chunkIndex = 0;
32339
+ const flushChunk = () => {
32340
+ if (currentTexts.length === 0) return;
32341
+ chunkIndex++;
32342
+ const text4 = currentTexts.join("\n\n");
32343
+ chunks.push({
32344
+ label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32345
+ text: text4,
32346
+ estimatedTokens: estimateTokens5(text4),
32347
+ timestamp: sessionTimestamp
32348
+ });
32349
+ currentTexts = [];
32350
+ currentTokens = 0;
32351
+ };
32352
+ for (const msg of messages) {
32353
+ const msgTokens = estimateTokens5(msg.text);
32354
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32355
+ flushChunk();
32356
+ }
32357
+ currentTexts.push(msg.text);
32358
+ currentTokens += msgTokens;
32359
+ }
32360
+ flushChunk();
32361
+ }
32362
+ return chunks;
32363
+ }
32364
+ };
32365
+ registerProvider(codexProvider);
32366
+
32367
+ // src/import/providers/opencode.ts
32368
+ import { existsSync as existsSync7 } from "fs";
32369
+ import { join as join10 } from "path";
32370
+ import { homedir as homedir4 } from "os";
32371
+ var OPENCODE_DB_PATH = join10(
32372
+ process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
32373
+ "opencode",
32374
+ "opencode.db"
32375
+ );
32376
+ var MAX_TOOL_OUTPUT_CHARS3 = 500;
32377
+ var DEFAULT_MAX_TOKENS3 = 12288;
32378
+ function estimateTokens6(text4) {
32379
+ return Math.ceil(text4.length / 3);
32380
+ }
32381
+ function truncate3(text4, max) {
32382
+ if (text4.length <= max) return text4;
32383
+ return text4.slice(0, max) + "...";
32384
+ }
32385
+ function openDB() {
32386
+ if (!existsSync7(OPENCODE_DB_PATH)) return null;
32387
+ try {
32388
+ return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
32389
+ } catch {
32390
+ return null;
32391
+ }
32392
+ }
32393
+ function tableExists(database, table) {
32394
+ const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
32395
+ return row != null;
32396
+ }
32397
+ function partsToConversationText(parts) {
32398
+ const segments = [];
32399
+ for (const part of parts) {
32400
+ if (part.type === "text" && part.text) {
32401
+ segments.push(part.text);
32402
+ } else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
32403
+ segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
32404
+ }
32405
+ }
32406
+ return segments.join("\n");
32407
+ }
32408
+ var opencodeProvider = {
32409
+ name: "opencode",
32410
+ displayName: "OpenCode",
32411
+ detect(projectPath) {
32412
+ const database = openDB();
32413
+ if (!database) return [];
32414
+ try {
32415
+ if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
32416
+ return [];
32417
+ }
32418
+ const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
32419
+ if (!project) return [];
32420
+ const sessions = database.query(
32421
+ `SELECT s.id, s.title, s.time_created, s.time_updated,
32422
+ (SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
32423
+ FROM session s
32424
+ WHERE s.project_id = ? AND s.parent_id IS NULL
32425
+ ORDER BY s.time_updated DESC`
32426
+ ).all(project.id);
32427
+ const results = [];
32428
+ for (const sess of sessions) {
32429
+ if (sess.msg_count < 3) continue;
32430
+ const estimatedTokens = sess.msg_count * 500;
32431
+ const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
32432
+ const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
32433
+ results.push({
32434
+ id: sess.id,
32435
+ label,
32436
+ startedAt: sess.time_created,
32437
+ lastActivityAt: sess.time_updated,
32438
+ estimatedTokens,
32439
+ messageCount: sess.msg_count
32440
+ });
32441
+ }
32442
+ return results;
32443
+ } finally {
32444
+ database.close();
32445
+ }
32446
+ },
32447
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
32448
+ const database = openDB();
32449
+ if (!database) return [];
32450
+ const chunks = [];
32451
+ try {
32452
+ const hasParts = tableExists(database, "part");
32453
+ for (const sessionId of sessionIds) {
32454
+ const messages = database.query(
32455
+ `SELECT id, data, time_created FROM message
32456
+ WHERE session_id = ?
32457
+ ORDER BY time_created ASC`
32458
+ ).all(sessionId);
32459
+ if (messages.length === 0) continue;
32460
+ const textMessages = [];
32461
+ for (const msg of messages) {
32462
+ let msgData;
32463
+ try {
32464
+ msgData = JSON.parse(msg.data);
32465
+ } catch {
32466
+ continue;
32467
+ }
32468
+ const role = msgData.role ?? "unknown";
32469
+ let contentText = "";
32470
+ if (hasParts) {
32471
+ const parts = database.query(
32472
+ `SELECT data FROM part
32473
+ WHERE message_id = ?
32474
+ ORDER BY time_created ASC`
32475
+ ).all(msg.id);
32476
+ const parsedParts = [];
32477
+ for (const p2 of parts) {
32478
+ try {
32479
+ parsedParts.push(JSON.parse(p2.data));
32480
+ } catch {
32481
+ }
32482
+ }
32483
+ contentText = partsToConversationText(parsedParts);
32484
+ }
32485
+ if (!contentText.trim()) continue;
32486
+ textMessages.push({
32487
+ text: `[${role}] ${contentText}`,
32488
+ timestamp: msg.time_created
32489
+ });
32490
+ }
32491
+ if (textMessages.length === 0) continue;
32492
+ let currentTexts = [];
32493
+ let currentTokens = 0;
32494
+ let chunkStart = textMessages[0].timestamp;
32495
+ let chunkIndex = 0;
32496
+ const flushChunk = () => {
32497
+ if (currentTexts.length === 0) return;
32498
+ chunkIndex++;
32499
+ const text4 = currentTexts.join("\n\n");
32500
+ chunks.push({
32501
+ label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
32502
+ text: text4,
32503
+ estimatedTokens: estimateTokens6(text4),
32504
+ timestamp: chunkStart
32505
+ });
32506
+ currentTexts = [];
32507
+ currentTokens = 0;
32508
+ };
32509
+ for (const msg of textMessages) {
32510
+ const msgTokens = estimateTokens6(msg.text);
32511
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32512
+ flushChunk();
32513
+ chunkStart = msg.timestamp;
32514
+ }
32515
+ currentTexts.push(msg.text);
32516
+ currentTokens += msgTokens;
32517
+ }
32518
+ flushChunk();
32519
+ }
32520
+ } finally {
32521
+ database.close();
32522
+ }
32523
+ return chunks;
32524
+ }
32525
+ };
32526
+ registerProvider(opencodeProvider);
32527
+
32528
+ // src/import/providers/cline.ts
32529
+ import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
32530
+ import { join as join11 } from "path";
32531
+ import { homedir as homedir5 } from "os";
32532
+ var MAX_TOOL_OUTPUT_CHARS4 = 500;
32533
+ var DEFAULT_MAX_TOKENS4 = 12288;
32534
+ var EXTENSION_IDS = [
32535
+ "saoudrizwan.claude-dev",
32536
+ "cline.cline"
32537
+ ];
32538
+ function estimateTokens7(text4) {
32539
+ return Math.ceil(text4.length / 3);
32540
+ }
32541
+ function truncate4(text4, max) {
32542
+ if (text4.length <= max) return text4;
32543
+ return text4.slice(0, max) + "...";
32544
+ }
32545
+ function findGlobalStorageDirs() {
32546
+ const home = homedir5();
32547
+ const dirs = [];
32548
+ const basePaths = [];
32549
+ const platform = process.platform;
32550
+ if (platform === "darwin") {
32551
+ basePaths.push(
32552
+ join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
32553
+ join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
32554
+ join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
32555
+ );
32556
+ } else if (platform === "win32") {
32557
+ const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
32558
+ basePaths.push(
32559
+ join11(appdata, "Code", "User", "globalStorage"),
32560
+ join11(appdata, "Code - Insiders", "User", "globalStorage"),
32561
+ join11(appdata, "VSCodium", "User", "globalStorage")
32562
+ );
32563
+ } else {
32564
+ const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
32565
+ basePaths.push(
32566
+ join11(configHome, "Code", "User", "globalStorage"),
32567
+ join11(configHome, "Code - Insiders", "User", "globalStorage"),
32568
+ join11(configHome, "VSCodium", "User", "globalStorage")
32569
+ );
32570
+ basePaths.push(
32571
+ join11(home, ".vscode", "data", "User", "globalStorage"),
32572
+ join11(home, ".vscode-insiders", "data", "User", "globalStorage")
32573
+ );
32574
+ }
32575
+ for (const base of basePaths) {
32576
+ for (const extId of EXTENSION_IDS) {
32577
+ const dir = join11(base, extId);
32578
+ if (existsSync8(dir)) dirs.push(dir);
32579
+ }
32580
+ }
32581
+ return dirs;
32582
+ }
32583
+ function loadTaskHistory(storageDir, projectPath) {
32584
+ const paths = [
32585
+ join11(storageDir, "state", "taskHistory.json"),
32586
+ join11(storageDir, "taskHistory.json")
32587
+ ];
32588
+ for (const historyPath of paths) {
32589
+ if (!existsSync8(historyPath)) continue;
32590
+ try {
32591
+ const raw = readFileSync6(historyPath, "utf-8");
32592
+ const items = JSON.parse(raw);
32593
+ if (!Array.isArray(items)) continue;
32594
+ return items.filter(
32595
+ (item) => item.cwdOnTaskInitialization === projectPath
32596
+ );
32597
+ } catch {
32598
+ continue;
32599
+ }
32600
+ }
32601
+ return [];
32602
+ }
32603
+ function readConversation(taskDir) {
32604
+ const filePath = join11(taskDir, "api_conversation_history.json");
32605
+ if (!existsSync8(filePath)) return [];
32606
+ try {
32607
+ const raw = readFileSync6(filePath, "utf-8");
32608
+ const messages = JSON.parse(raw);
32609
+ return Array.isArray(messages) ? messages : [];
32610
+ } catch {
32611
+ return [];
32612
+ }
32613
+ }
32614
+ function blockToText2(block) {
32615
+ switch (block.type) {
32616
+ case "text":
32617
+ return block.text;
32618
+ case "tool_use": {
32619
+ const tu = block;
32620
+ return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
32621
+ }
32622
+ case "tool_result": {
32623
+ const tr = block;
32624
+ let content3;
32625
+ if (typeof tr.content === "string") {
32626
+ content3 = tr.content;
32627
+ } else if (Array.isArray(tr.content)) {
32628
+ content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
32629
+ } else {
32630
+ content3 = "";
32631
+ }
32632
+ return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
32633
+ }
32634
+ default:
32635
+ return null;
32636
+ }
32637
+ }
32638
+ function messageToText(msg) {
32639
+ if (typeof msg.content === "string") {
32640
+ return msg.content ? `[${msg.role}] ${msg.content}` : null;
32641
+ }
32642
+ const parts = msg.content.map(blockToText2).filter(Boolean);
32643
+ return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
32644
+ }
32645
+ var clineProvider = {
32646
+ name: "cline",
32647
+ displayName: "Cline",
32648
+ detect(projectPath) {
32649
+ const sessions = [];
32650
+ const storageDirs = findGlobalStorageDirs();
32651
+ for (const storageDir of storageDirs) {
32652
+ const tasks = loadTaskHistory(storageDir, projectPath);
32653
+ for (const task of tasks) {
32654
+ const taskDir = join11(storageDir, "tasks", task.id);
32655
+ if (!existsSync8(taskDir)) continue;
32656
+ const messages = readConversation(taskDir);
32657
+ if (messages.length < 3) continue;
32658
+ const dateStr = new Date(task.ts).toISOString().slice(0, 10);
32659
+ const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
32660
+ const historyFile = join11(taskDir, "api_conversation_history.json");
32661
+ let estimatedTokens = messages.length * 500;
32662
+ try {
32663
+ const stat = statSync7(historyFile);
32664
+ estimatedTokens = Math.ceil(stat.size / 5);
32665
+ } catch {
32666
+ }
32667
+ sessions.push({
32668
+ id: taskDir,
32669
+ label,
32670
+ startedAt: task.ts,
32671
+ lastActivityAt: task.ts,
32672
+ estimatedTokens,
32673
+ messageCount: messages.length
32674
+ });
32675
+ }
32676
+ }
32677
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32678
+ },
32679
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
32680
+ const chunks = [];
32681
+ for (const taskDir of sessionIds) {
32682
+ const messages = readConversation(taskDir);
32683
+ if (messages.length === 0) continue;
32684
+ let sessionTimestamp;
32685
+ try {
32686
+ sessionTimestamp = statSync7(taskDir).mtimeMs;
32687
+ } catch {
32688
+ sessionTimestamp = Date.now();
32689
+ }
32690
+ const textMessages = [];
32691
+ for (const msg of messages) {
32692
+ const text4 = messageToText(msg);
32693
+ if (text4) textMessages.push({ text: text4 });
32694
+ }
32695
+ if (textMessages.length === 0) continue;
32696
+ let currentTexts = [];
32697
+ let currentTokens = 0;
32698
+ let chunkIndex = 0;
32699
+ const flushChunk = () => {
32700
+ if (currentTexts.length === 0) return;
32701
+ chunkIndex++;
32702
+ const text4 = currentTexts.join("\n\n");
32703
+ chunks.push({
32704
+ label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32705
+ text: text4,
32706
+ estimatedTokens: estimateTokens7(text4),
32707
+ timestamp: sessionTimestamp
32708
+ });
32709
+ currentTexts = [];
32710
+ currentTokens = 0;
32711
+ };
32712
+ for (const msg of textMessages) {
32713
+ const msgTokens = estimateTokens7(msg.text);
32714
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32715
+ flushChunk();
32716
+ }
32717
+ currentTexts.push(msg.text);
32718
+ currentTokens += msgTokens;
32719
+ }
32720
+ flushChunk();
32721
+ }
32722
+ return chunks;
32723
+ }
32724
+ };
32725
+ registerProvider(clineProvider);
32726
+
32727
+ // src/import/providers/continue.ts
32728
+ import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
32729
+ import { join as join12 } from "path";
32730
+ import { homedir as homedir6 } from "os";
32731
+ var MAX_TOOL_OUTPUT_CHARS5 = 500;
32732
+ var DEFAULT_MAX_TOKENS5 = 12288;
32733
+ function estimateTokens8(text4) {
32734
+ return Math.ceil(text4.length / 3);
32735
+ }
32736
+ function truncate5(text4, max) {
32737
+ if (text4.length <= max) return text4;
32738
+ return text4.slice(0, max) + "...";
32739
+ }
32740
+ function continueDir() {
32741
+ return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
32742
+ }
32743
+ function loadSessionIndex() {
32744
+ const indexPath = join12(continueDir(), "sessions", "sessions.json");
32745
+ if (!existsSync9(indexPath)) return [];
32746
+ try {
32747
+ const raw = readFileSync7(indexPath, "utf-8");
32748
+ const parsed = JSON.parse(raw);
32749
+ return Array.isArray(parsed) ? parsed : [];
32750
+ } catch {
32751
+ return [];
32752
+ }
32753
+ }
32754
+ function loadSession(sessionId) {
32755
+ const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
32756
+ if (!existsSync9(filePath)) return null;
32757
+ try {
32758
+ const raw = readFileSync7(filePath, "utf-8");
32759
+ return JSON.parse(raw);
32760
+ } catch {
32761
+ return null;
30330
32762
  }
30331
- if (config2().knowledge.enabled) {
30332
- for (const pat of extractPatterns(result.observations)) {
32763
+ }
32764
+ function extractMessageContent(content3) {
32765
+ if (typeof content3 === "string") return content3;
32766
+ if (!Array.isArray(content3)) return "";
32767
+ return content3.filter(
32768
+ (part) => part.type === "text" && typeof part.text === "string"
32769
+ ).map((part) => part.text).join("\n");
32770
+ }
32771
+ function historyItemToText(item) {
32772
+ const msg = item.message;
32773
+ if (!msg) return null;
32774
+ if (msg.role === "system") return null;
32775
+ const parts = [];
32776
+ const content3 = extractMessageContent(msg.content);
32777
+ if (content3) parts.push(content3);
32778
+ if (msg.toolCalls) {
32779
+ for (const call of msg.toolCalls) {
32780
+ if (call.function) {
32781
+ const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
32782
+ parts.push(`[tool: ${call.function.name}] ${args}`);
32783
+ }
32784
+ }
32785
+ }
32786
+ if (item.toolCallStates) {
32787
+ for (const state of item.toolCallStates) {
32788
+ if (state.output && state.status === "done") {
32789
+ parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
32790
+ }
32791
+ }
32792
+ }
32793
+ if (parts.length === 0) return null;
32794
+ const role = msg.role === "tool" ? "tool_result" : msg.role;
32795
+ return `[${role}] ${parts.join("\n")}`;
32796
+ }
32797
+ var continueProvider = {
32798
+ name: "continue",
32799
+ displayName: "Continue",
32800
+ detect(projectPath) {
32801
+ const sessions = [];
32802
+ const index2 = loadSessionIndex();
32803
+ for (const meta3 of index2) {
32804
+ if (meta3.workspaceDirectory !== projectPath) continue;
32805
+ const session = loadSession(meta3.sessionId);
32806
+ if (!session || !session.history || session.history.length < 3) continue;
32807
+ const ts = new Date(meta3.dateCreated).getTime();
32808
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32809
+ const messageCount = session.history.length;
32810
+ const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
32811
+ const estimatedTokens = messageCount * 500;
32812
+ sessions.push({
32813
+ id: meta3.sessionId,
32814
+ label,
32815
+ startedAt: ts,
32816
+ lastActivityAt: ts,
32817
+ estimatedTokens,
32818
+ messageCount
32819
+ });
32820
+ }
32821
+ const sessionsDir = join12(continueDir(), "sessions");
32822
+ if (existsSync9(sessionsDir)) {
32823
+ const existingIds = new Set(sessions.map((s) => s.id));
32824
+ let entries;
30333
32825
  try {
30334
- create({
30335
- projectPath: input.projectPath,
30336
- category: pat.category,
30337
- title: pat.title,
30338
- content: pat.content,
30339
- session: input.sessionID,
30340
- scope: "project"
30341
- });
32826
+ entries = readdirSync5(sessionsDir);
30342
32827
  } catch {
32828
+ entries = [];
32829
+ }
32830
+ for (const entry of entries) {
32831
+ if (!entry.endsWith(".json") || entry === "sessions.json") continue;
32832
+ const sessionId = entry.replace(".json", "");
32833
+ if (existingIds.has(sessionId)) continue;
32834
+ const session = loadSession(sessionId);
32835
+ if (!session) continue;
32836
+ if (session.workspaceDirectory !== projectPath) continue;
32837
+ if (!session.history || session.history.length < 3) continue;
32838
+ const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
32839
+ sessions.push({
32840
+ id: sessionId,
32841
+ label: `${dateStr} (${session.history.length} messages)`,
32842
+ startedAt: Date.now(),
32843
+ lastActivityAt: Date.now(),
32844
+ estimatedTokens: session.history.length * 500,
32845
+ messageCount: session.history.length
32846
+ });
32847
+ }
32848
+ }
32849
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32850
+ },
32851
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
32852
+ const chunks = [];
32853
+ for (const sessionId of sessionIds) {
32854
+ const session = loadSession(sessionId);
32855
+ if (!session || !session.history) continue;
32856
+ const textMessages = [];
32857
+ for (const item of session.history) {
32858
+ const text4 = historyItemToText(item);
32859
+ if (text4) textMessages.push({ text: text4 });
32860
+ }
32861
+ if (textMessages.length === 0) continue;
32862
+ const sessionTimestamp = Date.now();
32863
+ let currentTexts = [];
32864
+ let currentTokens = 0;
32865
+ let chunkIndex = 0;
32866
+ const flushChunk = () => {
32867
+ if (currentTexts.length === 0) return;
32868
+ chunkIndex++;
32869
+ const text4 = currentTexts.join("\n\n");
32870
+ chunks.push({
32871
+ label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
32872
+ text: text4,
32873
+ estimatedTokens: estimateTokens8(text4),
32874
+ timestamp: sessionTimestamp
32875
+ });
32876
+ currentTexts = [];
32877
+ currentTokens = 0;
32878
+ };
32879
+ for (const msg of textMessages) {
32880
+ const msgTokens = estimateTokens8(msg.text);
32881
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32882
+ flushChunk();
32883
+ }
32884
+ currentTexts.push(msg.text);
32885
+ currentTokens += msgTokens;
30343
32886
  }
32887
+ flushChunk();
30344
32888
  }
32889
+ return chunks;
30345
32890
  }
30346
- return result;
32891
+ };
32892
+ registerProvider(continueProvider);
32893
+
32894
+ // src/import/providers/pi.ts
32895
+ import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
32896
+ import { join as join13 } from "path";
32897
+ import { homedir as homedir7 } from "os";
32898
+ var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
32899
+ var MAX_TOOL_OUTPUT_CHARS6 = 500;
32900
+ var DEFAULT_MAX_TOKENS6 = 12288;
32901
+ function estimateTokens9(text4) {
32902
+ return Math.ceil(text4.length / 3);
30347
32903
  }
30348
- function backfillMetrics() {
30349
- const rows = db().query(
30350
- "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
30351
- ).all();
30352
- if (!rows.length) return 0;
30353
- const update2 = db().prepare(
30354
- "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
30355
- );
30356
- let updated = 0;
30357
- for (const row of rows) {
30358
- const sourceIds = parseSourceIds(row.source_ids);
30359
- if (!sourceIds.length) continue;
30360
- const placeholders = sourceIds.map(() => "?").join(",");
30361
- const sources = db().query(
30362
- `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
30363
- ).all(...sourceIds);
30364
- if (!sources.length) continue;
30365
- const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
30366
- const timestamps = sources.map((s) => s.created_at);
30367
- const rComp = compressionRatio(row.token_count, sourceTokens);
30368
- const cNorm = temporalCnorm(timestamps);
30369
- update2.run(rComp, cNorm, row.id);
30370
- updated++;
30371
- }
30372
- if (updated > 0) {
30373
- info(
30374
- `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
30375
- );
30376
- }
30377
- return updated;
32904
+ function truncate6(text4, max) {
32905
+ if (text4.length <= max) return text4;
32906
+ return text4.slice(0, max) + "...";
30378
32907
  }
30379
-
30380
- // src/curator.ts
30381
- var curator_exports = {};
30382
- __export(curator_exports, {
30383
- consolidate: () => consolidate,
30384
- resetCurationTracker: () => resetCurationTracker,
30385
- run: () => run2
30386
- });
30387
- var MAX_ENTRY_CONTENT_LENGTH = 1200;
30388
- function parseOps(text4) {
30389
- const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
32908
+ function encodeCwd(cwd) {
32909
+ const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
32910
+ return `--${encoded}--`;
32911
+ }
32912
+ function parseJSONL3(filePath) {
32913
+ let raw;
30390
32914
  try {
30391
- const parsed = JSON.parse(cleaned);
30392
- if (!Array.isArray(parsed)) return [];
30393
- return parsed.filter(
30394
- (op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
30395
- );
32915
+ raw = readFileSync8(filePath, "utf-8");
30396
32916
  } catch {
30397
32917
  return [];
30398
32918
  }
32919
+ const lines = [];
32920
+ for (const line of raw.split("\n")) {
32921
+ if (!line.trim()) continue;
32922
+ try {
32923
+ lines.push(JSON.parse(line));
32924
+ } catch {
32925
+ }
32926
+ }
32927
+ return lines;
30399
32928
  }
30400
- var lastCuratedAt = /* @__PURE__ */ new Map();
30401
- async function run2(input) {
30402
- const cfg = config2();
30403
- if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
30404
- const all3 = bySession(input.projectPath, input.sessionID);
30405
- const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
30406
- const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
30407
- if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
30408
- const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
30409
- const existing = forProject(input.projectPath, false);
30410
- const existingForPrompt = existing.map((e) => ({
30411
- id: e.id,
30412
- category: e.category,
30413
- title: e.title,
30414
- content: e.content
30415
- }));
30416
- const userContent = curatorUser({
30417
- messages: text4,
30418
- existing: existingForPrompt
30419
- });
30420
- const model = input.model ?? cfg.model;
30421
- const responseText = await input.llm.prompt(
30422
- CURATOR_SYSTEM,
30423
- userContent,
30424
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
30425
- );
30426
- if (!responseText) return { created: 0, updated: 0, deleted: 0 };
30427
- const ops = parseOps(responseText);
30428
- let created = 0;
30429
- let updated = 0;
30430
- let deleted = 0;
30431
- const idsToSync = [];
30432
- for (const op of ops) {
30433
- if (op.op === "create") {
30434
- const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30435
- const id = create({
30436
- projectPath: op.scope === "project" ? input.projectPath : void 0,
30437
- category: op.category,
30438
- title: op.title,
30439
- content: content3,
30440
- session: input.sessionID,
30441
- scope: op.scope,
30442
- crossProject: op.crossProject ?? true
32929
+ function linearize(lines) {
32930
+ if (lines.length === 0) return [];
32931
+ const children = /* @__PURE__ */ new Map();
32932
+ const byId = /* @__PURE__ */ new Map();
32933
+ let rootLine = null;
32934
+ for (const line of lines) {
32935
+ if (line.type === "session") {
32936
+ rootLine = line;
32937
+ continue;
32938
+ }
32939
+ if (!line.id) continue;
32940
+ byId.set(line.id, line);
32941
+ const pid = line.parentId;
32942
+ if (pid) {
32943
+ const siblings = children.get(pid) ?? [];
32944
+ siblings.push(line);
32945
+ children.set(pid, siblings);
32946
+ }
32947
+ }
32948
+ if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
32949
+ const result = [];
32950
+ let currentId = rootLine.id;
32951
+ while (currentId) {
32952
+ const kids = children.get(currentId);
32953
+ if (!kids || kids.length === 0) break;
32954
+ const next = kids[kids.length - 1];
32955
+ result.push(next);
32956
+ currentId = next.id;
32957
+ }
32958
+ return result;
32959
+ }
32960
+ function getSessionMeta2(filePath) {
32961
+ const lines = parseJSONL3(filePath);
32962
+ if (lines.length === 0) return null;
32963
+ const header = lines[0];
32964
+ if (header.type !== "session") return null;
32965
+ const session = header;
32966
+ const messageCount = lines.filter((l) => l.type === "message").length;
32967
+ let fileSize;
32968
+ try {
32969
+ fileSize = statSync8(filePath).size;
32970
+ } catch {
32971
+ fileSize = 0;
32972
+ }
32973
+ const ts = new Date(session.timestamp).getTime();
32974
+ return {
32975
+ id: session.id,
32976
+ cwd: session.cwd,
32977
+ timestamp: Number.isNaN(ts) ? Date.now() : ts,
32978
+ messageCount,
32979
+ fileSize
32980
+ };
32981
+ }
32982
+ var piProvider = {
32983
+ name: "pi",
32984
+ displayName: "Pi",
32985
+ detect(projectPath) {
32986
+ const encoded = encodeCwd(projectPath);
32987
+ const dir = join13(PI_DIR, encoded);
32988
+ let entries;
32989
+ try {
32990
+ entries = readdirSync6(dir);
32991
+ } catch {
32992
+ return [];
32993
+ }
32994
+ const sessions = [];
32995
+ for (const entry of entries) {
32996
+ if (!entry.endsWith(".jsonl")) continue;
32997
+ const filePath = join13(dir, entry);
32998
+ const meta3 = getSessionMeta2(filePath);
32999
+ if (!meta3) continue;
33000
+ if (meta3.messageCount < 3) continue;
33001
+ const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
33002
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
33003
+ sessions.push({
33004
+ id: filePath,
33005
+ label: `${dateStr} (${meta3.messageCount} messages)`,
33006
+ startedAt: meta3.timestamp,
33007
+ lastActivityAt: meta3.timestamp,
33008
+ estimatedTokens,
33009
+ messageCount: meta3.messageCount
30443
33010
  });
30444
- idsToSync.push(id);
30445
- created++;
30446
- } else if (op.op === "update") {
30447
- const entry = get(op.id);
30448
- if (entry) {
30449
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30450
- update(op.id, { content: content3, confidence: op.confidence });
30451
- if (op.content !== void 0) idsToSync.push(op.id);
30452
- updated++;
33011
+ }
33012
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
33013
+ },
33014
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
33015
+ const chunks = [];
33016
+ for (const filePath of sessionIds) {
33017
+ const allLines = parseJSONL3(filePath);
33018
+ const linearLines = linearize(allLines);
33019
+ let sessionTimestamp = Date.now();
33020
+ const header = allLines.find((l) => l.type === "session");
33021
+ if (header?.type === "session") {
33022
+ const session = header;
33023
+ const ts = new Date(session.timestamp).getTime();
33024
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
33025
+ }
33026
+ const messages = [];
33027
+ for (const line of linearLines) {
33028
+ if (line.type === "message") {
33029
+ const msg = line;
33030
+ const content3 = msg.message.content;
33031
+ if (!content3) continue;
33032
+ const ts = new Date(msg.timestamp).getTime();
33033
+ messages.push({
33034
+ text: `[${msg.message.role}] ${content3}`,
33035
+ timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
33036
+ });
33037
+ } else if (line.type === "compaction") {
33038
+ const comp = line;
33039
+ if (comp.summary) {
33040
+ messages.push({
33041
+ text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
33042
+ timestamp: sessionTimestamp
33043
+ });
33044
+ }
33045
+ }
30453
33046
  }
30454
- } else if (op.op === "delete") {
30455
- const entry = get(op.id);
30456
- if (entry) {
30457
- remove(op.id);
30458
- deleted++;
33047
+ if (messages.length === 0) continue;
33048
+ let currentTexts = [];
33049
+ let currentTokens = 0;
33050
+ let chunkIndex = 0;
33051
+ const flushChunk = () => {
33052
+ if (currentTexts.length === 0) return;
33053
+ chunkIndex++;
33054
+ const text4 = currentTexts.join("\n\n");
33055
+ chunks.push({
33056
+ label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
33057
+ text: text4,
33058
+ estimatedTokens: estimateTokens9(text4),
33059
+ timestamp: sessionTimestamp
33060
+ });
33061
+ currentTexts = [];
33062
+ currentTokens = 0;
33063
+ };
33064
+ for (const msg of messages) {
33065
+ const msgTokens = estimateTokens9(msg.text);
33066
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
33067
+ flushChunk();
33068
+ }
33069
+ currentTexts.push(msg.text);
33070
+ currentTokens += msgTokens;
30459
33071
  }
33072
+ flushChunk();
30460
33073
  }
33074
+ return chunks;
30461
33075
  }
30462
- for (const id of idsToSync) {
30463
- syncRefs(id);
30464
- }
30465
- lastCuratedAt.set(input.sessionID, Date.now());
30466
- return { created, updated, deleted };
33076
+ };
33077
+ registerProvider(piProvider);
33078
+
33079
+ // src/import/providers/aider.ts
33080
+ import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
33081
+ import { join as join14 } from "path";
33082
+ var HISTORY_FILE = ".aider.chat.history.md";
33083
+ var DEFAULT_MAX_TOKENS7 = 12288;
33084
+ var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
33085
+ function estimateTokens10(text4) {
33086
+ return Math.ceil(text4.length / 3);
30467
33087
  }
30468
- function resetCurationTracker(sessionID) {
30469
- if (sessionID) {
30470
- lastCuratedAt.delete(sessionID);
30471
- } else {
30472
- lastCuratedAt.clear();
33088
+ function parseAiderHistory(content3) {
33089
+ const lines = content3.split("\n");
33090
+ const messages = [];
33091
+ let currentRole = null;
33092
+ let currentLines = [];
33093
+ const flush = () => {
33094
+ if (currentRole && currentLines.length > 0) {
33095
+ const text4 = currentLines.join("\n").trim();
33096
+ if (text4) {
33097
+ messages.push({ role: currentRole, text: text4 });
33098
+ }
33099
+ }
33100
+ currentLines = [];
33101
+ };
33102
+ for (const line of lines) {
33103
+ const match = ROLE_HEADER_RE.exec(line);
33104
+ if (match) {
33105
+ flush();
33106
+ currentRole = match[1].toLowerCase();
33107
+ continue;
33108
+ }
33109
+ if (line.trim() === "---") {
33110
+ flush();
33111
+ currentRole = null;
33112
+ continue;
33113
+ }
33114
+ if (currentRole) {
33115
+ currentLines.push(line);
33116
+ }
30473
33117
  }
33118
+ flush();
33119
+ return messages;
30474
33120
  }
30475
- async function consolidate(input) {
30476
- const cfg = config2();
30477
- if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
30478
- const entries = forProject(input.projectPath, false);
30479
- if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
30480
- const entriesForPrompt = entries.map((e) => ({
30481
- id: e.id,
30482
- category: e.category,
30483
- title: e.title,
30484
- content: e.content
30485
- }));
30486
- const userContent = consolidationUser({
30487
- entries: entriesForPrompt,
30488
- targetMax: cfg.curator.maxEntries
30489
- });
30490
- const model = input.model ?? cfg.model;
30491
- const responseText = await input.llm.prompt(
30492
- CONSOLIDATION_SYSTEM,
30493
- userContent,
30494
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
30495
- );
30496
- if (!responseText) return { updated: 0, deleted: 0 };
30497
- const ops = parseOps(responseText);
30498
- let updated = 0;
30499
- let deleted = 0;
30500
- for (const op of ops) {
30501
- if (op.op === "update") {
30502
- const entry = get(op.id);
30503
- if (entry) {
30504
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30505
- update(op.id, { content: content3, confidence: op.confidence });
30506
- updated++;
33121
+ var aiderProvider = {
33122
+ name: "aider",
33123
+ displayName: "Aider",
33124
+ detect(projectPath) {
33125
+ const filePath = join14(projectPath, HISTORY_FILE);
33126
+ if (!existsSync11(filePath)) return [];
33127
+ let stat;
33128
+ try {
33129
+ stat = statSync9(filePath);
33130
+ } catch {
33131
+ return [];
33132
+ }
33133
+ if (!stat.isFile() || stat.size === 0) return [];
33134
+ let content3;
33135
+ try {
33136
+ content3 = readFileSync9(filePath, "utf-8");
33137
+ } catch {
33138
+ return [];
33139
+ }
33140
+ const messages = parseAiderHistory(content3);
33141
+ if (messages.length < 3) return [];
33142
+ const estimatedTokens = estimateTokens10(content3);
33143
+ return [
33144
+ {
33145
+ id: filePath,
33146
+ label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
33147
+ startedAt: stat.birthtimeMs || stat.ctimeMs,
33148
+ lastActivityAt: stat.mtimeMs,
33149
+ estimatedTokens,
33150
+ messageCount: messages.length
33151
+ }
33152
+ ];
33153
+ },
33154
+ readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
33155
+ const chunks = [];
33156
+ for (const filePath of sessionIds) {
33157
+ let content3;
33158
+ try {
33159
+ content3 = readFileSync9(filePath, "utf-8");
33160
+ } catch {
33161
+ continue;
30507
33162
  }
30508
- } else if (op.op === "delete") {
30509
- const entry = get(op.id);
30510
- if (entry) {
30511
- remove(op.id);
30512
- deleted++;
33163
+ const messages = parseAiderHistory(content3);
33164
+ if (messages.length === 0) continue;
33165
+ let fileTimestamp;
33166
+ try {
33167
+ fileTimestamp = statSync9(filePath).mtimeMs;
33168
+ } catch {
33169
+ fileTimestamp = Date.now();
33170
+ }
33171
+ let currentTexts = [];
33172
+ let currentTokens = 0;
33173
+ let chunkIndex = 0;
33174
+ const flushChunk = () => {
33175
+ if (currentTexts.length === 0) return;
33176
+ chunkIndex++;
33177
+ const text4 = currentTexts.join("\n\n");
33178
+ chunks.push({
33179
+ label: `Aider history (${chunkIndex})`,
33180
+ text: text4,
33181
+ estimatedTokens: estimateTokens10(text4),
33182
+ timestamp: fileTimestamp
33183
+ });
33184
+ currentTexts = [];
33185
+ currentTokens = 0;
33186
+ };
33187
+ for (const msg of messages) {
33188
+ const formatted = `[${msg.role}] ${msg.text}`;
33189
+ const msgTokens = estimateTokens10(formatted);
33190
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
33191
+ flushChunk();
33192
+ }
33193
+ currentTexts.push(formatted);
33194
+ currentTokens += msgTokens;
30513
33195
  }
33196
+ flushChunk();
30514
33197
  }
33198
+ return chunks;
30515
33199
  }
30516
- return { updated, deleted };
30517
- }
33200
+ };
33201
+ registerProvider(aiderProvider);
30518
33202
 
30519
33203
  // src/recall.ts
30520
33204
  function getTaggedText(tagged) {
@@ -30780,7 +33464,10 @@ async function searchRecall(input) {
30780
33464
  info("recall: query expansion failed, using original:", err);
30781
33465
  }
30782
33466
  }
33467
+ const queryTermCount = filterTerms(query).length;
33468
+ const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
30783
33469
  const allRrfLists = [];
33470
+ let primaryListEnd = 0;
30784
33471
  for (const q of queries) {
30785
33472
  const knowledgeResults = [];
30786
33473
  if (knowledgeEnabled && scope !== "session") {
@@ -30857,7 +33544,11 @@ async function searchRecall(input) {
30857
33544
  key: (r) => `t:${r.item.id}`
30858
33545
  });
30859
33546
  }
33547
+ if (primaryListEnd === 0) {
33548
+ primaryListEnd = allRrfLists.length;
33549
+ }
30860
33550
  }
33551
+ const perQueryListEnd = allRrfLists.length;
30861
33552
  if (isAvailable() && scope !== "session") {
30862
33553
  try {
30863
33554
  const [queryVec] = await embed([query], "query");
@@ -30876,7 +33567,8 @@ async function searchRecall(input) {
30876
33567
  if (vectorTagged.length) {
30877
33568
  allRrfLists.push({
30878
33569
  items: vectorTagged,
30879
- key: (r) => `k:${r.item.id}`
33570
+ key: (r) => `k:${r.item.id}`,
33571
+ weight: vectorWeight
30880
33572
  });
30881
33573
  }
30882
33574
  }
@@ -30895,7 +33587,8 @@ async function searchRecall(input) {
30895
33587
  if (distVectorTagged.length) {
30896
33588
  allRrfLists.push({
30897
33589
  items: distVectorTagged,
30898
- key: (r) => `d:${r.item.id}`
33590
+ key: (r) => `d:${r.item.id}`,
33591
+ weight: vectorWeight
30899
33592
  });
30900
33593
  }
30901
33594
  }
@@ -30919,7 +33612,8 @@ async function searchRecall(input) {
30919
33612
  if (temporalVectorTagged.length) {
30920
33613
  allRrfLists.push({
30921
33614
  items: temporalVectorTagged,
30922
- key: (r) => `t:${r.item.id}`
33615
+ key: (r) => `t:${r.item.id}`,
33616
+ weight: vectorWeight
30923
33617
  });
30924
33618
  }
30925
33619
  }
@@ -31022,6 +33716,15 @@ async function searchRecall(input) {
31022
33716
  });
31023
33717
  }
31024
33718
  }
33719
+ const MAX_RRF_LISTS = 10;
33720
+ if (allRrfLists.length > MAX_RRF_LISTS) {
33721
+ const primary = allRrfLists.slice(0, primaryListEnd);
33722
+ const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
33723
+ const supplemental = allRrfLists.slice(perQueryListEnd);
33724
+ const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
33725
+ allRrfLists.length = 0;
33726
+ allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
33727
+ }
31025
33728
  const fused = reciprocalRankFusion(allRrfLists);
31026
33729
  const maxResults = limit * 3;
31027
33730
  return fused.slice(0, maxResults);
@@ -31091,9 +33794,6 @@ async function runRecall(input) {
31091
33794
  if (input.id) {
31092
33795
  return recallById(input.id);
31093
33796
  }
31094
- if (ftsQuery(input.query) === EMPTY_QUERY) {
31095
- return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
31096
- }
31097
33797
  const fused = await searchRecall(input);
31098
33798
  const recallCfg = input.searchConfig?.recall;
31099
33799
  return formatFusedResults(fused, {
@@ -31140,9 +33840,11 @@ export {
31140
33840
  config2 as config,
31141
33841
  consolidationUser,
31142
33842
  consumeCameOutOfIdle,
33843
+ import_exports as conversationImport,
31143
33844
  curator_exports as curator,
31144
33845
  curatorUser,
31145
33846
  data_exports as data,
33847
+ dataDir,
31146
33848
  db,
31147
33849
  dbPath,
31148
33850
  distillation_exports as distillation,
@@ -31162,6 +33864,8 @@ export {
31162
33864
  ftsQueryRelaxed,
31163
33865
  getGitRemote,
31164
33866
  getInstanceId,
33867
+ getKV,
33868
+ getLastImportAt,
31165
33869
  getLastTransformEstimate,
31166
33870
  getLastTransformedCount,
31167
33871
  getLastTurnAt,
@@ -31174,6 +33878,7 @@ export {
31174
33878
  importLoreFile,
31175
33879
  inline,
31176
33880
  inspectSessionState,
33881
+ instruction_detect_exports as instructionDetect,
31177
33882
  isFirstRun,
31178
33883
  isReasoningPart,
31179
33884
  isTextPart,
@@ -31185,7 +33890,9 @@ export {
31185
33890
  load,
31186
33891
  loadAllSessionCosts,
31187
33892
  loadForceMinLayer,
33893
+ loadHeaderSessionIndex,
31188
33894
  loadSessionCosts,
33895
+ loadSessionTracking,
31189
33896
  log_exports as log,
31190
33897
  loreFileExists,
31191
33898
  ltm_exports as ltm,
@@ -31206,10 +33913,14 @@ export {
31206
33913
  runRecall,
31207
33914
  sanitizeSurrogates,
31208
33915
  saveForceMinLayer,
33916
+ saveGradientState,
31209
33917
  saveSessionCosts,
33918
+ saveSessionTracking,
31210
33919
  searchRecall,
31211
33920
  serialize,
31212
33921
  setForceMinLayer,
33922
+ setKV,
33923
+ setLastImportAt,
31213
33924
  setLastTurnAtForTest,
31214
33925
  setLtmTokens,
31215
33926
  setMaxContextTokens,