@loreai/core 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +85 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts +2 -13
  12. package/dist/bun/distillation.d.ts.map +1 -1
  13. package/dist/bun/embedding-vendor.d.ts +22 -38
  14. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  15. package/dist/bun/embedding-worker-types.d.ts +17 -12
  16. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  17. package/dist/bun/embedding-worker.d.ts +9 -2
  18. package/dist/bun/embedding-worker.d.ts.map +1 -1
  19. package/dist/bun/embedding-worker.js +38864 -33
  20. package/dist/bun/embedding-worker.js.map +4 -4
  21. package/dist/bun/embedding.d.ts +35 -23
  22. package/dist/bun/embedding.d.ts.map +1 -1
  23. package/dist/bun/gradient.d.ts +17 -1
  24. package/dist/bun/gradient.d.ts.map +1 -1
  25. package/dist/bun/import/detect.d.ts +14 -0
  26. package/dist/bun/import/detect.d.ts.map +1 -0
  27. package/dist/bun/import/extract.d.ts +43 -0
  28. package/dist/bun/import/extract.d.ts.map +1 -0
  29. package/dist/bun/import/history.d.ts +40 -0
  30. package/dist/bun/import/history.d.ts.map +1 -0
  31. package/dist/bun/import/index.d.ts +17 -0
  32. package/dist/bun/import/index.d.ts.map +1 -0
  33. package/dist/bun/import/providers/aider.d.ts +2 -0
  34. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  35. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  36. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  37. package/dist/bun/import/providers/cline.d.ts +2 -0
  38. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  39. package/dist/bun/import/providers/codex.d.ts +2 -0
  40. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  41. package/dist/bun/import/providers/continue.d.ts +2 -0
  42. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  43. package/dist/bun/import/providers/index.d.ts +19 -0
  44. package/dist/bun/import/providers/index.d.ts.map +1 -0
  45. package/dist/bun/import/providers/opencode.d.ts +2 -0
  46. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  47. package/dist/bun/import/providers/pi.d.ts +2 -0
  48. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  49. package/dist/bun/import/types.d.ts +82 -0
  50. package/dist/bun/import/types.d.ts.map +1 -0
  51. package/dist/bun/index.d.ts +5 -2
  52. package/dist/bun/index.d.ts.map +1 -1
  53. package/dist/bun/index.js +3150 -439
  54. package/dist/bun/index.js.map +4 -4
  55. package/dist/bun/instruction-detect.d.ts +66 -0
  56. package/dist/bun/instruction-detect.d.ts.map +1 -0
  57. package/dist/bun/log.d.ts +9 -0
  58. package/dist/bun/log.d.ts.map +1 -1
  59. package/dist/bun/ltm.d.ts +139 -5
  60. package/dist/bun/ltm.d.ts.map +1 -1
  61. package/dist/bun/pattern-extract.d.ts +7 -0
  62. package/dist/bun/pattern-extract.d.ts.map +1 -1
  63. package/dist/bun/prompt.d.ts +1 -1
  64. package/dist/bun/prompt.d.ts.map +1 -1
  65. package/dist/bun/recall.d.ts.map +1 -1
  66. package/dist/bun/search.d.ts +5 -3
  67. package/dist/bun/search.d.ts.map +1 -1
  68. package/dist/bun/session-limiter.d.ts +26 -0
  69. package/dist/bun/session-limiter.d.ts.map +1 -0
  70. package/dist/bun/temporal.d.ts +2 -0
  71. package/dist/bun/temporal.d.ts.map +1 -1
  72. package/dist/bun/types.d.ts +1 -1
  73. package/dist/node/agents-file.d.ts +4 -0
  74. package/dist/node/agents-file.d.ts.map +1 -1
  75. package/dist/node/config.d.ts +2 -0
  76. package/dist/node/config.d.ts.map +1 -1
  77. package/dist/node/curator.d.ts +45 -0
  78. package/dist/node/curator.d.ts.map +1 -1
  79. package/dist/node/data-dir.d.ts +18 -0
  80. package/dist/node/data-dir.d.ts.map +1 -0
  81. package/dist/node/db.d.ts +85 -0
  82. package/dist/node/db.d.ts.map +1 -1
  83. package/dist/node/distillation.d.ts +2 -13
  84. package/dist/node/distillation.d.ts.map +1 -1
  85. package/dist/node/embedding-vendor.d.ts +22 -38
  86. package/dist/node/embedding-vendor.d.ts.map +1 -1
  87. package/dist/node/embedding-worker-types.d.ts +17 -12
  88. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  89. package/dist/node/embedding-worker.d.ts +9 -2
  90. package/dist/node/embedding-worker.d.ts.map +1 -1
  91. package/dist/node/embedding-worker.js +38864 -33
  92. package/dist/node/embedding-worker.js.map +4 -4
  93. package/dist/node/embedding.d.ts +35 -23
  94. package/dist/node/embedding.d.ts.map +1 -1
  95. package/dist/node/gradient.d.ts +17 -1
  96. package/dist/node/gradient.d.ts.map +1 -1
  97. package/dist/node/import/detect.d.ts +14 -0
  98. package/dist/node/import/detect.d.ts.map +1 -0
  99. package/dist/node/import/extract.d.ts +43 -0
  100. package/dist/node/import/extract.d.ts.map +1 -0
  101. package/dist/node/import/history.d.ts +40 -0
  102. package/dist/node/import/history.d.ts.map +1 -0
  103. package/dist/node/import/index.d.ts +17 -0
  104. package/dist/node/import/index.d.ts.map +1 -0
  105. package/dist/node/import/providers/aider.d.ts +2 -0
  106. package/dist/node/import/providers/aider.d.ts.map +1 -0
  107. package/dist/node/import/providers/claude-code.d.ts +2 -0
  108. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  109. package/dist/node/import/providers/cline.d.ts +2 -0
  110. package/dist/node/import/providers/cline.d.ts.map +1 -0
  111. package/dist/node/import/providers/codex.d.ts +2 -0
  112. package/dist/node/import/providers/codex.d.ts.map +1 -0
  113. package/dist/node/import/providers/continue.d.ts +2 -0
  114. package/dist/node/import/providers/continue.d.ts.map +1 -0
  115. package/dist/node/import/providers/index.d.ts +19 -0
  116. package/dist/node/import/providers/index.d.ts.map +1 -0
  117. package/dist/node/import/providers/opencode.d.ts +2 -0
  118. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  119. package/dist/node/import/providers/pi.d.ts +2 -0
  120. package/dist/node/import/providers/pi.d.ts.map +1 -0
  121. package/dist/node/import/types.d.ts +82 -0
  122. package/dist/node/import/types.d.ts.map +1 -0
  123. package/dist/node/index.d.ts +5 -2
  124. package/dist/node/index.d.ts.map +1 -1
  125. package/dist/node/index.js +3150 -439
  126. package/dist/node/index.js.map +4 -4
  127. package/dist/node/instruction-detect.d.ts +66 -0
  128. package/dist/node/instruction-detect.d.ts.map +1 -0
  129. package/dist/node/log.d.ts +9 -0
  130. package/dist/node/log.d.ts.map +1 -1
  131. package/dist/node/ltm.d.ts +139 -5
  132. package/dist/node/ltm.d.ts.map +1 -1
  133. package/dist/node/pattern-extract.d.ts +7 -0
  134. package/dist/node/pattern-extract.d.ts.map +1 -1
  135. package/dist/node/prompt.d.ts +1 -1
  136. package/dist/node/prompt.d.ts.map +1 -1
  137. package/dist/node/recall.d.ts.map +1 -1
  138. package/dist/node/search.d.ts +5 -3
  139. package/dist/node/search.d.ts.map +1 -1
  140. package/dist/node/session-limiter.d.ts +26 -0
  141. package/dist/node/session-limiter.d.ts.map +1 -0
  142. package/dist/node/temporal.d.ts +2 -0
  143. package/dist/node/temporal.d.ts.map +1 -1
  144. package/dist/node/types.d.ts +1 -1
  145. package/dist/types/agents-file.d.ts +4 -0
  146. package/dist/types/agents-file.d.ts.map +1 -1
  147. package/dist/types/config.d.ts +2 -0
  148. package/dist/types/config.d.ts.map +1 -1
  149. package/dist/types/curator.d.ts +45 -0
  150. package/dist/types/curator.d.ts.map +1 -1
  151. package/dist/types/data-dir.d.ts +18 -0
  152. package/dist/types/data-dir.d.ts.map +1 -0
  153. package/dist/types/db.d.ts +85 -0
  154. package/dist/types/db.d.ts.map +1 -1
  155. package/dist/types/distillation.d.ts +2 -13
  156. package/dist/types/distillation.d.ts.map +1 -1
  157. package/dist/types/embedding-vendor.d.ts +22 -38
  158. package/dist/types/embedding-vendor.d.ts.map +1 -1
  159. package/dist/types/embedding-worker-types.d.ts +17 -12
  160. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  161. package/dist/types/embedding-worker.d.ts +9 -2
  162. package/dist/types/embedding-worker.d.ts.map +1 -1
  163. package/dist/types/embedding.d.ts +35 -23
  164. package/dist/types/embedding.d.ts.map +1 -1
  165. package/dist/types/gradient.d.ts +17 -1
  166. package/dist/types/gradient.d.ts.map +1 -1
  167. package/dist/types/import/detect.d.ts +14 -0
  168. package/dist/types/import/detect.d.ts.map +1 -0
  169. package/dist/types/import/extract.d.ts +43 -0
  170. package/dist/types/import/extract.d.ts.map +1 -0
  171. package/dist/types/import/history.d.ts +40 -0
  172. package/dist/types/import/history.d.ts.map +1 -0
  173. package/dist/types/import/index.d.ts +17 -0
  174. package/dist/types/import/index.d.ts.map +1 -0
  175. package/dist/types/import/providers/aider.d.ts +2 -0
  176. package/dist/types/import/providers/aider.d.ts.map +1 -0
  177. package/dist/types/import/providers/claude-code.d.ts +2 -0
  178. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  179. package/dist/types/import/providers/cline.d.ts +2 -0
  180. package/dist/types/import/providers/cline.d.ts.map +1 -0
  181. package/dist/types/import/providers/codex.d.ts +2 -0
  182. package/dist/types/import/providers/codex.d.ts.map +1 -0
  183. package/dist/types/import/providers/continue.d.ts +2 -0
  184. package/dist/types/import/providers/continue.d.ts.map +1 -0
  185. package/dist/types/import/providers/index.d.ts +19 -0
  186. package/dist/types/import/providers/index.d.ts.map +1 -0
  187. package/dist/types/import/providers/opencode.d.ts +2 -0
  188. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  189. package/dist/types/import/providers/pi.d.ts +2 -0
  190. package/dist/types/import/providers/pi.d.ts.map +1 -0
  191. package/dist/types/import/types.d.ts +82 -0
  192. package/dist/types/import/types.d.ts.map +1 -0
  193. package/dist/types/index.d.ts +5 -2
  194. package/dist/types/index.d.ts.map +1 -1
  195. package/dist/types/instruction-detect.d.ts +66 -0
  196. package/dist/types/instruction-detect.d.ts.map +1 -0
  197. package/dist/types/log.d.ts +9 -0
  198. package/dist/types/log.d.ts.map +1 -1
  199. package/dist/types/ltm.d.ts +139 -5
  200. package/dist/types/ltm.d.ts.map +1 -1
  201. package/dist/types/pattern-extract.d.ts +7 -0
  202. package/dist/types/pattern-extract.d.ts.map +1 -1
  203. package/dist/types/prompt.d.ts +1 -1
  204. package/dist/types/prompt.d.ts.map +1 -1
  205. package/dist/types/recall.d.ts.map +1 -1
  206. package/dist/types/search.d.ts +5 -3
  207. package/dist/types/search.d.ts.map +1 -1
  208. package/dist/types/session-limiter.d.ts +26 -0
  209. package/dist/types/session-limiter.d.ts.map +1 -0
  210. package/dist/types/temporal.d.ts +2 -0
  211. package/dist/types/temporal.d.ts.map +1 -1
  212. package/dist/types/types.d.ts +1 -1
  213. package/package.json +3 -4
  214. package/src/agents-file.ts +41 -13
  215. package/src/config.ts +31 -18
  216. package/src/curator.ts +163 -75
  217. package/src/data-dir.ts +76 -0
  218. package/src/db.ts +457 -11
  219. package/src/distillation.ts +65 -16
  220. package/src/embedding-vendor.ts +23 -40
  221. package/src/embedding-worker-types.ts +19 -11
  222. package/src/embedding-worker.ts +111 -47
  223. package/src/embedding.ts +224 -174
  224. package/src/gradient.ts +192 -75
  225. package/src/import/detect.ts +37 -0
  226. package/src/import/extract.ts +137 -0
  227. package/src/import/history.ts +99 -0
  228. package/src/import/index.ts +45 -0
  229. package/src/import/providers/aider.ts +207 -0
  230. package/src/import/providers/claude-code.ts +339 -0
  231. package/src/import/providers/cline.ts +324 -0
  232. package/src/import/providers/codex.ts +369 -0
  233. package/src/import/providers/continue.ts +304 -0
  234. package/src/import/providers/index.ts +32 -0
  235. package/src/import/providers/opencode.ts +272 -0
  236. package/src/import/providers/pi.ts +332 -0
  237. package/src/import/types.ts +91 -0
  238. package/src/index.ts +13 -0
  239. package/src/instruction-detect.ts +275 -0
  240. package/src/log.ts +91 -3
  241. package/src/ltm.ts +789 -41
  242. package/src/pattern-extract.ts +41 -0
  243. package/src/prompt.ts +7 -1
  244. package/src/recall.ts +43 -5
  245. package/src/search.ts +7 -5
  246. package/src/session-limiter.ts +47 -0
  247. package/src/temporal.ts +18 -6
  248. package/src/types.ts +1 -1
@@ -125,6 +125,7 @@ __export(temporal_exports, {
125
125
  CHUNK_TERMINATOR: () => CHUNK_TERMINATOR,
126
126
  bySession: () => bySession,
127
127
  count: () => count,
128
+ hasMessages: () => hasMessages,
128
129
  markDistilled: () => markDistilled,
129
130
  partsToText: () => partsToText,
130
131
  prune: () => prune,
@@ -162,9 +163,8 @@ function sha256(input) {
162
163
  }
163
164
 
164
165
  // src/db.ts
165
- import { join, dirname } from "path";
166
+ import { join as join2, dirname } from "path";
166
167
  import { mkdirSync } from "fs";
167
- import { homedir } from "os";
168
168
 
169
169
  // src/git.ts
170
170
  import { execSync } from "child_process";
@@ -227,6 +227,36 @@ function getGitRemote(path) {
227
227
  }
228
228
  }
229
229
 
230
+ // src/data-dir.ts
231
+ import { existsSync, renameSync } from "node:fs";
232
+ import { join } from "node:path";
233
+ import { homedir } from "node:os";
234
+ var OLD_DIR_NAME = "opencode-lore";
235
+ var NEW_DIR_NAME = "lore";
236
+ var migrationAttempted = false;
237
+ function baseDir() {
238
+ return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
239
+ }
240
+ function migrateDataDir() {
241
+ if (migrationAttempted) return;
242
+ migrationAttempted = true;
243
+ if (process.env.NODE_ENV === "test") return;
244
+ const base = baseDir();
245
+ const oldDir = join(base, OLD_DIR_NAME);
246
+ const newDir = join(base, NEW_DIR_NAME);
247
+ try {
248
+ if (existsSync(oldDir) && !existsSync(newDir)) {
249
+ renameSync(oldDir, newDir);
250
+ console.error(`[lore] migrated data directory: ${oldDir} \u2192 ${newDir}`);
251
+ }
252
+ } catch {
253
+ }
254
+ }
255
+ function dataDir() {
256
+ migrateDataDir();
257
+ return join(baseDir(), NEW_DIR_NAME);
258
+ }
259
+
230
260
  // src/db.ts
231
261
  function repoNameFromRemote(remote) {
232
262
  if (!remote) return null;
@@ -663,17 +693,123 @@ var MIGRATIONS = [
663
693
  ALTER TABLE session_state ADD COLUMN ttl_savings REAL NOT NULL DEFAULT 0;
664
694
  ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
665
695
  ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
696
+ `,
697
+ `
698
+ -- Version 19: Import history for conversation import idempotency.
699
+ -- Tracks which external agent sessions have been imported to prevent
700
+ -- re-importing unchanged sources and to record user-declined imports.
701
+ CREATE TABLE IF NOT EXISTS import_history (
702
+ id TEXT PRIMARY KEY,
703
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
704
+ agent_name TEXT NOT NULL,
705
+ source_id TEXT NOT NULL,
706
+ source_hash TEXT NOT NULL,
707
+ entries_created INTEGER NOT NULL DEFAULT 0,
708
+ entries_updated INTEGER NOT NULL DEFAULT 0,
709
+ imported_at INTEGER NOT NULL,
710
+ UNIQUE(project_id, agent_name, source_id)
711
+ );
712
+ CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
713
+ `,
714
+ `
715
+ -- Version 20: Purge worker boilerplate from temporal messages.
716
+ -- Legacy gateway/plugin worker calls (distillation observer, curator,
717
+ -- consolidation, reflector, eval) stored their full system prompts
718
+ -- (containing entire conversation transcripts, up to 1.6MB each) as
719
+ -- temporal messages. These pollute FTS search results by matching
720
+ -- virtually any domain keyword. Safe to delete: their actual output
721
+ -- (distillations, knowledge entries) is stored in dedicated tables.
722
+ DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
723
+ OR content LIKE '%You are a long-term memory curator.%'
724
+ OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
725
+ OR content LIKE '%You are a memory reflector.%'
726
+ OR content LIKE '%You are evaluating distillation quality.%';
727
+ `,
728
+ `
729
+ -- Version 21: Persist avoided compaction data from live sessions.
730
+ -- Historical estimates previously re-simulated avoided compactions from
731
+ -- temporal message token estimates (chars/3), missing system prompt and
732
+ -- tool definition overhead. Persisting the live session's real shadow
733
+ -- context tracking (from actual API-reported total input tokens) gives
734
+ -- accurate post-restart historical estimates.
735
+ ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
736
+ ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
737
+ `,
738
+ `
739
+ -- Version 22: Track when conversation import was last offered/run.
740
+ -- NULL means import has never been offered for this project.
741
+ -- Used by auto-import to avoid re-prompting, and by explicit
742
+ -- \`lore import\` for incremental imports (only newer conversations).
743
+ ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
744
+
745
+ -- Backfill: migrate legacy __declined__ sentinel rows so existing
746
+ -- users who previously declined are not re-prompted after upgrading.
747
+ UPDATE projects SET last_import_at = (
748
+ SELECT ih.imported_at FROM import_history ih
749
+ WHERE ih.project_id = projects.id
750
+ AND ih.source_id = '__declined__'
751
+ LIMIT 1
752
+ )
753
+ WHERE EXISTS (
754
+ SELECT 1 FROM import_history ih
755
+ WHERE ih.project_id = projects.id
756
+ AND ih.source_id = '__declined__'
757
+ );
758
+ `,
759
+ `
760
+ -- Version 23: Persist volatile session tracking state across restarts.
761
+ -- Previously these were in-memory only, causing duplicate processing,
762
+ -- false compaction detection, and expensive prompt cache busts on restart.
763
+ ALTER TABLE session_state ADD COLUMN last_curated_at INTEGER NOT NULL DEFAULT 0;
764
+ ALTER TABLE session_state ADD COLUMN message_count INTEGER NOT NULL DEFAULT 0;
765
+ ALTER TABLE session_state ADD COLUMN turns_since_curation INTEGER NOT NULL DEFAULT 0;
766
+ ALTER TABLE session_state ADD COLUMN ltm_cache_text TEXT;
767
+ ALTER TABLE session_state ADD COLUMN ltm_cache_tokens INTEGER;
768
+ ALTER TABLE session_state ADD COLUMN ltm_pin_text TEXT;
769
+ ALTER TABLE session_state ADD COLUMN ltm_pin_tokens INTEGER;
770
+ ALTER TABLE session_state ADD COLUMN consecutive_text_only_turns INTEGER NOT NULL DEFAULT 0;
771
+ `,
772
+ `
773
+ -- Version 24: Persist remaining volatile session state across restarts.
774
+ -- Session identity (Tier 1/2/3 session correlation)
775
+ ALTER TABLE session_state ADD COLUMN fingerprint TEXT NOT NULL DEFAULT '';
776
+ ALTER TABLE session_state ADD COLUMN header_session_id TEXT;
777
+ ALTER TABLE session_state ADD COLUMN header_name TEXT;
778
+ -- Cache warming state
779
+ ALTER TABLE session_state ADD COLUMN resolved_conversation_ttl TEXT NOT NULL DEFAULT '5m';
780
+ ALTER TABLE session_state ADD COLUMN warmup_state TEXT;
781
+ -- Gradient calibration state (survives restarts to avoid uncalibrated busts)
782
+ ALTER TABLE session_state ADD COLUMN dynamic_context_cap REAL NOT NULL DEFAULT 0;
783
+ ALTER TABLE session_state ADD COLUMN bust_rate_ema REAL NOT NULL DEFAULT -1;
784
+ ALTER TABLE session_state ADD COLUMN inter_bust_interval_ema REAL NOT NULL DEFAULT -1;
785
+ ALTER TABLE session_state ADD COLUMN last_layer INTEGER NOT NULL DEFAULT 0;
786
+ ALTER TABLE session_state ADD COLUMN last_known_input INTEGER NOT NULL DEFAULT 0;
787
+ ALTER TABLE session_state ADD COLUMN last_turn_at INTEGER NOT NULL DEFAULT 0;
788
+ ALTER TABLE session_state ADD COLUMN last_bust_at INTEGER NOT NULL DEFAULT 0;
789
+ `,
790
+ `
791
+ -- Version 25: Adaptive dedup threshold \u2014 store accept/reject feedback
792
+ -- on embedding-based duplicate pairs for per-project threshold calibration.
793
+ -- Titles stored instead of FK IDs because entries are deleted during dedup;
794
+ -- the similarity float is the actual calibration input.
795
+ CREATE TABLE IF NOT EXISTS dedup_feedback (
796
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
797
+ project_id TEXT,
798
+ entry_a_title TEXT NOT NULL,
799
+ entry_b_title TEXT NOT NULL,
800
+ similarity REAL NOT NULL,
801
+ accepted INTEGER NOT NULL,
802
+ source TEXT NOT NULL DEFAULT 'manual',
803
+ created_at INTEGER NOT NULL
804
+ );
805
+ CREATE INDEX IF NOT EXISTS idx_dedup_feedback_project
806
+ ON dedup_feedback(project_id);
666
807
  `
667
808
  ];
668
- function dataDir() {
669
- const xdg = process.env.XDG_DATA_HOME;
670
- const base = xdg || join(homedir(), ".local", "share");
671
- return join(base, "opencode-lore");
672
- }
673
809
  function dbPath() {
674
810
  const envPath = process.env.LORE_DB_PATH;
675
811
  if (envPath) return envPath;
676
- return join(dataDir(), "lore.db");
812
+ return join2(dataDir(), "lore.db");
677
813
  }
678
814
  var instance;
679
815
  function db() {
@@ -691,7 +827,7 @@ function db() {
691
827
  }
692
828
  const dir = dataDir();
693
829
  mkdirSync(dir, { recursive: true });
694
- path = join(dir, "lore.db");
830
+ path = join2(dir, "lore.db");
695
831
  }
696
832
  const database = new Database(path);
697
833
  database.exec("PRAGMA journal_mode = WAL");
@@ -804,6 +940,11 @@ function close() {
804
940
  }
805
941
  }
806
942
  function ensureProject(path, name) {
943
+ if (!process.env.LORE_DB_PATH && /^\/test\//.test(path)) {
944
+ throw new Error(
945
+ `Refusing to create project with test path "${path}" in the production DB. Set LORE_DB_PATH to a temp path, or run tests via \`bun test\` from the repo root.`
946
+ );
947
+ }
807
948
  const existing = db().query("SELECT id, git_remote FROM projects WHERE path = ?").get(path);
808
949
  if (existing) {
809
950
  if (!existing.git_remote) {
@@ -858,6 +999,15 @@ function isFirstRun() {
858
999
  const row = db().query("SELECT COUNT(*) as count FROM projects").get();
859
1000
  return row.count === 0;
860
1001
  }
1002
+ function getLastImportAt(projectPath) {
1003
+ const id = ensureProject(projectPath);
1004
+ const row = db().query("SELECT last_import_at FROM projects WHERE id = ?").get(id);
1005
+ return row?.last_import_at ?? null;
1006
+ }
1007
+ function setLastImportAt(projectPath, timestamp) {
1008
+ const id = ensureProject(projectPath);
1009
+ db().query("UPDATE projects SET last_import_at = ? WHERE id = ?").run(timestamp, id);
1010
+ }
861
1011
  function loadForceMinLayer(sessionID) {
862
1012
  const row = db().query("SELECT force_min_layer FROM session_state WHERE session_id = ?").get(sessionID);
863
1013
  return row?.force_min_layer ?? 0;
@@ -876,8 +1026,9 @@ function saveSessionCosts(sessionID, costs) {
876
1026
  `INSERT INTO session_state (session_id, force_min_layer, updated_at,
877
1027
  conversation_cost, worker_cost, conversation_turns,
878
1028
  cache_read_tokens, cache_write_tokens,
879
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings)
880
- VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1029
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1030
+ avoided_compactions, avoided_compaction_cost)
1031
+ VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
881
1032
  ON CONFLICT(session_id) DO UPDATE SET
882
1033
  conversation_cost = excluded.conversation_cost,
883
1034
  worker_cost = excluded.worker_cost,
@@ -889,6 +1040,8 @@ function saveSessionCosts(sessionID, costs) {
889
1040
  ttl_savings = excluded.ttl_savings,
890
1041
  ttl_hits = excluded.ttl_hits,
891
1042
  batch_savings = excluded.batch_savings,
1043
+ avoided_compactions = excluded.avoided_compactions,
1044
+ avoided_compaction_cost = excluded.avoided_compaction_cost,
892
1045
  updated_at = excluded.updated_at`
893
1046
  ).run(
894
1047
  sessionID,
@@ -903,14 +1056,17 @@ function saveSessionCosts(sessionID, costs) {
903
1056
  costs.warmupHits,
904
1057
  costs.ttlSavings,
905
1058
  costs.ttlHits,
906
- costs.batchSavings
1059
+ costs.batchSavings,
1060
+ costs.avoidedCompactions,
1061
+ costs.avoidedCompactionCost
907
1062
  );
908
1063
  }
909
1064
  function loadSessionCosts(sessionID) {
910
1065
  const row = db().query(
911
1066
  `SELECT conversation_cost, worker_cost, conversation_turns,
912
1067
  cache_read_tokens, cache_write_tokens,
913
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1068
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1069
+ avoided_compactions, avoided_compaction_cost
914
1070
  FROM session_state WHERE session_id = ?`
915
1071
  ).get(sessionID);
916
1072
  if (!row) return null;
@@ -924,14 +1080,17 @@ function loadSessionCosts(sessionID) {
924
1080
  warmupHits: row.warmup_hits,
925
1081
  ttlSavings: row.ttl_savings,
926
1082
  ttlHits: row.ttl_hits,
927
- batchSavings: row.batch_savings
1083
+ batchSavings: row.batch_savings,
1084
+ avoidedCompactions: row.avoided_compactions,
1085
+ avoidedCompactionCost: row.avoided_compaction_cost
928
1086
  };
929
1087
  }
930
1088
  function loadAllSessionCosts() {
931
1089
  const rows = db().query(
932
1090
  `SELECT session_id, conversation_cost, worker_cost, conversation_turns,
933
1091
  cache_read_tokens, cache_write_tokens,
934
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1092
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1093
+ avoided_compactions, avoided_compaction_cost
935
1094
  FROM session_state
936
1095
  WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`
937
1096
  ).all();
@@ -947,11 +1106,160 @@ function loadAllSessionCosts() {
947
1106
  warmupHits: row.warmup_hits,
948
1107
  ttlSavings: row.ttl_savings,
949
1108
  ttlHits: row.ttl_hits,
950
- batchSavings: row.batch_savings
1109
+ batchSavings: row.batch_savings,
1110
+ avoidedCompactions: row.avoided_compactions,
1111
+ avoidedCompactionCost: row.avoided_compaction_cost
951
1112
  });
952
1113
  }
953
1114
  return result;
954
1115
  }
1116
+ function saveSessionTracking(sessionID, state) {
1117
+ const now = Date.now();
1118
+ db().query(
1119
+ "INSERT OR IGNORE INTO session_state (session_id, force_min_layer, updated_at) VALUES (?, 0, ?)"
1120
+ ).run(sessionID, now);
1121
+ const sets = ["updated_at = ?"];
1122
+ const vals = [now];
1123
+ if (state.lastCuratedAt !== void 0) {
1124
+ sets.push("last_curated_at = ?");
1125
+ vals.push(state.lastCuratedAt);
1126
+ }
1127
+ if (state.messageCount !== void 0) {
1128
+ sets.push("message_count = ?");
1129
+ vals.push(state.messageCount);
1130
+ }
1131
+ if (state.turnsSinceCuration !== void 0) {
1132
+ sets.push("turns_since_curation = ?");
1133
+ vals.push(state.turnsSinceCuration);
1134
+ }
1135
+ if (state.consecutiveTextOnlyTurns !== void 0) {
1136
+ sets.push("consecutive_text_only_turns = ?");
1137
+ vals.push(state.consecutiveTextOnlyTurns);
1138
+ }
1139
+ if (state.ltmCacheText !== void 0) {
1140
+ sets.push("ltm_cache_text = ?");
1141
+ vals.push(state.ltmCacheText);
1142
+ }
1143
+ if (state.ltmCacheTokens !== void 0) {
1144
+ sets.push("ltm_cache_tokens = ?");
1145
+ vals.push(state.ltmCacheTokens);
1146
+ }
1147
+ if (state.ltmPinText !== void 0) {
1148
+ sets.push("ltm_pin_text = ?");
1149
+ vals.push(state.ltmPinText);
1150
+ }
1151
+ if (state.ltmPinTokens !== void 0) {
1152
+ sets.push("ltm_pin_tokens = ?");
1153
+ vals.push(state.ltmPinTokens);
1154
+ }
1155
+ if (state.fingerprint !== void 0) {
1156
+ sets.push("fingerprint = ?");
1157
+ vals.push(state.fingerprint);
1158
+ }
1159
+ if (state.headerSessionId !== void 0) {
1160
+ sets.push("header_session_id = ?");
1161
+ vals.push(state.headerSessionId);
1162
+ }
1163
+ if (state.headerName !== void 0) {
1164
+ sets.push("header_name = ?");
1165
+ vals.push(state.headerName);
1166
+ }
1167
+ if (state.resolvedConversationTTL !== void 0) {
1168
+ sets.push("resolved_conversation_ttl = ?");
1169
+ vals.push(state.resolvedConversationTTL);
1170
+ }
1171
+ if (state.warmupState !== void 0) {
1172
+ sets.push("warmup_state = ?");
1173
+ vals.push(state.warmupState);
1174
+ }
1175
+ if (state.dynamicContextCap !== void 0) {
1176
+ sets.push("dynamic_context_cap = ?");
1177
+ vals.push(state.dynamicContextCap);
1178
+ }
1179
+ if (state.bustRateEMA !== void 0) {
1180
+ sets.push("bust_rate_ema = ?");
1181
+ vals.push(state.bustRateEMA);
1182
+ }
1183
+ if (state.interBustIntervalEMA !== void 0) {
1184
+ sets.push("inter_bust_interval_ema = ?");
1185
+ vals.push(state.interBustIntervalEMA);
1186
+ }
1187
+ if (state.lastLayer !== void 0) {
1188
+ sets.push("last_layer = ?");
1189
+ vals.push(state.lastLayer);
1190
+ }
1191
+ if (state.lastKnownInput !== void 0) {
1192
+ sets.push("last_known_input = ?");
1193
+ vals.push(state.lastKnownInput);
1194
+ }
1195
+ if (state.lastTurnAt !== void 0) {
1196
+ sets.push("last_turn_at = ?");
1197
+ vals.push(state.lastTurnAt);
1198
+ }
1199
+ if (state.lastBustAt !== void 0) {
1200
+ sets.push("last_bust_at = ?");
1201
+ vals.push(state.lastBustAt);
1202
+ }
1203
+ db().query(
1204
+ "UPDATE session_state SET " + sets.join(", ") + " WHERE session_id = ?"
1205
+ ).run(...vals, sessionID);
1206
+ }
1207
+ function loadSessionTracking(sessionID) {
1208
+ const row = db().query(
1209
+ `SELECT last_curated_at, message_count, turns_since_curation,
1210
+ consecutive_text_only_turns,
1211
+ ltm_cache_text, ltm_cache_tokens, ltm_pin_text, ltm_pin_tokens,
1212
+ fingerprint, header_session_id, header_name,
1213
+ resolved_conversation_ttl, warmup_state,
1214
+ dynamic_context_cap, bust_rate_ema, inter_bust_interval_ema,
1215
+ last_layer, last_known_input, last_turn_at, last_bust_at
1216
+ FROM session_state WHERE session_id = ?`
1217
+ ).get(sessionID);
1218
+ if (!row) return null;
1219
+ return {
1220
+ lastCuratedAt: row.last_curated_at,
1221
+ messageCount: row.message_count,
1222
+ turnsSinceCuration: row.turns_since_curation,
1223
+ consecutiveTextOnlyTurns: row.consecutive_text_only_turns,
1224
+ ltmCacheText: row.ltm_cache_text,
1225
+ ltmCacheTokens: row.ltm_cache_tokens,
1226
+ ltmPinText: row.ltm_pin_text,
1227
+ ltmPinTokens: row.ltm_pin_tokens,
1228
+ fingerprint: row.fingerprint,
1229
+ headerSessionId: row.header_session_id,
1230
+ headerName: row.header_name,
1231
+ resolvedConversationTTL: row.resolved_conversation_ttl,
1232
+ warmupState: row.warmup_state,
1233
+ dynamicContextCap: row.dynamic_context_cap,
1234
+ bustRateEMA: row.bust_rate_ema,
1235
+ interBustIntervalEMA: row.inter_bust_interval_ema,
1236
+ lastLayer: row.last_layer,
1237
+ lastKnownInput: row.last_known_input,
1238
+ lastTurnAt: row.last_turn_at,
1239
+ lastBustAt: row.last_bust_at
1240
+ };
1241
+ }
1242
+ function loadHeaderSessionIndex() {
1243
+ const rows = db().query(
1244
+ `SELECT session_id, header_session_id, header_name
1245
+ FROM session_state
1246
+ WHERE header_session_id IS NOT NULL AND header_name IS NOT NULL`
1247
+ ).all();
1248
+ return rows.map((row) => ({
1249
+ sessionId: row.session_id,
1250
+ headerSessionId: row.header_session_id,
1251
+ headerName: row.header_name
1252
+ }));
1253
+ }
1254
+ function getKV(key) {
1255
+ const row = db().query("SELECT value FROM kv_meta WHERE key = ?").get(key);
1256
+ return row?.value ?? null;
1257
+ }
1258
+ function setKV(key, value) {
1259
+ db().query(
1260
+ "INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
1261
+ ).run(key, value, value);
1262
+ }
955
1263
  function getMeta(key) {
956
1264
  const row = db().query("SELECT value FROM metadata WHERE key = ?").get(key);
957
1265
  return row?.value ?? null;
@@ -9770,7 +10078,7 @@ var handle = {
9770
10078
  };
9771
10079
 
9772
10080
  // ../../node_modules/.bun/mdast-util-to-markdown@2.1.2/node_modules/mdast-util-to-markdown/lib/join.js
9773
- var join2 = [joinDefaults];
10081
+ var join3 = [joinDefaults];
9774
10082
  function joinDefaults(left, right, parent, state) {
9775
10083
  if (right.type === "code" && formatCodeAsIndented(right, state) && (left.type === "list" || left.type === right.type && formatCodeAsIndented(left, state))) {
9776
10084
  return false;
@@ -10190,7 +10498,7 @@ function toMarkdown(tree, options) {
10190
10498
  handle: void 0,
10191
10499
  indentLines,
10192
10500
  indexStack: [],
10193
- join: [...join2],
10501
+ join: [...join3],
10194
10502
  options: {},
10195
10503
  safe: safeBound,
10196
10504
  stack: [],
@@ -11915,6 +12223,10 @@ Focus ONLY on knowledge that helps a coding agent work effectively on THIS codeb
11915
12223
  - Environment/tooling setup details that affect development
11916
12224
  - Important relationships between components that aren't obvious from reading the code
11917
12225
  - User preferences and working style specific to how they use this project
12226
+ - Repeated user instructions \u2014 when the user says things like "always", "never",
12227
+ "make sure to", "don't forget to", these are high-value preference candidates.
12228
+ If you see instruction-like language, prioritize extracting it as a "preference" entry.
12229
+ These instructions represent how the user wants to work and should persist across sessions.
11918
12230
 
11919
12231
  Do NOT extract:
11920
12232
  - Task-specific details (file currently being edited, current bug being fixed)
@@ -11999,7 +12311,9 @@ IMPORTANT:
11999
12311
  2. When updating, REPLACE the content with a complete rewrite \u2014 never append.
12000
12312
  3. If entries cover the same system from different angles, merge them: update one, delete the rest.
12001
12313
  4. Only create a new entry for genuinely distinct knowledge with no existing home.
12002
- 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
12314
+ 5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.
12315
+ 6. Pay special attention to user instructions ("always do X", "never do Y", "make sure to X").
12316
+ These are strong signals for "preference" entries with high confidence.`;
12003
12317
  }
12004
12318
  var CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
12005
12319
 
@@ -12163,9 +12477,12 @@ var log_exports = {};
12163
12477
  __export(log_exports, {
12164
12478
  error: () => error,
12165
12479
  info: () => info,
12480
+ logFilePath: () => logFilePath,
12166
12481
  registerSink: () => registerSink,
12167
12482
  warn: () => warn
12168
12483
  });
12484
+ import { appendFileSync, renameSync as renameSync2, statSync, mkdirSync as mkdirSync2 } from "node:fs";
12485
+ import { join as join4 } from "node:path";
12169
12486
  var sink = null;
12170
12487
  function registerSink(s) {
12171
12488
  sink = s;
@@ -12180,17 +12497,71 @@ function findError(args) {
12180
12497
  }
12181
12498
  return void 0;
12182
12499
  }
12500
+ var LOG_MAX_BYTES = 5 * 1024 * 1024;
12501
+ var ROTATION_CHECK_INTERVAL = 1e3;
12502
+ var logPath;
12503
+ var logPathResolved = false;
12504
+ var writeCount = 0;
12505
+ function resolveLogPath() {
12506
+ if (process.env.NODE_ENV === "test") return void 0;
12507
+ try {
12508
+ const dir = dataDir();
12509
+ mkdirSync2(dir, { recursive: true });
12510
+ return join4(dir, "lore.log");
12511
+ } catch {
12512
+ return void 0;
12513
+ }
12514
+ }
12515
+ function logFilePath() {
12516
+ if (!logPathResolved) {
12517
+ logPath = resolveLogPath();
12518
+ logPathResolved = true;
12519
+ }
12520
+ return logPath;
12521
+ }
12522
+ function maybeRotate() {
12523
+ if (!logPath) return;
12524
+ try {
12525
+ const stat = statSync(logPath);
12526
+ if (stat.size > LOG_MAX_BYTES) {
12527
+ renameSync2(logPath, logPath + ".1");
12528
+ }
12529
+ } catch {
12530
+ }
12531
+ }
12532
+ function writeToFile(level, message) {
12533
+ const path = logFilePath();
12534
+ if (!path) return;
12535
+ if (++writeCount % ROTATION_CHECK_INTERVAL === 0) {
12536
+ maybeRotate();
12537
+ }
12538
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
12539
+ const tag = level.toUpperCase().padEnd(5);
12540
+ const flat = message.replace(/\n/g, "\\n");
12541
+ const line = `${ts} [${tag}] ${flat}
12542
+ `;
12543
+ try {
12544
+ appendFileSync(path, line);
12545
+ } catch {
12546
+ }
12547
+ }
12183
12548
  function info(...args) {
12184
12549
  if (isDebug) console.error("[lore]", ...args);
12185
- sink?.info(formatArgs(args));
12550
+ const msg = formatArgs(args);
12551
+ sink?.info(msg);
12552
+ writeToFile("info", msg);
12186
12553
  }
12187
12554
  function warn(...args) {
12188
12555
  if (isDebug) console.error("[lore] WARN:", ...args);
12189
- sink?.warn(formatArgs(args));
12556
+ const msg = formatArgs(args);
12557
+ sink?.warn(msg);
12558
+ writeToFile("warn", msg);
12190
12559
  }
12191
12560
  function error(...args) {
12192
12561
  console.error("[lore]", ...args);
12193
- sink?.error(formatArgs(args));
12562
+ const msg = formatArgs(args);
12563
+ sink?.error(msg);
12564
+ writeToFile("error", msg);
12194
12565
  const err = findError(args);
12195
12566
  if (err) sink?.captureException(err);
12196
12567
  }
@@ -12350,10 +12721,11 @@ function extractTopTerms(text4, limit = 40) {
12350
12721
  function reciprocalRankFusion(lists, k = 60) {
12351
12722
  const scores = /* @__PURE__ */ new Map();
12352
12723
  for (const list4 of lists) {
12724
+ const w = list4.weight ?? 1;
12353
12725
  for (let rank = 0; rank < list4.items.length; rank++) {
12354
12726
  const item = list4.items[rank];
12355
12727
  const id = list4.key(item);
12356
- const rrfScore = 1 / (k + rank);
12728
+ const rrfScore = w / (k + rank);
12357
12729
  const existing = scores.get(id);
12358
12730
  if (existing) {
12359
12731
  existing.score += rrfScore;
@@ -12407,8 +12779,8 @@ async function expandQuery(llm, query, model, sessionID) {
12407
12779
  var embedding_exports = {};
12408
12780
  __export(embedding_exports, {
12409
12781
  LocalProviderUnavailableError: () => LocalProviderUnavailableError,
12410
- _markFastembedUnavailable: () => _markFastembedUnavailable,
12411
- _resetFastembedProbe: () => _resetFastembedProbe,
12782
+ _markLocalProviderUnavailable: () => _markLocalProviderUnavailable,
12783
+ _resetLocalProviderProbe: () => _resetLocalProviderProbe,
12412
12784
  _restoreProvider: () => _restoreProvider,
12413
12785
  _saveAndClearProvider: () => _saveAndClearProvider,
12414
12786
  _shutdownAndDisable: () => _shutdownAndDisable,
@@ -12427,6 +12799,7 @@ __export(embedding_exports, {
12427
12799
  runStartupBackfill: () => runStartupBackfill,
12428
12800
  toBlob: () => toBlob,
12429
12801
  vectorSearch: () => vectorSearch,
12802
+ vectorSearchAllDistillations: () => vectorSearchAllDistillations,
12430
12803
  vectorSearchDistillations: () => vectorSearchDistillations,
12431
12804
  vectorSearchTemporal: () => vectorSearchTemporal
12432
12805
  });
@@ -26200,8 +26573,8 @@ function date4(params) {
26200
26573
  config(en_default());
26201
26574
 
26202
26575
  // src/config.ts
26203
- import { existsSync, readFileSync } from "node:fs";
26204
- import { join as join3 } from "node:path";
26576
+ import { existsSync as existsSync2, readFileSync } from "node:fs";
26577
+ import { join as join5 } from "node:path";
26205
26578
  var LoreConfig = external_exports.object({
26206
26579
  model: external_exports.object({
26207
26580
  providerID: external_exports.string(),
@@ -26318,15 +26691,25 @@ var LoreConfig = external_exports.object({
26318
26691
  }).default({ title: 6, content: 2, category: 3 }),
26319
26692
  /** Max results per source in recall tool before fusion. Default: 10. */
26320
26693
  recallLimit: external_exports.number().min(1).max(50).default(10),
26321
- /** Enable LLM-based query expansion for the recall tool. Default: false.
26322
- * When enabled, the configured model generates 2–3 alternative query phrasings
26323
- * before search, improving recall for ambiguous queries. */
26324
- queryExpansion: external_exports.boolean().default(false),
26694
+ /** Enable LLM-based query expansion for the recall tool. Default: true.
26695
+ * The configured model generates 2–3 alternative query phrasings before
26696
+ * search, improving recall for ambiguous queries. Guarded by a 3-second
26697
+ * timeout — if expansion fails or times out, the original query is used. */
26698
+ queryExpansion: external_exports.boolean().default(true),
26699
+ /** RRF weight multiplier for vector search lists. Applied when the query
26700
+ * has >= `vectorBoostMinTerms` meaningful terms (after stopword removal).
26701
+ * Boosts semantic/vector results relative to keyword-based BM25 lists.
26702
+ * Default: 1.5. Set to 1.0 to disable. */
26703
+ vectorBoostWeight: external_exports.number().min(1).max(5).default(1.5),
26704
+ /** Minimum meaningful query terms (after stopword removal) to activate
26705
+ * vector boost. Short keyword queries (1-2 terms) are left unweighted
26706
+ * since BM25 excels there. Default: 3. */
26707
+ vectorBoostMinTerms: external_exports.number().min(1).max(10).default(3),
26325
26708
  /** Vector embedding search.
26326
26709
  * Supports multiple providers:
26327
- * - "local" (default): fastembed + ONNX Runtime, no API key needed.
26328
- * Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
26329
- * cached in ~/.cache/fastembed. ~150ms per query embed.
26710
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
26711
+ * 768 dims (Matryoshka-capable: 64–768). Model downloaded on first use (~137MB INT8),
26712
+ * cached locally. Uses task instruction prefixes (search_document: / search_query:).
26330
26713
  * - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
26331
26714
  * - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
26332
26715
  * Set enabled: false to explicitly disable even with a provider available. */
@@ -26335,19 +26718,20 @@ var LoreConfig = external_exports.object({
26335
26718
  * Set to false to explicitly disable. */
26336
26719
  enabled: external_exports.boolean().default(true),
26337
26720
  /** Embedding provider. Default: "local".
26338
- * - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
26721
+ * - "local": @huggingface/transformers, no API key (default model: nomic-embed-text-v1.5, 768 dims)
26339
26722
  * - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
26340
26723
  * - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
26341
26724
  provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
26342
26725
  /** Model ID for the embedding provider. Default depends on provider. */
26343
- model: external_exports.string().default("BGESmallENV15"),
26344
- /** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
26345
- dimensions: external_exports.number().min(64).max(2048).default(384)
26726
+ model: external_exports.string().default("nomic-ai/nomic-embed-text-v1.5"),
26727
+ /** Embedding dimensions. Default: 768 (local) / 1024 (voyage) / 1536 (openai).
26728
+ * For the local Nomic v1.5 model, supports Matryoshka dimensions: 64, 128, 256, 512, 768. */
26729
+ dimensions: external_exports.number().min(64).max(2048).default(768)
26346
26730
  }).default({
26347
26731
  enabled: true,
26348
26732
  provider: "local",
26349
- model: "BGESmallENV15",
26350
- dimensions: 384
26733
+ model: "nomic-ai/nomic-embed-text-v1.5",
26734
+ dimensions: 768
26351
26735
  }),
26352
26736
  /** Recall output formatting — controls how search results are presented to the agent. */
26353
26737
  recall: external_exports.object({
@@ -26364,8 +26748,10 @@ var LoreConfig = external_exports.object({
26364
26748
  }).default({
26365
26749
  ftsWeights: { title: 6, content: 2, category: 3 },
26366
26750
  recallLimit: 10,
26367
- queryExpansion: false,
26368
- embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 },
26751
+ queryExpansion: true,
26752
+ vectorBoostWeight: 1.5,
26753
+ vectorBoostMinTerms: 3,
26754
+ embeddings: { enabled: true, provider: "local", model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26369
26755
  recall: { charBudget: 8e3, relevanceFloor: 0.15, maxResults: 15 }
26370
26756
  }),
26371
26757
  cache: external_exports.object({
@@ -26383,9 +26769,9 @@ var LoreConfig = external_exports.object({
26383
26769
  warming: external_exports.object({
26384
26770
  /** Enable cache warming. Default: true. */
26385
26771
  enabled: external_exports.boolean().default(true),
26386
- /** Override the survival probability threshold below which warming is
26387
- * skipped. Default: auto-derived from cache read/write cost ratio
26388
- * (~0.08 for 5m TTL, ~0.05 for 1h TTL). */
26772
+ /** Override the return probability threshold below which warming is
26773
+ * skipped. Default: auto-derived from corrected cost ratio
26774
+ * read/(write-read) (~0.087 for 5m TTL, ~0.042 for 1h TTL). */
26389
26775
  minReturnProbability: external_exports.number().min(0).max(1).optional()
26390
26776
  }).default({ enabled: true })
26391
26777
  }).default({
@@ -26405,8 +26791,8 @@ function config2() {
26405
26791
  return current;
26406
26792
  }
26407
26793
  async function load(directory) {
26408
- const path = join3(directory, ".lore.json");
26409
- if (existsSync(path)) {
26794
+ const path = join5(directory, ".lore.json");
26795
+ if (existsSync2(path)) {
26410
26796
  const raw = JSON.parse(readFileSync(path, "utf8"));
26411
26797
  current = LoreConfig.parse(raw);
26412
26798
  return current;
@@ -26437,8 +26823,7 @@ function vendorModelInfo() {
26437
26823
  const reg = getRegistration();
26438
26824
  if (!reg) return null;
26439
26825
  return {
26440
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
26441
- modelName: reg.modelName
26826
+ localModelPath: reg.localModelPath
26442
26827
  };
26443
26828
  }
26444
26829
  function isVendoredBinary() {
@@ -26525,62 +26910,31 @@ var OpenAIProvider = class {
26525
26910
  var LocalProviderUnavailableError = class extends Error {
26526
26911
  constructor(cause) {
26527
26912
  super(
26528
- "Local embedding provider unavailable: 'fastembed' is not installed. Configure search.embeddings.provider to 'voyage' or 'openai', or reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install."
26913
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. Configure search.embeddings.provider to 'voyage' or 'openai', or set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback."
26529
26914
  );
26530
26915
  this.name = "LocalProviderUnavailableError";
26531
26916
  if (cause !== void 0) this.cause = cause;
26532
26917
  }
26533
26918
  };
26534
- var fastembedModule = null;
26535
- var fastembedProbed = false;
26536
- var fastembedAvailable = false;
26537
- var fastembedLogged = false;
26538
- function _resetFastembedProbe() {
26539
- fastembedModule = null;
26540
- fastembedProbed = false;
26541
- fastembedAvailable = false;
26542
- fastembedLogged = false;
26543
- }
26544
- function _markFastembedUnavailable() {
26545
- fastembedModule = null;
26546
- fastembedProbed = true;
26547
- fastembedAvailable = false;
26548
- fastembedLogged = true;
26549
- }
26550
- async function tryLoadFastembed() {
26551
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26552
- try {
26553
- const mod = await loadFastembedModule();
26554
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26555
- fastembedModule = mod;
26556
- fastembedAvailable = true;
26557
- } catch (err) {
26558
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
26559
- fastembedAvailable = false;
26560
- if (!fastembedLogged) {
26561
- fastembedLogged = true;
26562
- const msg = err instanceof Error ? err.message : String(err);
26563
- const remediation = isVendoredBinary() ? "this is a bug in the lore binary; please file an issue. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime" : "set search.embeddings.provider to 'voyage' or 'openai', set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
26564
- info(
26565
- `local embedding provider unavailable (fastembed not installed: ${msg}) \u2014 ${remediation}`
26566
- );
26567
- }
26568
- } finally {
26569
- fastembedProbed = true;
26570
- }
26571
- return fastembedAvailable ? fastembedModule : null;
26919
+ var localProviderKnownBroken = false;
26920
+ var localProviderErrorLogged = false;
26921
+ function _resetLocalProviderProbe() {
26922
+ localProviderKnownBroken = false;
26923
+ localProviderErrorLogged = false;
26572
26924
  }
26573
- async function loadFastembedModule() {
26574
- return await import("fastembed");
26925
+ function _markLocalProviderUnavailable() {
26926
+ localProviderKnownBroken = true;
26927
+ localProviderErrorLogged = true;
26575
26928
  }
26576
- function fastembedKnownUnavailable() {
26577
- return fastembedProbed && !fastembedAvailable;
26929
+ function localProviderKnownUnavailable() {
26930
+ return localProviderKnownBroken;
26578
26931
  }
26579
26932
  var LocalProvider = class {
26580
26933
  // With inference off the main thread, large batches no longer block
26581
26934
  // the event loop. 256 maximises throughput per round-trip to the
26582
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
26583
- // the worker's priority queue breathing room for recall queries.
26935
+ // worker. Backfill callers use token-budget-based batching (see
26936
+ // nextBatch) to give the worker's priority queue breathing room
26937
+ // for recall queries and prevent OOM on long texts.
26584
26938
  maxBatchSize = 256;
26585
26939
  worker = null;
26586
26940
  workerReady = false;
@@ -26588,14 +26942,14 @@ var LocalProvider = class {
26588
26942
  pendingRequests = /* @__PURE__ */ new Map();
26589
26943
  nextRequestId = 0;
26590
26944
  initPromise = null;
26591
- modelName;
26592
- constructor(modelName) {
26593
- this.modelName = modelName;
26945
+ modelId;
26946
+ dimensions;
26947
+ constructor(modelId, dimensions) {
26948
+ this.modelId = modelId;
26949
+ this.dimensions = dimensions;
26594
26950
  }
26595
26951
  /**
26596
- * Ensure the worker thread is running. Probes fastembed on the main
26597
- * thread first (fast, cached) as a fast-fail gate — the worker is only
26598
- * spawned if the module is known-loadable. Worker startup failure is
26952
+ * Ensure the worker thread is running. Worker startup failure is
26599
26953
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
26600
26954
  * auto-fallback to remote providers.
26601
26955
  */
@@ -26604,8 +26958,7 @@ var LocalProvider = class {
26604
26958
  if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
26605
26959
  if (this.initPromise) return this.initPromise;
26606
26960
  this.initPromise = (async () => {
26607
- const fastembed = await tryLoadFastembed();
26608
- if (!fastembed) throw new LocalProviderUnavailableError();
26961
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
26609
26962
  const { Worker } = await import("node:worker_threads");
26610
26963
  const vendorWorkerUrl = globalThis.__LORE_VENDOR_WORKER_URL__;
26611
26964
  let workerUrl;
@@ -26619,12 +26972,22 @@ var LocalProvider = class {
26619
26972
  workerUrl = vendorWorkerUrl;
26620
26973
  }
26621
26974
  } else {
26622
- workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
26975
+ const selfUrl = typeof import.meta.url === "string" ? import.meta.url : void 0;
26976
+ if (selfUrl) {
26977
+ workerUrl = new URL(
26978
+ `./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
26979
+ selfUrl
26980
+ );
26981
+ } else {
26982
+ const { pathToFileURL } = await import("node:url");
26983
+ workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
26984
+ }
26623
26985
  }
26624
26986
  const vendor = vendorModelInfo();
26625
26987
  const workerInitData = {
26626
- modelName: this.modelName,
26627
- vendorModel: vendor ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName } : null
26988
+ modelId: this.modelId,
26989
+ dimensions: this.dimensions,
26990
+ vendorModel: vendor ? { localModelPath: vendor.localModelPath } : null
26628
26991
  };
26629
26992
  this.worker = new Worker(workerUrl, { workerData: workerInitData });
26630
26993
  this.worker.unref();
@@ -26651,6 +27014,13 @@ var LocalProvider = class {
26651
27014
  case "init-error": {
26652
27015
  this.workerInitError = msg.error;
26653
27016
  this.workerReady = false;
27017
+ localProviderKnownBroken = true;
27018
+ if (!localProviderErrorLogged) {
27019
+ localProviderErrorLogged = true;
27020
+ info(
27021
+ `local embedding provider failed to init: ${msg.error}. Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`
27022
+ );
27023
+ }
26654
27024
  for (const [, p2] of this.pendingRequests) {
26655
27025
  p2.reject(new LocalProviderUnavailableError(msg.error));
26656
27026
  }
@@ -26702,6 +27072,8 @@ var LocalProvider = class {
26702
27072
  }
26703
27073
  async embed(texts, inputType) {
26704
27074
  await this.ensureWorker();
27075
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
27076
+ const prefixed = texts.map((t2) => prefix + t2);
26705
27077
  const id = this.nextRequestId++;
26706
27078
  const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
26707
27079
  return new Promise((resolve, reject) => {
@@ -26710,7 +27082,7 @@ var LocalProvider = class {
26710
27082
  this.worker.postMessage({
26711
27083
  type: "embed",
26712
27084
  id,
26713
- texts,
27085
+ texts: prefixed,
26714
27086
  inputType,
26715
27087
  priority
26716
27088
  });
@@ -26718,8 +27090,6 @@ var LocalProvider = class {
26718
27090
  }
26719
27091
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
26720
27092
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
26721
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
26722
- * NAPI fatal error when tearing down onnxruntime's native bindings.
26723
27093
  *
26724
27094
  * Returns a promise that resolves once the worker has fully exited. Callers
26725
27095
  * that need a clean teardown (tests, config change) should await the result.
@@ -26742,7 +27112,7 @@ var LocalProvider = class {
26742
27112
  }
26743
27113
  };
26744
27114
  var PROVIDER_DEFAULTS = {
26745
- local: { model: "BGESmallENV15", dimensions: 384 },
27115
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
26746
27116
  voyage: { model: "voyage-code-3", dimensions: 1024 },
26747
27117
  openai: { model: "text-embedding-3-small", dimensions: 1536 }
26748
27118
  };
@@ -26766,7 +27136,7 @@ function getProvider() {
26766
27136
  const model = cfg.model;
26767
27137
  switch (providerName) {
26768
27138
  case "local": {
26769
- cachedProvider = new LocalProvider(model);
27139
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
26770
27140
  break;
26771
27141
  }
26772
27142
  case "voyage": {
@@ -26843,7 +27213,7 @@ function pickRemoteFallback() {
26843
27213
  function isAvailable() {
26844
27214
  const provider = getProvider();
26845
27215
  if (!provider) return false;
26846
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
27216
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
26847
27217
  return true;
26848
27218
  }
26849
27219
  async function embed(texts, inputType) {
@@ -26858,7 +27228,7 @@ async function embed(texts, inputType) {
26858
27228
  if (!remoteFallbackLogged) {
26859
27229
  remoteFallbackLogged = true;
26860
27230
  info(
26861
- `fastembed unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
27231
+ `local embedding provider unavailable; auto-switching to ${fallback.name} (set search.embeddings.provider in .lore.json to silence this)`
26862
27232
  );
26863
27233
  }
26864
27234
  cachedProvider = fallback.provider;
@@ -26886,8 +27256,14 @@ function fromBlob(blob) {
26886
27256
  const bytes = new Uint8Array(blob);
26887
27257
  return new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
26888
27258
  }
26889
- function vectorSearch(queryEmbedding, limit = 10) {
26890
- const rows = db().query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2").all();
27259
+ function vectorSearch(queryEmbedding, limit = 10, excludeCategories) {
27260
+ let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
27261
+ const params = [];
27262
+ if (excludeCategories?.length) {
27263
+ sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
27264
+ params.push(...excludeCategories);
27265
+ }
27266
+ const rows = db().query(sql).all(...params);
26891
27267
  const scored = [];
26892
27268
  for (const row of rows) {
26893
27269
  const vec = fromBlob(row.embedding);
@@ -26910,6 +27286,20 @@ function vectorSearchDistillations(queryEmbedding, limit = 10) {
26910
27286
  scored.sort((a, b) => b.similarity - a.similarity);
26911
27287
  return scored.slice(0, limit);
26912
27288
  }
27289
+ var MAX_DISTILLATION_VECTOR_ROWS = 500;
27290
+ function vectorSearchAllDistillations(queryEmbedding, projectId2, limit = 20) {
27291
+ const rows = db().query(
27292
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?"
27293
+ ).all(projectId2, MAX_DISTILLATION_VECTOR_ROWS);
27294
+ const scored = [];
27295
+ for (const row of rows) {
27296
+ const vec = fromBlob(row.embedding);
27297
+ const sim = cosineSimilarity(queryEmbedding, vec);
27298
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
27299
+ }
27300
+ scored.sort((a, b) => b.similarity - a.similarity);
27301
+ return scored.slice(0, limit);
27302
+ }
26913
27303
  function embedKnowledgeEntry(id, title, content3) {
26914
27304
  const text4 = `${title}
26915
27305
  ${content3}`;
@@ -27011,20 +27401,37 @@ async function runStartupBackfill() {
27011
27401
  );
27012
27402
  info(`embedding startup: ${parts.join("; ")}`);
27013
27403
  }
27014
- var BACKFILL_CHUNK_SIZE = 32;
27404
+ var MAX_BACKFILL_CHUNK = 8;
27405
+ var MAX_BATCH_TOKEN_AREA = 4096;
27406
+ var CHARS_PER_TOKEN = 4;
27407
+ function nextBatch(rows, start) {
27408
+ const batch = [];
27409
+ let maxTokens = 0;
27410
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
27411
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
27412
+ const newMax = Math.max(maxTokens, estTokens);
27413
+ const newArea = (batch.length + 1) * newMax;
27414
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
27415
+ batch.push(rows[i]);
27416
+ maxTokens = newMax;
27417
+ }
27418
+ return batch;
27419
+ }
27015
27420
  async function backfillEmbeddings() {
27016
27421
  checkConfigChange();
27017
27422
  const provider = getProvider();
27018
27423
  if (!provider) return 0;
27019
27424
  const rows = db().query("SELECT id, title, content FROM knowledge WHERE embedding IS NULL AND confidence > 0.2").all();
27020
27425
  if (!rows.length) return 0;
27426
+ const items = rows.map((r) => ({ ...r, text: `${r.title}
27427
+ ${r.content}` }));
27021
27428
  let embedded = 0;
27022
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27023
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27024
- const texts = batch.map((r) => `${r.title}
27025
- ${r.content}`);
27429
+ let i = 0;
27430
+ while (i < items.length) {
27431
+ const batch = nextBatch(items, i);
27432
+ i += batch.length;
27026
27433
  try {
27027
- const vectors = await embed(texts, "document");
27434
+ const vectors = await embed(batch.map((b) => b.text), "document");
27028
27435
  const update2 = db().prepare(
27029
27436
  "UPDATE knowledge SET embedding = ? WHERE id = ?"
27030
27437
  );
@@ -27033,7 +27440,7 @@ ${r.content}`);
27033
27440
  embedded++;
27034
27441
  }
27035
27442
  } catch (err) {
27036
- info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27443
+ error(`embedding backfill batch failed (${batch.length} items):`, err);
27037
27444
  }
27038
27445
  }
27039
27446
  if (embedded > 0) {
@@ -27051,11 +27458,13 @@ async function backfillDistillationEmbeddings() {
27051
27458
  let embedded = 0;
27052
27459
  const PROGRESS_INTERVAL = 256;
27053
27460
  let nextProgressAt = PROGRESS_INTERVAL;
27054
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
27055
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
27056
- const texts = batch.map((r) => r.observations);
27461
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
27462
+ let i = 0;
27463
+ while (i < items.length) {
27464
+ const batch = nextBatch(items, i);
27465
+ i += batch.length;
27057
27466
  try {
27058
- const vectors = await embed(texts, "document");
27467
+ const vectors = await embed(batch.map((b) => b.text), "document");
27059
27468
  const update2 = db().prepare(
27060
27469
  "UPDATE distillations SET embedding = ? WHERE id = ?"
27061
27470
  );
@@ -27064,7 +27473,7 @@ async function backfillDistillationEmbeddings() {
27064
27473
  embedded++;
27065
27474
  }
27066
27475
  } catch (err) {
27067
- info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
27476
+ error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
27068
27477
  }
27069
27478
  if (embedded >= nextProgressAt) {
27070
27479
  info(`embedding distillations: ${embedded}/${rows.length}\u2026`);
@@ -27178,7 +27587,7 @@ function searchLike(input) {
27178
27587
  if (!terms.length) return [];
27179
27588
  const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
27180
27589
  const likeParams = terms.map((t2) => `%${t2}%`);
27181
- const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27590
+ const query = input.sessionID ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
27182
27591
  const params = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
27183
27592
  return db().query(query).all(...params);
27184
27593
  }
@@ -27187,10 +27596,10 @@ function search2(input) {
27187
27596
  const limit = input.limit ?? 20;
27188
27597
  const ftsSQL = input.sessionID ? `SELECT m.* FROM temporal_fts f
27189
27598
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27190
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27599
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27191
27600
  ORDER BY rank LIMIT ?` : `SELECT m.* FROM temporal_fts f
27192
27601
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27193
- WHERE f.content MATCH ? AND m.project_id = ?
27602
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27194
27603
  ORDER BY rank LIMIT ?`;
27195
27604
  try {
27196
27605
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27211,10 +27620,10 @@ function searchScored(input) {
27211
27620
  const limit = input.limit ?? 20;
27212
27621
  const ftsSQL = input.sessionID ? `SELECT m.*, rank FROM temporal_fts f
27213
27622
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27214
- WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
27623
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ? AND m.distilled = 0
27215
27624
  ORDER BY rank LIMIT ?` : `SELECT m.*, rank FROM temporal_fts f
27216
27625
  CROSS JOIN temporal_messages m ON m.rowid = f.rowid
27217
- WHERE f.content MATCH ? AND m.project_id = ?
27626
+ WHERE f.content MATCH ? AND m.project_id = ? AND m.distilled = 0
27218
27627
  ORDER BY rank LIMIT ?`;
27219
27628
  try {
27220
27629
  return runRelaxedSearch(input.query, (matchExpr) => {
@@ -27243,6 +27652,12 @@ function count(projectPath, sessionID) {
27243
27652
  const params = sessionID ? [pid, sessionID] : [pid];
27244
27653
  return db().query(query).get(...params).count;
27245
27654
  }
27655
+ function hasMessages(projectPath, sessionID) {
27656
+ const pid = ensureProject(projectPath);
27657
+ return !!db().query(
27658
+ "SELECT 1 FROM temporal_messages WHERE project_id = ? AND session_id = ? LIMIT 1"
27659
+ ).get(pid, sessionID);
27660
+ }
27246
27661
  function undistilledCount(projectPath, sessionID) {
27247
27662
  const pid = ensureProject(projectPath);
27248
27663
  const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND distilled = 0";
@@ -27301,17 +27716,31 @@ function prune(input) {
27301
27716
  var ltm_exports = {};
27302
27717
  __export(ltm_exports, {
27303
27718
  all: () => all2,
27719
+ calibrateDedupThreshold: () => calibrateDedupThreshold,
27304
27720
  cascadeRefReplace: () => cascadeRefReplace,
27305
27721
  check: () => check2,
27306
27722
  cleanDeadRefs: () => cleanDeadRefs,
27307
27723
  create: () => create,
27724
+ crossProject: () => crossProject,
27725
+ dedupPairKey: () => dedupPairKey,
27726
+ deduplicate: () => deduplicate,
27727
+ deduplicateGlobal: () => deduplicateGlobal,
27308
27728
  extractRefs: () => extractRefs,
27729
+ findFuzzyDuplicate: () => findFuzzyDuplicate,
27309
27730
  forProject: () => forProject,
27310
27731
  forSession: () => forSession,
27311
27732
  get: () => get,
27733
+ getDedupFeedback: () => getDedupFeedback,
27734
+ getDedupFeedbackCount: () => getDedupFeedbackCount,
27735
+ loadCalibratedThreshold: () => loadCalibratedThreshold,
27736
+ pruneDedupFeedback: () => pruneDedupFeedback,
27312
27737
  pruneOversized: () => pruneOversized,
27738
+ recordAutoSignals: () => recordAutoSignals,
27739
+ recordDedupFeedback: () => recordDedupFeedback,
27740
+ recordDedupResultFeedback: () => recordDedupResultFeedback,
27313
27741
  remove: () => remove,
27314
27742
  resolveRef: () => resolveRef2,
27743
+ saveCalibratedThreshold: () => saveCalibratedThreshold,
27315
27744
  search: () => search3,
27316
27745
  searchScored: () => searchScored3,
27317
27746
  searchScoredOtherProjects: () => searchScoredOtherProjects,
@@ -27664,8 +28093,8 @@ __export(lat_reader_exports, {
27664
28093
  scoreForSession: () => scoreForSession,
27665
28094
  searchScored: () => searchScored2
27666
28095
  });
27667
- import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync2, statSync } from "fs";
27668
- import { join as join4, relative } from "path";
28096
+ import { readdirSync, readFileSync as readFileSync2, existsSync as existsSync3, statSync as statSync2 } from "fs";
28097
+ import { join as join6, relative } from "path";
27669
28098
  var processor2 = remark();
27670
28099
  function estimateTokens2(text4) {
27671
28100
  return Math.ceil(text4.length / 3);
@@ -27743,7 +28172,7 @@ function listMarkdownFiles(dir) {
27743
28172
  try {
27744
28173
  const entries = readdirSync(dir, { withFileTypes: true });
27745
28174
  for (const entry of entries) {
27746
- const fullPath = join4(dir, entry.name);
28175
+ const fullPath = join6(dir, entry.name);
27747
28176
  if (entry.isDirectory() && !entry.name.startsWith(".")) {
27748
28177
  results.push(...listMarkdownFiles(fullPath));
27749
28178
  } else if (entry.isFile() && entry.name.endsWith(".md")) {
@@ -27758,12 +28187,12 @@ function contentHash(content3) {
27758
28187
  return sha256(content3);
27759
28188
  }
27760
28189
  function hasLatDir(projectPath) {
27761
- const latDir = join4(projectPath, "lat.md");
27762
- return existsSync2(latDir) && statSync(latDir).isDirectory();
28190
+ const latDir = join6(projectPath, "lat.md");
28191
+ return existsSync3(latDir) && statSync2(latDir).isDirectory();
27763
28192
  }
27764
28193
  function refresh(projectPath) {
27765
- const latDir = join4(projectPath, "lat.md");
27766
- if (!existsSync2(latDir) || !statSync(latDir).isDirectory()) return 0;
28194
+ const latDir = join6(projectPath, "lat.md");
28195
+ if (!existsSync3(latDir) || !statSync2(latDir).isDirectory()) return 0;
27767
28196
  const pid = ensureProject(projectPath);
27768
28197
  const files = listMarkdownFiles(latDir);
27769
28198
  let upserted = 0;
@@ -27885,6 +28314,7 @@ var KNOWLEDGE_COLS = "id, project_id, category, title, content, source_session,
27885
28314
  var KNOWLEDGE_COLS_K = "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
27886
28315
  function create(input) {
27887
28316
  const pid = input.scope === "project" && input.projectPath ? ensureProject(input.projectPath) : null;
28317
+ const crossProject2 = pid === null ? true : input.crossProject ?? false;
27888
28318
  if (!input.id) {
27889
28319
  const existing = pid !== null ? db().query(
27890
28320
  "SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1"
@@ -27902,6 +28332,11 @@ function create(input) {
27902
28332
  update(crossExisting.id, { content: input.content });
27903
28333
  return crossExisting.id;
27904
28334
  }
28335
+ const fuzzyMatch = findFuzzyDuplicate({ title: input.title, projectId: pid });
28336
+ if (fuzzyMatch) {
28337
+ update(fuzzyMatch.id, { content: input.content });
28338
+ return fuzzyMatch.id;
28339
+ }
27905
28340
  }
27906
28341
  const id = input.id ?? uuidv72();
27907
28342
  const now = Date.now();
@@ -27915,7 +28350,7 @@ function create(input) {
27915
28350
  input.title,
27916
28351
  input.content,
27917
28352
  input.session ?? null,
27918
- input.crossProject ?? false ? 1 : 0,
28353
+ crossProject2 ? 1 : 0,
27919
28354
  now,
27920
28355
  now
27921
28356
  );
@@ -27933,7 +28368,7 @@ function update(id, input) {
27933
28368
  }
27934
28369
  if (input.confidence !== void 0) {
27935
28370
  sets.push("confidence = ?");
27936
- params.push(input.confidence);
28371
+ params.push(Math.max(0, Math.min(1, input.confidence)));
27937
28372
  }
27938
28373
  sets.push("updated_at = ?");
27939
28374
  params.push(Date.now());
@@ -27949,6 +28384,50 @@ function update(id, input) {
27949
28384
  function remove(id) {
27950
28385
  db().query("DELETE FROM knowledge WHERE id = ?").run(id);
27951
28386
  }
28387
+ function titleOverlap(a, b) {
28388
+ const wordsA = new Set(filterTerms(a).map((w) => w.toLowerCase()));
28389
+ const wordsB = new Set(filterTerms(b).map((w) => w.toLowerCase()));
28390
+ if (wordsA.size === 0 || wordsB.size === 0) return { coefficient: 0, intersectionSize: 0 };
28391
+ const intersection2 = [...wordsA].filter((w) => wordsB.has(w));
28392
+ return {
28393
+ coefficient: intersection2.length / Math.min(wordsA.size, wordsB.size),
28394
+ intersectionSize: intersection2.length
28395
+ };
28396
+ }
28397
+ var FUZZY_DEDUP_THRESHOLD = 0.7;
28398
+ var FUZZY_DEDUP_MIN_OVERLAP = 4;
28399
+ var EMBEDDING_DEDUP_THRESHOLD = 0.935;
28400
+ function findFuzzyDuplicate(input) {
28401
+ const q = ftsQueryOr(input.title);
28402
+ if (q === EMPTY_QUERY) return null;
28403
+ const { title: tw, content: cw, category: catw } = config2().search.ftsWeights;
28404
+ try {
28405
+ const excludeClause = input.excludeId ? "AND k.id != ?" : "";
28406
+ const sql = input.projectId !== null ? `SELECT k.id, k.title FROM knowledge_fts f
28407
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28408
+ WHERE knowledge_fts MATCH ?
28409
+ AND (k.project_id = ? OR k.cross_project = 1)
28410
+ AND k.confidence > 0.2
28411
+ ${excludeClause}
28412
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5` : `SELECT k.id, k.title FROM knowledge_fts f
28413
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
28414
+ WHERE knowledge_fts MATCH ?
28415
+ AND (k.project_id IS NULL OR k.cross_project = 1)
28416
+ AND k.confidence > 0.2
28417
+ ${excludeClause}
28418
+ ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT 5`;
28419
+ const params = input.projectId !== null ? [q, input.projectId, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw] : [q, ...input.excludeId ? [input.excludeId] : [], tw, cw, catw];
28420
+ const candidates = db().query(sql).all(...params);
28421
+ for (const candidate of candidates) {
28422
+ const { coefficient, intersectionSize } = titleOverlap(input.title, candidate.title);
28423
+ if (coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP) {
28424
+ return candidate;
28425
+ }
28426
+ }
28427
+ } catch {
28428
+ }
28429
+ return null;
28430
+ }
27952
28431
  function forProject(projectPath, includeCross = true) {
27953
28432
  const pid = ensureProject(projectPath);
27954
28433
  if (includeCross) {
@@ -27998,18 +28477,29 @@ function scoreEntriesFTS(sessionContext) {
27998
28477
  return /* @__PURE__ */ new Map();
27999
28478
  }
28000
28479
  }
28001
- function forSession(projectPath, sessionID, maxTokens) {
28480
+ async function forSession(projectPath, sessionID, maxTokens, options) {
28002
28481
  const pid = ensureProject(projectPath);
28482
+ const categoryFilter = options?.categories;
28483
+ const excludeFilter = options?.excludeCategories;
28484
+ let categoryClause = "";
28485
+ let categoryParams = [];
28486
+ if (categoryFilter?.length) {
28487
+ categoryClause = ` AND category IN (${categoryFilter.map(() => "?").join(",")})`;
28488
+ categoryParams = categoryFilter;
28489
+ } else if (excludeFilter?.length) {
28490
+ categoryClause = ` AND category NOT IN (${excludeFilter.map(() => "?").join(",")})`;
28491
+ categoryParams = excludeFilter;
28492
+ }
28003
28493
  const projectEntries = db().query(
28004
28494
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28005
- WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
28495
+ WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2${categoryClause}
28006
28496
  ORDER BY confidence DESC, updated_at DESC`
28007
- ).all(pid);
28497
+ ).all(pid, ...categoryParams);
28008
28498
  const crossEntries = db().query(
28009
28499
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28010
- WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28500
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2${categoryClause}
28011
28501
  ORDER BY confidence DESC, updated_at DESC`
28012
- ).all();
28502
+ ).all(...categoryParams);
28013
28503
  if (!crossEntries.length && !projectEntries.length) return [];
28014
28504
  let sessionContext = "";
28015
28505
  if (sessionID) {
@@ -28030,22 +28520,52 @@ function forSession(projectPath, sessionID, maxTokens) {
28030
28520
  sessionContext += recentMsgs.map((m) => m.content).join("\n");
28031
28521
  }
28032
28522
  }
28523
+ if (!sessionContext.trim() && options?.contextHint) {
28524
+ sessionContext = options.contextHint;
28525
+ }
28033
28526
  let scoredProject;
28034
28527
  let scoredCross;
28035
- if (sessionContext.trim().length > 20) {
28528
+ if (sessionContext.trim().length > 20 && isAvailable()) {
28529
+ let vectorScores;
28530
+ try {
28531
+ const [contextVec] = await embed([sessionContext], "query");
28532
+ const hits = vectorSearch(contextVec, 50, excludeFilter);
28533
+ vectorScores = new Map(hits.map((h3) => [h3.id, h3.similarity]));
28534
+ } catch (err) {
28535
+ warn("Vector scoring failed, falling back to FTS5:", err);
28536
+ vectorScores = /* @__PURE__ */ new Map();
28537
+ }
28538
+ if (vectorScores.size > 0) {
28539
+ const ftsScores = scoreEntriesFTS(sessionContext);
28540
+ const rawScored = projectEntries.map((entry) => {
28541
+ const vecScore = vectorScores.get(entry.id);
28542
+ const score = vecScore != null ? vecScore * entry.confidence : (ftsScores.get(entry.id) ?? 0) * entry.confidence;
28543
+ return { entry, score };
28544
+ });
28545
+ const matched = rawScored.filter((s) => s.score > 0);
28546
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
28547
+ const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28548
+ scoredProject = [...matched, ...safetyNet];
28549
+ scoredCross = crossEntries.filter((e) => vectorScores.has(e.id) || ftsScores.has(e.id)).map((e) => {
28550
+ const vecScore = vectorScores.get(e.id);
28551
+ const score = vecScore != null ? vecScore * e.confidence : (ftsScores.get(e.id) ?? 0) * e.confidence;
28552
+ return { entry: e, score };
28553
+ });
28554
+ } else {
28555
+ const ftsScores = scoreEntriesFTS(sessionContext);
28556
+ ({ scoredProject, scoredCross } = scoreFTS(
28557
+ projectEntries,
28558
+ crossEntries,
28559
+ ftsScores
28560
+ ));
28561
+ }
28562
+ } else if (sessionContext.trim().length > 20) {
28036
28563
  const ftsScores = scoreEntriesFTS(sessionContext);
28037
- const rawScored = projectEntries.map((entry) => ({
28038
- entry,
28039
- score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
28040
- }));
28041
- const matched = rawScored.filter((s) => s.score > 0);
28042
- const matchedIds = new Set(matched.map((s) => s.entry.id));
28043
- const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28044
- scoredProject = [...matched, ...safetyNet];
28045
- scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
28046
- entry: e,
28047
- score: (ftsScores.get(e.id) ?? 0) * e.confidence
28048
- }));
28564
+ ({ scoredProject, scoredCross } = scoreFTS(
28565
+ projectEntries,
28566
+ crossEntries,
28567
+ ftsScores
28568
+ ));
28049
28569
  } else {
28050
28570
  scoredProject = projectEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
28051
28571
  scoredCross = crossEntries.slice(0, NO_CONTEXT_FALLBACK_CAP).map((entry) => ({ entry, score: entry.confidence }));
@@ -28091,11 +28611,33 @@ function forSession(projectPath, sessionID, maxTokens) {
28091
28611
  }
28092
28612
  return result;
28093
28613
  }
28614
+ function scoreFTS(projectEntries, crossEntries, ftsScores) {
28615
+ const rawScored = projectEntries.map((entry) => ({
28616
+ entry,
28617
+ score: (ftsScores.get(entry.id) ?? 0) * entry.confidence
28618
+ }));
28619
+ const matched = rawScored.filter((s) => s.score > 0);
28620
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
28621
+ const safetyNet = projectEntries.filter((e) => !matchedIds.has(e.id)).slice(0, PROJECT_SAFETY_NET).map((e) => ({ entry: e, score: 1e-3 * e.confidence }));
28622
+ const scoredProject = [...matched, ...safetyNet];
28623
+ const scoredCross = crossEntries.filter((e) => ftsScores.has(e.id)).map((e) => ({
28624
+ entry: e,
28625
+ score: (ftsScores.get(e.id) ?? 0) * e.confidence
28626
+ }));
28627
+ return { scoredProject, scoredCross };
28628
+ }
28094
28629
  function all2() {
28095
28630
  return db().query(
28096
28631
  `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`
28097
28632
  ).all();
28098
28633
  }
28634
+ function crossProject() {
28635
+ return db().query(
28636
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28637
+ WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
28638
+ ORDER BY confidence DESC, updated_at DESC`
28639
+ ).all();
28640
+ }
28099
28641
  function searchLike2(input) {
28100
28642
  const terms = input.query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
28101
28643
  if (!terms.length) return [];
@@ -28327,6 +28869,270 @@ function check2(projectPath) {
28327
28869
  }
28328
28870
  return issues;
28329
28871
  }
28872
+ function dedupPairKey(idA, idB) {
28873
+ return idA < idB ? `${idA}:${idB}` : `${idB}:${idA}`;
28874
+ }
28875
+ function _dedup(entries, dryRun, embeddingThreshold = EMBEDDING_DEDUP_THRESHOLD) {
28876
+ if (entries.length < 2) return { clusters: [], totalRemoved: 0, pairSimilarities: /* @__PURE__ */ new Map(), entryTitles: /* @__PURE__ */ new Map() };
28877
+ const embeddingMap = /* @__PURE__ */ new Map();
28878
+ {
28879
+ const entryIds = entries.map((e) => e.id);
28880
+ const placeholders = entryIds.map(() => "?").join(",");
28881
+ const rows = db().query(`SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND id IN (${placeholders})`).all(...entryIds);
28882
+ for (const row of rows) {
28883
+ try {
28884
+ embeddingMap.set(row.id, fromBlob(row.embedding));
28885
+ } catch {
28886
+ info(`skipping corrupted embedding for entry ${row.id}`);
28887
+ }
28888
+ }
28889
+ }
28890
+ const neighborMap = /* @__PURE__ */ new Map();
28891
+ const pairSimilarities = /* @__PURE__ */ new Map();
28892
+ for (const entry of entries) {
28893
+ const neighbors = [];
28894
+ const entryVec = embeddingMap.get(entry.id);
28895
+ for (const other of entries) {
28896
+ if (other.id === entry.id) continue;
28897
+ const { coefficient, intersectionSize } = titleOverlap(entry.title, other.title);
28898
+ const titleMatch = coefficient >= FUZZY_DEDUP_THRESHOLD && intersectionSize >= FUZZY_DEDUP_MIN_OVERLAP;
28899
+ let embeddingMatch = false;
28900
+ let similarity = 0;
28901
+ if (entryVec) {
28902
+ const otherVec = embeddingMap.get(other.id);
28903
+ if (otherVec && entryVec.length === otherVec.length) {
28904
+ similarity = cosineSimilarity(entryVec, otherVec);
28905
+ embeddingMatch = similarity >= embeddingThreshold;
28906
+ }
28907
+ }
28908
+ if (similarity > 0) {
28909
+ const pk = dedupPairKey(entry.id, other.id);
28910
+ if (!pairSimilarities.has(pk)) {
28911
+ pairSimilarities.set(pk, similarity);
28912
+ }
28913
+ }
28914
+ if (titleMatch || embeddingMatch) {
28915
+ neighbors.push({ id: other.id, score: Math.max(coefficient, similarity) });
28916
+ }
28917
+ }
28918
+ neighbors.sort((a, b) => b.score - a.score);
28919
+ neighborMap.set(entry.id, neighbors);
28920
+ }
28921
+ const claimed = /* @__PURE__ */ new Set();
28922
+ const rawClusters = /* @__PURE__ */ new Map();
28923
+ const sortedIds = [...neighborMap.keys()].sort(
28924
+ (a, b) => neighborMap.get(b).length - neighborMap.get(a).length
28925
+ );
28926
+ for (const centerId of sortedIds) {
28927
+ if (claimed.has(centerId)) continue;
28928
+ claimed.add(centerId);
28929
+ const members = [centerId];
28930
+ for (const { id: neighborId } of neighborMap.get(centerId)) {
28931
+ if (claimed.has(neighborId)) continue;
28932
+ claimed.add(neighborId);
28933
+ members.push(neighborId);
28934
+ }
28935
+ if (members.length > 1) {
28936
+ rawClusters.set(centerId, members);
28937
+ }
28938
+ }
28939
+ const entryById = new Map(entries.map((e) => [e.id, e]));
28940
+ const result = [];
28941
+ let totalRemoved = 0;
28942
+ for (const members of rawClusters.values()) {
28943
+ if (members.length < 2) continue;
28944
+ const sorted = members.map((id) => entryById.get(id)).filter(Boolean).sort((a, b) => {
28945
+ if (b.confidence !== a.confidence) return b.confidence - a.confidence;
28946
+ if (b.updated_at !== a.updated_at) return b.updated_at - a.updated_at;
28947
+ return a.title.length - b.title.length;
28948
+ });
28949
+ const survivor = sorted[0];
28950
+ const merged = sorted.slice(1);
28951
+ result.push({
28952
+ surviving: { id: survivor.id, title: survivor.title },
28953
+ merged: merged.map((e) => ({ id: e.id, title: e.title }))
28954
+ });
28955
+ if (!dryRun) {
28956
+ for (const entry of merged) {
28957
+ remove(entry.id);
28958
+ }
28959
+ }
28960
+ totalRemoved += merged.length;
28961
+ }
28962
+ result.sort((a, b) => b.merged.length - a.merged.length);
28963
+ const entryTitles = new Map(entries.map((e) => [e.id, e.title]));
28964
+ return { clusters: result, totalRemoved, pairSimilarities, entryTitles };
28965
+ }
28966
+ async function deduplicate(projectPath, opts) {
28967
+ const pid = ensureProject(projectPath);
28968
+ const threshold = loadCalibratedThreshold(pid) ?? EMBEDDING_DEDUP_THRESHOLD;
28969
+ const entries = forProject(projectPath, false);
28970
+ return _dedup(entries, opts?.dryRun ?? true, threshold);
28971
+ }
28972
+ async function deduplicateGlobal(opts) {
28973
+ const threshold = loadCalibratedThreshold(null) ?? EMBEDDING_DEDUP_THRESHOLD;
28974
+ const entries = db().query(
28975
+ `SELECT ${KNOWLEDGE_COLS} FROM knowledge
28976
+ WHERE project_id IS NULL
28977
+ AND confidence > 0.2
28978
+ ORDER BY confidence DESC, updated_at DESC`
28979
+ ).all();
28980
+ return _dedup(entries, opts?.dryRun ?? true, threshold);
28981
+ }
28982
+ var MIN_CALIBRATION_SAMPLES = 20;
28983
+ var DEFAULT_EMBEDDING_DEDUP_THRESHOLD = EMBEDDING_DEDUP_THRESHOLD;
28984
+ var AUTO_SIGNAL_MIN_SIMILARITY = 0.8;
28985
+ var AUTO_SIGNAL_MAX_PAIRS = 50;
28986
+ function recordDedupFeedback(input) {
28987
+ db().query(
28988
+ `INSERT INTO dedup_feedback
28989
+ (project_id, entry_a_title, entry_b_title, similarity, accepted, source, created_at)
28990
+ VALUES (?, ?, ?, ?, ?, ?, ?)`
28991
+ ).run(
28992
+ input.projectId,
28993
+ input.entryATitle,
28994
+ input.entryBTitle,
28995
+ input.similarity,
28996
+ input.accepted ? 1 : 0,
28997
+ input.source,
28998
+ Date.now()
28999
+ );
29000
+ }
29001
+ function recordDedupResultFeedback(projectId2, result, accepted, source) {
29002
+ for (const cluster of result.clusters) {
29003
+ for (const merged of cluster.merged) {
29004
+ const pk = dedupPairKey(cluster.surviving.id, merged.id);
29005
+ const similarity = result.pairSimilarities.get(pk);
29006
+ if (similarity != null && similarity > 0) {
29007
+ recordDedupFeedback({
29008
+ projectId: projectId2,
29009
+ entryATitle: cluster.surviving.title,
29010
+ entryBTitle: merged.title,
29011
+ similarity,
29012
+ accepted,
29013
+ source
29014
+ });
29015
+ }
29016
+ }
29017
+ }
29018
+ }
29019
+ function recordAutoSignals(projectId2, result) {
29020
+ const mergedPairs = /* @__PURE__ */ new Set();
29021
+ for (const cluster of result.clusters) {
29022
+ for (const merged of cluster.merged) {
29023
+ mergedPairs.add(dedupPairKey(cluster.surviving.id, merged.id));
29024
+ }
29025
+ }
29026
+ const titleMap = new Map(result.entryTitles);
29027
+ for (const cluster of result.clusters) {
29028
+ if (!titleMap.has(cluster.surviving.id)) {
29029
+ titleMap.set(cluster.surviving.id, cluster.surviving.title);
29030
+ }
29031
+ for (const m of cluster.merged) {
29032
+ if (!titleMap.has(m.id)) titleMap.set(m.id, m.title);
29033
+ }
29034
+ }
29035
+ const signals = [];
29036
+ for (const [pk, sim] of result.pairSimilarities) {
29037
+ if (sim < AUTO_SIGNAL_MIN_SIMILARITY) continue;
29038
+ if (mergedPairs.has(pk)) continue;
29039
+ const [idA, idB] = pk.split(":");
29040
+ const titleA = titleMap.get(idA);
29041
+ const titleB = titleMap.get(idB);
29042
+ if (!titleA || !titleB) continue;
29043
+ signals.push({ entryATitle: titleA, entryBTitle: titleB, similarity: sim });
29044
+ }
29045
+ const currentThreshold = loadCalibratedThreshold(projectId2) ?? DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
29046
+ signals.sort((a, b) => Math.abs(a.similarity - currentThreshold) - Math.abs(b.similarity - currentThreshold));
29047
+ const capped = signals.slice(0, AUTO_SIGNAL_MAX_PAIRS);
29048
+ pruneDedupFeedback(projectId2);
29049
+ for (const s of capped) {
29050
+ recordDedupFeedback({
29051
+ projectId: projectId2,
29052
+ entryATitle: s.entryATitle,
29053
+ entryBTitle: s.entryBTitle,
29054
+ similarity: s.similarity,
29055
+ accepted: false,
29056
+ source: "auto_dedup"
29057
+ });
29058
+ }
29059
+ }
29060
+ function getDedupFeedback(projectId2) {
29061
+ const rows = projectId2 !== null ? db().query(
29062
+ "SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id = ? ORDER BY similarity"
29063
+ ).all(projectId2) : db().query(
29064
+ "SELECT similarity, accepted, source FROM dedup_feedback WHERE project_id IS NULL ORDER BY similarity"
29065
+ ).all();
29066
+ return rows.map((r) => ({ similarity: r.similarity, accepted: r.accepted === 1, source: r.source }));
29067
+ }
29068
+ function getDedupFeedbackCount(projectId2) {
29069
+ const row = projectId2 !== null ? db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id = ?").get(projectId2) : db().query("SELECT COUNT(*) as cnt FROM dedup_feedback WHERE project_id IS NULL").get();
29070
+ return row?.cnt ?? 0;
29071
+ }
29072
+ var MAX_FEEDBACK_ROWS_PER_PROJECT = 500;
29073
+ function pruneDedupFeedback(projectId2) {
29074
+ const count3 = getDedupFeedbackCount(projectId2);
29075
+ if (count3 <= MAX_FEEDBACK_ROWS_PER_PROJECT) return;
29076
+ const excess = count3 - MAX_FEEDBACK_ROWS_PER_PROJECT;
29077
+ if (projectId2 !== null) {
29078
+ db().query(
29079
+ `DELETE FROM dedup_feedback WHERE id IN (
29080
+ SELECT id FROM dedup_feedback WHERE project_id = ?
29081
+ ORDER BY created_at ASC LIMIT ?
29082
+ )`
29083
+ ).run(projectId2, excess);
29084
+ } else {
29085
+ db().query(
29086
+ `DELETE FROM dedup_feedback WHERE id IN (
29087
+ SELECT id FROM dedup_feedback WHERE project_id IS NULL
29088
+ ORDER BY created_at ASC LIMIT ?
29089
+ )`
29090
+ ).run(excess);
29091
+ }
29092
+ }
29093
+ function calibrateDedupThreshold(projectId2) {
29094
+ const feedback = getDedupFeedback(projectId2);
29095
+ if (feedback.length < MIN_CALIBRATION_SAMPLES) return null;
29096
+ const accepted = feedback.filter((f) => f.accepted);
29097
+ const rejected = feedback.filter((f) => !f.accepted);
29098
+ if (rejected.length === 0) {
29099
+ const minAccepted = Math.min(...accepted.map((f) => f.similarity));
29100
+ return Math.max(0.85, minAccepted - 5e-3);
29101
+ }
29102
+ if (accepted.length === 0) {
29103
+ warn("dedup calibration: all feedback is reject \u2014 keeping default threshold");
29104
+ return null;
29105
+ }
29106
+ const allSims = [...new Set(feedback.map((f) => f.similarity))].sort((a, b) => a - b);
29107
+ let bestThreshold = DEFAULT_EMBEDDING_DEDUP_THRESHOLD;
29108
+ let bestAccuracy = -1;
29109
+ for (let i = 0; i < allSims.length - 1; i++) {
29110
+ const candidate = (allSims[i] + allSims[i + 1]) / 2;
29111
+ const correctAccepted = accepted.filter((f) => f.similarity >= candidate).length;
29112
+ const correctRejected = rejected.filter((f) => f.similarity < candidate).length;
29113
+ const accuracy = (correctAccepted + correctRejected) / feedback.length;
29114
+ if (accuracy > bestAccuracy || accuracy === bestAccuracy && candidate > bestThreshold) {
29115
+ bestAccuracy = accuracy;
29116
+ bestThreshold = candidate;
29117
+ }
29118
+ }
29119
+ return Math.max(0.85, Math.min(0.98, bestThreshold));
29120
+ }
29121
+ function saveCalibratedThreshold(projectId2, threshold, sampleSize) {
29122
+ const key = `dedup_threshold:${projectId2 ?? "global"}`;
29123
+ setKV(key, JSON.stringify({ threshold, sampleSize, calibratedAt: Date.now() }));
29124
+ }
29125
+ function loadCalibratedThreshold(projectId2) {
29126
+ const key = `dedup_threshold:${projectId2 ?? "global"}`;
29127
+ const raw = getKV(key);
29128
+ if (!raw) return null;
29129
+ try {
29130
+ const parsed = JSON.parse(raw);
29131
+ return typeof parsed.threshold === "number" ? parsed.threshold : null;
29132
+ } catch {
29133
+ return null;
29134
+ }
29135
+ }
28330
29136
 
28331
29137
  // src/data.ts
28332
29138
  var data_exports = {};
@@ -28351,11 +29157,11 @@ __export(data_exports, {
28351
29157
  resolveId: () => resolveId,
28352
29158
  wipeDatabase: () => wipeDatabase
28353
29159
  });
28354
- import { statSync as statSync3, unlinkSync, existsSync as existsSync4 } from "fs";
29160
+ import { statSync as statSync4, unlinkSync, existsSync as existsSync5 } from "fs";
28355
29161
 
28356
29162
  // src/agents-file.ts
28357
- import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2, statSync as statSync2 } from "fs";
28358
- import { dirname as dirname2, join as join5 } from "path";
29163
+ import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync3, statSync as statSync3 } from "fs";
29164
+ import { dirname as dirname2, join as join7 } from "path";
28359
29165
  var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
28360
29166
  var LORE_SECTION_END = "<!-- End lore-managed section -->";
28361
29167
  var ALL_START_MARKERS = [
@@ -28386,7 +29192,7 @@ function setCache(fp, entry) {
28386
29192
  ).run(key, value, value);
28387
29193
  }
28388
29194
  function clearLoreFileCache(projectPath) {
28389
- db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join5(projectPath, LORE_FILE));
29195
+ db().query("DELETE FROM kv_meta WHERE key = ?").run(CACHE_PREFIX + join7(projectPath, LORE_FILE));
28390
29196
  }
28391
29197
  function splitFile(fileContent) {
28392
29198
  const spans = [];
@@ -28499,7 +29305,7 @@ function exportToFile(input) {
28499
29305
  const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
28500
29306
  const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
28501
29307
  let fileContent = "";
28502
- if (existsSync3(input.filePath)) {
29308
+ if (existsSync4(input.filePath)) {
28503
29309
  fileContent = readFileSync3(input.filePath, "utf8");
28504
29310
  }
28505
29311
  const { before, after } = splitFile(fileContent);
@@ -28508,11 +29314,11 @@ function exportToFile(input) {
28508
29314
  const suffix = after.trimStart();
28509
29315
  const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
28510
29316
  const result = prefixWithSep + newSection + suffixWithSep;
28511
- mkdirSync2(dirname2(input.filePath), { recursive: true });
29317
+ mkdirSync3(dirname2(input.filePath), { recursive: true });
28512
29318
  writeFileSync(input.filePath, result, "utf8");
28513
29319
  }
28514
29320
  function shouldImport(input) {
28515
- if (!existsSync3(input.filePath)) return false;
29321
+ if (!existsSync4(input.filePath)) return false;
28516
29322
  const fileContent = readFileSync3(input.filePath, "utf8");
28517
29323
  const { section } = splitFile(fileContent);
28518
29324
  if (section === null) {
@@ -28533,18 +29339,26 @@ function _importEntries(entries, projectPath) {
28533
29339
  update(entry.id, { content: entry.content });
28534
29340
  }
28535
29341
  } else {
28536
- create({
28537
- projectPath,
28538
- category: entry.category,
28539
- title: entry.title,
28540
- content: entry.content,
28541
- scope: "project",
28542
- crossProject: false,
28543
- id: entry.id
28544
- });
29342
+ const pid = ensureProject(projectPath);
29343
+ const fuzzyMatch = findFuzzyDuplicate({ title: entry.title, projectId: pid });
29344
+ if (fuzzyMatch) {
29345
+ if (fuzzyMatch.title !== entry.title || get(fuzzyMatch.id)?.content !== entry.content) {
29346
+ update(fuzzyMatch.id, { content: entry.content });
29347
+ }
29348
+ } else {
29349
+ create({
29350
+ projectPath,
29351
+ category: entry.category,
29352
+ title: entry.title,
29353
+ content: entry.content,
29354
+ scope: "project",
29355
+ crossProject: false,
29356
+ id: entry.id
29357
+ });
29358
+ }
28545
29359
  }
28546
29360
  } else {
28547
- const existing = forProject(projectPath, true);
29361
+ const existing = forProject(projectPath, false);
28548
29362
  const titleMatch = existing.find(
28549
29363
  (e) => e.title.toLowerCase() === entry.title.toLowerCase()
28550
29364
  );
@@ -28562,7 +29376,7 @@ function _importEntries(entries, projectPath) {
28562
29376
  }
28563
29377
  }
28564
29378
  function importFromFile(input) {
28565
- if (!existsSync3(input.filePath)) return;
29379
+ if (!existsSync4(input.filePath)) return;
28566
29380
  const fileContent = readFileSync3(input.filePath, "utf8");
28567
29381
  const { section } = splitFile(fileContent);
28568
29382
  const textToParse = section ?? fileContent;
@@ -28571,25 +29385,25 @@ function importFromFile(input) {
28571
29385
  _importEntries(fileEntries, input.projectPath);
28572
29386
  }
28573
29387
  function loreFileExists(projectPath) {
28574
- return existsSync3(join5(projectPath, LORE_FILE));
29388
+ return existsSync4(join7(projectPath, LORE_FILE));
28575
29389
  }
28576
29390
  function exportLoreFile(projectPath) {
28577
29391
  const sectionBody = buildSection(projectPath);
28578
29392
  const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
28579
29393
  const contentHash2 = hashSection(content3);
28580
- const fp = join5(projectPath, LORE_FILE);
29394
+ const fp = join7(projectPath, LORE_FILE);
28581
29395
  const cached2 = getCache(fp);
28582
29396
  if (cached2 && cached2.hash === contentHash2) {
28583
29397
  return;
28584
29398
  }
28585
29399
  writeFileSync(fp, content3, "utf8");
28586
- const { mtimeMs } = statSync2(fp);
29400
+ const { mtimeMs } = statSync3(fp);
28587
29401
  setCache(fp, { mtimeMs, hash: contentHash2 });
28588
29402
  }
28589
29403
  function shouldImportLoreFile(projectPath) {
28590
- const fp = join5(projectPath, LORE_FILE);
28591
- if (!existsSync3(fp)) return false;
28592
- const { mtimeMs } = statSync2(fp);
29404
+ const fp = join7(projectPath, LORE_FILE);
29405
+ if (!existsSync4(fp)) return false;
29406
+ const { mtimeMs } = statSync3(fp);
28593
29407
  const cached2 = getCache(fp);
28594
29408
  if (cached2 && cached2.mtimeMs === mtimeMs) {
28595
29409
  return false;
@@ -28605,12 +29419,17 @@ function shouldImportLoreFile(projectPath) {
28605
29419
  return true;
28606
29420
  }
28607
29421
  function importLoreFile(projectPath) {
28608
- const fp = join5(projectPath, LORE_FILE);
28609
- if (!existsSync3(fp)) return;
29422
+ const fp = join7(projectPath, LORE_FILE);
29423
+ if (!existsSync4(fp)) return;
28610
29424
  const fileContent = readFileSync3(fp, "utf8");
28611
29425
  const fileEntries = parseEntriesFromSection(fileContent);
28612
29426
  if (!fileEntries.length) return;
28613
29427
  _importEntries(fileEntries, projectPath);
29428
+ try {
29429
+ const { mtimeMs } = statSync3(fp);
29430
+ setCache(fp, { mtimeMs, hash: hashSection(fileContent) });
29431
+ } catch {
29432
+ }
28614
29433
  }
28615
29434
 
28616
29435
  // src/data.ts
@@ -28685,10 +29504,10 @@ function globalStats() {
28685
29504
  let db_size_bytes = 0;
28686
29505
  try {
28687
29506
  const p2 = dbPath();
28688
- db_size_bytes = statSync3(p2).size;
29507
+ db_size_bytes = statSync4(p2).size;
28689
29508
  const walPath = p2 + "-wal";
28690
- if (existsSync4(walPath)) {
28691
- db_size_bytes += statSync3(walPath).size;
29509
+ if (existsSync5(walPath)) {
29510
+ db_size_bytes += statSync4(walPath).size;
28692
29511
  }
28693
29512
  } catch {
28694
29513
  }
@@ -28739,7 +29558,7 @@ function clearProject(projectPath) {
28739
29558
  database.exec("ROLLBACK");
28740
29559
  throw e;
28741
29560
  }
28742
- if (existsSync4(projectPath)) {
29561
+ if (existsSync5(projectPath)) {
28743
29562
  try {
28744
29563
  exportLoreFile(projectPath);
28745
29564
  } catch {
@@ -28810,7 +29629,7 @@ function clearKnowledge(projectPath) {
28810
29629
  "SELECT COUNT(*) as c FROM knowledge WHERE project_id = ?"
28811
29630
  ).get(pid).c;
28812
29631
  db().query("DELETE FROM knowledge WHERE project_id = ?").run(pid);
28813
- if (existsSync4(projectPath)) {
29632
+ if (existsSync5(projectPath)) {
28814
29633
  try {
28815
29634
  exportLoreFile(projectPath);
28816
29635
  } catch {
@@ -28869,7 +29688,7 @@ function wipeDatabase() {
28869
29688
  close();
28870
29689
  for (const suffix of ["", "-wal", "-shm"]) {
28871
29690
  const fp = p2 + suffix;
28872
- if (existsSync4(fp)) {
29691
+ if (existsSync5(fp)) {
28873
29692
  try {
28874
29693
  unlinkSync(fp);
28875
29694
  } catch {
@@ -28910,7 +29729,7 @@ function backfillGitRemotes() {
28910
29729
  for (const project of projects) {
28911
29730
  let gitRemote = project.git_remote;
28912
29731
  if (!gitRemote) {
28913
- if (!existsSync4(project.path)) continue;
29732
+ if (!existsSync5(project.path)) continue;
28914
29733
  gitRemote = getGitRemote(project.path);
28915
29734
  if (!gitRemote) continue;
28916
29735
  const existing = db().query(
@@ -29009,6 +29828,32 @@ var PATTERNS = [
29009
29828
  regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
29010
29829
  category: "preference",
29011
29830
  titleFn: (m) => `Typically uses ${m[1].trim()}`
29831
+ },
29832
+ // Process instruction patterns — match distilled observations recording
29833
+ // user assertions about workflow/process rules. The distillation observer
29834
+ // normalizes user instructions into "User stated always X" phrasing.
29835
+ // These require "stated/asserted/said" to avoid overlapping with the
29836
+ // existing "typically uses" pattern above (which already handles
29837
+ // "user always use/prefer/go with X").
29838
+ {
29839
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?always (.+?)(?:\.|,|$)/gi,
29840
+ category: "preference",
29841
+ titleFn: (m) => `Always ${m[1].trim()}`
29842
+ },
29843
+ {
29844
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?never (.+?)(?:\.|,|$)/gi,
29845
+ category: "preference",
29846
+ titleFn: (m) => `Never ${m[1].trim()}`
29847
+ },
29848
+ {
29849
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?make sure to (.+?)(?:\.|,|$)/gi,
29850
+ category: "preference",
29851
+ titleFn: (m) => `Make sure to ${m[1].trim()}`
29852
+ },
29853
+ {
29854
+ regex: /(?:user |team |we )(?:stated |asserted |said )(?:to )?(?:don't|do not) forget (?:to )?(.+?)(?:\.|,|$)/gi,
29855
+ category: "preference",
29856
+ titleFn: (m) => `Always ${m[1].trim()}`
29012
29857
  }
29013
29858
  ];
29014
29859
  function extractPatterns(observations) {
@@ -29018,6 +29863,8 @@ function extractPatterns(observations) {
29018
29863
  regex.lastIndex = 0;
29019
29864
  let match;
29020
29865
  while ((match = regex.exec(observations)) !== null) {
29866
+ const captures = match.slice(1);
29867
+ if (captures.some((c) => c && (c.trim().length <= 2 || /["\u201C\u201D`\u2018\u2019]/.test(c)))) continue;
29021
29868
  const title = titleFn(match);
29022
29869
  const key = title.toLowerCase();
29023
29870
  if (seen.has(key)) continue;
@@ -29133,11 +29980,21 @@ function getSessionState(sessionID) {
29133
29980
  if (!state) {
29134
29981
  state = makeSessionState();
29135
29982
  state.forceMinLayer = loadForceMinLayer(sessionID);
29983
+ const persisted = loadSessionTracking(sessionID);
29984
+ if (persisted && persisted.lastTurnAt > 0) {
29985
+ state.dynamicContextCap = persisted.dynamicContextCap;
29986
+ state.bustRateEMA = persisted.bustRateEMA;
29987
+ state.interBustIntervalEMA = persisted.interBustIntervalEMA;
29988
+ state.lastLayer = persisted.lastLayer;
29989
+ state.lastKnownInput = persisted.lastKnownInput;
29990
+ state.lastTurnAt = persisted.lastTurnAt;
29991
+ state.lastBustAt = persisted.lastBustAt;
29992
+ }
29136
29993
  sessionStates.set(sessionID, state);
29137
29994
  }
29138
29995
  return state;
29139
29996
  }
29140
- function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29997
+ function onIdleResume(sessionID, thresholdMs, now = Date.now(), skipCompact = false) {
29141
29998
  if (thresholdMs <= 0) return { triggered: false };
29142
29999
  const state = getSessionState(sessionID);
29143
30000
  if (state.lastTurnAt === 0) return { triggered: false };
@@ -29147,7 +30004,7 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
29147
30004
  state.rawWindowCache = null;
29148
30005
  state.distillationSnapshot = null;
29149
30006
  state.cameOutOfIdle = true;
29150
- state.postIdleCompact = true;
30007
+ state.postIdleCompact = !skipCompact;
29151
30008
  return { triggered: true, idleMs };
29152
30009
  }
29153
30010
  function getLastTurnAt(sessionID) {
@@ -29238,6 +30095,19 @@ function inspectSessionState(sessionID) {
29238
30095
  function setLastTurnAtForTest(sessionID, ms) {
29239
30096
  getSessionState(sessionID).lastTurnAt = ms;
29240
30097
  }
30098
+ function saveGradientState(sessionID) {
30099
+ const state = sessionStates.get(sessionID);
30100
+ if (!state) return;
30101
+ saveSessionTracking(sessionID, {
30102
+ dynamicContextCap: state.dynamicContextCap,
30103
+ bustRateEMA: state.bustRateEMA,
30104
+ interBustIntervalEMA: state.interBustIntervalEMA,
30105
+ lastLayer: state.lastLayer,
30106
+ lastKnownInput: state.lastKnownInput,
30107
+ lastTurnAt: state.lastTurnAt,
30108
+ lastBustAt: state.lastBustAt
30109
+ });
30110
+ }
29241
30111
  function loadDistillations(projectPath, sessionID) {
29242
30112
  const pid = ensureProject(projectPath);
29243
30113
  const query = sessionID ? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC" : "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
@@ -29522,6 +30392,26 @@ function buildPrefixMessages(formatted) {
29522
30392
  }
29523
30393
  ];
29524
30394
  }
30395
+ var DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
30396
+ var GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
30397
+ var ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
30398
+ function importanceBonus(d) {
30399
+ let bonus = 0;
30400
+ if (DECISION_RE.test(d.observations)) bonus += 0.3;
30401
+ if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
30402
+ if (ARCH_RE.test(d.observations)) bonus += 0.1;
30403
+ if (d.generation >= 1) bonus += 0.2;
30404
+ return Math.min(bonus, 1);
30405
+ }
30406
+ function selectDistillations(all3, limit) {
30407
+ if (all3.length <= limit) return all3;
30408
+ const maxIdx = all3.length - 1;
30409
+ const scored = all3.map((d, i) => ({
30410
+ d,
30411
+ score: (maxIdx > 0 ? i / maxIdx : 1) * 0.7 + importanceBonus(d) * 0.3
30412
+ }));
30413
+ return scored.sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.d).sort((a, b) => a.created_at - b.created_at);
30414
+ }
29525
30415
  function distilledPrefix(distillations) {
29526
30416
  if (!distillations.length) return [];
29527
30417
  const formatted = formatDistillations(distillations);
@@ -29639,6 +30529,11 @@ function tryFitStable(input) {
29639
30529
  }
29640
30530
  return result;
29641
30531
  }
30532
+ var COMPRESSION_STAGES = [
30533
+ { strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
30534
+ { strip: "old-tools", rawFrac: 0.5, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
30535
+ { strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false }
30536
+ ];
29642
30537
  var urgentDistillationMap = /* @__PURE__ */ new Map();
29643
30538
  function needsUrgentDistillation(sessionID) {
29644
30539
  const v = urgentDistillationMap.get(sessionID) ?? false;
@@ -29670,7 +30565,7 @@ function transformInner(input) {
29670
30565
  if (calibrated) return true;
29671
30566
  return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
29672
30567
  }
29673
- if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
30568
+ if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
29674
30569
  effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
29675
30570
  }
29676
30571
  const postIdleCompact = sessState.postIdleCompact;
@@ -29708,7 +30603,8 @@ function transformInner(input) {
29708
30603
  totalTokens: Math.max(0, messageTokens),
29709
30604
  usable,
29710
30605
  distilledBudget,
29711
- rawBudget
30606
+ rawBudget,
30607
+ refreshLtm: false
29712
30608
  };
29713
30609
  }
29714
30610
  const turnStart = currentTurnStart(input.messages);
@@ -29718,67 +30614,52 @@ function transformInner(input) {
29718
30614
  const msgs = distilledPrefix(distillations);
29719
30615
  return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
29720
30616
  })();
29721
- if (effectiveMinLayer <= 1) {
29722
- const layer1 = sid ? tryFitStable({
29723
- messages: dedupMessages,
29724
- prefix: cached2.messages,
29725
- prefixTokens: cached2.tokens,
29726
- distilledBudget,
29727
- rawBudget,
29728
- sessionID: sid,
29729
- sessState
29730
- }) : tryFit({
29731
- messages: dedupMessages,
29732
- prefix: cached2.messages,
29733
- prefixTokens: cached2.tokens,
29734
- distilledBudget,
29735
- rawBudget,
29736
- strip: "none"
29737
- });
29738
- if (fitsWithSafetyMargin(layer1)) {
29739
- if (cached2.tokens === 0 && sid) {
30617
+ for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
30618
+ const stageLayer = s + 1;
30619
+ if (effectiveMinLayer > stageLayer) continue;
30620
+ const stage = COMPRESSION_STAGES[s];
30621
+ const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
30622
+ const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
30623
+ let stagePrefix = cached2.messages;
30624
+ let stagePrefixTokens = cached2.tokens;
30625
+ if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
30626
+ const trimmed = selectDistillations(distillations, stage.distLimit);
30627
+ stagePrefix = distilledPrefix(trimmed);
30628
+ stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
30629
+ }
30630
+ let result;
30631
+ if (stage.useStableWindow && sid) {
30632
+ result = tryFitStable({
30633
+ messages: dedupMessages,
30634
+ prefix: stagePrefix,
30635
+ prefixTokens: stagePrefixTokens,
30636
+ distilledBudget: stageDistBudget,
30637
+ rawBudget: stageRawBudget,
30638
+ sessionID: sid,
30639
+ sessState
30640
+ });
30641
+ } else {
30642
+ sessState.rawWindowCache = null;
30643
+ result = tryFit({
30644
+ messages: dedupMessages,
30645
+ prefix: stagePrefix,
30646
+ prefixTokens: stagePrefixTokens,
30647
+ distilledBudget: stageDistBudget,
30648
+ rawBudget: stageRawBudget,
30649
+ strip: stage.strip,
30650
+ protectedTurns: stage.protectedTurns
30651
+ });
30652
+ }
30653
+ if (fitsWithSafetyMargin(result)) {
30654
+ if (sid && (s > 0 || cached2.tokens === 0)) {
29740
30655
  urgentDistillationMap.set(sid, true);
29741
30656
  }
29742
- return { ...layer1, layer: 1, usable, distilledBudget, rawBudget };
30657
+ return { ...result, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
29743
30658
  }
29744
30659
  }
29745
30660
  sessState.rawWindowCache = null;
29746
- if (effectiveMinLayer <= 2) {
29747
- const layer2 = tryFit({
29748
- messages: dedupMessages,
29749
- prefix: cached2.messages,
29750
- prefixTokens: cached2.tokens,
29751
- distilledBudget,
29752
- rawBudget: Math.floor(usable * 0.5),
29753
- // give raw more room
29754
- strip: "old-tools",
29755
- protectedTurns: 2
29756
- });
29757
- if (fitsWithSafetyMargin(layer2)) {
29758
- if (sid) urgentDistillationMap.set(sid, true);
29759
- return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
29760
- }
29761
- }
29762
- const trimmedDistillations = distillations.slice(-5);
29763
- const trimmedPrefix = distilledPrefix(trimmedDistillations);
29764
- const trimmedPrefixTokens = trimmedPrefix.reduce(
29765
- (sum, m) => sum + estimateMessage(m),
29766
- 0
29767
- );
29768
- const layer3 = tryFit({
29769
- messages: dedupMessages,
29770
- prefix: trimmedPrefix,
29771
- prefixTokens: trimmedPrefixTokens,
29772
- distilledBudget: Math.floor(usable * 0.15),
29773
- rawBudget: Math.floor(usable * 0.55),
29774
- strip: "all-tools"
29775
- });
29776
- if (fitsWithSafetyMargin(layer3)) {
29777
- if (sid) urgentDistillationMap.set(sid, true);
29778
- return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
29779
- }
29780
30661
  if (sid) urgentDistillationMap.set(sid, true);
29781
- const nuclearDistillations = distillations.slice(-2);
30662
+ const nuclearDistillations = selectDistillations(distillations, 2);
29782
30663
  const nuclearPrefix = distilledPrefix(nuclearDistillations);
29783
30664
  const nuclearPrefixTokens = nuclearPrefix.reduce(
29784
30665
  (sum, m) => sum + estimateMessage(m),
@@ -29817,7 +30698,8 @@ function transformInner(input) {
29817
30698
  totalTokens: nuclearPrefixTokens + nuclearRawTokens,
29818
30699
  usable,
29819
30700
  distilledBudget,
29820
- rawBudget
30701
+ rawBudget,
30702
+ refreshLtm: true
29821
30703
  };
29822
30704
  }
29823
30705
  function transform2(input) {
@@ -29924,10 +30806,189 @@ function isWorkerSession(sessionID) {
29924
30806
  return workerSessionIDs.has(sessionID);
29925
30807
  }
29926
30808
 
29927
- // src/distillation.ts
29928
- function compressionRatio(distilledTokens, sourceTokens) {
29929
- if (sourceTokens <= 0) return 0;
29930
- return distilledTokens / Math.sqrt(sourceTokens);
30809
+ // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
30810
+ var Node = class {
30811
+ value;
30812
+ next;
30813
+ constructor(value) {
30814
+ this.value = value;
30815
+ }
30816
+ };
30817
+ var Queue = class {
30818
+ #head;
30819
+ #tail;
30820
+ #size;
30821
+ constructor() {
30822
+ this.clear();
30823
+ }
30824
+ enqueue(value) {
30825
+ const node2 = new Node(value);
30826
+ if (this.#head) {
30827
+ this.#tail.next = node2;
30828
+ this.#tail = node2;
30829
+ } else {
30830
+ this.#head = node2;
30831
+ this.#tail = node2;
30832
+ }
30833
+ this.#size++;
30834
+ }
30835
+ dequeue() {
30836
+ const current2 = this.#head;
30837
+ if (!current2) {
30838
+ return;
30839
+ }
30840
+ this.#head = this.#head.next;
30841
+ this.#size--;
30842
+ if (!this.#head) {
30843
+ this.#tail = void 0;
30844
+ }
30845
+ return current2.value;
30846
+ }
30847
+ peek() {
30848
+ if (!this.#head) {
30849
+ return;
30850
+ }
30851
+ return this.#head.value;
30852
+ }
30853
+ clear() {
30854
+ this.#head = void 0;
30855
+ this.#tail = void 0;
30856
+ this.#size = 0;
30857
+ }
30858
+ get size() {
30859
+ return this.#size;
30860
+ }
30861
+ *[Symbol.iterator]() {
30862
+ let current2 = this.#head;
30863
+ while (current2) {
30864
+ yield current2.value;
30865
+ current2 = current2.next;
30866
+ }
30867
+ }
30868
+ *drain() {
30869
+ while (this.#head) {
30870
+ yield this.dequeue();
30871
+ }
30872
+ }
30873
+ };
30874
+
30875
+ // ../../node_modules/.bun/p-limit@7.3.0/node_modules/p-limit/index.js
30876
+ function pLimit(concurrency) {
30877
+ let rejectOnClear = false;
30878
+ if (typeof concurrency === "object") {
30879
+ ({ concurrency, rejectOnClear = false } = concurrency);
30880
+ }
30881
+ validateConcurrency(concurrency);
30882
+ if (typeof rejectOnClear !== "boolean") {
30883
+ throw new TypeError("Expected `rejectOnClear` to be a boolean");
30884
+ }
30885
+ const queue = new Queue();
30886
+ let activeCount = 0;
30887
+ const resumeNext = () => {
30888
+ if (activeCount < concurrency && queue.size > 0) {
30889
+ activeCount++;
30890
+ queue.dequeue().run();
30891
+ }
30892
+ };
30893
+ const next = () => {
30894
+ activeCount--;
30895
+ resumeNext();
30896
+ };
30897
+ const run3 = async (function_, resolve, arguments_) => {
30898
+ const result = (async () => function_(...arguments_))();
30899
+ resolve(result);
30900
+ try {
30901
+ await result;
30902
+ } catch {
30903
+ }
30904
+ next();
30905
+ };
30906
+ const enqueue = (function_, resolve, reject, arguments_) => {
30907
+ const queueItem = { reject };
30908
+ new Promise((internalResolve) => {
30909
+ queueItem.run = internalResolve;
30910
+ queue.enqueue(queueItem);
30911
+ }).then(run3.bind(void 0, function_, resolve, arguments_));
30912
+ if (activeCount < concurrency) {
30913
+ resumeNext();
30914
+ }
30915
+ };
30916
+ const generator = (function_, ...arguments_) => new Promise((resolve, reject) => {
30917
+ enqueue(function_, resolve, reject, arguments_);
30918
+ });
30919
+ Object.defineProperties(generator, {
30920
+ activeCount: {
30921
+ get: () => activeCount
30922
+ },
30923
+ pendingCount: {
30924
+ get: () => queue.size
30925
+ },
30926
+ clearQueue: {
30927
+ value() {
30928
+ if (!rejectOnClear) {
30929
+ queue.clear();
30930
+ return;
30931
+ }
30932
+ const abortError = AbortSignal.abort().reason;
30933
+ while (queue.size > 0) {
30934
+ queue.dequeue().reject(abortError);
30935
+ }
30936
+ }
30937
+ },
30938
+ concurrency: {
30939
+ get: () => concurrency,
30940
+ set(newConcurrency) {
30941
+ validateConcurrency(newConcurrency);
30942
+ concurrency = newConcurrency;
30943
+ queueMicrotask(() => {
30944
+ while (activeCount < concurrency && queue.size > 0) {
30945
+ resumeNext();
30946
+ }
30947
+ });
30948
+ }
30949
+ },
30950
+ map: {
30951
+ async value(iterable, function_) {
30952
+ const promises = Array.from(iterable, (value, index2) => this(function_, value, index2));
30953
+ return Promise.all(promises);
30954
+ }
30955
+ }
30956
+ });
30957
+ return generator;
30958
+ }
30959
+ function validateConcurrency(concurrency) {
30960
+ if (!((Number.isInteger(concurrency) || concurrency === Number.POSITIVE_INFINITY) && concurrency > 0)) {
30961
+ throw new TypeError("Expected `concurrency` to be a number from 1 and up");
30962
+ }
30963
+ }
30964
+
30965
+ // src/session-limiter.ts
30966
+ function createLimiterPool() {
30967
+ const limiters = /* @__PURE__ */ new Map();
30968
+ function get2(key) {
30969
+ let limiter = limiters.get(key);
30970
+ if (!limiter) {
30971
+ limiter = pLimit(1);
30972
+ limiters.set(key, limiter);
30973
+ }
30974
+ return limiter;
30975
+ }
30976
+ function isBusy(key) {
30977
+ const limiter = limiters.get(key);
30978
+ return limiter ? limiter.activeCount + limiter.pendingCount > 0 : false;
30979
+ }
30980
+ function clear() {
30981
+ limiters.clear();
30982
+ }
30983
+ return { get: get2, isBusy, clear };
30984
+ }
30985
+ var distillLimiter = createLimiterPool();
30986
+ var curatorLimiter = createLimiterPool();
30987
+
30988
+ // src/distillation.ts
30989
+ function compressionRatio(distilledTokens, sourceTokens) {
30990
+ if (sourceTokens <= 0) return 0;
30991
+ return distilledTokens / Math.sqrt(sourceTokens);
29931
30992
  }
29932
30993
  function maxAllowedExpansion(sourceTokens) {
29933
30994
  if (sourceTokens < 100) return sourceTokens * 5;
@@ -30168,6 +31229,9 @@ function resetOrphans(projectPath, sessionID) {
30168
31229
  return orphans.length;
30169
31230
  }
30170
31231
  async function run(input) {
31232
+ return distillLimiter.get(input.sessionID)(() => runInner(input));
31233
+ }
31234
+ async function runInner(input) {
30171
31235
  const orphans = resetOrphans(input.projectPath, input.sessionID);
30172
31236
  if (orphans > 0) {
30173
31237
  info(
@@ -30211,7 +31275,7 @@ async function run(input) {
30211
31275
  }
30212
31276
  }
30213
31277
  if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
30214
- await metaDistill({
31278
+ await metaDistillInner({
30215
31279
  llm: input.llm,
30216
31280
  projectPath: input.projectPath,
30217
31281
  sessionID: input.sessionID,
@@ -30261,17 +31325,25 @@ async function distillSegment(input) {
30261
31325
  );
30262
31326
  return null;
30263
31327
  }
30264
- const distillId = storeDistillation({
30265
- projectPath: input.projectPath,
30266
- sessionID: input.sessionID,
30267
- observations: result.observations,
30268
- sourceIDs: input.messages.map((m) => m.id),
30269
- generation: 0,
30270
- rCompression: rComp,
30271
- cNorm,
30272
- callType: input.callType
30273
- });
30274
- markDistilled(input.messages.map((m) => m.id));
31328
+ let distillId;
31329
+ db().exec("BEGIN IMMEDIATE");
31330
+ try {
31331
+ distillId = storeDistillation({
31332
+ projectPath: input.projectPath,
31333
+ sessionID: input.sessionID,
31334
+ observations: result.observations,
31335
+ sourceIDs: input.messages.map((m) => m.id),
31336
+ generation: 0,
31337
+ rCompression: rComp,
31338
+ cNorm,
31339
+ callType: input.callType
31340
+ });
31341
+ markDistilled(input.messages.map((m) => m.id));
31342
+ db().exec("COMMIT");
31343
+ } catch (e) {
31344
+ db().exec("ROLLBACK");
31345
+ throw e;
31346
+ }
30275
31347
  info(
30276
31348
  `distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
30277
31349
  );
@@ -30284,7 +31356,8 @@ async function distillSegment(input) {
30284
31356
  embedDistillation(distillId, result.observations);
30285
31357
  }
30286
31358
  if (config2().knowledge.enabled) {
30287
- for (const pat of extractPatterns(result.observations)) {
31359
+ const patterns = extractPatterns(result.observations);
31360
+ for (const pat of patterns) {
30288
31361
  try {
30289
31362
  create({
30290
31363
  projectPath: input.projectPath,
@@ -30297,10 +31370,16 @@ async function distillSegment(input) {
30297
31370
  } catch {
30298
31371
  }
30299
31372
  }
31373
+ if (patterns.length > 0) {
31374
+ info(`pattern extraction: ${patterns.length} entries from distillation`);
31375
+ }
30300
31376
  }
30301
31377
  return result;
30302
31378
  }
30303
31379
  async function metaDistill(input) {
31380
+ return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
31381
+ }
31382
+ async function metaDistillInner(input) {
30304
31383
  const existing = loadGen0(input.projectPath, input.sessionID);
30305
31384
  const priorMeta = latestMeta(input.projectPath, input.sessionID);
30306
31385
  if (priorMeta) {
@@ -30342,196 +31421,1801 @@ async function metaDistill(input) {
30342
31421
  db().exec("ROLLBACK");
30343
31422
  throw e;
30344
31423
  }
30345
- if (isAvailable()) {
30346
- embedDistillation(metaId, result.observations);
31424
+ if (isAvailable()) {
31425
+ embedDistillation(metaId, result.observations);
31426
+ }
31427
+ if (config2().knowledge.enabled) {
31428
+ const patterns = extractPatterns(result.observations);
31429
+ for (const pat of patterns) {
31430
+ try {
31431
+ create({
31432
+ projectPath: input.projectPath,
31433
+ category: pat.category,
31434
+ title: pat.title,
31435
+ content: pat.content,
31436
+ session: input.sessionID,
31437
+ scope: "project"
31438
+ });
31439
+ } catch {
31440
+ }
31441
+ }
31442
+ if (patterns.length > 0) {
31443
+ info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
31444
+ }
31445
+ }
31446
+ return result;
31447
+ }
31448
+ function backfillMetrics() {
31449
+ const rows = db().query(
31450
+ "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
31451
+ ).all();
31452
+ if (!rows.length) return 0;
31453
+ const update2 = db().prepare(
31454
+ "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
31455
+ );
31456
+ let updated = 0;
31457
+ for (const row of rows) {
31458
+ const sourceIds = parseSourceIds(row.source_ids);
31459
+ if (!sourceIds.length) continue;
31460
+ const placeholders = sourceIds.map(() => "?").join(",");
31461
+ const sources = db().query(
31462
+ `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
31463
+ ).all(...sourceIds);
31464
+ if (!sources.length) continue;
31465
+ const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
31466
+ const timestamps = sources.map((s) => s.created_at);
31467
+ const rComp = compressionRatio(row.token_count, sourceTokens);
31468
+ const cNorm = temporalCnorm(timestamps);
31469
+ update2.run(rComp, cNorm, row.id);
31470
+ updated++;
31471
+ }
31472
+ if (updated > 0) {
31473
+ info(
31474
+ `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
31475
+ );
31476
+ }
31477
+ return updated;
31478
+ }
31479
+
31480
+ // src/curator.ts
31481
+ var curator_exports = {};
31482
+ __export(curator_exports, {
31483
+ MAX_ENTRY_CONTENT_LENGTH: () => MAX_ENTRY_CONTENT_LENGTH,
31484
+ applyOps: () => applyOps,
31485
+ consolidate: () => consolidate,
31486
+ parseOps: () => parseOps,
31487
+ resetCurationTracker: () => resetCurationTracker,
31488
+ run: () => run2
31489
+ });
31490
+
31491
+ // src/instruction-detect.ts
31492
+ var instruction_detect_exports = {};
31493
+ __export(instruction_detect_exports, {
31494
+ detectAndFormat: () => detectAndFormat,
31495
+ extractInstructionCandidates: () => extractInstructionCandidates,
31496
+ findRepeatedInstructions: () => findRepeatedInstructions,
31497
+ formatForCurator: () => formatForCurator
31498
+ });
31499
+ var DEFAULT_REPETITION_THRESHOLD = 2;
31500
+ var VECTOR_SIMILARITY_THRESHOLD = 0.5;
31501
+ var MAX_CANDIDATES = 5;
31502
+ var INSTRUCTION_PATTERNS = [
31503
+ /\balways\b (.{10,80}?)(?:\.|,|!|$)/gi,
31504
+ /\bnever\b (.{10,80}?)(?:\.|,|!|$)/gi,
31505
+ /\bmake sure to (.{10,80}?)(?:\.|,|!|$)/gi,
31506
+ /\bdon'?t forget (?:to )?(.{10,80}?)(?:\.|,|!|$)/gi,
31507
+ /\bplease (?:always |make sure (?:to )?)(.{10,80}?)(?:\.|,|!|$)/gi,
31508
+ /\bI (?:want|need|prefer|expect) (?:you to )?(.{10,80}?)(?:\.|,|!|$)/gi
31509
+ ];
31510
+ function extractInstructionCandidates(messages) {
31511
+ const candidates = [];
31512
+ const seen = /* @__PURE__ */ new Set();
31513
+ for (const msg of messages) {
31514
+ if (msg.role !== "user") continue;
31515
+ for (const pattern of INSTRUCTION_PATTERNS) {
31516
+ pattern.lastIndex = 0;
31517
+ let match;
31518
+ while ((match = pattern.exec(msg.content)) !== null) {
31519
+ const text4 = match[1]?.trim();
31520
+ if (!text4 || text4.length < 10) continue;
31521
+ const key = text4.toLowerCase();
31522
+ if (seen.has(key)) continue;
31523
+ seen.add(key);
31524
+ candidates.push({
31525
+ text: text4,
31526
+ sessionID: msg.session_id
31527
+ });
31528
+ if (candidates.length >= MAX_CANDIDATES) return candidates;
31529
+ }
31530
+ }
31531
+ }
31532
+ return candidates;
31533
+ }
31534
+ async function findRepeatedInstructions(input) {
31535
+ const threshold = input.threshold ?? DEFAULT_REPETITION_THRESHOLD;
31536
+ if (!input.candidates.length) return [];
31537
+ const pid = ensureProject(input.projectPath);
31538
+ let candidateEmbeddings = [];
31539
+ if (isAvailable()) {
31540
+ try {
31541
+ candidateEmbeddings = await embed(
31542
+ input.candidates.map((c) => c.text),
31543
+ "query"
31544
+ );
31545
+ } catch (err) {
31546
+ warn("instruction-detect: batch embedding failed:", err);
31547
+ }
31548
+ }
31549
+ const results = [];
31550
+ for (let i = 0; i < input.candidates.length; i++) {
31551
+ const candidate = input.candidates[i];
31552
+ const sessionIDs = /* @__PURE__ */ new Set();
31553
+ if (candidateEmbeddings.length > i) {
31554
+ const hits = vectorSearchAllDistillations(candidateEmbeddings[i], pid, 20);
31555
+ for (const hit of hits) {
31556
+ if (hit.similarity >= VECTOR_SIMILARITY_THRESHOLD && hit.session_id !== input.currentSessionID) {
31557
+ sessionIDs.add(hit.session_id);
31558
+ }
31559
+ }
31560
+ }
31561
+ const terms = filterTerms(candidate.text);
31562
+ if (terms.length >= 2) {
31563
+ const searchText = terms.slice(0, 5).join(" ");
31564
+ const ftsHits = searchDistillationsFTS(pid, searchText);
31565
+ for (const hit of ftsHits) {
31566
+ if (hit.session_id !== input.currentSessionID) {
31567
+ sessionIDs.add(hit.session_id);
31568
+ }
31569
+ }
31570
+ }
31571
+ if (sessionIDs.size >= threshold) {
31572
+ results.push({
31573
+ instruction: candidate.text,
31574
+ priorSessionCount: sessionIDs.size
31575
+ });
31576
+ }
31577
+ }
31578
+ return results;
31579
+ }
31580
+ function searchDistillationsFTS(projectId2, rawQuery) {
31581
+ const matchExpr = ftsQueryOr(rawQuery);
31582
+ if (matchExpr === EMPTY_QUERY) return [];
31583
+ const sql = `SELECT d.id, d.session_id
31584
+ FROM distillation_fts f
31585
+ CROSS JOIN distillations d ON d.rowid = f.rowid
31586
+ WHERE distillation_fts MATCH ?
31587
+ AND d.project_id = ?
31588
+ ORDER BY rank LIMIT 30`;
31589
+ try {
31590
+ return db().query(sql).all(matchExpr, projectId2);
31591
+ } catch (err) {
31592
+ warn("instruction-detect: FTS search failed:", err);
31593
+ return [];
31594
+ }
31595
+ }
31596
+ function formatForCurator(instructions) {
31597
+ if (!instructions.length) return "";
31598
+ const lines = instructions.map(
31599
+ (i) => `- "${i.instruction}" (seen in ${i.priorSessionCount} prior session${i.priorSessionCount !== 1 ? "s" : ""})`
31600
+ );
31601
+ return `
31602
+
31603
+ ---
31604
+ CROSS-SESSION REPEATED INSTRUCTIONS (high-confidence preference candidates):
31605
+ The following user instructions have appeared in multiple prior sessions. These are strong candidates for "preference" entries:
31606
+ ${lines.join("\n")}`;
31607
+ }
31608
+ async function detectAndFormat(input) {
31609
+ const messages = bySession(input.projectPath, input.sessionID);
31610
+ const candidates = extractInstructionCandidates(messages);
31611
+ if (!candidates.length) return "";
31612
+ const repeated = await findRepeatedInstructions({
31613
+ projectPath: input.projectPath,
31614
+ currentSessionID: input.sessionID,
31615
+ candidates,
31616
+ threshold: input.threshold
31617
+ });
31618
+ if (repeated.length) {
31619
+ info(
31620
+ `instruction-detect: ${repeated.length} repeated instruction(s) found across sessions`
31621
+ );
31622
+ }
31623
+ return formatForCurator(repeated);
31624
+ }
31625
+
31626
+ // src/curator.ts
31627
+ var MAX_ENTRY_CONTENT_LENGTH = 1200;
31628
+ function parseOps(text4) {
31629
+ const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
31630
+ try {
31631
+ const parsed = JSON.parse(cleaned);
31632
+ if (!Array.isArray(parsed)) return [];
31633
+ return parsed.filter(
31634
+ (op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
31635
+ );
31636
+ } catch {
31637
+ return [];
31638
+ }
31639
+ }
31640
+ function applyOps(ops, input) {
31641
+ let created = 0;
31642
+ let updated = 0;
31643
+ let deleted = 0;
31644
+ const idsToSync = [];
31645
+ for (const op of ops) {
31646
+ if (op.op === "create") {
31647
+ if (input.skipCreate) continue;
31648
+ const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
31649
+ const id = create({
31650
+ projectPath: op.scope === "project" ? input.projectPath : void 0,
31651
+ category: op.category,
31652
+ title: op.title,
31653
+ content: content3,
31654
+ session: input.sessionID,
31655
+ scope: op.scope,
31656
+ crossProject: op.crossProject ?? true
31657
+ });
31658
+ idsToSync.push(id);
31659
+ created++;
31660
+ } else if (op.op === "update") {
31661
+ const entry = get(op.id);
31662
+ if (entry) {
31663
+ const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
31664
+ update(op.id, { content: content3, confidence: op.confidence });
31665
+ if (op.content !== void 0) idsToSync.push(op.id);
31666
+ updated++;
31667
+ }
31668
+ } else if (op.op === "delete") {
31669
+ const entry = get(op.id);
31670
+ if (entry) {
31671
+ remove(op.id);
31672
+ deleted++;
31673
+ }
31674
+ }
31675
+ }
31676
+ for (const id of idsToSync) {
31677
+ syncRefs(id);
31678
+ }
31679
+ return { created, updated, deleted };
31680
+ }
31681
+ var lastCuratedAt = /* @__PURE__ */ new Map();
31682
+ function getLastCuratedAt(sessionID) {
31683
+ const cached2 = lastCuratedAt.get(sessionID);
31684
+ if (cached2 !== void 0) return cached2;
31685
+ const persisted = loadSessionTracking(sessionID);
31686
+ const ts = persisted?.lastCuratedAt ?? 0;
31687
+ lastCuratedAt.set(sessionID, ts);
31688
+ return ts;
31689
+ }
31690
+ async function run2(input) {
31691
+ const cfg = config2();
31692
+ if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
31693
+ if (curatorLimiter.isBusy(input.sessionID)) {
31694
+ info(`curation skipped: already running for session ${input.sessionID.slice(0, 16)}`);
31695
+ return { created: 0, updated: 0, deleted: 0 };
31696
+ }
31697
+ return curatorLimiter.get(input.sessionID)(() => runInner2(input));
31698
+ }
31699
+ async function runInner2(input) {
31700
+ const cfg = config2();
31701
+ const all3 = bySession(input.projectPath, input.sessionID);
31702
+ const sessionCuratedAt = getLastCuratedAt(input.sessionID);
31703
+ const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
31704
+ if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
31705
+ const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
31706
+ const existing = forProject(input.projectPath, false);
31707
+ const existingForPrompt = existing.map((e) => ({
31708
+ id: e.id,
31709
+ category: e.category,
31710
+ title: e.title,
31711
+ content: e.content
31712
+ }));
31713
+ const baseUserContent = curatorUser({
31714
+ messages: text4,
31715
+ existing: existingForPrompt
31716
+ });
31717
+ let crossSessionContext = "";
31718
+ try {
31719
+ crossSessionContext = await detectAndFormat({
31720
+ projectPath: input.projectPath,
31721
+ sessionID: input.sessionID
31722
+ });
31723
+ } catch (err) {
31724
+ warn("instruction-detect failed (non-fatal):", err);
31725
+ }
31726
+ const userContent = baseUserContent + crossSessionContext;
31727
+ const model = input.model ?? cfg.model;
31728
+ const responseText = await input.llm.prompt(
31729
+ CURATOR_SYSTEM,
31730
+ userContent,
31731
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
31732
+ );
31733
+ if (!responseText) return { created: 0, updated: 0, deleted: 0 };
31734
+ const ops = parseOps(responseText);
31735
+ const result = applyOps(ops, {
31736
+ projectPath: input.projectPath,
31737
+ sessionID: input.sessionID
31738
+ });
31739
+ if (result.created > 0) {
31740
+ try {
31741
+ const dupes = await deduplicate(input.projectPath, { dryRun: false });
31742
+ if (dupes.totalRemoved > 0) {
31743
+ info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
31744
+ result.deleted += dupes.totalRemoved;
31745
+ }
31746
+ if (dupes.pairSimilarities.size > 0) {
31747
+ const pid = ensureProject(input.projectPath);
31748
+ recordAutoSignals(pid, dupes);
31749
+ const newThreshold = calibrateDedupThreshold(pid);
31750
+ if (newThreshold !== null) {
31751
+ const count3 = getDedupFeedbackCount(pid);
31752
+ saveCalibratedThreshold(pid, newThreshold, count3);
31753
+ }
31754
+ }
31755
+ } catch (err) {
31756
+ warn("post-curation dedup failed (non-fatal):", err);
31757
+ }
31758
+ }
31759
+ const now = Date.now();
31760
+ lastCuratedAt.set(input.sessionID, now);
31761
+ saveSessionTracking(input.sessionID, { lastCuratedAt: now });
31762
+ return result;
31763
+ }
31764
+ function resetCurationTracker(sessionID) {
31765
+ if (sessionID) {
31766
+ lastCuratedAt.delete(sessionID);
31767
+ } else {
31768
+ lastCuratedAt.clear();
31769
+ }
31770
+ }
31771
+ async function consolidate(input) {
31772
+ const cfg = config2();
31773
+ if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
31774
+ const entries = forProject(input.projectPath, false);
31775
+ if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
31776
+ const entriesForPrompt = entries.map((e) => ({
31777
+ id: e.id,
31778
+ category: e.category,
31779
+ title: e.title,
31780
+ content: e.content
31781
+ }));
31782
+ const userContent = consolidationUser({
31783
+ entries: entriesForPrompt,
31784
+ targetMax: cfg.curator.maxEntries
31785
+ });
31786
+ const model = input.model ?? cfg.model;
31787
+ const responseText = await input.llm.prompt(
31788
+ CONSOLIDATION_SYSTEM,
31789
+ userContent,
31790
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
31791
+ );
31792
+ if (!responseText) return { updated: 0, deleted: 0 };
31793
+ const ops = parseOps(responseText);
31794
+ const result = applyOps(ops, {
31795
+ projectPath: input.projectPath,
31796
+ sessionID: input.sessionID,
31797
+ skipCreate: true
31798
+ // Consolidation must not add entries.
31799
+ });
31800
+ return { updated: result.updated, deleted: result.deleted };
31801
+ }
31802
+
31803
+ // src/import/index.ts
31804
+ var import_exports = {};
31805
+ __export(import_exports, {
31806
+ clearProviders: () => clearProviders,
31807
+ computeHash: () => computeHash,
31808
+ detectAll: () => detectAll,
31809
+ extractKnowledge: () => extractKnowledge,
31810
+ getProvider: () => getProvider2,
31811
+ getProviders: () => getProviders,
31812
+ isImported: () => isImported,
31813
+ listImports: () => listImports,
31814
+ recordImport: () => recordImport,
31815
+ registerProvider: () => registerProvider
31816
+ });
31817
+
31818
+ // src/import/providers/index.ts
31819
+ var providers = [];
31820
+ function registerProvider(provider) {
31821
+ providers.push(provider);
31822
+ }
31823
+ function getProviders() {
31824
+ return providers;
31825
+ }
31826
+ function getProvider2(name) {
31827
+ return providers.find((p2) => p2.name === name);
31828
+ }
31829
+ function clearProviders() {
31830
+ providers.length = 0;
31831
+ }
31832
+
31833
+ // src/import/detect.ts
31834
+ function detectAll(projectPath) {
31835
+ const results = [];
31836
+ for (const provider of getProviders()) {
31837
+ try {
31838
+ const sessions = provider.detect(projectPath);
31839
+ if (sessions.length > 0) {
31840
+ results.push({
31841
+ agentName: provider.name,
31842
+ agentDisplayName: provider.displayName,
31843
+ sessions,
31844
+ totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
31845
+ totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0)
31846
+ });
31847
+ }
31848
+ } catch (err) {
31849
+ }
31850
+ }
31851
+ return results.sort((a, b) => b.totalMessages - a.totalMessages);
31852
+ }
31853
+
31854
+ // src/import/extract.ts
31855
+ var IMPORT_CURATOR_SYSTEM = `${CURATOR_SYSTEM}
31856
+
31857
+ ADDITIONAL CONTEXT: You are extracting knowledge from HISTORICAL conversations with a different AI coding agent. Focus on durable insights that are still relevant:
31858
+ - Architecture decisions, design patterns, and project conventions
31859
+ - Gotchas, non-obvious bugs, and their fixes
31860
+ - Developer preferences and workflow patterns
31861
+ - Key technical choices and their rationale
31862
+
31863
+ Ignore:
31864
+ - References to the other agent's specific capabilities or limitations
31865
+ - Task-specific state that is no longer current (e.g. "currently debugging X")
31866
+ - Debugging steps for issues that were already resolved
31867
+ - Transient conversation artifacts (greetings, acknowledgments, status updates)`;
31868
+ async function extractKnowledge(input) {
31869
+ const result = {
31870
+ created: 0,
31871
+ updated: 0,
31872
+ deleted: 0,
31873
+ chunksProcessed: 0,
31874
+ chunksFailed: 0
31875
+ };
31876
+ const sorted = [...input.chunks].sort((a, b) => a.timestamp - b.timestamp);
31877
+ for (let i = 0; i < sorted.length; i++) {
31878
+ const chunk = sorted[i];
31879
+ const existing = forProject(input.projectPath, false);
31880
+ const existingForPrompt = existing.map((e) => ({
31881
+ id: e.id,
31882
+ category: e.category,
31883
+ title: e.title,
31884
+ content: e.content
31885
+ }));
31886
+ const userContent = curatorUser({
31887
+ messages: chunk.text,
31888
+ existing: existingForPrompt
31889
+ });
31890
+ try {
31891
+ const response = await input.llm.prompt(
31892
+ IMPORT_CURATOR_SYSTEM,
31893
+ userContent,
31894
+ {
31895
+ model: input.model,
31896
+ workerID: "lore-import",
31897
+ thinking: false,
31898
+ maxTokens: 4096,
31899
+ sessionID: input.sessionID
31900
+ }
31901
+ );
31902
+ if (response) {
31903
+ const ops = parseOps(response);
31904
+ const applied = applyOps(ops, {
31905
+ projectPath: input.projectPath,
31906
+ sessionID: input.sessionID
31907
+ });
31908
+ result.created += applied.created;
31909
+ result.updated += applied.updated;
31910
+ result.deleted += applied.deleted;
31911
+ }
31912
+ result.chunksProcessed++;
31913
+ } catch {
31914
+ result.chunksFailed++;
31915
+ }
31916
+ input.onProgress?.({
31917
+ current: i + 1,
31918
+ total: sorted.length,
31919
+ created: result.created,
31920
+ updated: result.updated
31921
+ });
31922
+ }
31923
+ return result;
31924
+ }
31925
+
31926
+ // src/import/history.ts
31927
+ function isImported(projectPath, agentName, sourceId, sourceHash) {
31928
+ const projectId2 = ensureProject(projectPath);
31929
+ const row = db().query(
31930
+ `SELECT * FROM import_history
31931
+ WHERE project_id = ? AND agent_name = ? AND source_id = ?`
31932
+ ).get(projectId2, agentName, sourceId);
31933
+ if (!row) return null;
31934
+ if (row.source_hash !== sourceHash) return null;
31935
+ return row;
31936
+ }
31937
+ function recordImport(projectPath, agentName, sourceId, sourceHash, stats) {
31938
+ const projectId2 = ensureProject(projectPath);
31939
+ db().query(
31940
+ `INSERT OR REPLACE INTO import_history
31941
+ (id, project_id, agent_name, source_id, source_hash, entries_created, entries_updated, imported_at)
31942
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
31943
+ ).run(
31944
+ crypto.randomUUID(),
31945
+ projectId2,
31946
+ agentName,
31947
+ sourceId,
31948
+ sourceHash,
31949
+ stats.created,
31950
+ stats.updated,
31951
+ Date.now()
31952
+ );
31953
+ }
31954
+ function listImports(projectPath) {
31955
+ const projectId2 = ensureProject(projectPath);
31956
+ return db().query(
31957
+ `SELECT * FROM import_history
31958
+ WHERE project_id = ? AND source_id != '__declined__'
31959
+ ORDER BY imported_at DESC`
31960
+ ).all(projectId2);
31961
+ }
31962
+ function computeHash(parts) {
31963
+ return `${parts.size ?? 0}:${parts.messageCount ?? 0}:${parts.lastTimestamp ?? 0}`;
31964
+ }
31965
+
31966
+ // src/import/providers/claude-code.ts
31967
+ import { readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync5 } from "fs";
31968
+ import { join as join8 } from "path";
31969
+ import { homedir as homedir2 } from "os";
31970
+ var CLAUDE_DIR = join8(homedir2(), ".claude", "projects");
31971
+ var MAX_TOOL_OUTPUT_CHARS = 500;
31972
+ var DEFAULT_MAX_TOKENS = 12288;
31973
+ function manglePath(projectPath) {
31974
+ return projectPath.replace(/\//g, "-");
31975
+ }
31976
+ function estimateTokens4(text4) {
31977
+ return Math.ceil(text4.length / 3);
31978
+ }
31979
+ function truncate(text4, max) {
31980
+ if (text4.length <= max) return text4;
31981
+ return text4.slice(0, max) + "...";
31982
+ }
31983
+ function blockToText(block) {
31984
+ switch (block.type) {
31985
+ case "text":
31986
+ return block.text;
31987
+ case "tool_use": {
31988
+ const tu = block;
31989
+ const inputSummary = truncate(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS);
31990
+ return `[tool: ${tu.name}] ${inputSummary}`;
31991
+ }
31992
+ case "tool_result": {
31993
+ const tr = block;
31994
+ let content3;
31995
+ if (typeof tr.content === "string") {
31996
+ content3 = tr.content;
31997
+ } else if (Array.isArray(tr.content)) {
31998
+ content3 = tr.content.map((b) => {
31999
+ if (b.type === "text") return b.text;
32000
+ return "";
32001
+ }).filter(Boolean).join("\n");
32002
+ } else {
32003
+ content3 = "";
32004
+ }
32005
+ return content3 ? `[tool_result] ${truncate(content3, MAX_TOOL_OUTPUT_CHARS)}` : null;
32006
+ }
32007
+ case "thinking":
32008
+ return null;
32009
+ default:
32010
+ return null;
32011
+ }
32012
+ }
32013
+ function lineToText(parsed) {
32014
+ if (parsed.type === "user") {
32015
+ const msg = parsed;
32016
+ const content3 = msg.message.content;
32017
+ if (typeof content3 === "string") {
32018
+ return `[user] ${content3}`;
32019
+ }
32020
+ const parts = content3.map(blockToText).filter(Boolean);
32021
+ return parts.length > 0 ? `[user] ${parts.join("\n")}` : null;
32022
+ }
32023
+ if (parsed.type === "assistant") {
32024
+ const msg = parsed;
32025
+ const blocks = msg.message.content;
32026
+ if (!Array.isArray(blocks)) return null;
32027
+ const parts = blocks.map(blockToText).filter(Boolean);
32028
+ return parts.length > 0 ? `[assistant] ${parts.join("\n")}` : null;
32029
+ }
32030
+ return null;
32031
+ }
32032
+ function parseJSONL(filePath) {
32033
+ const raw = readFileSync4(filePath, "utf-8");
32034
+ const lines = [];
32035
+ for (const line of raw.split("\n")) {
32036
+ if (!line.trim()) continue;
32037
+ try {
32038
+ lines.push(JSON.parse(line));
32039
+ } catch {
32040
+ }
32041
+ }
32042
+ return lines;
32043
+ }
32044
+ function getSessionMetadata(filePath) {
32045
+ let raw;
32046
+ try {
32047
+ raw = readFileSync4(filePath, "utf-8");
32048
+ } catch {
32049
+ return null;
32050
+ }
32051
+ const lines = raw.split("\n").filter((l) => l.trim());
32052
+ if (lines.length === 0) return null;
32053
+ let sessionId;
32054
+ let startedAt = Infinity;
32055
+ let lastActivityAt = 0;
32056
+ let messageCount = 0;
32057
+ for (const line of lines) {
32058
+ try {
32059
+ const parsed = JSON.parse(line);
32060
+ if (parsed.sessionId && !sessionId) sessionId = parsed.sessionId;
32061
+ if (parsed.timestamp) {
32062
+ const ts = new Date(parsed.timestamp).getTime();
32063
+ if (!Number.isNaN(ts)) {
32064
+ if (ts < startedAt) startedAt = ts;
32065
+ if (ts > lastActivityAt) lastActivityAt = ts;
32066
+ }
32067
+ }
32068
+ if (parsed.type === "user" || parsed.type === "assistant") {
32069
+ messageCount++;
32070
+ }
32071
+ } catch {
32072
+ }
32073
+ }
32074
+ if (!sessionId || messageCount === 0) return null;
32075
+ const fileSize = raw.length;
32076
+ const estimatedTokens = Math.ceil(fileSize / 5);
32077
+ return {
32078
+ sessionId,
32079
+ startedAt: startedAt === Infinity ? Date.now() : startedAt,
32080
+ lastActivityAt,
32081
+ messageCount,
32082
+ estimatedTokens
32083
+ };
32084
+ }
32085
+ var claudeCodeProvider = {
32086
+ name: "claude-code",
32087
+ displayName: "Claude Code",
32088
+ detect(projectPath) {
32089
+ const mangled = manglePath(projectPath);
32090
+ const dir = join8(CLAUDE_DIR, mangled);
32091
+ let entries;
32092
+ try {
32093
+ entries = readdirSync2(dir);
32094
+ } catch {
32095
+ return [];
32096
+ }
32097
+ const sessions = [];
32098
+ for (const entry of entries) {
32099
+ if (!entry.endsWith(".jsonl")) continue;
32100
+ const filePath = join8(dir, entry);
32101
+ try {
32102
+ const stat = statSync5(filePath);
32103
+ if (!stat.isFile()) continue;
32104
+ } catch {
32105
+ continue;
32106
+ }
32107
+ const meta3 = getSessionMetadata(filePath);
32108
+ if (!meta3) continue;
32109
+ if (meta3.messageCount < 3) continue;
32110
+ const dateStr = new Date(meta3.startedAt).toISOString().slice(0, 10);
32111
+ sessions.push({
32112
+ id: filePath,
32113
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32114
+ startedAt: meta3.startedAt,
32115
+ lastActivityAt: meta3.lastActivityAt,
32116
+ estimatedTokens: meta3.estimatedTokens,
32117
+ messageCount: meta3.messageCount
32118
+ });
32119
+ }
32120
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32121
+ },
32122
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS) {
32123
+ const chunks = [];
32124
+ for (const filePath of sessionIds) {
32125
+ const lines = parseJSONL(filePath);
32126
+ const messages = [];
32127
+ for (const line of lines) {
32128
+ const text4 = lineToText(line);
32129
+ if (!text4) continue;
32130
+ const ts = "timestamp" in line && line.timestamp ? new Date(line.timestamp).getTime() : Date.now();
32131
+ messages.push({ text: text4, timestamp: ts });
32132
+ }
32133
+ if (messages.length === 0) continue;
32134
+ let currentTexts = [];
32135
+ let currentTokens = 0;
32136
+ let chunkStart = messages[0].timestamp;
32137
+ let chunkIndex = 0;
32138
+ const flushChunk = () => {
32139
+ if (currentTexts.length === 0) return;
32140
+ chunkIndex++;
32141
+ const text4 = currentTexts.join("\n\n");
32142
+ chunks.push({
32143
+ label: `Claude Code ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
32144
+ text: text4,
32145
+ estimatedTokens: estimateTokens4(text4),
32146
+ timestamp: chunkStart
32147
+ });
32148
+ currentTexts = [];
32149
+ currentTokens = 0;
32150
+ };
32151
+ for (const msg of messages) {
32152
+ const msgTokens = estimateTokens4(msg.text);
32153
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32154
+ flushChunk();
32155
+ chunkStart = msg.timestamp;
32156
+ }
32157
+ currentTexts.push(msg.text);
32158
+ currentTokens += msgTokens;
32159
+ }
32160
+ flushChunk();
32161
+ }
32162
+ return chunks;
32163
+ }
32164
+ };
32165
+ registerProvider(claudeCodeProvider);
32166
+
32167
+ // src/import/providers/codex.ts
32168
+ import { readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync6, existsSync as existsSync6 } from "fs";
32169
+ import { join as join9 } from "path";
32170
+ import { homedir as homedir3 } from "os";
32171
+ var CODEX_DIR = join9(homedir3(), ".codex");
32172
+ var SESSIONS_DIR = join9(CODEX_DIR, "sessions");
32173
+ var ARCHIVED_DIR = join9(CODEX_DIR, "archived_sessions");
32174
+ var MAX_TOOL_OUTPUT_CHARS2 = 500;
32175
+ var DEFAULT_MAX_TOKENS2 = 12288;
32176
+ function estimateTokens5(text4) {
32177
+ return Math.ceil(text4.length / 3);
32178
+ }
32179
+ function truncate2(text4, max) {
32180
+ if (text4.length <= max) return text4;
32181
+ return text4.slice(0, max) + "...";
32182
+ }
32183
+ function findJsonlFiles(dir) {
32184
+ const results = [];
32185
+ if (!existsSync6(dir)) return results;
32186
+ const walk = (d) => {
32187
+ let entries;
32188
+ try {
32189
+ entries = readdirSync3(d);
32190
+ } catch {
32191
+ return;
32192
+ }
32193
+ for (const entry of entries) {
32194
+ const full = join9(d, entry);
32195
+ try {
32196
+ const stat = statSync6(full);
32197
+ if (stat.isDirectory()) walk(full);
32198
+ else if (stat.isFile() && entry.endsWith(".jsonl")) results.push(full);
32199
+ } catch {
32200
+ }
32201
+ }
32202
+ };
32203
+ walk(dir);
32204
+ return results;
32205
+ }
32206
+ function responseItemToText(item) {
32207
+ if (!item) return null;
32208
+ if (item.type === "message" && item.role && item.content) {
32209
+ const text4 = extractContent(item.content);
32210
+ if (text4) return `[${item.role}] ${text4}`;
32211
+ }
32212
+ if (item.type === "function_call" && item.name) {
32213
+ const args = item.arguments ? truncate2(item.arguments, MAX_TOOL_OUTPUT_CHARS2) : "";
32214
+ return `[tool: ${item.name}] ${args}`;
32215
+ }
32216
+ if (item.type === "function_call_output" && item.output) {
32217
+ return `[tool_result] ${truncate2(item.output, MAX_TOOL_OUTPUT_CHARS2)}`;
32218
+ }
32219
+ return null;
32220
+ }
32221
+ function extractContent(content3) {
32222
+ if (typeof content3 === "string") return content3;
32223
+ if (!Array.isArray(content3)) return null;
32224
+ const parts = [];
32225
+ for (const part of content3) {
32226
+ if ("text" in part && typeof part.text === "string") {
32227
+ parts.push(part.text);
32228
+ }
32229
+ }
32230
+ return parts.length > 0 ? parts.join("\n") : null;
32231
+ }
32232
+ function parseJSONL2(filePath) {
32233
+ let raw;
32234
+ try {
32235
+ raw = readFileSync5(filePath, "utf-8");
32236
+ } catch {
32237
+ return [];
32238
+ }
32239
+ const lines = [];
32240
+ for (const line of raw.split("\n")) {
32241
+ if (!line.trim()) continue;
32242
+ try {
32243
+ lines.push(JSON.parse(line));
32244
+ } catch {
32245
+ }
32246
+ }
32247
+ return lines;
32248
+ }
32249
+ function getSessionMeta(filePath) {
32250
+ let raw;
32251
+ try {
32252
+ raw = readFileSync5(filePath, "utf-8");
32253
+ } catch {
32254
+ return null;
32255
+ }
32256
+ const lines = raw.split("\n").filter((l) => l.trim());
32257
+ if (lines.length === 0) return null;
32258
+ let meta3;
32259
+ try {
32260
+ meta3 = JSON.parse(lines[0]);
32261
+ } catch {
32262
+ return null;
32263
+ }
32264
+ if (meta3.type !== "session_meta") return null;
32265
+ const payload = meta3.payload;
32266
+ let messageCount = 0;
32267
+ for (const line of lines) {
32268
+ try {
32269
+ const parsed = JSON.parse(line);
32270
+ if (parsed.type === "response_item" || parsed.type === "event_msg") {
32271
+ messageCount++;
32272
+ }
32273
+ } catch {
32274
+ }
32275
+ }
32276
+ return {
32277
+ id: payload.meta.id,
32278
+ cwd: payload.meta.cwd,
32279
+ timestamp: payload.meta.timestamp,
32280
+ messageCount,
32281
+ fileSize: raw.length
32282
+ };
32283
+ }
32284
+ var codexProvider = {
32285
+ name: "codex",
32286
+ displayName: "Codex",
32287
+ detect(projectPath) {
32288
+ const sessions = [];
32289
+ const allFiles = [
32290
+ ...findJsonlFiles(SESSIONS_DIR),
32291
+ ...findJsonlFiles(ARCHIVED_DIR)
32292
+ ];
32293
+ for (const filePath of allFiles) {
32294
+ const meta3 = getSessionMeta(filePath);
32295
+ if (!meta3) continue;
32296
+ if (meta3.cwd !== projectPath) continue;
32297
+ if (meta3.messageCount < 3) continue;
32298
+ const ts = new Date(meta3.timestamp).getTime();
32299
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
32300
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32301
+ sessions.push({
32302
+ id: filePath,
32303
+ label: `${dateStr} (${meta3.messageCount} messages)`,
32304
+ startedAt: ts,
32305
+ lastActivityAt: ts,
32306
+ // Best approximation without reading all lines
32307
+ estimatedTokens,
32308
+ messageCount: meta3.messageCount
32309
+ });
32310
+ }
32311
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32312
+ },
32313
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS2) {
32314
+ const chunks = [];
32315
+ for (const filePath of sessionIds) {
32316
+ const lines = parseJSONL2(filePath);
32317
+ const messages = [];
32318
+ let sessionTimestamp = Date.now();
32319
+ const firstLine = lines[0];
32320
+ if (firstLine?.type === "session_meta") {
32321
+ const meta3 = firstLine;
32322
+ const ts = new Date(meta3.payload.meta.timestamp).getTime();
32323
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
32324
+ }
32325
+ for (const line of lines) {
32326
+ if (line.type === "response_item") {
32327
+ const ri = line;
32328
+ const text4 = responseItemToText(ri.payload);
32329
+ if (text4) {
32330
+ messages.push({ text: text4, timestamp: sessionTimestamp });
32331
+ }
32332
+ } else if (line.type === "event_msg") {
32333
+ const ev = line;
32334
+ if (ev.payload.output) {
32335
+ messages.push({
32336
+ text: `[exec] ${truncate2(ev.payload.output, MAX_TOOL_OUTPUT_CHARS2)}`,
32337
+ timestamp: sessionTimestamp
32338
+ });
32339
+ }
32340
+ } else if (line.type === "compacted") {
32341
+ const comp = line;
32342
+ if (comp.payload.replacement_history) {
32343
+ for (const item of comp.payload.replacement_history) {
32344
+ const text4 = responseItemToText(item);
32345
+ if (text4) {
32346
+ messages.push({ text: text4, timestamp: sessionTimestamp });
32347
+ }
32348
+ }
32349
+ }
32350
+ }
32351
+ }
32352
+ if (messages.length === 0) continue;
32353
+ let currentTexts = [];
32354
+ let currentTokens = 0;
32355
+ let chunkIndex = 0;
32356
+ const flushChunk = () => {
32357
+ if (currentTexts.length === 0) return;
32358
+ chunkIndex++;
32359
+ const text4 = currentTexts.join("\n\n");
32360
+ chunks.push({
32361
+ label: `Codex ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32362
+ text: text4,
32363
+ estimatedTokens: estimateTokens5(text4),
32364
+ timestamp: sessionTimestamp
32365
+ });
32366
+ currentTexts = [];
32367
+ currentTokens = 0;
32368
+ };
32369
+ for (const msg of messages) {
32370
+ const msgTokens = estimateTokens5(msg.text);
32371
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32372
+ flushChunk();
32373
+ }
32374
+ currentTexts.push(msg.text);
32375
+ currentTokens += msgTokens;
32376
+ }
32377
+ flushChunk();
32378
+ }
32379
+ return chunks;
32380
+ }
32381
+ };
32382
+ registerProvider(codexProvider);
32383
+
32384
+ // src/import/providers/opencode.ts
32385
+ import { existsSync as existsSync7 } from "fs";
32386
+ import { join as join10 } from "path";
32387
+ import { homedir as homedir4 } from "os";
32388
+ var OPENCODE_DB_PATH = join10(
32389
+ process.env.XDG_DATA_HOME || join10(homedir4(), ".local", "share"),
32390
+ "opencode",
32391
+ "opencode.db"
32392
+ );
32393
+ var MAX_TOOL_OUTPUT_CHARS3 = 500;
32394
+ var DEFAULT_MAX_TOKENS3 = 12288;
32395
+ function estimateTokens6(text4) {
32396
+ return Math.ceil(text4.length / 3);
32397
+ }
32398
+ function truncate3(text4, max) {
32399
+ if (text4.length <= max) return text4;
32400
+ return text4.slice(0, max) + "...";
32401
+ }
32402
+ function openDB() {
32403
+ if (!existsSync7(OPENCODE_DB_PATH)) return null;
32404
+ try {
32405
+ return new Database(OPENCODE_DB_PATH, { readonly: true, readOnly: true });
32406
+ } catch {
32407
+ return null;
32408
+ }
32409
+ }
32410
+ function tableExists(database, table) {
32411
+ const row = database.query("SELECT name FROM sqlite_master WHERE type='table' AND name=?").get(table);
32412
+ return row != null;
32413
+ }
32414
+ function partsToConversationText(parts) {
32415
+ const segments = [];
32416
+ for (const part of parts) {
32417
+ if (part.type === "text" && part.text) {
32418
+ segments.push(part.text);
32419
+ } else if (part.type === "tool" && part.tool && part.state?.status === "completed" && part.state.output) {
32420
+ segments.push(`[tool: ${part.tool}] ${truncate3(part.state.output, MAX_TOOL_OUTPUT_CHARS3)}`);
32421
+ }
32422
+ }
32423
+ return segments.join("\n");
32424
+ }
32425
+ var opencodeProvider = {
32426
+ name: "opencode",
32427
+ displayName: "OpenCode",
32428
+ detect(projectPath) {
32429
+ const database = openDB();
32430
+ if (!database) return [];
32431
+ try {
32432
+ if (!tableExists(database, "project") || !tableExists(database, "session") || !tableExists(database, "message")) {
32433
+ return [];
32434
+ }
32435
+ const project = database.query("SELECT id FROM project WHERE worktree = ?").get(projectPath);
32436
+ if (!project) return [];
32437
+ const sessions = database.query(
32438
+ `SELECT s.id, s.title, s.time_created, s.time_updated,
32439
+ (SELECT COUNT(*) FROM message m WHERE m.session_id = s.id) as msg_count
32440
+ FROM session s
32441
+ WHERE s.project_id = ? AND s.parent_id IS NULL
32442
+ ORDER BY s.time_updated DESC`
32443
+ ).all(project.id);
32444
+ const results = [];
32445
+ for (const sess of sessions) {
32446
+ if (sess.msg_count < 3) continue;
32447
+ const estimatedTokens = sess.msg_count * 500;
32448
+ const dateStr = new Date(sess.time_created).toISOString().slice(0, 10);
32449
+ const label = sess.title ? `${dateStr} - ${sess.title} (${sess.msg_count} messages)` : `${dateStr} (${sess.msg_count} messages)`;
32450
+ results.push({
32451
+ id: sess.id,
32452
+ label,
32453
+ startedAt: sess.time_created,
32454
+ lastActivityAt: sess.time_updated,
32455
+ estimatedTokens,
32456
+ messageCount: sess.msg_count
32457
+ });
32458
+ }
32459
+ return results;
32460
+ } finally {
32461
+ database.close();
32462
+ }
32463
+ },
32464
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS3) {
32465
+ const database = openDB();
32466
+ if (!database) return [];
32467
+ const chunks = [];
32468
+ try {
32469
+ const hasParts = tableExists(database, "part");
32470
+ for (const sessionId of sessionIds) {
32471
+ const messages = database.query(
32472
+ `SELECT id, data, time_created FROM message
32473
+ WHERE session_id = ?
32474
+ ORDER BY time_created ASC`
32475
+ ).all(sessionId);
32476
+ if (messages.length === 0) continue;
32477
+ const textMessages = [];
32478
+ for (const msg of messages) {
32479
+ let msgData;
32480
+ try {
32481
+ msgData = JSON.parse(msg.data);
32482
+ } catch {
32483
+ continue;
32484
+ }
32485
+ const role = msgData.role ?? "unknown";
32486
+ let contentText = "";
32487
+ if (hasParts) {
32488
+ const parts = database.query(
32489
+ `SELECT data FROM part
32490
+ WHERE message_id = ?
32491
+ ORDER BY time_created ASC`
32492
+ ).all(msg.id);
32493
+ const parsedParts = [];
32494
+ for (const p2 of parts) {
32495
+ try {
32496
+ parsedParts.push(JSON.parse(p2.data));
32497
+ } catch {
32498
+ }
32499
+ }
32500
+ contentText = partsToConversationText(parsedParts);
32501
+ }
32502
+ if (!contentText.trim()) continue;
32503
+ textMessages.push({
32504
+ text: `[${role}] ${contentText}`,
32505
+ timestamp: msg.time_created
32506
+ });
32507
+ }
32508
+ if (textMessages.length === 0) continue;
32509
+ let currentTexts = [];
32510
+ let currentTokens = 0;
32511
+ let chunkStart = textMessages[0].timestamp;
32512
+ let chunkIndex = 0;
32513
+ const flushChunk = () => {
32514
+ if (currentTexts.length === 0) return;
32515
+ chunkIndex++;
32516
+ const text4 = currentTexts.join("\n\n");
32517
+ chunks.push({
32518
+ label: `OpenCode ${new Date(chunkStart).toISOString().slice(0, 10)} (${chunkIndex})`,
32519
+ text: text4,
32520
+ estimatedTokens: estimateTokens6(text4),
32521
+ timestamp: chunkStart
32522
+ });
32523
+ currentTexts = [];
32524
+ currentTokens = 0;
32525
+ };
32526
+ for (const msg of textMessages) {
32527
+ const msgTokens = estimateTokens6(msg.text);
32528
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32529
+ flushChunk();
32530
+ chunkStart = msg.timestamp;
32531
+ }
32532
+ currentTexts.push(msg.text);
32533
+ currentTokens += msgTokens;
32534
+ }
32535
+ flushChunk();
32536
+ }
32537
+ } finally {
32538
+ database.close();
32539
+ }
32540
+ return chunks;
32541
+ }
32542
+ };
32543
+ registerProvider(opencodeProvider);
32544
+
32545
+ // src/import/providers/cline.ts
32546
+ import { readFileSync as readFileSync6, existsSync as existsSync8, statSync as statSync7 } from "fs";
32547
+ import { join as join11 } from "path";
32548
+ import { homedir as homedir5 } from "os";
32549
+ var MAX_TOOL_OUTPUT_CHARS4 = 500;
32550
+ var DEFAULT_MAX_TOKENS4 = 12288;
32551
+ var EXTENSION_IDS = [
32552
+ "saoudrizwan.claude-dev",
32553
+ "cline.cline"
32554
+ ];
32555
+ function estimateTokens7(text4) {
32556
+ return Math.ceil(text4.length / 3);
32557
+ }
32558
+ function truncate4(text4, max) {
32559
+ if (text4.length <= max) return text4;
32560
+ return text4.slice(0, max) + "...";
32561
+ }
32562
+ function findGlobalStorageDirs() {
32563
+ const home = homedir5();
32564
+ const dirs = [];
32565
+ const basePaths = [];
32566
+ const platform = process.platform;
32567
+ if (platform === "darwin") {
32568
+ basePaths.push(
32569
+ join11(home, "Library", "Application Support", "Code", "User", "globalStorage"),
32570
+ join11(home, "Library", "Application Support", "Code - Insiders", "User", "globalStorage"),
32571
+ join11(home, "Library", "Application Support", "VSCodium", "User", "globalStorage")
32572
+ );
32573
+ } else if (platform === "win32") {
32574
+ const appdata = process.env.APPDATA || join11(home, "AppData", "Roaming");
32575
+ basePaths.push(
32576
+ join11(appdata, "Code", "User", "globalStorage"),
32577
+ join11(appdata, "Code - Insiders", "User", "globalStorage"),
32578
+ join11(appdata, "VSCodium", "User", "globalStorage")
32579
+ );
32580
+ } else {
32581
+ const configHome = process.env.XDG_CONFIG_HOME || join11(home, ".config");
32582
+ basePaths.push(
32583
+ join11(configHome, "Code", "User", "globalStorage"),
32584
+ join11(configHome, "Code - Insiders", "User", "globalStorage"),
32585
+ join11(configHome, "VSCodium", "User", "globalStorage")
32586
+ );
32587
+ basePaths.push(
32588
+ join11(home, ".vscode", "data", "User", "globalStorage"),
32589
+ join11(home, ".vscode-insiders", "data", "User", "globalStorage")
32590
+ );
32591
+ }
32592
+ for (const base of basePaths) {
32593
+ for (const extId of EXTENSION_IDS) {
32594
+ const dir = join11(base, extId);
32595
+ if (existsSync8(dir)) dirs.push(dir);
32596
+ }
32597
+ }
32598
+ return dirs;
32599
+ }
32600
+ function loadTaskHistory(storageDir, projectPath) {
32601
+ const paths = [
32602
+ join11(storageDir, "state", "taskHistory.json"),
32603
+ join11(storageDir, "taskHistory.json")
32604
+ ];
32605
+ for (const historyPath of paths) {
32606
+ if (!existsSync8(historyPath)) continue;
32607
+ try {
32608
+ const raw = readFileSync6(historyPath, "utf-8");
32609
+ const items = JSON.parse(raw);
32610
+ if (!Array.isArray(items)) continue;
32611
+ return items.filter(
32612
+ (item) => item.cwdOnTaskInitialization === projectPath
32613
+ );
32614
+ } catch {
32615
+ continue;
32616
+ }
32617
+ }
32618
+ return [];
32619
+ }
32620
+ function readConversation(taskDir) {
32621
+ const filePath = join11(taskDir, "api_conversation_history.json");
32622
+ if (!existsSync8(filePath)) return [];
32623
+ try {
32624
+ const raw = readFileSync6(filePath, "utf-8");
32625
+ const messages = JSON.parse(raw);
32626
+ return Array.isArray(messages) ? messages : [];
32627
+ } catch {
32628
+ return [];
32629
+ }
32630
+ }
32631
+ function blockToText2(block) {
32632
+ switch (block.type) {
32633
+ case "text":
32634
+ return block.text;
32635
+ case "tool_use": {
32636
+ const tu = block;
32637
+ return `[tool: ${tu.name}] ${truncate4(JSON.stringify(tu.input), MAX_TOOL_OUTPUT_CHARS4)}`;
32638
+ }
32639
+ case "tool_result": {
32640
+ const tr = block;
32641
+ let content3;
32642
+ if (typeof tr.content === "string") {
32643
+ content3 = tr.content;
32644
+ } else if (Array.isArray(tr.content)) {
32645
+ content3 = tr.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
32646
+ } else {
32647
+ content3 = "";
32648
+ }
32649
+ return content3 ? `[tool_result] ${truncate4(content3, MAX_TOOL_OUTPUT_CHARS4)}` : null;
32650
+ }
32651
+ default:
32652
+ return null;
32653
+ }
32654
+ }
32655
+ function messageToText(msg) {
32656
+ if (typeof msg.content === "string") {
32657
+ return msg.content ? `[${msg.role}] ${msg.content}` : null;
32658
+ }
32659
+ const parts = msg.content.map(blockToText2).filter(Boolean);
32660
+ return parts.length > 0 ? `[${msg.role}] ${parts.join("\n")}` : null;
32661
+ }
32662
+ var clineProvider = {
32663
+ name: "cline",
32664
+ displayName: "Cline",
32665
+ detect(projectPath) {
32666
+ const sessions = [];
32667
+ const storageDirs = findGlobalStorageDirs();
32668
+ for (const storageDir of storageDirs) {
32669
+ const tasks = loadTaskHistory(storageDir, projectPath);
32670
+ for (const task of tasks) {
32671
+ const taskDir = join11(storageDir, "tasks", task.id);
32672
+ if (!existsSync8(taskDir)) continue;
32673
+ const messages = readConversation(taskDir);
32674
+ if (messages.length < 3) continue;
32675
+ const dateStr = new Date(task.ts).toISOString().slice(0, 10);
32676
+ const label = task.task ? `${dateStr} - ${truncate4(task.task, 60)} (${messages.length} messages)` : `${dateStr} (${messages.length} messages)`;
32677
+ const historyFile = join11(taskDir, "api_conversation_history.json");
32678
+ let estimatedTokens = messages.length * 500;
32679
+ try {
32680
+ const stat = statSync7(historyFile);
32681
+ estimatedTokens = Math.ceil(stat.size / 5);
32682
+ } catch {
32683
+ }
32684
+ sessions.push({
32685
+ id: taskDir,
32686
+ label,
32687
+ startedAt: task.ts,
32688
+ lastActivityAt: task.ts,
32689
+ estimatedTokens,
32690
+ messageCount: messages.length
32691
+ });
32692
+ }
32693
+ }
32694
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32695
+ },
32696
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS4) {
32697
+ const chunks = [];
32698
+ for (const taskDir of sessionIds) {
32699
+ const messages = readConversation(taskDir);
32700
+ if (messages.length === 0) continue;
32701
+ let sessionTimestamp;
32702
+ try {
32703
+ sessionTimestamp = statSync7(taskDir).mtimeMs;
32704
+ } catch {
32705
+ sessionTimestamp = Date.now();
32706
+ }
32707
+ const textMessages = [];
32708
+ for (const msg of messages) {
32709
+ const text4 = messageToText(msg);
32710
+ if (text4) textMessages.push({ text: text4 });
32711
+ }
32712
+ if (textMessages.length === 0) continue;
32713
+ let currentTexts = [];
32714
+ let currentTokens = 0;
32715
+ let chunkIndex = 0;
32716
+ const flushChunk = () => {
32717
+ if (currentTexts.length === 0) return;
32718
+ chunkIndex++;
32719
+ const text4 = currentTexts.join("\n\n");
32720
+ chunks.push({
32721
+ label: `Cline ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
32722
+ text: text4,
32723
+ estimatedTokens: estimateTokens7(text4),
32724
+ timestamp: sessionTimestamp
32725
+ });
32726
+ currentTexts = [];
32727
+ currentTokens = 0;
32728
+ };
32729
+ for (const msg of textMessages) {
32730
+ const msgTokens = estimateTokens7(msg.text);
32731
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32732
+ flushChunk();
32733
+ }
32734
+ currentTexts.push(msg.text);
32735
+ currentTokens += msgTokens;
32736
+ }
32737
+ flushChunk();
32738
+ }
32739
+ return chunks;
32740
+ }
32741
+ };
32742
+ registerProvider(clineProvider);
32743
+
32744
+ // src/import/providers/continue.ts
32745
+ import { readdirSync as readdirSync5, readFileSync as readFileSync7, existsSync as existsSync9 } from "fs";
32746
+ import { join as join12 } from "path";
32747
+ import { homedir as homedir6 } from "os";
32748
+ var MAX_TOOL_OUTPUT_CHARS5 = 500;
32749
+ var DEFAULT_MAX_TOKENS5 = 12288;
32750
+ function estimateTokens8(text4) {
32751
+ return Math.ceil(text4.length / 3);
32752
+ }
32753
+ function truncate5(text4, max) {
32754
+ if (text4.length <= max) return text4;
32755
+ return text4.slice(0, max) + "...";
32756
+ }
32757
+ function continueDir() {
32758
+ return process.env.CONTINUE_GLOBAL_DIR || join12(homedir6(), ".continue");
32759
+ }
32760
+ function loadSessionIndex() {
32761
+ const indexPath = join12(continueDir(), "sessions", "sessions.json");
32762
+ if (!existsSync9(indexPath)) return [];
32763
+ try {
32764
+ const raw = readFileSync7(indexPath, "utf-8");
32765
+ const parsed = JSON.parse(raw);
32766
+ return Array.isArray(parsed) ? parsed : [];
32767
+ } catch {
32768
+ return [];
32769
+ }
32770
+ }
32771
+ function loadSession(sessionId) {
32772
+ const filePath = join12(continueDir(), "sessions", `${sessionId}.json`);
32773
+ if (!existsSync9(filePath)) return null;
32774
+ try {
32775
+ const raw = readFileSync7(filePath, "utf-8");
32776
+ return JSON.parse(raw);
32777
+ } catch {
32778
+ return null;
30347
32779
  }
30348
- if (config2().knowledge.enabled) {
30349
- for (const pat of extractPatterns(result.observations)) {
32780
+ }
32781
+ function extractMessageContent(content3) {
32782
+ if (typeof content3 === "string") return content3;
32783
+ if (!Array.isArray(content3)) return "";
32784
+ return content3.filter(
32785
+ (part) => part.type === "text" && typeof part.text === "string"
32786
+ ).map((part) => part.text).join("\n");
32787
+ }
32788
+ function historyItemToText(item) {
32789
+ const msg = item.message;
32790
+ if (!msg) return null;
32791
+ if (msg.role === "system") return null;
32792
+ const parts = [];
32793
+ const content3 = extractMessageContent(msg.content);
32794
+ if (content3) parts.push(content3);
32795
+ if (msg.toolCalls) {
32796
+ for (const call of msg.toolCalls) {
32797
+ if (call.function) {
32798
+ const args = truncate5(call.function.arguments || "{}", MAX_TOOL_OUTPUT_CHARS5);
32799
+ parts.push(`[tool: ${call.function.name}] ${args}`);
32800
+ }
32801
+ }
32802
+ }
32803
+ if (item.toolCallStates) {
32804
+ for (const state of item.toolCallStates) {
32805
+ if (state.output && state.status === "done") {
32806
+ parts.push(`[tool_result] ${truncate5(state.output, MAX_TOOL_OUTPUT_CHARS5)}`);
32807
+ }
32808
+ }
32809
+ }
32810
+ if (parts.length === 0) return null;
32811
+ const role = msg.role === "tool" ? "tool_result" : msg.role;
32812
+ return `[${role}] ${parts.join("\n")}`;
32813
+ }
32814
+ var continueProvider = {
32815
+ name: "continue",
32816
+ displayName: "Continue",
32817
+ detect(projectPath) {
32818
+ const sessions = [];
32819
+ const index2 = loadSessionIndex();
32820
+ for (const meta3 of index2) {
32821
+ if (meta3.workspaceDirectory !== projectPath) continue;
32822
+ const session = loadSession(meta3.sessionId);
32823
+ if (!session || !session.history || session.history.length < 3) continue;
32824
+ const ts = new Date(meta3.dateCreated).getTime();
32825
+ const dateStr = new Date(ts).toISOString().slice(0, 10);
32826
+ const messageCount = session.history.length;
32827
+ const label = meta3.title ? `${dateStr} - ${truncate5(meta3.title, 60)} (${messageCount} messages)` : `${dateStr} (${messageCount} messages)`;
32828
+ const estimatedTokens = messageCount * 500;
32829
+ sessions.push({
32830
+ id: meta3.sessionId,
32831
+ label,
32832
+ startedAt: ts,
32833
+ lastActivityAt: ts,
32834
+ estimatedTokens,
32835
+ messageCount
32836
+ });
32837
+ }
32838
+ const sessionsDir = join12(continueDir(), "sessions");
32839
+ if (existsSync9(sessionsDir)) {
32840
+ const existingIds = new Set(sessions.map((s) => s.id));
32841
+ let entries;
30350
32842
  try {
30351
- create({
30352
- projectPath: input.projectPath,
30353
- category: pat.category,
30354
- title: pat.title,
30355
- content: pat.content,
30356
- session: input.sessionID,
30357
- scope: "project"
30358
- });
32843
+ entries = readdirSync5(sessionsDir);
30359
32844
  } catch {
32845
+ entries = [];
32846
+ }
32847
+ for (const entry of entries) {
32848
+ if (!entry.endsWith(".json") || entry === "sessions.json") continue;
32849
+ const sessionId = entry.replace(".json", "");
32850
+ if (existingIds.has(sessionId)) continue;
32851
+ const session = loadSession(sessionId);
32852
+ if (!session) continue;
32853
+ if (session.workspaceDirectory !== projectPath) continue;
32854
+ if (!session.history || session.history.length < 3) continue;
32855
+ const dateStr = session.title ? truncate5(session.title, 60) : sessionId.slice(0, 8);
32856
+ sessions.push({
32857
+ id: sessionId,
32858
+ label: `${dateStr} (${session.history.length} messages)`,
32859
+ startedAt: Date.now(),
32860
+ lastActivityAt: Date.now(),
32861
+ estimatedTokens: session.history.length * 500,
32862
+ messageCount: session.history.length
32863
+ });
32864
+ }
32865
+ }
32866
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
32867
+ },
32868
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS5) {
32869
+ const chunks = [];
32870
+ for (const sessionId of sessionIds) {
32871
+ const session = loadSession(sessionId);
32872
+ if (!session || !session.history) continue;
32873
+ const textMessages = [];
32874
+ for (const item of session.history) {
32875
+ const text4 = historyItemToText(item);
32876
+ if (text4) textMessages.push({ text: text4 });
32877
+ }
32878
+ if (textMessages.length === 0) continue;
32879
+ const sessionTimestamp = Date.now();
32880
+ let currentTexts = [];
32881
+ let currentTokens = 0;
32882
+ let chunkIndex = 0;
32883
+ const flushChunk = () => {
32884
+ if (currentTexts.length === 0) return;
32885
+ chunkIndex++;
32886
+ const text4 = currentTexts.join("\n\n");
32887
+ chunks.push({
32888
+ label: `Continue ${session.title || sessionId.slice(0, 8)} (${chunkIndex})`,
32889
+ text: text4,
32890
+ estimatedTokens: estimateTokens8(text4),
32891
+ timestamp: sessionTimestamp
32892
+ });
32893
+ currentTexts = [];
32894
+ currentTokens = 0;
32895
+ };
32896
+ for (const msg of textMessages) {
32897
+ const msgTokens = estimateTokens8(msg.text);
32898
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
32899
+ flushChunk();
32900
+ }
32901
+ currentTexts.push(msg.text);
32902
+ currentTokens += msgTokens;
30360
32903
  }
32904
+ flushChunk();
30361
32905
  }
32906
+ return chunks;
30362
32907
  }
30363
- return result;
32908
+ };
32909
+ registerProvider(continueProvider);
32910
+
32911
+ // src/import/providers/pi.ts
32912
+ import { readdirSync as readdirSync6, readFileSync as readFileSync8, statSync as statSync8 } from "fs";
32913
+ import { join as join13 } from "path";
32914
+ import { homedir as homedir7 } from "os";
32915
+ var PI_DIR = join13(homedir7(), ".pi", "agent", "sessions");
32916
+ var MAX_TOOL_OUTPUT_CHARS6 = 500;
32917
+ var DEFAULT_MAX_TOKENS6 = 12288;
32918
+ function estimateTokens9(text4) {
32919
+ return Math.ceil(text4.length / 3);
30364
32920
  }
30365
- function backfillMetrics() {
30366
- const rows = db().query(
30367
- "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
30368
- ).all();
30369
- if (!rows.length) return 0;
30370
- const update2 = db().prepare(
30371
- "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
30372
- );
30373
- let updated = 0;
30374
- for (const row of rows) {
30375
- const sourceIds = parseSourceIds(row.source_ids);
30376
- if (!sourceIds.length) continue;
30377
- const placeholders = sourceIds.map(() => "?").join(",");
30378
- const sources = db().query(
30379
- `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
30380
- ).all(...sourceIds);
30381
- if (!sources.length) continue;
30382
- const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
30383
- const timestamps = sources.map((s) => s.created_at);
30384
- const rComp = compressionRatio(row.token_count, sourceTokens);
30385
- const cNorm = temporalCnorm(timestamps);
30386
- update2.run(rComp, cNorm, row.id);
30387
- updated++;
30388
- }
30389
- if (updated > 0) {
30390
- info(
30391
- `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
30392
- );
30393
- }
30394
- return updated;
32921
+ function truncate6(text4, max) {
32922
+ if (text4.length <= max) return text4;
32923
+ return text4.slice(0, max) + "...";
30395
32924
  }
30396
-
30397
- // src/curator.ts
30398
- var curator_exports = {};
30399
- __export(curator_exports, {
30400
- consolidate: () => consolidate,
30401
- resetCurationTracker: () => resetCurationTracker,
30402
- run: () => run2
30403
- });
30404
- var MAX_ENTRY_CONTENT_LENGTH = 1200;
30405
- function parseOps(text4) {
30406
- const cleaned = text4.trim().replace(/^```json?\s*/i, "").replace(/\s*```$/i, "");
32925
+ function encodeCwd(cwd) {
32926
+ const encoded = cwd.replace(/^\//, "").replace(/\//g, "-");
32927
+ return `--${encoded}--`;
32928
+ }
32929
+ function parseJSONL3(filePath) {
32930
+ let raw;
30407
32931
  try {
30408
- const parsed = JSON.parse(cleaned);
30409
- if (!Array.isArray(parsed)) return [];
30410
- return parsed.filter(
30411
- (op) => typeof op === "object" && op !== null && "op" in op && typeof op.op === "string"
30412
- );
32932
+ raw = readFileSync8(filePath, "utf-8");
30413
32933
  } catch {
30414
32934
  return [];
30415
32935
  }
32936
+ const lines = [];
32937
+ for (const line of raw.split("\n")) {
32938
+ if (!line.trim()) continue;
32939
+ try {
32940
+ lines.push(JSON.parse(line));
32941
+ } catch {
32942
+ }
32943
+ }
32944
+ return lines;
30416
32945
  }
30417
- var lastCuratedAt = /* @__PURE__ */ new Map();
30418
- async function run2(input) {
30419
- const cfg = config2();
30420
- if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
30421
- const all3 = bySession(input.projectPath, input.sessionID);
30422
- const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
30423
- const recent = all3.filter((m) => m.created_at > sessionCuratedAt);
30424
- if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
30425
- const text4 = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
30426
- const existing = forProject(input.projectPath, false);
30427
- const existingForPrompt = existing.map((e) => ({
30428
- id: e.id,
30429
- category: e.category,
30430
- title: e.title,
30431
- content: e.content
30432
- }));
30433
- const userContent = curatorUser({
30434
- messages: text4,
30435
- existing: existingForPrompt
30436
- });
30437
- const model = input.model ?? cfg.model;
30438
- const responseText = await input.llm.prompt(
30439
- CURATOR_SYSTEM,
30440
- userContent,
30441
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 }
30442
- );
30443
- if (!responseText) return { created: 0, updated: 0, deleted: 0 };
30444
- const ops = parseOps(responseText);
30445
- let created = 0;
30446
- let updated = 0;
30447
- let deleted = 0;
30448
- const idsToSync = [];
30449
- for (const op of ops) {
30450
- if (op.op === "create") {
30451
- const content3 = op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30452
- const id = create({
30453
- projectPath: op.scope === "project" ? input.projectPath : void 0,
30454
- category: op.category,
30455
- title: op.title,
30456
- content: content3,
30457
- session: input.sessionID,
30458
- scope: op.scope,
30459
- crossProject: op.crossProject ?? true
32946
+ function linearize(lines) {
32947
+ if (lines.length === 0) return [];
32948
+ const children = /* @__PURE__ */ new Map();
32949
+ const byId = /* @__PURE__ */ new Map();
32950
+ let rootLine = null;
32951
+ for (const line of lines) {
32952
+ if (line.type === "session") {
32953
+ rootLine = line;
32954
+ continue;
32955
+ }
32956
+ if (!line.id) continue;
32957
+ byId.set(line.id, line);
32958
+ const pid = line.parentId;
32959
+ if (pid) {
32960
+ const siblings = children.get(pid) ?? [];
32961
+ siblings.push(line);
32962
+ children.set(pid, siblings);
32963
+ }
32964
+ }
32965
+ if (!rootLine || !rootLine.id) return lines.filter((l) => l.type === "message");
32966
+ const result = [];
32967
+ let currentId = rootLine.id;
32968
+ while (currentId) {
32969
+ const kids = children.get(currentId);
32970
+ if (!kids || kids.length === 0) break;
32971
+ const next = kids[kids.length - 1];
32972
+ result.push(next);
32973
+ currentId = next.id;
32974
+ }
32975
+ return result;
32976
+ }
32977
+ function getSessionMeta2(filePath) {
32978
+ const lines = parseJSONL3(filePath);
32979
+ if (lines.length === 0) return null;
32980
+ const header = lines[0];
32981
+ if (header.type !== "session") return null;
32982
+ const session = header;
32983
+ const messageCount = lines.filter((l) => l.type === "message").length;
32984
+ let fileSize;
32985
+ try {
32986
+ fileSize = statSync8(filePath).size;
32987
+ } catch {
32988
+ fileSize = 0;
32989
+ }
32990
+ const ts = new Date(session.timestamp).getTime();
32991
+ return {
32992
+ id: session.id,
32993
+ cwd: session.cwd,
32994
+ timestamp: Number.isNaN(ts) ? Date.now() : ts,
32995
+ messageCount,
32996
+ fileSize
32997
+ };
32998
+ }
32999
+ var piProvider = {
33000
+ name: "pi",
33001
+ displayName: "Pi",
33002
+ detect(projectPath) {
33003
+ const encoded = encodeCwd(projectPath);
33004
+ const dir = join13(PI_DIR, encoded);
33005
+ let entries;
33006
+ try {
33007
+ entries = readdirSync6(dir);
33008
+ } catch {
33009
+ return [];
33010
+ }
33011
+ const sessions = [];
33012
+ for (const entry of entries) {
33013
+ if (!entry.endsWith(".jsonl")) continue;
33014
+ const filePath = join13(dir, entry);
33015
+ const meta3 = getSessionMeta2(filePath);
33016
+ if (!meta3) continue;
33017
+ if (meta3.messageCount < 3) continue;
33018
+ const dateStr = new Date(meta3.timestamp).toISOString().slice(0, 10);
33019
+ const estimatedTokens = Math.ceil(meta3.fileSize / 5);
33020
+ sessions.push({
33021
+ id: filePath,
33022
+ label: `${dateStr} (${meta3.messageCount} messages)`,
33023
+ startedAt: meta3.timestamp,
33024
+ lastActivityAt: meta3.timestamp,
33025
+ estimatedTokens,
33026
+ messageCount: meta3.messageCount
30460
33027
  });
30461
- idsToSync.push(id);
30462
- created++;
30463
- } else if (op.op === "update") {
30464
- const entry = get(op.id);
30465
- if (entry) {
30466
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30467
- update(op.id, { content: content3, confidence: op.confidence });
30468
- if (op.content !== void 0) idsToSync.push(op.id);
30469
- updated++;
33028
+ }
33029
+ return sessions.sort((a, b) => b.lastActivityAt - a.lastActivityAt);
33030
+ },
33031
+ readChunks(_projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS6) {
33032
+ const chunks = [];
33033
+ for (const filePath of sessionIds) {
33034
+ const allLines = parseJSONL3(filePath);
33035
+ const linearLines = linearize(allLines);
33036
+ let sessionTimestamp = Date.now();
33037
+ const header = allLines.find((l) => l.type === "session");
33038
+ if (header?.type === "session") {
33039
+ const session = header;
33040
+ const ts = new Date(session.timestamp).getTime();
33041
+ if (!Number.isNaN(ts)) sessionTimestamp = ts;
33042
+ }
33043
+ const messages = [];
33044
+ for (const line of linearLines) {
33045
+ if (line.type === "message") {
33046
+ const msg = line;
33047
+ const content3 = msg.message.content;
33048
+ if (!content3) continue;
33049
+ const ts = new Date(msg.timestamp).getTime();
33050
+ messages.push({
33051
+ text: `[${msg.message.role}] ${content3}`,
33052
+ timestamp: Number.isNaN(ts) ? sessionTimestamp : ts
33053
+ });
33054
+ } else if (line.type === "compaction") {
33055
+ const comp = line;
33056
+ if (comp.summary) {
33057
+ messages.push({
33058
+ text: `[summary] ${truncate6(comp.summary, MAX_TOOL_OUTPUT_CHARS6 * 2)}`,
33059
+ timestamp: sessionTimestamp
33060
+ });
33061
+ }
33062
+ }
30470
33063
  }
30471
- } else if (op.op === "delete") {
30472
- const entry = get(op.id);
30473
- if (entry) {
30474
- remove(op.id);
30475
- deleted++;
33064
+ if (messages.length === 0) continue;
33065
+ let currentTexts = [];
33066
+ let currentTokens = 0;
33067
+ let chunkIndex = 0;
33068
+ const flushChunk = () => {
33069
+ if (currentTexts.length === 0) return;
33070
+ chunkIndex++;
33071
+ const text4 = currentTexts.join("\n\n");
33072
+ chunks.push({
33073
+ label: `Pi ${new Date(sessionTimestamp).toISOString().slice(0, 10)} (${chunkIndex})`,
33074
+ text: text4,
33075
+ estimatedTokens: estimateTokens9(text4),
33076
+ timestamp: sessionTimestamp
33077
+ });
33078
+ currentTexts = [];
33079
+ currentTokens = 0;
33080
+ };
33081
+ for (const msg of messages) {
33082
+ const msgTokens = estimateTokens9(msg.text);
33083
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
33084
+ flushChunk();
33085
+ }
33086
+ currentTexts.push(msg.text);
33087
+ currentTokens += msgTokens;
30476
33088
  }
33089
+ flushChunk();
30477
33090
  }
33091
+ return chunks;
30478
33092
  }
30479
- for (const id of idsToSync) {
30480
- syncRefs(id);
30481
- }
30482
- lastCuratedAt.set(input.sessionID, Date.now());
30483
- return { created, updated, deleted };
33093
+ };
33094
+ registerProvider(piProvider);
33095
+
33096
+ // src/import/providers/aider.ts
33097
+ import { existsSync as existsSync11, readFileSync as readFileSync9, statSync as statSync9 } from "fs";
33098
+ import { join as join14 } from "path";
33099
+ var HISTORY_FILE = ".aider.chat.history.md";
33100
+ var DEFAULT_MAX_TOKENS7 = 12288;
33101
+ var ROLE_HEADER_RE = /^####\s+(user|assistant|system)\s*$/i;
33102
+ function estimateTokens10(text4) {
33103
+ return Math.ceil(text4.length / 3);
30484
33104
  }
30485
- function resetCurationTracker(sessionID) {
30486
- if (sessionID) {
30487
- lastCuratedAt.delete(sessionID);
30488
- } else {
30489
- lastCuratedAt.clear();
33105
+ function parseAiderHistory(content3) {
33106
+ const lines = content3.split("\n");
33107
+ const messages = [];
33108
+ let currentRole = null;
33109
+ let currentLines = [];
33110
+ const flush = () => {
33111
+ if (currentRole && currentLines.length > 0) {
33112
+ const text4 = currentLines.join("\n").trim();
33113
+ if (text4) {
33114
+ messages.push({ role: currentRole, text: text4 });
33115
+ }
33116
+ }
33117
+ currentLines = [];
33118
+ };
33119
+ for (const line of lines) {
33120
+ const match = ROLE_HEADER_RE.exec(line);
33121
+ if (match) {
33122
+ flush();
33123
+ currentRole = match[1].toLowerCase();
33124
+ continue;
33125
+ }
33126
+ if (line.trim() === "---") {
33127
+ flush();
33128
+ currentRole = null;
33129
+ continue;
33130
+ }
33131
+ if (currentRole) {
33132
+ currentLines.push(line);
33133
+ }
30490
33134
  }
33135
+ flush();
33136
+ return messages;
30491
33137
  }
30492
- async function consolidate(input) {
30493
- const cfg = config2();
30494
- if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
30495
- const entries = forProject(input.projectPath, false);
30496
- if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
30497
- const entriesForPrompt = entries.map((e) => ({
30498
- id: e.id,
30499
- category: e.category,
30500
- title: e.title,
30501
- content: e.content
30502
- }));
30503
- const userContent = consolidationUser({
30504
- entries: entriesForPrompt,
30505
- targetMax: cfg.curator.maxEntries
30506
- });
30507
- const model = input.model ?? cfg.model;
30508
- const responseText = await input.llm.prompt(
30509
- CONSOLIDATION_SYSTEM,
30510
- userContent,
30511
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 4096 }
30512
- );
30513
- if (!responseText) return { updated: 0, deleted: 0 };
30514
- const ops = parseOps(responseText);
30515
- let updated = 0;
30516
- let deleted = 0;
30517
- for (const op of ops) {
30518
- if (op.op === "update") {
30519
- const entry = get(op.id);
30520
- if (entry) {
30521
- const content3 = op.content !== void 0 && op.content.length > MAX_ENTRY_CONTENT_LENGTH ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) + " [truncated \u2014 entry too long]" : op.content;
30522
- update(op.id, { content: content3, confidence: op.confidence });
30523
- updated++;
33138
+ var aiderProvider = {
33139
+ name: "aider",
33140
+ displayName: "Aider",
33141
+ detect(projectPath) {
33142
+ const filePath = join14(projectPath, HISTORY_FILE);
33143
+ if (!existsSync11(filePath)) return [];
33144
+ let stat;
33145
+ try {
33146
+ stat = statSync9(filePath);
33147
+ } catch {
33148
+ return [];
33149
+ }
33150
+ if (!stat.isFile() || stat.size === 0) return [];
33151
+ let content3;
33152
+ try {
33153
+ content3 = readFileSync9(filePath, "utf-8");
33154
+ } catch {
33155
+ return [];
33156
+ }
33157
+ const messages = parseAiderHistory(content3);
33158
+ if (messages.length < 3) return [];
33159
+ const estimatedTokens = estimateTokens10(content3);
33160
+ return [
33161
+ {
33162
+ id: filePath,
33163
+ label: `Chat history (${messages.length} messages, ${Math.round(stat.size / 1024)}KB)`,
33164
+ startedAt: stat.birthtimeMs || stat.ctimeMs,
33165
+ lastActivityAt: stat.mtimeMs,
33166
+ estimatedTokens,
33167
+ messageCount: messages.length
33168
+ }
33169
+ ];
33170
+ },
33171
+ readChunks(projectPath, sessionIds, maxTokens = DEFAULT_MAX_TOKENS7) {
33172
+ const chunks = [];
33173
+ for (const filePath of sessionIds) {
33174
+ let content3;
33175
+ try {
33176
+ content3 = readFileSync9(filePath, "utf-8");
33177
+ } catch {
33178
+ continue;
30524
33179
  }
30525
- } else if (op.op === "delete") {
30526
- const entry = get(op.id);
30527
- if (entry) {
30528
- remove(op.id);
30529
- deleted++;
33180
+ const messages = parseAiderHistory(content3);
33181
+ if (messages.length === 0) continue;
33182
+ let fileTimestamp;
33183
+ try {
33184
+ fileTimestamp = statSync9(filePath).mtimeMs;
33185
+ } catch {
33186
+ fileTimestamp = Date.now();
33187
+ }
33188
+ let currentTexts = [];
33189
+ let currentTokens = 0;
33190
+ let chunkIndex = 0;
33191
+ const flushChunk = () => {
33192
+ if (currentTexts.length === 0) return;
33193
+ chunkIndex++;
33194
+ const text4 = currentTexts.join("\n\n");
33195
+ chunks.push({
33196
+ label: `Aider history (${chunkIndex})`,
33197
+ text: text4,
33198
+ estimatedTokens: estimateTokens10(text4),
33199
+ timestamp: fileTimestamp
33200
+ });
33201
+ currentTexts = [];
33202
+ currentTokens = 0;
33203
+ };
33204
+ for (const msg of messages) {
33205
+ const formatted = `[${msg.role}] ${msg.text}`;
33206
+ const msgTokens = estimateTokens10(formatted);
33207
+ if (currentTokens > 0 && currentTokens + msgTokens > maxTokens) {
33208
+ flushChunk();
33209
+ }
33210
+ currentTexts.push(formatted);
33211
+ currentTokens += msgTokens;
30530
33212
  }
33213
+ flushChunk();
30531
33214
  }
33215
+ return chunks;
30532
33216
  }
30533
- return { updated, deleted };
30534
- }
33217
+ };
33218
+ registerProvider(aiderProvider);
30535
33219
 
30536
33220
  // src/recall.ts
30537
33221
  function getTaggedText(tagged) {
@@ -30797,7 +33481,10 @@ async function searchRecall(input) {
30797
33481
  info("recall: query expansion failed, using original:", err);
30798
33482
  }
30799
33483
  }
33484
+ const queryTermCount = filterTerms(query).length;
33485
+ const vectorWeight = queryTermCount >= (searchConfig?.vectorBoostMinTerms ?? 3) ? searchConfig?.vectorBoostWeight ?? 1.5 : 1;
30800
33486
  const allRrfLists = [];
33487
+ let primaryListEnd = 0;
30801
33488
  for (const q of queries) {
30802
33489
  const knowledgeResults = [];
30803
33490
  if (knowledgeEnabled && scope !== "session") {
@@ -30874,7 +33561,11 @@ async function searchRecall(input) {
30874
33561
  key: (r) => `t:${r.item.id}`
30875
33562
  });
30876
33563
  }
33564
+ if (primaryListEnd === 0) {
33565
+ primaryListEnd = allRrfLists.length;
33566
+ }
30877
33567
  }
33568
+ const perQueryListEnd = allRrfLists.length;
30878
33569
  if (isAvailable() && scope !== "session") {
30879
33570
  try {
30880
33571
  const [queryVec] = await embed([query], "query");
@@ -30893,7 +33584,8 @@ async function searchRecall(input) {
30893
33584
  if (vectorTagged.length) {
30894
33585
  allRrfLists.push({
30895
33586
  items: vectorTagged,
30896
- key: (r) => `k:${r.item.id}`
33587
+ key: (r) => `k:${r.item.id}`,
33588
+ weight: vectorWeight
30897
33589
  });
30898
33590
  }
30899
33591
  }
@@ -30912,7 +33604,8 @@ async function searchRecall(input) {
30912
33604
  if (distVectorTagged.length) {
30913
33605
  allRrfLists.push({
30914
33606
  items: distVectorTagged,
30915
- key: (r) => `d:${r.item.id}`
33607
+ key: (r) => `d:${r.item.id}`,
33608
+ weight: vectorWeight
30916
33609
  });
30917
33610
  }
30918
33611
  }
@@ -30936,7 +33629,8 @@ async function searchRecall(input) {
30936
33629
  if (temporalVectorTagged.length) {
30937
33630
  allRrfLists.push({
30938
33631
  items: temporalVectorTagged,
30939
- key: (r) => `t:${r.item.id}`
33632
+ key: (r) => `t:${r.item.id}`,
33633
+ weight: vectorWeight
30940
33634
  });
30941
33635
  }
30942
33636
  }
@@ -31039,6 +33733,15 @@ async function searchRecall(input) {
31039
33733
  });
31040
33734
  }
31041
33735
  }
33736
+ const MAX_RRF_LISTS = 10;
33737
+ if (allRrfLists.length > MAX_RRF_LISTS) {
33738
+ const primary = allRrfLists.slice(0, primaryListEnd);
33739
+ const expanded = allRrfLists.slice(primaryListEnd, perQueryListEnd);
33740
+ const supplemental = allRrfLists.slice(perQueryListEnd);
33741
+ const budget = Math.max(0, MAX_RRF_LISTS - primary.length - supplemental.length);
33742
+ allRrfLists.length = 0;
33743
+ allRrfLists.push(...primary, ...expanded.slice(0, budget), ...supplemental);
33744
+ }
31042
33745
  const fused = reciprocalRankFusion(allRrfLists);
31043
33746
  const maxResults = limit * 3;
31044
33747
  return fused.slice(0, maxResults);
@@ -31108,9 +33811,6 @@ async function runRecall(input) {
31108
33811
  if (input.id) {
31109
33812
  return recallById(input.id);
31110
33813
  }
31111
- if (ftsQuery(input.query) === EMPTY_QUERY) {
31112
- return "Query too vague \u2014 try using specific keywords, file names, or technical terms.";
31113
- }
31114
33814
  const fused = await searchRecall(input);
31115
33815
  const recallCfg = input.searchConfig?.recall;
31116
33816
  return formatFusedResults(fused, {
@@ -31157,9 +33857,11 @@ export {
31157
33857
  config2 as config,
31158
33858
  consolidationUser,
31159
33859
  consumeCameOutOfIdle,
33860
+ import_exports as conversationImport,
31160
33861
  curator_exports as curator,
31161
33862
  curatorUser,
31162
33863
  data_exports as data,
33864
+ dataDir,
31163
33865
  db,
31164
33866
  dbPath,
31165
33867
  distillation_exports as distillation,
@@ -31179,6 +33881,8 @@ export {
31179
33881
  ftsQueryRelaxed,
31180
33882
  getGitRemote,
31181
33883
  getInstanceId,
33884
+ getKV,
33885
+ getLastImportAt,
31182
33886
  getLastTransformEstimate,
31183
33887
  getLastTransformedCount,
31184
33888
  getLastTurnAt,
@@ -31191,6 +33895,7 @@ export {
31191
33895
  importLoreFile,
31192
33896
  inline,
31193
33897
  inspectSessionState,
33898
+ instruction_detect_exports as instructionDetect,
31194
33899
  isFirstRun,
31195
33900
  isReasoningPart,
31196
33901
  isTextPart,
@@ -31202,7 +33907,9 @@ export {
31202
33907
  load,
31203
33908
  loadAllSessionCosts,
31204
33909
  loadForceMinLayer,
33910
+ loadHeaderSessionIndex,
31205
33911
  loadSessionCosts,
33912
+ loadSessionTracking,
31206
33913
  log_exports as log,
31207
33914
  loreFileExists,
31208
33915
  ltm_exports as ltm,
@@ -31223,10 +33930,14 @@ export {
31223
33930
  runRecall,
31224
33931
  sanitizeSurrogates,
31225
33932
  saveForceMinLayer,
33933
+ saveGradientState,
31226
33934
  saveSessionCosts,
33935
+ saveSessionTracking,
31227
33936
  searchRecall,
31228
33937
  serialize,
31229
33938
  setForceMinLayer,
33939
+ setKV,
33940
+ setLastImportAt,
31230
33941
  setLastTurnAtForTest,
31231
33942
  setLtmTokens,
31232
33943
  setMaxContextTokens,