@loreai/core 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +12 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +22 -38
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  14. package/dist/bun/embedding-worker-types.d.ts +17 -12
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  16. package/dist/bun/embedding-worker.d.ts +9 -2
  17. package/dist/bun/embedding-worker.d.ts.map +1 -1
  18. package/dist/bun/embedding-worker.js +38864 -33
  19. package/dist/bun/embedding-worker.js.map +4 -4
  20. package/dist/bun/embedding.d.ts +30 -22
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/gradient.d.ts +8 -1
  23. package/dist/bun/gradient.d.ts.map +1 -1
  24. package/dist/bun/import/detect.d.ts +14 -0
  25. package/dist/bun/import/detect.d.ts.map +1 -0
  26. package/dist/bun/import/extract.d.ts +43 -0
  27. package/dist/bun/import/extract.d.ts.map +1 -0
  28. package/dist/bun/import/history.d.ts +40 -0
  29. package/dist/bun/import/history.d.ts.map +1 -0
  30. package/dist/bun/import/index.d.ts +17 -0
  31. package/dist/bun/import/index.d.ts.map +1 -0
  32. package/dist/bun/import/providers/aider.d.ts +2 -0
  33. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  34. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  35. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  36. package/dist/bun/import/providers/cline.d.ts +2 -0
  37. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  38. package/dist/bun/import/providers/codex.d.ts +2 -0
  39. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  40. package/dist/bun/import/providers/continue.d.ts +2 -0
  41. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  42. package/dist/bun/import/providers/index.d.ts +19 -0
  43. package/dist/bun/import/providers/index.d.ts.map +1 -0
  44. package/dist/bun/import/providers/opencode.d.ts +2 -0
  45. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  46. package/dist/bun/import/providers/pi.d.ts +2 -0
  47. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  48. package/dist/bun/import/types.d.ts +82 -0
  49. package/dist/bun/import/types.d.ts.map +1 -0
  50. package/dist/bun/index.d.ts +4 -1
  51. package/dist/bun/index.d.ts.map +1 -1
  52. package/dist/bun/index.js +2217 -224
  53. package/dist/bun/index.js.map +4 -4
  54. package/dist/bun/instruction-detect.d.ts +66 -0
  55. package/dist/bun/instruction-detect.d.ts.map +1 -0
  56. package/dist/bun/log.d.ts +9 -0
  57. package/dist/bun/log.d.ts.map +1 -1
  58. package/dist/bun/ltm.d.ts +40 -0
  59. package/dist/bun/ltm.d.ts.map +1 -1
  60. package/dist/bun/pattern-extract.d.ts +7 -0
  61. package/dist/bun/pattern-extract.d.ts.map +1 -1
  62. package/dist/bun/prompt.d.ts +1 -1
  63. package/dist/bun/prompt.d.ts.map +1 -1
  64. package/dist/bun/recall.d.ts.map +1 -1
  65. package/dist/bun/search.d.ts +5 -3
  66. package/dist/bun/search.d.ts.map +1 -1
  67. package/dist/bun/temporal.d.ts.map +1 -1
  68. package/dist/bun/types.d.ts +1 -1
  69. package/dist/node/agents-file.d.ts +4 -0
  70. package/dist/node/agents-file.d.ts.map +1 -1
  71. package/dist/node/config.d.ts +2 -0
  72. package/dist/node/config.d.ts.map +1 -1
  73. package/dist/node/curator.d.ts +45 -0
  74. package/dist/node/curator.d.ts.map +1 -1
  75. package/dist/node/data-dir.d.ts +18 -0
  76. package/dist/node/data-dir.d.ts.map +1 -0
  77. package/dist/node/db.d.ts +12 -0
  78. package/dist/node/db.d.ts.map +1 -1
  79. package/dist/node/distillation.d.ts.map +1 -1
  80. package/dist/node/embedding-vendor.d.ts +22 -38
  81. package/dist/node/embedding-vendor.d.ts.map +1 -1
  82. package/dist/node/embedding-worker-types.d.ts +17 -12
  83. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  84. package/dist/node/embedding-worker.d.ts +9 -2
  85. package/dist/node/embedding-worker.d.ts.map +1 -1
  86. package/dist/node/embedding-worker.js +38864 -33
  87. package/dist/node/embedding-worker.js.map +4 -4
  88. package/dist/node/embedding.d.ts +30 -22
  89. package/dist/node/embedding.d.ts.map +1 -1
  90. package/dist/node/gradient.d.ts +8 -1
  91. package/dist/node/gradient.d.ts.map +1 -1
  92. package/dist/node/import/detect.d.ts +14 -0
  93. package/dist/node/import/detect.d.ts.map +1 -0
  94. package/dist/node/import/extract.d.ts +43 -0
  95. package/dist/node/import/extract.d.ts.map +1 -0
  96. package/dist/node/import/history.d.ts +40 -0
  97. package/dist/node/import/history.d.ts.map +1 -0
  98. package/dist/node/import/index.d.ts +17 -0
  99. package/dist/node/import/index.d.ts.map +1 -0
  100. package/dist/node/import/providers/aider.d.ts +2 -0
  101. package/dist/node/import/providers/aider.d.ts.map +1 -0
  102. package/dist/node/import/providers/claude-code.d.ts +2 -0
  103. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  104. package/dist/node/import/providers/cline.d.ts +2 -0
  105. package/dist/node/import/providers/cline.d.ts.map +1 -0
  106. package/dist/node/import/providers/codex.d.ts +2 -0
  107. package/dist/node/import/providers/codex.d.ts.map +1 -0
  108. package/dist/node/import/providers/continue.d.ts +2 -0
  109. package/dist/node/import/providers/continue.d.ts.map +1 -0
  110. package/dist/node/import/providers/index.d.ts +19 -0
  111. package/dist/node/import/providers/index.d.ts.map +1 -0
  112. package/dist/node/import/providers/opencode.d.ts +2 -0
  113. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  114. package/dist/node/import/providers/pi.d.ts +2 -0
  115. package/dist/node/import/providers/pi.d.ts.map +1 -0
  116. package/dist/node/import/types.d.ts +82 -0
  117. package/dist/node/import/types.d.ts.map +1 -0
  118. package/dist/node/index.d.ts +4 -1
  119. package/dist/node/index.d.ts.map +1 -1
  120. package/dist/node/index.js +2217 -224
  121. package/dist/node/index.js.map +4 -4
  122. package/dist/node/instruction-detect.d.ts +66 -0
  123. package/dist/node/instruction-detect.d.ts.map +1 -0
  124. package/dist/node/log.d.ts +9 -0
  125. package/dist/node/log.d.ts.map +1 -1
  126. package/dist/node/ltm.d.ts +40 -0
  127. package/dist/node/ltm.d.ts.map +1 -1
  128. package/dist/node/pattern-extract.d.ts +7 -0
  129. package/dist/node/pattern-extract.d.ts.map +1 -1
  130. package/dist/node/prompt.d.ts +1 -1
  131. package/dist/node/prompt.d.ts.map +1 -1
  132. package/dist/node/recall.d.ts.map +1 -1
  133. package/dist/node/search.d.ts +5 -3
  134. package/dist/node/search.d.ts.map +1 -1
  135. package/dist/node/temporal.d.ts.map +1 -1
  136. package/dist/node/types.d.ts +1 -1
  137. package/dist/types/agents-file.d.ts +4 -0
  138. package/dist/types/agents-file.d.ts.map +1 -1
  139. package/dist/types/config.d.ts +2 -0
  140. package/dist/types/config.d.ts.map +1 -1
  141. package/dist/types/curator.d.ts +45 -0
  142. package/dist/types/curator.d.ts.map +1 -1
  143. package/dist/types/data-dir.d.ts +18 -0
  144. package/dist/types/data-dir.d.ts.map +1 -0
  145. package/dist/types/db.d.ts +12 -0
  146. package/dist/types/db.d.ts.map +1 -1
  147. package/dist/types/distillation.d.ts.map +1 -1
  148. package/dist/types/embedding-vendor.d.ts +22 -38
  149. package/dist/types/embedding-vendor.d.ts.map +1 -1
  150. package/dist/types/embedding-worker-types.d.ts +17 -12
  151. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  152. package/dist/types/embedding-worker.d.ts +9 -2
  153. package/dist/types/embedding-worker.d.ts.map +1 -1
  154. package/dist/types/embedding.d.ts +30 -22
  155. package/dist/types/embedding.d.ts.map +1 -1
  156. package/dist/types/gradient.d.ts +8 -1
  157. package/dist/types/gradient.d.ts.map +1 -1
  158. package/dist/types/import/detect.d.ts +14 -0
  159. package/dist/types/import/detect.d.ts.map +1 -0
  160. package/dist/types/import/extract.d.ts +43 -0
  161. package/dist/types/import/extract.d.ts.map +1 -0
  162. package/dist/types/import/history.d.ts +40 -0
  163. package/dist/types/import/history.d.ts.map +1 -0
  164. package/dist/types/import/index.d.ts +17 -0
  165. package/dist/types/import/index.d.ts.map +1 -0
  166. package/dist/types/import/providers/aider.d.ts +2 -0
  167. package/dist/types/import/providers/aider.d.ts.map +1 -0
  168. package/dist/types/import/providers/claude-code.d.ts +2 -0
  169. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  170. package/dist/types/import/providers/cline.d.ts +2 -0
  171. package/dist/types/import/providers/cline.d.ts.map +1 -0
  172. package/dist/types/import/providers/codex.d.ts +2 -0
  173. package/dist/types/import/providers/codex.d.ts.map +1 -0
  174. package/dist/types/import/providers/continue.d.ts +2 -0
  175. package/dist/types/import/providers/continue.d.ts.map +1 -0
  176. package/dist/types/import/providers/index.d.ts +19 -0
  177. package/dist/types/import/providers/index.d.ts.map +1 -0
  178. package/dist/types/import/providers/opencode.d.ts +2 -0
  179. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  180. package/dist/types/import/providers/pi.d.ts +2 -0
  181. package/dist/types/import/providers/pi.d.ts.map +1 -0
  182. package/dist/types/import/types.d.ts +82 -0
  183. package/dist/types/import/types.d.ts.map +1 -0
  184. package/dist/types/index.d.ts +4 -1
  185. package/dist/types/index.d.ts.map +1 -1
  186. package/dist/types/instruction-detect.d.ts +66 -0
  187. package/dist/types/instruction-detect.d.ts.map +1 -0
  188. package/dist/types/log.d.ts +9 -0
  189. package/dist/types/log.d.ts.map +1 -1
  190. package/dist/types/ltm.d.ts +40 -0
  191. package/dist/types/ltm.d.ts.map +1 -1
  192. package/dist/types/pattern-extract.d.ts +7 -0
  193. package/dist/types/pattern-extract.d.ts.map +1 -1
  194. package/dist/types/prompt.d.ts +1 -1
  195. package/dist/types/prompt.d.ts.map +1 -1
  196. package/dist/types/recall.d.ts.map +1 -1
  197. package/dist/types/search.d.ts +5 -3
  198. package/dist/types/search.d.ts.map +1 -1
  199. package/dist/types/temporal.d.ts.map +1 -1
  200. package/dist/types/types.d.ts +1 -1
  201. package/package.json +2 -4
  202. package/src/agents-file.ts +41 -13
  203. package/src/config.ts +31 -18
  204. package/src/curator.ts +111 -75
  205. package/src/data-dir.ts +76 -0
  206. package/src/db.ts +110 -11
  207. package/src/distillation.ts +10 -2
  208. package/src/embedding-vendor.ts +23 -40
  209. package/src/embedding-worker-types.ts +19 -11
  210. package/src/embedding-worker.ts +111 -47
  211. package/src/embedding.ts +196 -171
  212. package/src/gradient.ts +9 -1
  213. package/src/import/detect.ts +37 -0
  214. package/src/import/extract.ts +137 -0
  215. package/src/import/history.ts +99 -0
  216. package/src/import/index.ts +45 -0
  217. package/src/import/providers/aider.ts +207 -0
  218. package/src/import/providers/claude-code.ts +339 -0
  219. package/src/import/providers/cline.ts +324 -0
  220. package/src/import/providers/codex.ts +369 -0
  221. package/src/import/providers/continue.ts +304 -0
  222. package/src/import/providers/index.ts +32 -0
  223. package/src/import/providers/opencode.ts +272 -0
  224. package/src/import/providers/pi.ts +332 -0
  225. package/src/import/types.ts +91 -0
  226. package/src/index.ts +5 -0
  227. package/src/instruction-detect.ts +275 -0
  228. package/src/log.ts +91 -3
  229. package/src/ltm.ts +316 -3
  230. package/src/pattern-extract.ts +41 -0
  231. package/src/prompt.ts +7 -1
  232. package/src/recall.ts +43 -5
  233. package/src/search.ts +7 -5
  234. package/src/temporal.ts +8 -6
  235. package/src/types.ts +1 -1
package/src/curator.ts CHANGED
@@ -3,6 +3,7 @@ import * as temporal from "./temporal";
3
3
  import * as ltm from "./ltm";
4
4
  import * as log from "./log";
5
5
  import { CURATOR_SYSTEM, curatorUser, CONSOLIDATION_SYSTEM, consolidationUser } from "./prompt";
6
+ import { detectAndFormat } from "./instruction-detect";
6
7
  import type { LLMClient } from "./types";
7
8
 
8
9
  /**
@@ -11,9 +12,9 @@ import type { LLMClient } from "./types";
11
12
  * The curator prompt also instructs the model to stay within this limit,
12
13
  * so truncation is a last-resort safety net.
13
14
  */
14
- const MAX_ENTRY_CONTENT_LENGTH = 1200;
15
+ export const MAX_ENTRY_CONTENT_LENGTH = 1200;
15
16
 
16
- type CuratorOp =
17
+ export type CuratorOp =
17
18
  | {
18
19
  op: "create";
19
20
  category: string;
@@ -25,7 +26,11 @@ type CuratorOp =
25
26
  | { op: "update"; id: string; content?: string; confidence?: number }
26
27
  | { op: "delete"; id: string; reason: string };
27
28
 
28
- function parseOps(text: string): CuratorOp[] {
29
+ /**
30
+ * Parse the LLM's JSON response into typed curator ops.
31
+ * Handles markdown fences and filters invalid entries.
32
+ */
33
+ export function parseOps(text: string): CuratorOp[] {
29
34
  const cleaned = text
30
35
  .trim()
31
36
  .replace(/^```json?\s*/i, "")
@@ -45,58 +50,29 @@ function parseOps(text: string): CuratorOp[] {
45
50
  }
46
51
  }
47
52
 
48
- // Track which messages we've already curated — per session to prevent
49
- // cross-session leaking (curation on session A advancing the timestamp
50
- // past session B's messages, causing B's curation to find < 3 recent).
51
- const lastCuratedAt = new Map<string, number>();
52
-
53
- export async function run(input: {
54
- llm: LLMClient;
55
- projectPath: string;
56
- sessionID: string;
57
- model?: { providerID: string; modelID: string };
58
- }): Promise<{ created: number; updated: number; deleted: number }> {
59
- const cfg = config();
60
- if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
61
-
62
- // Get recent messages since last curation
63
- const all = temporal.bySession(input.projectPath, input.sessionID);
64
- const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
65
- const recent = all.filter((m) => m.created_at > sessionCuratedAt);
66
- if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
67
-
68
- const text = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
69
- const existing = ltm.forProject(input.projectPath, false);
70
- const existingForPrompt = existing.map((e) => ({
71
- id: e.id,
72
- category: e.category,
73
- title: e.title,
74
- content: e.content,
75
- }));
76
-
77
- const userContent = curatorUser({
78
- messages: text,
79
- existing: existingForPrompt,
80
- });
81
- const model = input.model ?? cfg.model;
82
- const responseText = await input.llm.prompt(
83
- CURATOR_SYSTEM,
84
- userContent,
85
- { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 },
86
- );
87
- if (!responseText) return { created: 0, updated: 0, deleted: 0 };
88
-
89
- const ops = parseOps(responseText);
53
+ /**
54
+ * Apply a list of curator ops (create/update/delete) to the knowledge DB.
55
+ * Shared by both the live curator and the conversation import system.
56
+ *
57
+ * @returns Counts of applied operations.
58
+ */
59
+ export function applyOps(
60
+ ops: CuratorOp[],
61
+ input: {
62
+ projectPath?: string;
63
+ sessionID?: string;
64
+ /** If true, skip "create" ops (used by consolidation). */
65
+ skipCreate?: boolean;
66
+ },
67
+ ): { created: number; updated: number; deleted: number } {
90
68
  let created = 0;
91
69
  let updated = 0;
92
70
  let deleted = 0;
93
-
94
71
  const idsToSync: string[] = [];
95
72
 
96
73
  for (const op of ops) {
97
74
  if (op.op === "create") {
98
- // Truncate oversized content — the model should stay within the prompt's
99
- // 500-word limit, but enforce it here as a hard safety net.
75
+ if (input.skipCreate) continue;
100
76
  const content =
101
77
  op.content.length > MAX_ENTRY_CONTENT_LENGTH
102
78
  ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
@@ -139,10 +115,90 @@ export async function run(input: {
139
115
  ltm.syncRefs(id);
140
116
  }
141
117
 
142
- lastCuratedAt.set(input.sessionID, Date.now());
143
118
  return { created, updated, deleted };
144
119
  }
145
120
 
121
+ // Track which messages we've already curated — per session to prevent
122
+ // cross-session leaking (curation on session A advancing the timestamp
123
+ // past session B's messages, causing B's curation to find < 3 recent).
124
+ const lastCuratedAt = new Map<string, number>();
125
+
126
+ export async function run(input: {
127
+ llm: LLMClient;
128
+ projectPath: string;
129
+ sessionID: string;
130
+ model?: { providerID: string; modelID: string };
131
+ }): Promise<{ created: number; updated: number; deleted: number }> {
132
+ const cfg = config();
133
+ if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0 };
134
+
135
+ // Get recent messages since last curation
136
+ const all = temporal.bySession(input.projectPath, input.sessionID);
137
+ const sessionCuratedAt = lastCuratedAt.get(input.sessionID) ?? 0;
138
+ const recent = all.filter((m) => m.created_at > sessionCuratedAt);
139
+ if (recent.length < 3) return { created: 0, updated: 0, deleted: 0 };
140
+
141
+ const text = recent.map((m) => `[${m.role}] ${m.content}`).join("\n\n");
142
+ const existing = ltm.forProject(input.projectPath, false);
143
+ const existingForPrompt = existing.map((e) => ({
144
+ id: e.id,
145
+ category: e.category,
146
+ title: e.title,
147
+ content: e.content,
148
+ }));
149
+
150
+ const baseUserContent = curatorUser({
151
+ messages: text,
152
+ existing: existingForPrompt,
153
+ });
154
+
155
+ // Detect repeated instructions across prior sessions and append as
156
+ // additional context for the curator. This is async (may embed candidates)
157
+ // but fast — typically <250ms for 5 candidates with local embeddings.
158
+ let crossSessionContext = "";
159
+ try {
160
+ crossSessionContext = await detectAndFormat({
161
+ projectPath: input.projectPath,
162
+ sessionID: input.sessionID,
163
+ });
164
+ } catch (err) {
165
+ log.warn("instruction-detect failed (non-fatal):", err);
166
+ }
167
+
168
+ const userContent = baseUserContent + crossSessionContext;
169
+ const model = input.model ?? cfg.model;
170
+ const responseText = await input.llm.prompt(
171
+ CURATOR_SYSTEM,
172
+ userContent,
173
+ { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048 },
174
+ );
175
+ if (!responseText) return { created: 0, updated: 0, deleted: 0 };
176
+
177
+ const ops = parseOps(responseText);
178
+ const result = applyOps(ops, {
179
+ projectPath: input.projectPath,
180
+ sessionID: input.sessionID,
181
+ });
182
+
183
+ // Post-curation dedup sweep: if the curator created new entries, check for
184
+ // and auto-merge any semantic duplicates it introduced. Uses embedding-based
185
+ // similarity when available, falls back to word-overlap.
186
+ if (result.created > 0) {
187
+ try {
188
+ const dupes = await ltm.deduplicate(input.projectPath, { dryRun: false });
189
+ if (dupes.totalRemoved > 0) {
190
+ log.info(`post-curation dedup: merged ${dupes.totalRemoved} duplicate entries`);
191
+ result.deleted += dupes.totalRemoved;
192
+ }
193
+ } catch (err) {
194
+ log.warn("post-curation dedup failed (non-fatal):", err);
195
+ }
196
+ }
197
+
198
+ lastCuratedAt.set(input.sessionID, Date.now());
199
+ return result;
200
+ }
201
+
146
202
  export function resetCurationTracker(sessionID?: string) {
147
203
  if (sessionID) {
148
204
  lastCuratedAt.delete(sessionID);
@@ -190,31 +246,11 @@ export async function consolidate(input: {
190
246
  if (!responseText) return { updated: 0, deleted: 0 };
191
247
 
192
248
  const ops = parseOps(responseText);
193
- let updated = 0;
194
- let deleted = 0;
195
-
196
- for (const op of ops) {
197
- // Consolidation only applies update and delete — never create.
198
- if (op.op === "update") {
199
- const entry = ltm.get(op.id);
200
- if (entry) {
201
- const content =
202
- op.content !== undefined && op.content.length > MAX_ENTRY_CONTENT_LENGTH
203
- ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
204
- " [truncated — entry too long]"
205
- : op.content;
206
- ltm.update(op.id, { content, confidence: op.confidence });
207
- updated++;
208
- }
209
- } else if (op.op === "delete") {
210
- const entry = ltm.get(op.id);
211
- if (entry) {
212
- ltm.remove(op.id);
213
- deleted++;
214
- }
215
- }
216
- // "create" ops are silently ignored — consolidation must not add entries.
217
- }
249
+ const result = applyOps(ops, {
250
+ projectPath: input.projectPath,
251
+ sessionID: input.sessionID,
252
+ skipCreate: true, // Consolidation must not add entries.
253
+ });
218
254
 
219
- return { updated, deleted };
255
+ return { updated: result.updated, deleted: result.deleted };
220
256
  }
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Shared data-directory path resolution with one-time migration from the
3
+ * legacy `opencode-lore` directory name to `lore`.
4
+ *
5
+ * Both `db.ts` and `log.ts` need the data directory path. This module
6
+ * provides a single source of truth so the path logic is not duplicated.
7
+ */
8
+
9
+ import { existsSync, renameSync } from "node:fs";
10
+ import { join } from "node:path";
11
+ import { homedir } from "node:os";
12
+
13
+ const OLD_DIR_NAME = "opencode-lore";
14
+ const NEW_DIR_NAME = "lore";
15
+
16
+ let migrationAttempted = false;
17
+
18
+ /**
19
+ * Compute the XDG-compliant base directory for lore data.
20
+ * Respects `$XDG_DATA_HOME`, defaults to `~/.local/share`.
21
+ */
22
+ function baseDir(): string {
23
+ return process.env.XDG_DATA_HOME || join(homedir(), ".local", "share");
24
+ }
25
+
26
+ /**
27
+ * Attempt a one-time migration of the legacy data directory.
28
+ *
29
+ * - Old exists, new does not → atomic `renameSync` (same filesystem).
30
+ * - Both exist → keep new (user already migrated or has fresh data).
31
+ * - Neither exists → no-op; callers create the dir via `mkdirSync`.
32
+ *
33
+ * Runs at most once per process. Errors are swallowed — migration
34
+ * failure is not fatal because callers create the directory anyway.
35
+ */
36
+ function migrateDataDir(): void {
37
+ if (migrationAttempted) return;
38
+ migrationAttempted = true;
39
+
40
+ // Tests use LORE_DB_PATH pointed at a temp dir; never touch the real
41
+ // data directory.
42
+ if (process.env.NODE_ENV === "test") return;
43
+
44
+ const base = baseDir();
45
+ const oldDir = join(base, OLD_DIR_NAME);
46
+ const newDir = join(base, NEW_DIR_NAME);
47
+
48
+ try {
49
+ if (existsSync(oldDir) && !existsSync(newDir)) {
50
+ renameSync(oldDir, newDir);
51
+ // Can't use the lore logger here (circular dep), so use stderr.
52
+ console.error(`[lore] migrated data directory: ${oldDir} → ${newDir}`);
53
+ }
54
+ } catch {
55
+ // Permission error, cross-device rename, concurrent process already
56
+ // renamed it, etc. Not fatal — dataDir() returns the new path and
57
+ // callers create the directory if it doesn't exist yet.
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Return the resolved data directory path (`~/.local/share/lore` by
63
+ * default), running the legacy-directory migration on the first call.
64
+ *
65
+ * **Callers are responsible for creating the directory** — this function
66
+ * does not call `mkdirSync`.
67
+ */
68
+ export function dataDir(): string {
69
+ migrateDataDir();
70
+ return join(baseDir(), NEW_DIR_NAME);
71
+ }
72
+
73
+ /** @internal Visible for testing only — resets the one-shot guard. */
74
+ export function _resetMigrationFlag(): void {
75
+ migrationAttempted = false;
76
+ }
package/src/db.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import { Database } from "#db/driver";
2
2
  import { join, dirname } from "path";
3
3
  import { mkdirSync } from "fs";
4
- import { homedir } from "os";
5
4
  import { getGitRemote } from "./git";
5
+ import { dataDir } from "./data-dir";
6
6
 
7
7
  /**
8
8
  * Extract the repository name from a normalized git remote URL.
@@ -453,14 +453,70 @@ const MIGRATIONS: string[] = [
453
453
  ALTER TABLE session_state ADD COLUMN ttl_hits INTEGER NOT NULL DEFAULT 0;
454
454
  ALTER TABLE session_state ADD COLUMN batch_savings REAL NOT NULL DEFAULT 0;
455
455
  `,
456
+ `
457
+ -- Version 19: Import history for conversation import idempotency.
458
+ -- Tracks which external agent sessions have been imported to prevent
459
+ -- re-importing unchanged sources and to record user-declined imports.
460
+ CREATE TABLE IF NOT EXISTS import_history (
461
+ id TEXT PRIMARY KEY,
462
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
463
+ agent_name TEXT NOT NULL,
464
+ source_id TEXT NOT NULL,
465
+ source_hash TEXT NOT NULL,
466
+ entries_created INTEGER NOT NULL DEFAULT 0,
467
+ entries_updated INTEGER NOT NULL DEFAULT 0,
468
+ imported_at INTEGER NOT NULL,
469
+ UNIQUE(project_id, agent_name, source_id)
470
+ );
471
+ CREATE INDEX IF NOT EXISTS idx_import_history_project ON import_history(project_id);
472
+ `,
473
+ `
474
+ -- Version 20: Purge worker boilerplate from temporal messages.
475
+ -- Legacy gateway/plugin worker calls (distillation observer, curator,
476
+ -- consolidation, reflector, eval) stored their full system prompts
477
+ -- (containing entire conversation transcripts, up to 1.6MB each) as
478
+ -- temporal messages. These pollute FTS search results by matching
479
+ -- virtually any domain keyword. Safe to delete: their actual output
480
+ -- (distillations, knowledge entries) is stored in dedicated tables.
481
+ DELETE FROM temporal_messages WHERE content LIKE '%You are a memory observer.%'
482
+ OR content LIKE '%You are a long-term memory curator.%'
483
+ OR content LIKE '%You are a long-term memory curator performing a consolidation pass.%'
484
+ OR content LIKE '%You are a memory reflector.%'
485
+ OR content LIKE '%You are evaluating distillation quality.%';
486
+ `,
487
+ `
488
+ -- Version 21: Persist avoided compaction data from live sessions.
489
+ -- Historical estimates previously re-simulated avoided compactions from
490
+ -- temporal message token estimates (chars/3), missing system prompt and
491
+ -- tool definition overhead. Persisting the live session's real shadow
492
+ -- context tracking (from actual API-reported total input tokens) gives
493
+ -- accurate post-restart historical estimates.
494
+ ALTER TABLE session_state ADD COLUMN avoided_compactions INTEGER NOT NULL DEFAULT 0;
495
+ ALTER TABLE session_state ADD COLUMN avoided_compaction_cost REAL NOT NULL DEFAULT 0;
496
+ `,
497
+ `
498
+ -- Version 22: Track when conversation import was last offered/run.
499
+ -- NULL means import has never been offered for this project.
500
+ -- Used by auto-import to avoid re-prompting, and by explicit
501
+ -- \`lore import\` for incremental imports (only newer conversations).
502
+ ALTER TABLE projects ADD COLUMN last_import_at INTEGER;
503
+
504
+ -- Backfill: migrate legacy __declined__ sentinel rows so existing
505
+ -- users who previously declined are not re-prompted after upgrading.
506
+ UPDATE projects SET last_import_at = (
507
+ SELECT ih.imported_at FROM import_history ih
508
+ WHERE ih.project_id = projects.id
509
+ AND ih.source_id = '__declined__'
510
+ LIMIT 1
511
+ )
512
+ WHERE EXISTS (
513
+ SELECT 1 FROM import_history ih
514
+ WHERE ih.project_id = projects.id
515
+ AND ih.source_id = '__declined__'
516
+ );
517
+ `,
456
518
  ];
457
519
 
458
- function dataDir() {
459
- const xdg = process.env.XDG_DATA_HOME;
460
- const base = xdg || join(homedir(), ".local", "share");
461
- return join(base, "opencode-lore");
462
- }
463
-
464
520
  /** Return the resolved path of the SQLite database file. */
465
521
  export function dbPath(): string {
466
522
  const envPath = process.env.LORE_DB_PATH;
@@ -807,6 +863,33 @@ export function isFirstRun(): boolean {
807
863
  return row.count === 0;
808
864
  }
809
865
 
866
+ // ---------------------------------------------------------------------------
867
+ // Conversation import tracking
868
+ // ---------------------------------------------------------------------------
869
+
870
+ /**
871
+ * Get the timestamp of the last conversation import offer/run for a project.
872
+ * Returns null if import has never been offered for this project.
873
+ */
874
+ export function getLastImportAt(projectPath: string): number | null {
875
+ const id = ensureProject(projectPath);
876
+ const row = db()
877
+ .query("SELECT last_import_at FROM projects WHERE id = ?")
878
+ .get(id) as { last_import_at: number | null } | null;
879
+ return row?.last_import_at ?? null;
880
+ }
881
+
882
+ /**
883
+ * Record that conversation import was offered/run for a project.
884
+ * Prevents auto-import from re-prompting, and enables incremental imports.
885
+ */
886
+ export function setLastImportAt(projectPath: string, timestamp: number): void {
887
+ const id = ensureProject(projectPath);
888
+ db()
889
+ .query("UPDATE projects SET last_import_at = ? WHERE id = ?")
890
+ .run(timestamp, id);
891
+ }
892
+
810
893
  // ---------------------------------------------------------------------------
811
894
  // Persistent session state (error recovery)
812
895
  // ---------------------------------------------------------------------------
@@ -851,6 +934,8 @@ export type SessionCostSnapshot = {
851
934
  ttlSavings: number;
852
935
  ttlHits: number;
853
936
  batchSavings: number;
937
+ avoidedCompactions: number;
938
+ avoidedCompactionCost: number;
854
939
  };
855
940
 
856
941
  /**
@@ -863,8 +948,9 @@ export function saveSessionCosts(sessionID: string, costs: SessionCostSnapshot):
863
948
  `INSERT INTO session_state (session_id, force_min_layer, updated_at,
864
949
  conversation_cost, worker_cost, conversation_turns,
865
950
  cache_read_tokens, cache_write_tokens,
866
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings)
867
- VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
951
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
952
+ avoided_compactions, avoided_compaction_cost)
953
+ VALUES (?, COALESCE((SELECT force_min_layer FROM session_state WHERE session_id = ?), 0), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
868
954
  ON CONFLICT(session_id) DO UPDATE SET
869
955
  conversation_cost = excluded.conversation_cost,
870
956
  worker_cost = excluded.worker_cost,
@@ -876,6 +962,8 @@ export function saveSessionCosts(sessionID: string, costs: SessionCostSnapshot):
876
962
  ttl_savings = excluded.ttl_savings,
877
963
  ttl_hits = excluded.ttl_hits,
878
964
  batch_savings = excluded.batch_savings,
965
+ avoided_compactions = excluded.avoided_compactions,
966
+ avoided_compaction_cost = excluded.avoided_compaction_cost,
879
967
  updated_at = excluded.updated_at`,
880
968
  )
881
969
  .run(
@@ -883,6 +971,7 @@ export function saveSessionCosts(sessionID: string, costs: SessionCostSnapshot):
883
971
  costs.conversationCost, costs.workerCost, costs.conversationTurns,
884
972
  costs.cacheReadTokens, costs.cacheWriteTokens,
885
973
  costs.warmupSavings, costs.warmupHits, costs.ttlSavings, costs.ttlHits, costs.batchSavings,
974
+ costs.avoidedCompactions, costs.avoidedCompactionCost,
886
975
  );
887
976
  }
888
977
 
@@ -895,7 +984,8 @@ export function loadSessionCosts(sessionID: string): SessionCostSnapshot | null
895
984
  .query(
896
985
  `SELECT conversation_cost, worker_cost, conversation_turns,
897
986
  cache_read_tokens, cache_write_tokens,
898
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
987
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
988
+ avoided_compactions, avoided_compaction_cost
899
989
  FROM session_state WHERE session_id = ?`,
900
990
  )
901
991
  .get(sessionID) as {
@@ -909,6 +999,8 @@ export function loadSessionCosts(sessionID: string): SessionCostSnapshot | null
909
999
  ttl_savings: number;
910
1000
  ttl_hits: number;
911
1001
  batch_savings: number;
1002
+ avoided_compactions: number;
1003
+ avoided_compaction_cost: number;
912
1004
  } | null;
913
1005
  if (!row) return null;
914
1006
  return {
@@ -922,6 +1014,8 @@ export function loadSessionCosts(sessionID: string): SessionCostSnapshot | null
922
1014
  ttlSavings: row.ttl_savings,
923
1015
  ttlHits: row.ttl_hits,
924
1016
  batchSavings: row.batch_savings,
1017
+ avoidedCompactions: row.avoided_compactions,
1018
+ avoidedCompactionCost: row.avoided_compaction_cost,
925
1019
  };
926
1020
  }
927
1021
 
@@ -934,7 +1028,8 @@ export function loadAllSessionCosts(): Map<string, SessionCostSnapshot> {
934
1028
  .query(
935
1029
  `SELECT session_id, conversation_cost, worker_cost, conversation_turns,
936
1030
  cache_read_tokens, cache_write_tokens,
937
- warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings
1031
+ warmup_savings, warmup_hits, ttl_savings, ttl_hits, batch_savings,
1032
+ avoided_compactions, avoided_compaction_cost
938
1033
  FROM session_state
939
1034
  WHERE conversation_turns > 0 OR warmup_savings > 0 OR ttl_savings > 0 OR batch_savings > 0`,
940
1035
  )
@@ -950,6 +1045,8 @@ export function loadAllSessionCosts(): Map<string, SessionCostSnapshot> {
950
1045
  ttl_savings: number;
951
1046
  ttl_hits: number;
952
1047
  batch_savings: number;
1048
+ avoided_compactions: number;
1049
+ avoided_compaction_cost: number;
953
1050
  }>;
954
1051
  const result = new Map<string, SessionCostSnapshot>();
955
1052
  for (const row of rows) {
@@ -964,6 +1061,8 @@ export function loadAllSessionCosts(): Map<string, SessionCostSnapshot> {
964
1061
  ttlSavings: row.ttl_savings,
965
1062
  ttlHits: row.ttl_hits,
966
1063
  batchSavings: row.batch_savings,
1064
+ avoidedCompactions: row.avoided_compactions,
1065
+ avoidedCompactionCost: row.avoided_compaction_cost,
967
1066
  });
968
1067
  }
969
1068
  return result;
@@ -811,7 +811,8 @@ async function distillSegment(input: {
811
811
 
812
812
  // Fire-and-forget: extract decision/preference patterns → knowledge entries
813
813
  if (config().knowledge.enabled) {
814
- for (const pat of extractPatterns(result.observations)) {
814
+ const patterns = extractPatterns(result.observations);
815
+ for (const pat of patterns) {
815
816
  try {
816
817
  ltm.create({
817
818
  projectPath: input.projectPath,
@@ -825,6 +826,9 @@ async function distillSegment(input: {
825
826
  // Dedup guard in ltm.create() handles duplicates — swallow errors
826
827
  }
827
828
  }
829
+ if (patterns.length > 0) {
830
+ log.info(`pattern extraction: ${patterns.length} entries from distillation`);
831
+ }
828
832
  }
829
833
 
830
834
  return result;
@@ -928,7 +932,8 @@ export async function metaDistill(input: {
928
932
 
929
933
  // Fire-and-forget: extract decision/preference patterns → knowledge entries
930
934
  if (config().knowledge.enabled) {
931
- for (const pat of extractPatterns(result.observations)) {
935
+ const patterns = extractPatterns(result.observations);
936
+ for (const pat of patterns) {
932
937
  try {
933
938
  ltm.create({
934
939
  projectPath: input.projectPath,
@@ -942,6 +947,9 @@ export async function metaDistill(input: {
942
947
  // Dedup guard in ltm.create() handles duplicates — swallow errors
943
948
  }
944
949
  }
950
+ if (patterns.length > 0) {
951
+ log.info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
952
+ }
945
953
  }
946
954
 
947
955
  return result;
@@ -1,31 +1,19 @@
1
1
  /**
2
- * Vendored bge-small registration for the standalone Lore binary.
2
+ * Vendored model registration for the standalone Lore binary.
3
3
  *
4
- * The Bun-compiled `lore` binary uses `bun build --compile` to bundle
5
- * `fastembed` + `onnxruntime-node` + `@anush008/tokenizers-<platform>`
6
- * directly into the executable including the platform-specific
7
- * `.node` addons which Bun embeds and dlopens from `$bunfs` at runtime.
4
+ * The Bun-compiled `lore` binary bundles `@huggingface/transformers`
5
+ * (which includes ONNX Runtime) into the executable. The model weights
6
+ * and tokenizer files (nomic-embed-text-v1.5, INT8 quantized) are
7
+ * embedded as Bun assets and extracted to a cache dir on first run.
8
8
  *
9
- * Two pieces don't fit into Bun's automatic bundling and need our help:
10
- *
11
- * 1. **Side-load shared libraries**. `onnxruntime_binding.node` does a
12
- * runtime `dlopen("libonnxruntime.so.1")` (or the .dylib / .dll
13
- * equivalent) for the actual ONNX Runtime computation library. Bun
14
- * doesn't follow this kind of dependency. The binary's wrapper
15
- * pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
16
- * so when the addon's dlopen fires it finds the cached handle.
17
- *
18
- * 2. **Model weights + tokenizer**. fastembed downloads from the HF
19
- * Hub on first use; we want zero network on first run. The wrapper
20
- * embeds the bge-small INT8 files as Bun assets, writes them to a
21
- * real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
22
- * to that path. This module exposes that registration to the
23
- * LocalProvider so it can hand the path to fastembed's CUSTOM-mode
24
- * init (`modelAbsoluteDirPath` + `modelName`).
9
+ * The binary's wrapper sets `globalThis.__LORE_VENDOR_MODEL__` to the
10
+ * extraction path. This module exposes that registration to the
11
+ * `LocalProvider` so it can configure the transformers.js `env` to
12
+ * load from the local path instead of downloading from HuggingFace Hub.
25
13
  *
26
14
  * In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
27
- * is unset and `vendorModelInfo()` returns `null`, so the LocalProvider
28
- * falls through to fastembed's default Qdrant repo + cache.
15
+ * is unset and `vendorModelInfo()` returns `null`, so transformers.js
16
+ * downloads the model from HF Hub on first use and caches it locally.
29
17
  */
30
18
 
31
19
  // ---------------------------------------------------------------------------
@@ -34,13 +22,10 @@
34
22
 
35
23
  /** What the binary wrapper writes to globalThis after extracting model files. */
36
24
  export interface VendorRegistration {
37
- /** Absolute path to the dir containing the bge-small files
38
- * (config.json, tokenizer.json, model_quantized.onnx, …). Pass to
39
- * fastembed as `modelAbsoluteDirPath` in CUSTOM init. */
40
- modelAbsoluteDirPath: string;
41
- /** Filename of the ONNX weights inside that dir. Pass to fastembed
42
- * as `modelName` in CUSTOM init. */
43
- modelName: string;
25
+ /** Absolute path to the dir containing the model files in HF layout
26
+ * (config.json, tokenizer.json, onnx/model_quantized.onnx, …).
27
+ * Passed to transformers.js as `env.localModelPath`. */
28
+ localModelPath: string;
44
29
  /** Target identifier the binary was built for, e.g. "linux-x64".
45
30
  * Diagnostic only — the runtime doesn't branch on it. */
46
31
  target: string;
@@ -69,25 +54,23 @@ export function _setVendorRegistration(reg: VendorRegistration | null): void {
69
54
  // Public entry
70
55
  // ---------------------------------------------------------------------------
71
56
 
72
- /** Subset of the registration fastembed needs. Stripped of the
73
- * diagnostic fields so the LocalProvider has exactly what it should
74
- * hand to `FlagEmbedding.init`. */
57
+ /** Subset of the registration the LocalProvider needs. */
75
58
  export interface VendorModelInfo {
76
- modelAbsoluteDirPath: string;
77
- modelName: string;
59
+ /** Absolute path to the dir containing the model in HF layout.
60
+ * Set as `env.localModelPath` in the worker thread. */
61
+ localModelPath: string;
78
62
  }
79
63
 
80
64
  /**
81
- * Resolve the bundled-model arguments for fastembed CUSTOM init. Returns
82
- * `null` when no vendor is registered (npm-mode), so the caller can fall
83
- * through to fastembed's default cacheDir/HF Hub flow.
65
+ * Resolve the vendored model path for transformers.js local loading.
66
+ * Returns `null` when no vendor is registered (npm-mode), so the caller
67
+ * falls through to transformers.js's default HF Hub download + cache.
84
68
  */
85
69
  export function vendorModelInfo(): VendorModelInfo | null {
86
70
  const reg = getRegistration();
87
71
  if (!reg) return null;
88
72
  return {
89
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
90
- modelName: reg.modelName,
73
+ localModelPath: reg.localModelPath,
91
74
  };
92
75
  }
93
76