@traqr/memory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +135 -0
  2. package/dist/index.d.ts +34 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +38 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/lib/auth.d.ts +18 -0
  7. package/dist/lib/auth.d.ts.map +1 -0
  8. package/dist/lib/auth.js +31 -0
  9. package/dist/lib/auth.js.map +1 -0
  10. package/dist/lib/auto-derive.d.ts +35 -0
  11. package/dist/lib/auto-derive.js +261 -0
  12. package/dist/lib/auto-derive.js.map +1 -0
  13. package/dist/lib/borderline.d.ts +26 -0
  14. package/dist/lib/borderline.js +121 -0
  15. package/dist/lib/borderline.js.map +1 -0
  16. package/dist/lib/client.d.ts +28 -0
  17. package/dist/lib/client.d.ts.map +1 -0
  18. package/dist/lib/client.js +60 -0
  19. package/dist/lib/client.js.map +1 -0
  20. package/dist/lib/context.d.ts +38 -0
  21. package/dist/lib/context.d.ts.map +1 -0
  22. package/dist/lib/context.js +334 -0
  23. package/dist/lib/context.js.map +1 -0
  24. package/dist/lib/embeddings.d.ts +60 -0
  25. package/dist/lib/embeddings.d.ts.map +1 -0
  26. package/dist/lib/embeddings.js +229 -0
  27. package/dist/lib/embeddings.js.map +1 -0
  28. package/dist/lib/entity-pipeline.d.ts +23 -0
  29. package/dist/lib/entity-pipeline.js +151 -0
  30. package/dist/lib/entity-pipeline.js.map +1 -0
  31. package/dist/lib/formatting.d.ts +13 -0
  32. package/dist/lib/formatting.d.ts.map +1 -0
  33. package/dist/lib/formatting.js +60 -0
  34. package/dist/lib/formatting.js.map +1 -0
  35. package/dist/lib/learning-extractor.d.ts +144 -0
  36. package/dist/lib/learning-extractor.d.ts.map +1 -0
  37. package/dist/lib/learning-extractor.js +921 -0
  38. package/dist/lib/learning-extractor.js.map +1 -0
  39. package/dist/lib/lifecycle.d.ts +45 -0
  40. package/dist/lib/lifecycle.js +84 -0
  41. package/dist/lib/lifecycle.js.map +1 -0
  42. package/dist/lib/memory.d.ts +128 -0
  43. package/dist/lib/memory.d.ts.map +1 -0
  44. package/dist/lib/memory.js +590 -0
  45. package/dist/lib/memory.js.map +1 -0
  46. package/dist/lib/quality-gate.d.ts +32 -0
  47. package/dist/lib/quality-gate.js +158 -0
  48. package/dist/lib/quality-gate.js.map +1 -0
  49. package/dist/lib/quality-gate.test.d.ts +7 -0
  50. package/dist/lib/quality-gate.test.js +75 -0
  51. package/dist/lib/quality-gate.test.js.map +1 -0
  52. package/dist/lib/rerank.d.ts +22 -0
  53. package/dist/lib/rerank.js +61 -0
  54. package/dist/lib/rerank.js.map +1 -0
  55. package/dist/lib/retrieval.d.ts +75 -0
  56. package/dist/lib/retrieval.js +380 -0
  57. package/dist/lib/retrieval.js.map +1 -0
  58. package/dist/migrate.d.ts +17 -0
  59. package/dist/migrate.d.ts.map +1 -0
  60. package/dist/migrate.js +81 -0
  61. package/dist/migrate.js.map +1 -0
  62. package/dist/routes/analyze-codebase.d.ts +9 -0
  63. package/dist/routes/analyze-codebase.d.ts.map +1 -0
  64. package/dist/routes/analyze-codebase.js +70 -0
  65. package/dist/routes/analyze-codebase.js.map +1 -0
  66. package/dist/routes/analyze-voice.d.ts +9 -0
  67. package/dist/routes/analyze-voice.d.ts.map +1 -0
  68. package/dist/routes/analyze-voice.js +63 -0
  69. package/dist/routes/analyze-voice.js.map +1 -0
  70. package/dist/routes/assemble-context.d.ts +9 -0
  71. package/dist/routes/assemble-context.d.ts.map +1 -0
  72. package/dist/routes/assemble-context.js +68 -0
  73. package/dist/routes/assemble-context.js.map +1 -0
  74. package/dist/routes/bootstrap.d.ts +12 -0
  75. package/dist/routes/bootstrap.d.ts.map +1 -0
  76. package/dist/routes/bootstrap.js +102 -0
  77. package/dist/routes/bootstrap.js.map +1 -0
  78. package/dist/routes/browse.d.ts +11 -0
  79. package/dist/routes/browse.js +85 -0
  80. package/dist/routes/browse.js.map +1 -0
  81. package/dist/routes/capture-thought.d.ts +13 -0
  82. package/dist/routes/capture-thought.d.ts.map +1 -0
  83. package/dist/routes/capture-thought.js +178 -0
  84. package/dist/routes/capture-thought.js.map +1 -0
  85. package/dist/routes/capture.d.ts +13 -0
  86. package/dist/routes/capture.d.ts.map +1 -0
  87. package/dist/routes/capture.js +86 -0
  88. package/dist/routes/capture.js.map +1 -0
  89. package/dist/routes/cite.d.ts +9 -0
  90. package/dist/routes/cite.d.ts.map +1 -0
  91. package/dist/routes/cite.js +49 -0
  92. package/dist/routes/cite.js.map +1 -0
  93. package/dist/routes/crud.d.ts +11 -0
  94. package/dist/routes/crud.d.ts.map +1 -0
  95. package/dist/routes/crud.js +176 -0
  96. package/dist/routes/crud.js.map +1 -0
  97. package/dist/routes/dashboard.d.ts +9 -0
  98. package/dist/routes/dashboard.d.ts.map +1 -0
  99. package/dist/routes/dashboard.js +85 -0
  100. package/dist/routes/dashboard.js.map +1 -0
  101. package/dist/routes/entity-cron.d.ts +8 -0
  102. package/dist/routes/entity-cron.js +31 -0
  103. package/dist/routes/entity-cron.js.map +1 -0
  104. package/dist/routes/export.d.ts +8 -0
  105. package/dist/routes/export.d.ts.map +1 -0
  106. package/dist/routes/export.js +69 -0
  107. package/dist/routes/export.js.map +1 -0
  108. package/dist/routes/extract-pr-learnings.d.ts +12 -0
  109. package/dist/routes/extract-pr-learnings.d.ts.map +1 -0
  110. package/dist/routes/extract-pr-learnings.js +127 -0
  111. package/dist/routes/extract-pr-learnings.js.map +1 -0
  112. package/dist/routes/forget-cron.d.ts +9 -0
  113. package/dist/routes/forget-cron.js +30 -0
  114. package/dist/routes/forget-cron.js.map +1 -0
  115. package/dist/routes/learnings.d.ts +9 -0
  116. package/dist/routes/learnings.d.ts.map +1 -0
  117. package/dist/routes/learnings.js +237 -0
  118. package/dist/routes/learnings.js.map +1 -0
  119. package/dist/routes/pulse.d.ts +9 -0
  120. package/dist/routes/pulse.d.ts.map +1 -0
  121. package/dist/routes/pulse.js +133 -0
  122. package/dist/routes/pulse.js.map +1 -0
  123. package/dist/routes/search.d.ts +8 -0
  124. package/dist/routes/search.d.ts.map +1 -0
  125. package/dist/routes/search.js +107 -0
  126. package/dist/routes/search.js.map +1 -0
  127. package/dist/routes/store.d.ts +8 -0
  128. package/dist/routes/store.d.ts.map +1 -0
  129. package/dist/routes/store.js +89 -0
  130. package/dist/routes/store.js.map +1 -0
  131. package/dist/routes/sync.d.ts +12 -0
  132. package/dist/routes/sync.d.ts.map +1 -0
  133. package/dist/routes/sync.js +83 -0
  134. package/dist/routes/sync.js.map +1 -0
  135. package/dist/routes/voice-profile.d.ts +9 -0
  136. package/dist/routes/voice-profile.d.ts.map +1 -0
  137. package/dist/routes/voice-profile.js +124 -0
  138. package/dist/routes/voice-profile.js.map +1 -0
  139. package/dist/server.d.ts +37 -0
  140. package/dist/server.d.ts.map +1 -0
  141. package/dist/server.js +99 -0
  142. package/dist/server.js.map +1 -0
  143. package/dist/vectordb/index.d.ts +17 -0
  144. package/dist/vectordb/index.d.ts.map +1 -0
  145. package/dist/vectordb/index.js +39 -0
  146. package/dist/vectordb/index.js.map +1 -0
  147. package/dist/vectordb/supabase.d.ts +62 -0
  148. package/dist/vectordb/supabase.d.ts.map +1 -0
  149. package/dist/vectordb/supabase.js +711 -0
  150. package/dist/vectordb/supabase.js.map +1 -0
  151. package/dist/vectordb/types.d.ts +217 -0
  152. package/dist/vectordb/types.d.ts.map +1 -0
  153. package/dist/vectordb/types.js +28 -0
  154. package/dist/vectordb/types.js.map +1 -0
  155. package/package.json +49 -0
  156. package/setup.sql +1037 -0
@@ -0,0 +1,921 @@
1
+ /**
2
+ * Learning Extractor — Portable
3
+ *
4
+ * Auto-extract learnings from PRs, sessions, codebases, and voice samples.
5
+ * Uses OpenAI for extraction, stores in @traqr/memory.
6
+ *
7
+ * Portable version: no NookTraqr-specific imports. Uses package-internal
8
+ * memory operations and env-based project identity.
9
+ */
10
+ import OpenAI from 'openai';
11
+ import { storeMemory, storeWithDedup, searchMemories } from './memory.js';
12
+ import { passesQualityGate } from './quality-gate.js';
13
+ function getOpenAIClient() {
14
+ const apiKey = process.env.OPENAI_API_KEY;
15
+ if (!apiKey)
16
+ return null;
17
+ return new OpenAI({ apiKey });
18
+ }
19
+ // ============================================================
20
+ // Project Identity
21
+ // ============================================================
22
+ /**
23
+ * Resolve the current project slug for memory tagging.
24
+ * Checks env var first (set by daemon/templates), falls back to 'default'.
25
+ */
26
+ export function getSourceProject() {
27
+ return process.env.TRAQR_SOURCE_PROJECT || process.env.NEXT_PUBLIC_TRAQR_PROJECT_SLUG || 'default';
28
+ }
29
+ // ============================================================
30
+ // Constants
31
+ // ============================================================
32
+ const EXTRACTION_PROMPT = `You are analyzing a completed pull request to extract learnings that will help with future development.
33
+
34
+ PR Details:
35
+ - Number: #{{prNumber}}
36
+ - Title: {{title}}
37
+ - Files Changed: {{filesChanged}}
38
+ {{#if diffSummary}}
39
+ - Diff Summary: {{diffSummary}}
40
+ {{/if}}
41
+ {{#if description}}
42
+ - Description: {{description}}
43
+ {{/if}}
44
+ {{#if templatePath}}
45
+ - Template/Domain: {{templatePath}}
46
+ {{/if}}
47
+
48
+ Extract 1-3 learnings MAX. Quality over quantity. Return empty array if nothing notable — most PRs have ZERO novel learnings. Only extract if a future agent would genuinely do something WRONG without this knowledge.
49
+
50
+ Focus on:
51
+ 1. **Gotchas**: Things that could trip someone up in the future
52
+ 2. **Patterns**: Approaches that worked well and should be repeated
53
+ 3. **Fixes**: Solutions to specific problems
54
+ 4. **Insights**: Non-obvious realizations about the codebase or domain
55
+ 5. **Preferences**: Developer style choices, design decisions, how things are preferred
56
+ 6. **Conventions**: Naming patterns, file structure rules, project conventions
57
+ 7. **Identity**: What does this PR reveal about the developer's priorities, decision-making, target audience, or values?
58
+
59
+ For each learning, provide:
60
+ - content: A clear, actionable statement (1-2 sentences)
61
+ - category: One of "gotcha", "pattern", "fix", "insight", "preference", "convention"
62
+ - tags: 2-4 relevant tags (lowercase, no spaces)
63
+ - confidence: How confident you are this is valuable (0.5-1.0)
64
+
65
+ Rules:
66
+ - Be specific, not generic. "Use thread_ts not ts for Slack replies" is good. "Be careful with Slack" is bad.
67
+ - Include context about WHY, not just WHAT
68
+ - Reference specific APIs, functions, or patterns when relevant
69
+ - Only extract learnings that would genuinely help future development
70
+ - If there's nothing notable to learn, return an empty array
71
+
72
+ ANTI-PATTERNS (auto-reject):
73
+ - "Be careful with..." → too vague
74
+ - "Remember to..." → no specificity
75
+ - "Always make sure..." → generic advice
76
+ - "Consider..." → not actionable
77
+
78
+ REQUIRED for every learning:
79
+ - Specific file path, function name, or API quirk
80
+ - Answer: "What would a future agent do DIFFERENTLY because of this?"
81
+ - Minimum 50 characters
82
+
83
+ THE ACID TEST: Would a future agent say "I would have done that wrong without knowing this"? If no, don't extract it.
84
+
85
+ IMPORTANT: You are working with LIMITED context (PR metadata only, not full diff). If you cannot be brutally specific, return FEWER learnings or an empty array. 0 learnings is better than 3 vague ones.
86
+
87
+ Respond with JSON only:
88
+ {
89
+ "learnings": [
90
+ {
91
+ "content": "...",
92
+ "category": "gotcha|pattern|fix|insight",
93
+ "tags": ["tag1", "tag2"],
94
+ "confidence": 0.8
95
+ }
96
+ ]
97
+ }
98
+ `;
99
+ // ============================================================
100
+ // Validation & Quality Gate
101
+ // ============================================================
102
+ const VALID_CATEGORIES = [
103
+ 'gotcha', 'pattern', 'fix', 'insight', 'question', 'preference', 'convention',
104
+ ];
105
+ function isValidLearning(l) {
106
+ if (typeof l !== 'object' || l === null)
107
+ return false;
108
+ const obj = l;
109
+ return (typeof obj.content === 'string' &&
110
+ obj.content.length > 10 &&
111
+ typeof obj.category === 'string' &&
112
+ VALID_CATEGORIES.includes(obj.category));
113
+ }
114
+ // BANNED_PHRASES, SPECIFICITY_MARKERS, FLUFF_PATTERNS, and passesQualityGate
115
+ // are imported from ./quality-gate.ts (shared with ingestion routes)
116
+ // ============================================================
117
+ // Layer Classification
118
+ // ============================================================
119
+ function classifyLayer(learning) {
120
+ const { content, category, tags } = learning;
121
+ const identityTags = ['identity', 'identity:value', 'identity:preference', 'identity:priority',
122
+ 'identity:thinking-style', 'identity:communication', 'identity:audience'];
123
+ if (tags.some(t => identityTags.includes(t)) || category === 'preference') {
124
+ if (/\bSean\b/i.test(content) || /\b(prefers?|values?|prioritizes?|hates?)\b/i.test(content)) {
125
+ return { layer: 'claude_memory', reason: 'Personal preference — suggest for Claude /memory' };
126
+ }
127
+ }
128
+ if (category === 'convention' || tags.includes('convention')) {
129
+ return { layer: 'claude_md', reason: 'Project convention — suggest for CLAUDE.md' };
130
+ }
131
+ if (/\b(always|never|must)\s+(use|import|name|prefix)\b/i.test(content) && category !== 'gotcha') {
132
+ return { layer: 'claude_md', reason: 'Stable rule — suggest for CLAUDE.md' };
133
+ }
134
+ return { layer: 'vector_db', reason: 'Situational learning — store in vector DB' };
135
+ }
136
+ // ============================================================
137
+ // Preflight Dedup Check
138
+ // ============================================================
139
+ async function shouldSkipExtraction(searchText) {
140
+ try {
141
+ const existing = await searchMemories(searchText, {
142
+ limit: 5,
143
+ similarityThreshold: 0.8,
144
+ });
145
+ if (existing.length >= 3) {
146
+ console.log(`[learning-extractor] Preflight dedup: ${existing.length} memories at >0.8 similarity, skipping extraction`);
147
+ return true;
148
+ }
149
+ }
150
+ catch {
151
+ // If search fails, proceed with extraction
152
+ }
153
+ return false;
154
+ }
155
+ // ============================================================
156
+ // Main Extraction Function
157
+ // ============================================================
158
+ export async function extractLearningsFromPR(context) {
159
+ const result = {
160
+ memoriesStored: 0,
161
+ memoriesDeduplicated: 0,
162
+ learnings: [],
163
+ errors: [],
164
+ layerSuggestions: [],
165
+ };
166
+ try {
167
+ const searchText = `${context.title} ${context.filesChanged.join(' ')}`;
168
+ if (await shouldSkipExtraction(searchText)) {
169
+ return result;
170
+ }
171
+ const learnings = await callClaudeForExtraction(context);
172
+ if (!learnings || learnings.length === 0) {
173
+ return result;
174
+ }
175
+ result.learnings = learnings;
176
+ for (const learning of learnings) {
177
+ const { layer, reason } = classifyLayer(learning);
178
+ learning.suggestedLayer = layer;
179
+ if (layer !== 'vector_db') {
180
+ result.layerSuggestions.push({ content: learning.content, layer, reason });
181
+ learning.tags = [...learning.tags, `suggested-layer:${layer}`];
182
+ }
183
+ try {
184
+ const { deduplicated } = await storeWithDedup({
185
+ content: learning.content,
186
+ category: learning.category,
187
+ tags: learning.tags,
188
+ sourceType: 'pr',
189
+ sourceTool: 'learning-extractor',
190
+ sourceRef: `PR #${context.prNumber}: ${context.title}`,
191
+ sourceProject: getSourceProject(),
192
+ confidence: learning.confidence,
193
+ contextTags: context.templatePath
194
+ ? [context.templatePath]
195
+ : undefined,
196
+ });
197
+ if (deduplicated) {
198
+ result.memoriesDeduplicated++;
199
+ }
200
+ else {
201
+ result.memoriesStored++;
202
+ }
203
+ }
204
+ catch (err) {
205
+ result.errors.push(`Failed to store learning: ${err instanceof Error ? err.message : 'Unknown error'}`);
206
+ }
207
+ }
208
+ }
209
+ catch (err) {
210
+ result.errors.push(`Extraction failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
211
+ }
212
+ return result;
213
+ }
214
+ // ============================================================
215
+ // Claude API Call
216
+ // ============================================================
217
+ async function callClaudeForExtraction(context) {
218
+ const openai = getOpenAIClient();
219
+ if (!openai) {
220
+ console.warn('[learning-extractor] No OPENAI_API_KEY, skipping extraction');
221
+ return [];
222
+ }
223
+ const prompt = EXTRACTION_PROMPT
224
+ .replace('{{prNumber}}', String(context.prNumber))
225
+ .replace('{{title}}', context.title)
226
+ .replace('{{filesChanged}}', context.filesChanged.join(', '))
227
+ .replace('{{#if diffSummary}}', context.diffSummary ? '' : '<!--')
228
+ .replace('{{/if}}', context.diffSummary ? '' : '-->')
229
+ .replace('{{diffSummary}}', context.diffSummary || '')
230
+ .replace('{{#if description}}', context.description ? '' : '<!--')
231
+ .replace('{{/if}}', context.description ? '' : '-->')
232
+ .replace('{{description}}', context.description || '')
233
+ .replace('{{#if templatePath}}', context.templatePath ? '' : '<!--')
234
+ .replace('{{/if}}', context.templatePath ? '' : '-->')
235
+ .replace('{{templatePath}}', context.templatePath || '');
236
+ try {
237
+ const response = await openai.chat.completions.create({
238
+ model: 'gpt-4o',
239
+ max_tokens: 2048,
240
+ messages: [{ role: 'user', content: prompt }],
241
+ });
242
+ const textContent = response.choices[0]?.message?.content;
243
+ if (!textContent)
244
+ return [];
245
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
246
+ if (!jsonMatch)
247
+ return [];
248
+ const parsed = JSON.parse(jsonMatch[0]);
249
+ if (!parsed.learnings || !Array.isArray(parsed.learnings))
250
+ return [];
251
+ return parsed.learnings
252
+ .filter((l) => isValidLearning(l))
253
+ .map((l) => ({
254
+ content: l.content.trim(),
255
+ category: l.category,
256
+ tags: Array.isArray(l.tags)
257
+ ? l.tags.map((t) => String(t).toLowerCase().trim())
258
+ : [],
259
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
260
+ }))
261
+ .filter((l) => passesQualityGate(l));
262
+ }
263
+ catch (err) {
264
+ console.error('[learning-extractor] OpenAI API error:', err);
265
+ return [];
266
+ }
267
+ }
268
+ // ============================================================
269
+ // Batch Extraction
270
+ // ============================================================
271
+ export async function batchExtractLearnings(prs, options = {}) {
272
+ const { delayMs = 1000, onProgress } = options;
273
+ const prResults = new Map();
274
+ let totalMemories = 0;
275
+ for (let i = 0; i < prs.length; i++) {
276
+ const pr = prs[i];
277
+ const result = await extractLearningsFromPR(pr);
278
+ prResults.set(pr.prNumber, result);
279
+ totalMemories += result.memoriesStored;
280
+ onProgress?.(i + 1, prs.length);
281
+ if (i < prs.length - 1 && delayMs > 0) {
282
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
283
+ }
284
+ }
285
+ return { totalMemories, prResults };
286
+ }
287
+ // ============================================================
288
+ // Manual Learning Submission
289
+ // ============================================================
290
+ export async function submitManualLearning(content, category, tags, context) {
291
+ try {
292
+ const memory = await storeMemory({
293
+ content,
294
+ category,
295
+ tags,
296
+ sourceType: 'manual',
297
+ sourceTool: 'learning-extractor',
298
+ sourceRef: context?.sessionId
299
+ ? `Advisor session: ${context.sessionId}`
300
+ : 'Manual submission',
301
+ sourceProject: getSourceProject(),
302
+ contextTags: context?.templatePath ? [context.templatePath] : undefined,
303
+ confidence: 0.9,
304
+ });
305
+ return { success: true, memoryId: memory.id };
306
+ }
307
+ catch (err) {
308
+ return {
309
+ success: false,
310
+ error: err instanceof Error ? err.message : 'Unknown error',
311
+ };
312
+ }
313
+ }
314
+ // ============================================================
315
+ // Extraction from Conversation
316
+ // ============================================================
317
+ export async function extractFromConversation(conversationContent, context) {
318
+ const openai = getOpenAIClient();
319
+ if (!openai)
320
+ return [];
321
+ const prompt = `Analyze this advisor conversation and extract valuable learnings. Extract TWO types:
322
+
323
+ **TECHNICAL LEARNINGS** — code patterns, gotchas, architecture decisions
324
+ **IDENTITY LEARNINGS** — who the user is, how they think, what they prioritize, their decision-making patterns
325
+
326
+ Conversation:
327
+ ${conversationContent}
328
+
329
+ ${context?.templatePath ? `Domain: ${context.templatePath}` : ''}
330
+ ${context?.topic ? `Topic: ${context.topic}` : ''}
331
+
332
+ Extract 1-3 learnings MAX. Quality over quantity. Return empty array if nothing notable.
333
+
334
+ THE ACID TEST: Would knowing this change how an AI assists the user? If not, don't extract it.
335
+
336
+ Respond with JSON:
337
+ {
338
+ "learnings": [
339
+ {
340
+ "content": "...",
341
+ "category": "gotcha|pattern|fix|insight|preference|convention",
342
+ "tags": ["tag1", "tag2"],
343
+ "confidence": 0.8
344
+ }
345
+ ]
346
+ }
347
+ `;
348
+ try {
349
+ const response = await openai.chat.completions.create({
350
+ model: 'gpt-4o',
351
+ max_tokens: 2048,
352
+ messages: [{ role: 'user', content: prompt }],
353
+ });
354
+ const textContent = response.choices[0]?.message?.content;
355
+ if (!textContent)
356
+ return [];
357
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
358
+ if (!jsonMatch)
359
+ return [];
360
+ const parsed = JSON.parse(jsonMatch[0]);
361
+ return (parsed.learnings || [])
362
+ .filter((l) => isValidLearning(l))
363
+ .map((l) => ({
364
+ content: l.content.trim(),
365
+ category: l.category,
366
+ tags: Array.isArray(l.tags) ? l.tags.map((t) => String(t).toLowerCase().trim()) : [],
367
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
368
+ }))
369
+ .filter((l) => passesQualityGate(l));
370
+ }
371
+ catch {
372
+ return [];
373
+ }
374
+ }
375
+ const SESSION_EXTRACTION_PROMPT = `You are analyzing a completed coding session to extract learnings for future development.
376
+
377
+ Session Context:
378
+ - Slot: {{slot}}
379
+ - Branch: {{branch}}
380
+ {{#if task}}- Task: {{task}}{{/if}}
381
+ - Files Changed: {{filesChanged}}
382
+ - Commits: {{commitMessages}}
383
+
384
+ Diff Summary:
385
+ {{diffSummary}}
386
+
387
+ {{#if diffContent}}
388
+ Diff Content (truncated):
389
+ {{diffContent}}
390
+ {{/if}}
391
+
392
+ Extract 1-3 learnings MAX. Quality over quantity. Return empty array if nothing notable.
393
+
394
+ THE ACID TEST: Would a future agent say "I would have done that wrong without knowing this"? If no, don't extract it.
395
+
396
+ Respond with JSON only:
397
+ {
398
+ "learnings": [
399
+ {
400
+ "content": "...",
401
+ "category": "gotcha|pattern|fix|insight",
402
+ "tags": ["tag1", "tag2"],
403
+ "confidence": 0.8
404
+ }
405
+ ]
406
+ }
407
+ `;
408
+ export async function extractFromSessionDiff(context) {
409
+ const result = {
410
+ memoriesStored: 0,
411
+ memoriesDeduplicated: 0,
412
+ learnings: [],
413
+ errors: [],
414
+ layerSuggestions: [],
415
+ };
416
+ const openai = getOpenAIClient();
417
+ if (!openai) {
418
+ result.errors.push('No OPENAI_API_KEY configured');
419
+ return result;
420
+ }
421
+ const searchText = `${context.task || ''} ${context.filesChanged.join(' ')} ${context.commitMessages.join(' ')}`;
422
+ if (await shouldSkipExtraction(searchText)) {
423
+ return result;
424
+ }
425
+ try {
426
+ const prompt = SESSION_EXTRACTION_PROMPT
427
+ .replace('{{slot}}', context.slot)
428
+ .replace('{{branch}}', context.branch)
429
+ .replace('{{#if task}}', context.task ? '' : '<!--')
430
+ .replace('{{/if}}', context.task ? '' : '-->')
431
+ .replace('{{task}}', context.task || '')
432
+ .replace('{{filesChanged}}', context.filesChanged.join(', '))
433
+ .replace('{{commitMessages}}', context.commitMessages.join('\n'))
434
+ .replace('{{diffSummary}}', context.diffSummary)
435
+ .replace('{{#if diffContent}}', context.diffContent ? '' : '<!--')
436
+ .replace('{{/if}}', context.diffContent ? '' : '-->')
437
+ .replace('{{diffContent}}', context.diffContent || '');
438
+ const response = await openai.chat.completions.create({
439
+ model: 'gpt-4o',
440
+ max_tokens: 2048,
441
+ messages: [{ role: 'user', content: prompt }],
442
+ });
443
+ const textContent = response.choices[0]?.message?.content;
444
+ if (!textContent)
445
+ return result;
446
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
447
+ if (!jsonMatch)
448
+ return result;
449
+ const parsed = JSON.parse(jsonMatch[0]);
450
+ if (!parsed.learnings || !Array.isArray(parsed.learnings))
451
+ return result;
452
+ const learnings = parsed.learnings
453
+ .filter((l) => isValidLearning(l))
454
+ .map((l) => ({
455
+ content: l.content.trim(),
456
+ category: l.category,
457
+ tags: Array.isArray(l.tags) ? l.tags.map((t) => String(t).toLowerCase().trim()) : [],
458
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
459
+ }))
460
+ .filter((l) => passesQualityGate(l));
461
+ result.learnings = learnings;
462
+ const sourceRef = `session:${context.slot}:${new Date().toISOString().split('T')[0]}`;
463
+ for (const learning of learnings) {
464
+ try {
465
+ const { layer, reason } = classifyLayer(learning);
466
+ learning.suggestedLayer = layer;
467
+ if (layer !== 'vector_db') {
468
+ result.layerSuggestions.push({ content: learning.content, layer, reason });
469
+ learning.tags = [...learning.tags, `suggested-layer:${layer}`];
470
+ }
471
+ const contradiction = await detectContradictions(learning.content);
472
+ const { deduplicated } = await storeWithDedup({
473
+ content: learning.content,
474
+ category: learning.category,
475
+ tags: learning.tags,
476
+ sourceType: 'session',
477
+ sourceTool: 'learning-extractor',
478
+ sourceRef,
479
+ sourceProject: getSourceProject(),
480
+ confidence: learning.confidence,
481
+ isContradiction: contradiction.isContradiction,
482
+ relatedTo: contradiction.conflictId ? [contradiction.conflictId] : undefined,
483
+ });
484
+ if (contradiction.isContradiction) {
485
+ result.contradictions = result.contradictions || [];
486
+ result.contradictions.push({
487
+ newLearning: learning.content,
488
+ conflictsWith: contradiction.conflictsWith || '',
489
+ conflictId: contradiction.conflictId,
490
+ });
491
+ }
492
+ if (deduplicated) {
493
+ result.memoriesDeduplicated++;
494
+ }
495
+ else {
496
+ result.memoriesStored++;
497
+ }
498
+ }
499
+ catch (err) {
500
+ result.errors.push(`Failed to store learning: ${err instanceof Error ? err.message : 'Unknown error'}`);
501
+ }
502
+ }
503
+ }
504
+ catch (err) {
505
+ result.errors.push(`Session extraction failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
506
+ }
507
+ return result;
508
+ }
509
+ const PLAN_EXTRACTION_PROMPT = `You are analyzing a completed implementation plan to extract learnings for future planning.
510
+
511
+ Plan Details:
512
+ - Plan ID: {{planId}}
513
+ - Title: {{title}}
514
+ - Outcome: {{outcome}}
515
+ - Build Passed: {{buildPassed}}
516
+ - Files Planned: {{filesPlanned}}
517
+ - Files Actually Changed: {{filesActual}}
518
+ {{#if prNumber}}- PR: #{{prNumber}}{{/if}}
519
+
520
+ Plan Content:
521
+ {{planContent}}
522
+
523
+ Extract 1-3 learnings MAX. Quality over quantity. Return empty array if nothing notable.
524
+
525
+ Respond with JSON only:
526
+ {
527
+ "learnings": [
528
+ {
529
+ "content": "...",
530
+ "category": "gotcha|pattern|fix|insight|preference|convention",
531
+ "tags": ["tag1", "tag2"],
532
+ "confidence": 0.8
533
+ }
534
+ ]
535
+ }
536
+ `;
537
+ export async function extractLearningsFromPlan(context) {
538
+ const result = {
539
+ memoriesStored: 0,
540
+ memoriesDeduplicated: 0,
541
+ learnings: [],
542
+ errors: [],
543
+ layerSuggestions: [],
544
+ };
545
+ const openai = getOpenAIClient();
546
+ if (!openai) {
547
+ result.errors.push('No OPENAI_API_KEY configured');
548
+ return result;
549
+ }
550
+ try {
551
+ const truncatedPlan = context.planContent.slice(0, 6000);
552
+ const prompt = PLAN_EXTRACTION_PROMPT
553
+ .replace('{{planId}}', context.planId)
554
+ .replace('{{title}}', context.title)
555
+ .replace('{{outcome}}', context.outcome)
556
+ .replace('{{buildPassed}}', String(context.buildPassed))
557
+ .replace('{{filesPlanned}}', context.filesPlanned.join(', '))
558
+ .replace('{{filesActual}}', context.filesActual.join(', '))
559
+ .replace('{{#if prNumber}}', context.prNumber ? '' : '<!--')
560
+ .replace('{{/if}}', context.prNumber ? '' : '-->')
561
+ .replace('{{prNumber}}', String(context.prNumber || ''))
562
+ .replace('{{planContent}}', truncatedPlan);
563
+ const response = await openai.chat.completions.create({
564
+ model: 'gpt-4o',
565
+ max_tokens: 2048,
566
+ messages: [{ role: 'user', content: prompt }],
567
+ });
568
+ const textContent = response.choices[0]?.message?.content;
569
+ if (!textContent)
570
+ return result;
571
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
572
+ if (!jsonMatch)
573
+ return result;
574
+ const parsed = JSON.parse(jsonMatch[0]);
575
+ if (!parsed.learnings || !Array.isArray(parsed.learnings))
576
+ return result;
577
+ const learnings = parsed.learnings
578
+ .filter((l) => isValidLearning(l))
579
+ .map((l) => ({
580
+ content: l.content.trim(),
581
+ category: l.category,
582
+ tags: Array.isArray(l.tags) ? l.tags.map((t) => String(t).toLowerCase().trim()) : [],
583
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
584
+ }))
585
+ .filter((l) => passesQualityGate(l));
586
+ result.learnings = learnings;
587
+ const sourceRef = `plan:${context.planId}${context.prNumber ? `:PR#${context.prNumber}` : ''}`;
588
+ for (const learning of learnings) {
589
+ try {
590
+ const { deduplicated } = await storeWithDedup({
591
+ content: learning.content,
592
+ category: learning.category,
593
+ tags: learning.tags,
594
+ sourceType: 'plan',
595
+ sourceTool: 'learning-extractor',
596
+ sourceRef,
597
+ sourceProject: getSourceProject(),
598
+ confidence: learning.confidence,
599
+ });
600
+ if (deduplicated) {
601
+ result.memoriesDeduplicated++;
602
+ }
603
+ else {
604
+ result.memoriesStored++;
605
+ }
606
+ }
607
+ catch (err) {
608
+ result.errors.push(`Failed to store plan learning: ${err instanceof Error ? err.message : 'Unknown error'}`);
609
+ }
610
+ }
611
+ }
612
+ catch (err) {
613
+ result.errors.push(`Plan extraction failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
614
+ }
615
+ return result;
616
+ }
617
+ const CODEBASE_ANALYSIS_PROMPT = `You are analyzing source code files to extract the developer's design preferences and coding conventions.
618
+
619
+ Analysis Type: {{analysisType}}
620
+
621
+ {{#if configFiles}}
622
+ Config Files:
623
+ {{configFiles}}
624
+ {{/if}}
625
+
626
+ Source Files:
627
+ {{fileSamples}}
628
+
629
+ Extract the developer's design preferences and coding conventions from these files. Focus on CHOICES — what they chose over alternatives, and WHY it matters.
630
+
631
+ Extract 5-15 learnings. Be exhaustive — every color choice, naming convention, animation parameter, component pattern, and style decision is worth capturing.
632
+
633
+ Categories to use:
634
+ - **preference**: Coding style, design choices, how the developer likes things done
635
+ - **convention**: Project rules, naming patterns, file structure conventions
636
+ - **pattern**: Reusable architectural patterns
637
+ - **insight**: Non-obvious design decisions and their rationale
638
+
639
+ For each learning, provide:
640
+ - content: A specific, actionable statement (1-2 sentences). Include exact values when possible.
641
+ - category: One of "preference", "convention", "pattern", "insight"
642
+ - tags: 2-4 relevant tags (lowercase, no spaces)
643
+ - confidence: How confident (0.5-1.0)
644
+
645
+ Respond with JSON only:
646
+ {
647
+ "learnings": [
648
+ {
649
+ "content": "...",
650
+ "category": "preference|convention|pattern|insight",
651
+ "tags": ["tag1", "tag2"],
652
+ "confidence": 0.8
653
+ }
654
+ ]
655
+ }
656
+ `;
657
+ export async function extractFromCodebaseAnalysis(context) {
658
+ const result = {
659
+ memoriesStored: 0,
660
+ memoriesDeduplicated: 0,
661
+ learnings: [],
662
+ errors: [],
663
+ analysisType: context.analysisType,
664
+ };
665
+ const openai = getOpenAIClient();
666
+ if (!openai) {
667
+ result.errors.push('No OPENAI_API_KEY configured');
668
+ return result;
669
+ }
670
+ try {
671
+ const fileSamplesText = context.fileSamples
672
+ .map(f => `### ${f.path}\n\`\`\`\n${f.content.slice(0, 4500)}\n\`\`\``)
673
+ .join('\n\n');
674
+ const configFilesText = context.configFiles
675
+ ? context.configFiles
676
+ .map(f => `### ${f.path}\n\`\`\`\n${f.content.slice(0, 3000)}\n\`\`\``)
677
+ .join('\n\n')
678
+ : '';
679
+ const prompt = CODEBASE_ANALYSIS_PROMPT
680
+ .replace('{{analysisType}}', context.analysisType)
681
+ .replace('{{#if configFiles}}', configFilesText ? '' : '<!--')
682
+ .replace('{{/if}}', configFilesText ? '' : '-->')
683
+ .replace('{{configFiles}}', configFilesText)
684
+ .replace('{{fileSamples}}', fileSamplesText);
685
+ const response = await openai.chat.completions.create({
686
+ model: 'gpt-4o',
687
+ max_tokens: 2048,
688
+ messages: [{ role: 'user', content: prompt }],
689
+ });
690
+ const textContent = response.choices[0]?.message?.content;
691
+ if (!textContent)
692
+ return result;
693
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
694
+ if (!jsonMatch)
695
+ return result;
696
+ const parsed = JSON.parse(jsonMatch[0]);
697
+ if (!parsed.learnings || !Array.isArray(parsed.learnings))
698
+ return result;
699
+ const learnings = parsed.learnings
700
+ .filter((l) => isValidLearning(l))
701
+ .map((l) => ({
702
+ content: l.content.trim(),
703
+ category: l.category,
704
+ tags: Array.isArray(l.tags) ? l.tags.map((t) => String(t).toLowerCase().trim()) : [],
705
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
706
+ }))
707
+ .filter((l) => passesQualityGate(l));
708
+ result.learnings = learnings;
709
+ const sourceRef = `codebase-analysis:${context.analysisType}:${new Date().toISOString().split('T')[0]}`;
710
+ for (const learning of learnings) {
711
+ try {
712
+ const { deduplicated } = await storeWithDedup({
713
+ content: learning.content,
714
+ category: learning.category,
715
+ tags: [...learning.tags, context.analysisType],
716
+ sourceType: 'codebase_analysis',
717
+ sourceTool: 'learning-extractor',
718
+ sourceRef,
719
+ sourceProject: getSourceProject(),
720
+ confidence: learning.confidence,
721
+ });
722
+ if (deduplicated) {
723
+ result.memoriesDeduplicated++;
724
+ }
725
+ else {
726
+ result.memoriesStored++;
727
+ }
728
+ }
729
+ catch (err) {
730
+ result.errors.push(`Failed to store analysis learning: ${err instanceof Error ? err.message : 'Unknown error'}`);
731
+ }
732
+ }
733
+ }
734
+ catch (err) {
735
+ result.errors.push(`Codebase analysis extraction failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
736
+ }
737
+ return result;
738
+ }
739
+ // ============================================================
740
+ // Contradiction Detection
741
+ // ============================================================
742
+ const NEGATION_PATTERNS = [
743
+ /\bnot\b/i, /\bnever\b/i, /\bdon'?t\b/i, /\bwon'?t\b/i, /\bcan'?t\b/i,
744
+ /\bshouldn'?t\b/i, /\binstead of\b/i, /\brather than\b/i, /\bno longer\b/i,
745
+ /\bremoved?\b/i, /\breplaced?\b/i, /\bdeprecated?\b/i, /\bavoid\b/i,
746
+ /\bwrong\b/i, /\bincorrect\b/i, /\bbroken\b/i, /\bfixed\b/i,
747
+ ];
748
+ export async function detectContradictions(newLearning, existingMemories) {
749
+ const memories = existingMemories ?? await searchMemories(newLearning, {
750
+ limit: 5,
751
+ similarityThreshold: 0.7,
752
+ });
753
+ for (const existing of memories) {
754
+ if (existing.similarity < 0.8)
755
+ continue;
756
+ const newHasNegation = NEGATION_PATTERNS.some(p => p.test(newLearning));
757
+ const existingHasNegation = NEGATION_PATTERNS.some(p => p.test(existing.content));
758
+ if (newHasNegation !== existingHasNegation) {
759
+ return {
760
+ isContradiction: true,
761
+ conflictsWith: existing.content,
762
+ conflictId: existing.id,
763
+ similarity: existing.similarity,
764
+ };
765
+ }
766
+ }
767
+ return { isContradiction: false };
768
+ }
769
+ const VOICE_EXTRACTION_PROMPT = `You are analyzing writing samples to extract a developer/founder's communication voice and style patterns.
770
+
771
+ Analysis Type: {{analysisType}}
772
+
773
+ Writing Samples:
774
+ {{writingSamples}}
775
+
776
+ Extract the writer's voice traits, communication patterns, and style preferences. Focus on CHOICES — what they chose over alternatives, and how their voice shifts across audiences.
777
+
778
+ Extract 5-15 voice traits. Be exhaustive.
779
+
780
+ Categories to use:
781
+ - **preference**: Voice choices, tone decisions, structural preferences
782
+ - **convention**: Consistent patterns that should be replicated
783
+ - **pattern**: Reusable rhetorical structures
784
+ - **insight**: Non-obvious voice characteristics and their effect
785
+
786
+ For each learning, provide:
787
+ - content: A specific, actionable statement (1-2 sentences). Include exact phrases when possible.
788
+ - category: One of "preference", "convention", "pattern", "insight"
789
+ - tags: 2-4 relevant tags — always include "voice" and an audience tag like "audience:user-facing", etc.
790
+ - confidence: How confident (0.5-1.0)
791
+
792
+ Respond with JSON only:
793
+ {
794
+ "learnings": [
795
+ {
796
+ "content": "...",
797
+ "category": "preference|convention|pattern|insight",
798
+ "tags": ["voice", "audience:user-facing", "tone"],
799
+ "confidence": 0.8
800
+ }
801
+ ]
802
+ }
803
+ `;
804
+ export async function extractVoiceTraits(context) {
805
+ const result = {
806
+ memoriesStored: 0,
807
+ memoriesDeduplicated: 0,
808
+ learnings: [],
809
+ errors: [],
810
+ analysisType: context.analysisType,
811
+ };
812
+ const openai = getOpenAIClient();
813
+ if (!openai) {
814
+ result.errors.push('No OPENAI_API_KEY configured');
815
+ return result;
816
+ }
817
+ try {
818
+ const samplesText = context.writingSamples
819
+ .map(s => `### ${s.label}\n\`\`\`\n${s.content.slice(0, 4500)}\n\`\`\``)
820
+ .join('\n\n');
821
+ const prompt = VOICE_EXTRACTION_PROMPT
822
+ .replace('{{analysisType}}', context.analysisType)
823
+ .replace('{{writingSamples}}', samplesText);
824
+ const response = await openai.chat.completions.create({
825
+ model: 'gpt-4o',
826
+ max_tokens: 2048,
827
+ messages: [{ role: 'user', content: prompt }],
828
+ });
829
+ const textContent = response.choices[0]?.message?.content;
830
+ if (!textContent)
831
+ return result;
832
+ const jsonMatch = textContent.match(/\{[\s\S]*\}/);
833
+ if (!jsonMatch)
834
+ return result;
835
+ const parsed = JSON.parse(jsonMatch[0]);
836
+ if (!parsed.learnings || !Array.isArray(parsed.learnings))
837
+ return result;
838
+ const learnings = parsed.learnings
839
+ .filter((l) => isValidLearning(l))
840
+ .map((l) => ({
841
+ content: l.content.trim(),
842
+ category: l.category,
843
+ tags: Array.isArray(l.tags) ? l.tags.map((t) => String(t).toLowerCase().trim()) : [],
844
+ confidence: Math.min(1, Math.max(0.5, Number(l.confidence) || 0.7)),
845
+ }))
846
+ .filter((l) => passesQualityGate(l));
847
+ result.learnings = learnings;
848
+ const sourceRef = `voice-analysis:${context.analysisType}:${new Date().toISOString().split('T')[0]}`;
849
+ for (const learning of learnings) {
850
+ try {
851
+ const tags = learning.tags.includes('voice')
852
+ ? learning.tags
853
+ : ['voice', ...learning.tags];
854
+ const { deduplicated } = await storeWithDedup({
855
+ content: learning.content,
856
+ category: learning.category,
857
+ tags: [...tags, context.analysisType],
858
+ sourceType: 'codebase_analysis',
859
+ sourceTool: 'learning-extractor',
860
+ sourceRef,
861
+ sourceProject: getSourceProject(),
862
+ confidence: learning.confidence,
863
+ });
864
+ if (deduplicated) {
865
+ result.memoriesDeduplicated++;
866
+ }
867
+ else {
868
+ result.memoriesStored++;
869
+ }
870
+ }
871
+ catch (err) {
872
+ result.errors.push(`Failed to store voice learning: ${err instanceof Error ? err.message : 'Unknown error'}`);
873
+ }
874
+ }
875
+ }
876
+ catch (err) {
877
+ result.errors.push(`Voice analysis extraction failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
878
+ }
879
+ return result;
880
+ }
881
+ // ============================================================
882
+ // Formatting
883
+ // ============================================================
884
+ export function formatExtractionResult(result) {
885
+ if (result.memoriesStored === 0 && result.memoriesDeduplicated === 0 && result.errors.length === 0) {
886
+ return 'No learnings extracted from this PR.';
887
+ }
888
+ const lines = [];
889
+ if (result.memoriesStored > 0 || result.memoriesDeduplicated > 0) {
890
+ if (result.memoriesStored > 0 && result.memoriesDeduplicated > 0) {
891
+ lines.push(`**${result.memoriesStored} new learnings stored (${result.memoriesDeduplicated} confirmed existing):**`);
892
+ }
893
+ else if (result.memoriesStored > 0) {
894
+ lines.push(`**${result.memoriesStored} new learnings extracted and stored:**`);
895
+ }
896
+ else {
897
+ lines.push(`**${result.memoriesDeduplicated} learnings confirmed (all matched existing knowledge)**`);
898
+ }
899
+ lines.push('');
900
+ result.learnings.forEach((l, i) => {
901
+ const emoji = l.category === 'gotcha'
902
+ ? '|!|'
903
+ : l.category === 'pattern'
904
+ ? '->'
905
+ : l.category === 'fix'
906
+ ? '[+]'
907
+ : '*';
908
+ lines.push(`${i + 1}. ${emoji} **${l.category}**: ${l.content}`);
909
+ lines.push(` Tags: ${l.tags.map((t) => `\`${t}\``).join(' ')}`);
910
+ });
911
+ }
912
+ if (result.errors.length > 0) {
913
+ lines.push('');
914
+ lines.push('**Errors:**');
915
+ result.errors.forEach((e) => {
916
+ lines.push(`- ${e}`);
917
+ });
918
+ }
919
+ return lines.join('\n');
920
+ }
921
+ //# sourceMappingURL=learning-extractor.js.map