@shadowforge0/aquifer-memory 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ 'use strict';
2
+
3
+ const http = require('http');
4
+ const https = require('https');
5
+
6
+ // ---------------------------------------------------------------------------
7
+ // HTTP helpers
8
+ // ---------------------------------------------------------------------------
9
+
10
+ function httpRequest(url, options, body) {
11
+ return new Promise((resolve, reject) => {
12
+ const parsedUrl = new URL(url);
13
+ const transport = parsedUrl.protocol === 'https:' ? https : http;
14
+
15
+ // M8 fix: settled flag to prevent double-settle on timeout race
16
+ let settled = false;
17
+ const finish = (fn, val) => { if (!settled) { settled = true; fn(val); } };
18
+
19
+ const req = transport.request(parsedUrl, options, (res) => {
20
+ const chunks = [];
21
+ res.on('data', (chunk) => chunks.push(chunk));
22
+ res.on('end', () => {
23
+ if (timer) clearTimeout(timer);
24
+ const raw = Buffer.concat(chunks).toString();
25
+ if (res.statusCode < 200 || res.statusCode >= 300) {
26
+ finish(reject, new Error(`HTTP ${res.statusCode}: ${raw.slice(0, 500)}`));
27
+ return;
28
+ }
29
+ try {
30
+ finish(resolve, JSON.parse(raw));
31
+ } catch (e) {
32
+ finish(reject, new Error(`Invalid JSON response: ${raw.slice(0, 200)}`));
33
+ }
34
+ });
35
+ });
36
+
37
+ const timer = options.timeout
38
+ ? setTimeout(() => { req.destroy(); finish(reject, new Error('Request timeout')); }, options.timeout)
39
+ : null;
40
+
41
+ req.on('error', (err) => { if (timer) clearTimeout(timer); finish(reject, err); });
42
+ if (body) req.write(JSON.stringify(body));
43
+ req.end();
44
+ });
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Retry wrapper
49
+ // ---------------------------------------------------------------------------
50
+
51
+ async function withRetry(fn, { maxRetries = 3, initialBackoffMs = 2000 } = {}) {
52
+ let lastErr;
53
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
54
+ try {
55
+ return await fn();
56
+ } catch (err) {
57
+ lastErr = err;
58
+ if (attempt < maxRetries - 1) {
59
+ const delay = initialBackoffMs * Math.pow(2, attempt);
60
+ await new Promise(r => setTimeout(r, delay));
61
+ }
62
+ }
63
+ }
64
+ throw lastErr;
65
+ }
66
+
67
+ // ---------------------------------------------------------------------------
68
+ // Ollama adapter
69
+ // ---------------------------------------------------------------------------
70
+
71
+ function createOllamaEmbedder(config) {
72
+ const url = config.ollamaUrl || 'http://localhost:11434';
73
+ const model = config.model || 'bge-m3';
74
+ const chunkSize = config.chunkSize || 32;
75
+ const timeout = config.timeout || 120000;
76
+ const maxRetries = config.maxRetries || 3;
77
+ const initialBackoffMs = config.initialBackoffMs || 2000;
78
+ let detectedDim = null;
79
+
80
+ async function embedBatchRaw(texts) {
81
+ const allEmbeddings = [];
82
+
83
+ for (let i = 0; i < texts.length; i += chunkSize) {
84
+ const chunk = texts.slice(i, i + chunkSize);
85
+ const result = await withRetry(
86
+ () => httpRequest(
87
+ `${url}/api/embed`,
88
+ {
89
+ method: 'POST',
90
+ headers: { 'Content-Type': 'application/json' },
91
+ timeout,
92
+ },
93
+ { model, input: chunk }
94
+ ),
95
+ { maxRetries, initialBackoffMs }
96
+ );
97
+
98
+ const embeddings = result.embeddings || [];
99
+ allEmbeddings.push(...embeddings);
100
+
101
+ if (!detectedDim && embeddings.length > 0 && embeddings[0]) {
102
+ detectedDim = embeddings[0].length;
103
+ }
104
+ }
105
+
106
+ return allEmbeddings;
107
+ }
108
+
109
+ return {
110
+ embed: async (text) => {
111
+ const results = await embedBatchRaw([text]);
112
+ return results[0] || [];
113
+ },
114
+ embedBatch: async (texts) => {
115
+ if (!texts || texts.length === 0) return [];
116
+ return embedBatchRaw(texts);
117
+ },
118
+ get dim() { return detectedDim; },
119
+ };
120
+ }
121
+
122
+ // ---------------------------------------------------------------------------
123
+ // OpenAI adapter
124
+ // ---------------------------------------------------------------------------
125
+
126
+ function createOpenAIEmbedder(config) {
127
+ const apiKey = config.openaiApiKey;
128
+ if (!apiKey) throw new Error('openaiApiKey is required for OpenAI embedder');
129
+
130
+ const model = config.openaiModel || 'text-embedding-3-small';
131
+ const dimensions = config.openaiDimensions || 1536;
132
+ const maxRetries = config.maxRetries || 3;
133
+ const initialBackoffMs = config.initialBackoffMs || 2000;
134
+ const timeout = config.timeout || 120000;
135
+
136
+ const chunkSize = config.chunkSize || 100; // M7: batch chunking for OpenAI
137
+
138
+ async function embedBatchRaw(texts) {
139
+ const allEmbeddings = [];
140
+ for (let i = 0; i < texts.length; i += chunkSize) {
141
+ const chunk = texts.slice(i, i + chunkSize);
142
+ const result = await withRetry(
143
+ () => httpRequest(
144
+ 'https://api.openai.com/v1/embeddings',
145
+ {
146
+ method: 'POST',
147
+ headers: {
148
+ 'Content-Type': 'application/json',
149
+ 'Authorization': `Bearer ${apiKey}`,
150
+ },
151
+ timeout,
152
+ },
153
+ { model, input: chunk, dimensions }
154
+ ),
155
+ { maxRetries, initialBackoffMs }
156
+ );
157
+
158
+ const data = result.data || [];
159
+ data.sort((a, b) => a.index - b.index);
160
+ allEmbeddings.push(...data.map(d => d.embedding));
161
+ }
162
+ return allEmbeddings;
163
+ }
164
+
165
+ return {
166
+ embed: async (text) => {
167
+ const results = await embedBatchRaw([text]);
168
+ return results[0] || [];
169
+ },
170
+ embedBatch: async (texts) => {
171
+ if (!texts || texts.length === 0) return [];
172
+ return embedBatchRaw(texts);
173
+ },
174
+ get dim() { return dimensions; },
175
+ };
176
+ }
177
+
178
+ // ---------------------------------------------------------------------------
179
+ // Custom adapter
180
+ // ---------------------------------------------------------------------------
181
+
182
+ function createCustomEmbedder(config) {
183
+ const fn = config.fn;
184
+ if (!fn) throw new Error('fn is required for custom embedder');
185
+
186
+ let detectedDim = null;
187
+
188
+ return {
189
+ embed: async (text) => {
190
+ const results = await fn([text]);
191
+ const vec = results[0] || [];
192
+ if (!detectedDim && vec.length > 0) detectedDim = vec.length;
193
+ return vec;
194
+ },
195
+ embedBatch: async (texts) => {
196
+ if (!texts || texts.length === 0) return [];
197
+ const results = await fn(texts);
198
+ if (!detectedDim && results.length > 0 && results[0]) {
199
+ detectedDim = results[0].length;
200
+ }
201
+ return results;
202
+ },
203
+ get dim() { return detectedDim; },
204
+ };
205
+ }
206
+
207
+ // ---------------------------------------------------------------------------
208
+ // Factory
209
+ // ---------------------------------------------------------------------------
210
+
211
+ function createEmbedder(config = {}) {
212
+ const provider = config.provider || 'ollama';
213
+
214
+ switch (provider) {
215
+ case 'ollama':
216
+ return createOllamaEmbedder(config);
217
+ case 'openai':
218
+ return createOpenAIEmbedder(config);
219
+ case 'custom':
220
+ return createCustomEmbedder(config);
221
+ default:
222
+ throw new Error(`Unknown embedding provider: ${provider}`);
223
+ }
224
+ }
225
+
226
+ // ---------------------------------------------------------------------------
227
+ // Exports
228
+ // ---------------------------------------------------------------------------
229
+
230
+ module.exports = { createEmbedder };
@@ -0,0 +1,73 @@
1
+ 'use strict';
2
+
3
+ const { parseEntityOutput } = require('../core/entity');
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // defaultEntityPrompt
7
+ // ---------------------------------------------------------------------------
8
+
9
+ function defaultEntityPrompt(messages, opts = {}) {
10
+ const conversation = messages
11
+ .map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
12
+ .join('\n');
13
+
14
+ return `Extract named entities from the following conversation.
15
+
16
+ Output in this exact format (one entity per block, separated by ---):
17
+
18
+ [ENTITIES]
19
+ name: <display name, original casing>
20
+ type: <person|project|concept|tool|metric|org|place|event|doc|task|topic|other>
21
+ aliases: <comma-separated alternative names, or empty>
22
+ ---
23
+
24
+ Rules:
25
+ - Only extract entities discussed substantively (not just mentioned in passing)
26
+ - Normalize aliases (e.g., "React.js" and "React" are aliases)
27
+ - Choose the most specific type
28
+ - Minimum 0, maximum 15 entities
29
+ - If no entities found, output only: [ENTITIES]\n(none)
30
+
31
+ Example:
32
+ [ENTITIES]
33
+ name: PostgreSQL
34
+ type: tool
35
+ aliases: Postgres, PG
36
+ ---
37
+ name: Alice Chen
38
+ type: person
39
+ aliases: Alice
40
+ ---
41
+
42
+ ---
43
+ CONVERSATION:
44
+ ${conversation}`;
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // extractEntities
49
+ // ---------------------------------------------------------------------------
50
+
51
+ async function extractEntities(messages, {
52
+ llmFn,
53
+ promptFn,
54
+ } = {}) {
55
+ if (!llmFn) return [];
56
+
57
+ const buildPrompt = promptFn || defaultEntityPrompt;
58
+
59
+ try {
60
+ const prompt = buildPrompt(messages, {});
61
+ const response = await llmFn(prompt);
62
+ return parseEntityOutput(response);
63
+ } catch (err) {
64
+ // LLM failure: return empty, never throw
65
+ return [];
66
+ }
67
+ }
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Exports
71
+ // ---------------------------------------------------------------------------
72
+
73
+ module.exports = { defaultEntityPrompt, extractEntities, parseEntityOutput };
@@ -0,0 +1,245 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // defaultSummarizePrompt
5
+ // ---------------------------------------------------------------------------
6
+
7
+ function defaultSummarizePrompt(messages, opts = {}) {
8
+ const conversation = messages
9
+ .map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
10
+ .join('\n');
11
+
12
+ let entitySection = '';
13
+ if (opts.mergeEntities) {
14
+ entitySection = `
15
+
16
+ Also extract named entities from this conversation. Output them after the summary in this exact format:
17
+
18
+ [ENTITIES]
19
+ name: <display name, original casing>
20
+ type: <person|project|concept|tool|metric|org|place|event|doc|task|topic|other>
21
+ aliases: <comma-separated alternative names, or empty>
22
+ ---
23
+ (repeat for each entity)
24
+
25
+ Rules for entities:
26
+ - Only extract entities that are discussed substantively (not just mentioned in passing)
27
+ - Normalize aliases (e.g., "React.js" and "React" are aliases of the same entity)
28
+ - Choose the most specific type that fits
29
+ - Minimum 0, maximum 15 entities per session`;
30
+ }
31
+
32
+ return `Summarize the following conversation concisely. Focus on:
33
+ 1. What was discussed (main topics)
34
+ 2. What was decided or concluded
35
+ 3. What actions were taken or planned
36
+ 4. Any unresolved questions or open loops
37
+
38
+ Output a structured summary in this exact format (follow spacing and prefixes precisely):
39
+
40
+ TITLE: <one-line title>
41
+ OVERVIEW: <2-3 sentence overview>
42
+ TOPICS:
43
+ - <topic name>: <brief summary>
44
+ DECISIONS:
45
+ - <decision>: <reason>
46
+ OPEN_LOOPS:
47
+ - <unresolved item>
48
+ IMPORTANT_FACTS:
49
+ - <key fact>
50
+
51
+ Example:
52
+ TITLE: Database migration strategy discussion
53
+ OVERVIEW: Team discussed migrating from MySQL to PostgreSQL. Decided to use pgloader for data transfer. Timeline set for next sprint.
54
+ TOPICS:
55
+ - Migration tooling: Evaluated pgloader vs custom scripts, chose pgloader for reliability
56
+ - Schema changes: Need to convert ENUM types to CHECK constraints
57
+ DECISIONS:
58
+ - Use pgloader: Handles type conversion automatically, proven in production
59
+ OPEN_LOOPS:
60
+ - Need to benchmark query performance on new schema
61
+ IMPORTANT_FACTS:
62
+ - Current DB size is 45GB with 12M rows in largest table
63
+ ${entitySection}
64
+
65
+ ---
66
+ CONVERSATION:
67
+ ${conversation}`;
68
+ }
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // parseStructuredSummary — extract fields from LLM output
72
+ // ---------------------------------------------------------------------------
73
+
74
+ function _parseStructuredSummary(text) {
75
+ if (!text) return null;
76
+
77
+ const result = {
78
+ title: '',
79
+ overview: '',
80
+ topics: [],
81
+ decisions: [],
82
+ open_loops: [],
83
+ important_facts: [],
84
+ };
85
+
86
+ // Extract TITLE
87
+ const titleMatch = text.match(/^TITLE:\s*(.+)/m);
88
+ if (titleMatch) result.title = titleMatch[1].trim();
89
+
90
+ // Extract OVERVIEW
91
+ const overviewMatch = text.match(/^OVERVIEW:\s*([\s\S]*?)(?=\n(?:TOPICS|DECISIONS|OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
92
+ if (overviewMatch) result.overview = overviewMatch[1].trim();
93
+
94
+ // Extract TOPICS
95
+ const topicsMatch = text.match(/^TOPICS:\s*\n([\s\S]*?)(?=\n(?:DECISIONS|OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
96
+ if (topicsMatch) {
97
+ const lines = topicsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
98
+ for (const line of lines) {
99
+ const cleaned = line.replace(/^\s*-\s*/, '').trim();
100
+ const colonIdx = cleaned.indexOf(':');
101
+ if (colonIdx > 0) {
102
+ result.topics.push({
103
+ name: cleaned.slice(0, colonIdx).trim(),
104
+ summary: cleaned.slice(colonIdx + 1).trim(),
105
+ });
106
+ } else {
107
+ result.topics.push({ name: cleaned, summary: '' });
108
+ }
109
+ }
110
+ }
111
+
112
+ // Extract DECISIONS
113
+ const decisionsMatch = text.match(/^DECISIONS:\s*\n([\s\S]*?)(?=\n(?:OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
114
+ if (decisionsMatch) {
115
+ const lines = decisionsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
116
+ for (const line of lines) {
117
+ const cleaned = line.replace(/^\s*-\s*/, '').trim();
118
+ const colonIdx = cleaned.indexOf(':');
119
+ if (colonIdx > 0) {
120
+ result.decisions.push({
121
+ decision: cleaned.slice(0, colonIdx).trim(),
122
+ reason: cleaned.slice(colonIdx + 1).trim(),
123
+ });
124
+ } else {
125
+ result.decisions.push({ decision: cleaned, reason: '' });
126
+ }
127
+ }
128
+ }
129
+
130
+ // Extract OPEN_LOOPS
131
+ const openLoopsMatch = text.match(/^OPEN_LOOPS:\s*\n([\s\S]*?)(?=\n(?:IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
132
+ if (openLoopsMatch) {
133
+ const lines = openLoopsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
134
+ for (const line of lines) {
135
+ result.open_loops.push({ item: line.replace(/^\s*-\s*/, '').trim() });
136
+ }
137
+ }
138
+
139
+ // Extract IMPORTANT_FACTS
140
+ const factsMatch = text.match(/^IMPORTANT_FACTS:\s*\n([\s\S]*?)(?=\n\[ENTITIES\]|$)/m);
141
+ if (factsMatch) {
142
+ const lines = factsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
143
+ for (const line of lines) {
144
+ const fact = line.replace(/^\s*-\s*/, '').trim();
145
+ if (fact) result.important_facts.push(fact);
146
+ }
147
+ }
148
+
149
+ return result;
150
+ }
151
+
152
+ // ---------------------------------------------------------------------------
153
+ // extractiveFallback
154
+ // ---------------------------------------------------------------------------
155
+
156
+ function extractiveFallback(messages) {
157
+ const userMsgs = (messages || []).filter(m => m.role === 'user');
158
+ const texts = userMsgs.map(m => {
159
+ if (typeof m.content === 'string') return m.content.trim();
160
+ if (Array.isArray(m.content)) {
161
+ return m.content
162
+ .filter(p => p.type === 'text')
163
+ .map(p => p.text)
164
+ .join('\n')
165
+ .trim();
166
+ }
167
+ return '';
168
+ }).filter(Boolean);
169
+
170
+ let selected;
171
+ if (texts.length <= 6) {
172
+ selected = texts;
173
+ } else {
174
+ const head = texts.slice(0, 3);
175
+ const tail = texts.slice(-3);
176
+ // Dedupe: if any tail item is already in head, skip it
177
+ const headSet = new Set(head);
178
+ selected = [...head, ...tail.filter(t => !headSet.has(t))];
179
+ }
180
+
181
+ const joined = selected.join('\n---\n').slice(0, 2000);
182
+
183
+ return {
184
+ summaryText: joined,
185
+ structuredSummary: null,
186
+ entityRaw: null,
187
+ isExtractive: true,
188
+ };
189
+ }
190
+
191
+ // ---------------------------------------------------------------------------
192
+ // summarize
193
+ // ---------------------------------------------------------------------------
194
+
195
+ async function summarize(messages, {
196
+ llmFn,
197
+ promptFn,
198
+ mergeEntities = false,
199
+ } = {}) {
200
+ if (!llmFn) {
201
+ return extractiveFallback(messages);
202
+ }
203
+
204
+ const buildPrompt = promptFn || defaultSummarizePrompt;
205
+
206
+ try {
207
+ const prompt = buildPrompt(messages, { mergeEntities });
208
+ const response = await llmFn(prompt);
209
+
210
+ // Parse structured fields
211
+ const structuredSummary = _parseStructuredSummary(response);
212
+
213
+ // Extract entity section if present
214
+ let entityRaw = null;
215
+ if (mergeEntities) {
216
+ const idx = response.indexOf('[ENTITIES]');
217
+ if (idx !== -1) {
218
+ entityRaw = response.slice(idx);
219
+ }
220
+ }
221
+
222
+ // M6 fix: strip [ENTITIES] section from summaryText before storage
223
+ let cleanSummary = response;
224
+ if (mergeEntities) {
225
+ const idx = response.indexOf('[ENTITIES]');
226
+ if (idx !== -1) cleanSummary = response.slice(0, idx).trim();
227
+ }
228
+
229
+ return {
230
+ summaryText: cleanSummary,
231
+ structuredSummary,
232
+ entityRaw,
233
+ isExtractive: false,
234
+ };
235
+ } catch (err) {
236
+ // LLM failure: fall back to extractive
237
+ return extractiveFallback(messages);
238
+ }
239
+ }
240
+
241
+ // ---------------------------------------------------------------------------
242
+ // Exports
243
+ // ---------------------------------------------------------------------------
244
+
245
+ module.exports = { defaultSummarizePrompt, summarize, extractiveFallback };