@shadowforge0/aquifer-memory 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -0
- package/consumers/cli.js +314 -0
- package/consumers/mcp.js +135 -0
- package/consumers/openclaw-plugin.js +235 -0
- package/consumers/shared/config.js +143 -0
- package/consumers/shared/factory.js +77 -0
- package/consumers/shared/llm.js +119 -0
- package/core/aquifer.js +634 -0
- package/core/entity.js +360 -0
- package/core/hybrid-rank.js +166 -0
- package/core/storage.js +550 -0
- package/index.js +6 -0
- package/package.json +57 -0
- package/pipeline/embed.js +230 -0
- package/pipeline/extract-entities.js +73 -0
- package/pipeline/summarize.js +245 -0
- package/schema/001-base.sql +180 -0
- package/schema/002-entities.sql +120 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const http = require('http');
|
|
4
|
+
const https = require('https');
|
|
5
|
+
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// HTTP helpers
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
function httpRequest(url, options, body) {
|
|
11
|
+
return new Promise((resolve, reject) => {
|
|
12
|
+
const parsedUrl = new URL(url);
|
|
13
|
+
const transport = parsedUrl.protocol === 'https:' ? https : http;
|
|
14
|
+
|
|
15
|
+
// M8 fix: settled flag to prevent double-settle on timeout race
|
|
16
|
+
let settled = false;
|
|
17
|
+
const finish = (fn, val) => { if (!settled) { settled = true; fn(val); } };
|
|
18
|
+
|
|
19
|
+
const req = transport.request(parsedUrl, options, (res) => {
|
|
20
|
+
const chunks = [];
|
|
21
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
22
|
+
res.on('end', () => {
|
|
23
|
+
if (timer) clearTimeout(timer);
|
|
24
|
+
const raw = Buffer.concat(chunks).toString();
|
|
25
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
26
|
+
finish(reject, new Error(`HTTP ${res.statusCode}: ${raw.slice(0, 500)}`));
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
try {
|
|
30
|
+
finish(resolve, JSON.parse(raw));
|
|
31
|
+
} catch (e) {
|
|
32
|
+
finish(reject, new Error(`Invalid JSON response: ${raw.slice(0, 200)}`));
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const timer = options.timeout
|
|
38
|
+
? setTimeout(() => { req.destroy(); finish(reject, new Error('Request timeout')); }, options.timeout)
|
|
39
|
+
: null;
|
|
40
|
+
|
|
41
|
+
req.on('error', (err) => { if (timer) clearTimeout(timer); finish(reject, err); });
|
|
42
|
+
if (body) req.write(JSON.stringify(body));
|
|
43
|
+
req.end();
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Retry wrapper
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
async function withRetry(fn, { maxRetries = 3, initialBackoffMs = 2000 } = {}) {
|
|
52
|
+
let lastErr;
|
|
53
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
54
|
+
try {
|
|
55
|
+
return await fn();
|
|
56
|
+
} catch (err) {
|
|
57
|
+
lastErr = err;
|
|
58
|
+
if (attempt < maxRetries - 1) {
|
|
59
|
+
const delay = initialBackoffMs * Math.pow(2, attempt);
|
|
60
|
+
await new Promise(r => setTimeout(r, delay));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
throw lastErr;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Ollama adapter
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
function createOllamaEmbedder(config) {
|
|
72
|
+
const url = config.ollamaUrl || 'http://localhost:11434';
|
|
73
|
+
const model = config.model || 'bge-m3';
|
|
74
|
+
const chunkSize = config.chunkSize || 32;
|
|
75
|
+
const timeout = config.timeout || 120000;
|
|
76
|
+
const maxRetries = config.maxRetries || 3;
|
|
77
|
+
const initialBackoffMs = config.initialBackoffMs || 2000;
|
|
78
|
+
let detectedDim = null;
|
|
79
|
+
|
|
80
|
+
async function embedBatchRaw(texts) {
|
|
81
|
+
const allEmbeddings = [];
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < texts.length; i += chunkSize) {
|
|
84
|
+
const chunk = texts.slice(i, i + chunkSize);
|
|
85
|
+
const result = await withRetry(
|
|
86
|
+
() => httpRequest(
|
|
87
|
+
`${url}/api/embed`,
|
|
88
|
+
{
|
|
89
|
+
method: 'POST',
|
|
90
|
+
headers: { 'Content-Type': 'application/json' },
|
|
91
|
+
timeout,
|
|
92
|
+
},
|
|
93
|
+
{ model, input: chunk }
|
|
94
|
+
),
|
|
95
|
+
{ maxRetries, initialBackoffMs }
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
const embeddings = result.embeddings || [];
|
|
99
|
+
allEmbeddings.push(...embeddings);
|
|
100
|
+
|
|
101
|
+
if (!detectedDim && embeddings.length > 0 && embeddings[0]) {
|
|
102
|
+
detectedDim = embeddings[0].length;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return allEmbeddings;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
embed: async (text) => {
|
|
111
|
+
const results = await embedBatchRaw([text]);
|
|
112
|
+
return results[0] || [];
|
|
113
|
+
},
|
|
114
|
+
embedBatch: async (texts) => {
|
|
115
|
+
if (!texts || texts.length === 0) return [];
|
|
116
|
+
return embedBatchRaw(texts);
|
|
117
|
+
},
|
|
118
|
+
get dim() { return detectedDim; },
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// OpenAI adapter
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
function createOpenAIEmbedder(config) {
|
|
127
|
+
const apiKey = config.openaiApiKey;
|
|
128
|
+
if (!apiKey) throw new Error('openaiApiKey is required for OpenAI embedder');
|
|
129
|
+
|
|
130
|
+
const model = config.openaiModel || 'text-embedding-3-small';
|
|
131
|
+
const dimensions = config.openaiDimensions || 1536;
|
|
132
|
+
const maxRetries = config.maxRetries || 3;
|
|
133
|
+
const initialBackoffMs = config.initialBackoffMs || 2000;
|
|
134
|
+
const timeout = config.timeout || 120000;
|
|
135
|
+
|
|
136
|
+
const chunkSize = config.chunkSize || 100; // M7: batch chunking for OpenAI
|
|
137
|
+
|
|
138
|
+
async function embedBatchRaw(texts) {
|
|
139
|
+
const allEmbeddings = [];
|
|
140
|
+
for (let i = 0; i < texts.length; i += chunkSize) {
|
|
141
|
+
const chunk = texts.slice(i, i + chunkSize);
|
|
142
|
+
const result = await withRetry(
|
|
143
|
+
() => httpRequest(
|
|
144
|
+
'https://api.openai.com/v1/embeddings',
|
|
145
|
+
{
|
|
146
|
+
method: 'POST',
|
|
147
|
+
headers: {
|
|
148
|
+
'Content-Type': 'application/json',
|
|
149
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
150
|
+
},
|
|
151
|
+
timeout,
|
|
152
|
+
},
|
|
153
|
+
{ model, input: chunk, dimensions }
|
|
154
|
+
),
|
|
155
|
+
{ maxRetries, initialBackoffMs }
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
const data = result.data || [];
|
|
159
|
+
data.sort((a, b) => a.index - b.index);
|
|
160
|
+
allEmbeddings.push(...data.map(d => d.embedding));
|
|
161
|
+
}
|
|
162
|
+
return allEmbeddings;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
embed: async (text) => {
|
|
167
|
+
const results = await embedBatchRaw([text]);
|
|
168
|
+
return results[0] || [];
|
|
169
|
+
},
|
|
170
|
+
embedBatch: async (texts) => {
|
|
171
|
+
if (!texts || texts.length === 0) return [];
|
|
172
|
+
return embedBatchRaw(texts);
|
|
173
|
+
},
|
|
174
|
+
get dim() { return dimensions; },
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
// Custom adapter
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
function createCustomEmbedder(config) {
|
|
183
|
+
const fn = config.fn;
|
|
184
|
+
if (!fn) throw new Error('fn is required for custom embedder');
|
|
185
|
+
|
|
186
|
+
let detectedDim = null;
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
embed: async (text) => {
|
|
190
|
+
const results = await fn([text]);
|
|
191
|
+
const vec = results[0] || [];
|
|
192
|
+
if (!detectedDim && vec.length > 0) detectedDim = vec.length;
|
|
193
|
+
return vec;
|
|
194
|
+
},
|
|
195
|
+
embedBatch: async (texts) => {
|
|
196
|
+
if (!texts || texts.length === 0) return [];
|
|
197
|
+
const results = await fn(texts);
|
|
198
|
+
if (!detectedDim && results.length > 0 && results[0]) {
|
|
199
|
+
detectedDim = results[0].length;
|
|
200
|
+
}
|
|
201
|
+
return results;
|
|
202
|
+
},
|
|
203
|
+
get dim() { return detectedDim; },
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
// Factory
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
function createEmbedder(config = {}) {
|
|
212
|
+
const provider = config.provider || 'ollama';
|
|
213
|
+
|
|
214
|
+
switch (provider) {
|
|
215
|
+
case 'ollama':
|
|
216
|
+
return createOllamaEmbedder(config);
|
|
217
|
+
case 'openai':
|
|
218
|
+
return createOpenAIEmbedder(config);
|
|
219
|
+
case 'custom':
|
|
220
|
+
return createCustomEmbedder(config);
|
|
221
|
+
default:
|
|
222
|
+
throw new Error(`Unknown embedding provider: ${provider}`);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ---------------------------------------------------------------------------
|
|
227
|
+
// Exports
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
module.exports = { createEmbedder };
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { parseEntityOutput } = require('../core/entity');
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// defaultEntityPrompt
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
function defaultEntityPrompt(messages, opts = {}) {
|
|
10
|
+
const conversation = messages
|
|
11
|
+
.map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
|
|
12
|
+
.join('\n');
|
|
13
|
+
|
|
14
|
+
return `Extract named entities from the following conversation.
|
|
15
|
+
|
|
16
|
+
Output in this exact format (one entity per block, separated by ---):
|
|
17
|
+
|
|
18
|
+
[ENTITIES]
|
|
19
|
+
name: <display name, original casing>
|
|
20
|
+
type: <person|project|concept|tool|metric|org|place|event|doc|task|topic|other>
|
|
21
|
+
aliases: <comma-separated alternative names, or empty>
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
Rules:
|
|
25
|
+
- Only extract entities discussed substantively (not just mentioned in passing)
|
|
26
|
+
- Normalize aliases (e.g., "React.js" and "React" are aliases)
|
|
27
|
+
- Choose the most specific type
|
|
28
|
+
- Minimum 0, maximum 15 entities
|
|
29
|
+
- If no entities found, output only: [ENTITIES]\n(none)
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
[ENTITIES]
|
|
33
|
+
name: PostgreSQL
|
|
34
|
+
type: tool
|
|
35
|
+
aliases: Postgres, PG
|
|
36
|
+
---
|
|
37
|
+
name: Alice Chen
|
|
38
|
+
type: person
|
|
39
|
+
aliases: Alice
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
CONVERSATION:
|
|
44
|
+
${conversation}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// extractEntities
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
async function extractEntities(messages, {
|
|
52
|
+
llmFn,
|
|
53
|
+
promptFn,
|
|
54
|
+
} = {}) {
|
|
55
|
+
if (!llmFn) return [];
|
|
56
|
+
|
|
57
|
+
const buildPrompt = promptFn || defaultEntityPrompt;
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const prompt = buildPrompt(messages, {});
|
|
61
|
+
const response = await llmFn(prompt);
|
|
62
|
+
return parseEntityOutput(response);
|
|
63
|
+
} catch (err) {
|
|
64
|
+
// LLM failure: return empty, never throw
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// Exports
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
module.exports = { defaultEntityPrompt, extractEntities, parseEntityOutput };
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// defaultSummarizePrompt
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
function defaultSummarizePrompt(messages, opts = {}) {
|
|
8
|
+
const conversation = messages
|
|
9
|
+
.map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
|
|
10
|
+
.join('\n');
|
|
11
|
+
|
|
12
|
+
let entitySection = '';
|
|
13
|
+
if (opts.mergeEntities) {
|
|
14
|
+
entitySection = `
|
|
15
|
+
|
|
16
|
+
Also extract named entities from this conversation. Output them after the summary in this exact format:
|
|
17
|
+
|
|
18
|
+
[ENTITIES]
|
|
19
|
+
name: <display name, original casing>
|
|
20
|
+
type: <person|project|concept|tool|metric|org|place|event|doc|task|topic|other>
|
|
21
|
+
aliases: <comma-separated alternative names, or empty>
|
|
22
|
+
---
|
|
23
|
+
(repeat for each entity)
|
|
24
|
+
|
|
25
|
+
Rules for entities:
|
|
26
|
+
- Only extract entities that are discussed substantively (not just mentioned in passing)
|
|
27
|
+
- Normalize aliases (e.g., "React.js" and "React" are aliases of the same entity)
|
|
28
|
+
- Choose the most specific type that fits
|
|
29
|
+
- Minimum 0, maximum 15 entities per session`;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return `Summarize the following conversation concisely. Focus on:
|
|
33
|
+
1. What was discussed (main topics)
|
|
34
|
+
2. What was decided or concluded
|
|
35
|
+
3. What actions were taken or planned
|
|
36
|
+
4. Any unresolved questions or open loops
|
|
37
|
+
|
|
38
|
+
Output a structured summary in this exact format (follow spacing and prefixes precisely):
|
|
39
|
+
|
|
40
|
+
TITLE: <one-line title>
|
|
41
|
+
OVERVIEW: <2-3 sentence overview>
|
|
42
|
+
TOPICS:
|
|
43
|
+
- <topic name>: <brief summary>
|
|
44
|
+
DECISIONS:
|
|
45
|
+
- <decision>: <reason>
|
|
46
|
+
OPEN_LOOPS:
|
|
47
|
+
- <unresolved item>
|
|
48
|
+
IMPORTANT_FACTS:
|
|
49
|
+
- <key fact>
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
TITLE: Database migration strategy discussion
|
|
53
|
+
OVERVIEW: Team discussed migrating from MySQL to PostgreSQL. Decided to use pgloader for data transfer. Timeline set for next sprint.
|
|
54
|
+
TOPICS:
|
|
55
|
+
- Migration tooling: Evaluated pgloader vs custom scripts, chose pgloader for reliability
|
|
56
|
+
- Schema changes: Need to convert ENUM types to CHECK constraints
|
|
57
|
+
DECISIONS:
|
|
58
|
+
- Use pgloader: Handles type conversion automatically, proven in production
|
|
59
|
+
OPEN_LOOPS:
|
|
60
|
+
- Need to benchmark query performance on new schema
|
|
61
|
+
IMPORTANT_FACTS:
|
|
62
|
+
- Current DB size is 45GB with 12M rows in largest table
|
|
63
|
+
${entitySection}
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
CONVERSATION:
|
|
67
|
+
${conversation}`;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// parseStructuredSummary — extract fields from LLM output
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
function _parseStructuredSummary(text) {
|
|
75
|
+
if (!text) return null;
|
|
76
|
+
|
|
77
|
+
const result = {
|
|
78
|
+
title: '',
|
|
79
|
+
overview: '',
|
|
80
|
+
topics: [],
|
|
81
|
+
decisions: [],
|
|
82
|
+
open_loops: [],
|
|
83
|
+
important_facts: [],
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Extract TITLE
|
|
87
|
+
const titleMatch = text.match(/^TITLE:\s*(.+)/m);
|
|
88
|
+
if (titleMatch) result.title = titleMatch[1].trim();
|
|
89
|
+
|
|
90
|
+
// Extract OVERVIEW
|
|
91
|
+
const overviewMatch = text.match(/^OVERVIEW:\s*([\s\S]*?)(?=\n(?:TOPICS|DECISIONS|OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
|
|
92
|
+
if (overviewMatch) result.overview = overviewMatch[1].trim();
|
|
93
|
+
|
|
94
|
+
// Extract TOPICS
|
|
95
|
+
const topicsMatch = text.match(/^TOPICS:\s*\n([\s\S]*?)(?=\n(?:DECISIONS|OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
|
|
96
|
+
if (topicsMatch) {
|
|
97
|
+
const lines = topicsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
|
|
98
|
+
for (const line of lines) {
|
|
99
|
+
const cleaned = line.replace(/^\s*-\s*/, '').trim();
|
|
100
|
+
const colonIdx = cleaned.indexOf(':');
|
|
101
|
+
if (colonIdx > 0) {
|
|
102
|
+
result.topics.push({
|
|
103
|
+
name: cleaned.slice(0, colonIdx).trim(),
|
|
104
|
+
summary: cleaned.slice(colonIdx + 1).trim(),
|
|
105
|
+
});
|
|
106
|
+
} else {
|
|
107
|
+
result.topics.push({ name: cleaned, summary: '' });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Extract DECISIONS
|
|
113
|
+
const decisionsMatch = text.match(/^DECISIONS:\s*\n([\s\S]*?)(?=\n(?:OPEN_LOOPS|IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
|
|
114
|
+
if (decisionsMatch) {
|
|
115
|
+
const lines = decisionsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
|
|
116
|
+
for (const line of lines) {
|
|
117
|
+
const cleaned = line.replace(/^\s*-\s*/, '').trim();
|
|
118
|
+
const colonIdx = cleaned.indexOf(':');
|
|
119
|
+
if (colonIdx > 0) {
|
|
120
|
+
result.decisions.push({
|
|
121
|
+
decision: cleaned.slice(0, colonIdx).trim(),
|
|
122
|
+
reason: cleaned.slice(colonIdx + 1).trim(),
|
|
123
|
+
});
|
|
124
|
+
} else {
|
|
125
|
+
result.decisions.push({ decision: cleaned, reason: '' });
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Extract OPEN_LOOPS
|
|
131
|
+
const openLoopsMatch = text.match(/^OPEN_LOOPS:\s*\n([\s\S]*?)(?=\n(?:IMPORTANT_FACTS|\[ENTITIES\])|$)/m);
|
|
132
|
+
if (openLoopsMatch) {
|
|
133
|
+
const lines = openLoopsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
result.open_loops.push({ item: line.replace(/^\s*-\s*/, '').trim() });
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Extract IMPORTANT_FACTS
|
|
140
|
+
const factsMatch = text.match(/^IMPORTANT_FACTS:\s*\n([\s\S]*?)(?=\n\[ENTITIES\]|$)/m);
|
|
141
|
+
if (factsMatch) {
|
|
142
|
+
const lines = factsMatch[1].split('\n').filter(l => l.trim().startsWith('-'));
|
|
143
|
+
for (const line of lines) {
|
|
144
|
+
const fact = line.replace(/^\s*-\s*/, '').trim();
|
|
145
|
+
if (fact) result.important_facts.push(fact);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// extractiveFallback
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
function extractiveFallback(messages) {
|
|
157
|
+
const userMsgs = (messages || []).filter(m => m.role === 'user');
|
|
158
|
+
const texts = userMsgs.map(m => {
|
|
159
|
+
if (typeof m.content === 'string') return m.content.trim();
|
|
160
|
+
if (Array.isArray(m.content)) {
|
|
161
|
+
return m.content
|
|
162
|
+
.filter(p => p.type === 'text')
|
|
163
|
+
.map(p => p.text)
|
|
164
|
+
.join('\n')
|
|
165
|
+
.trim();
|
|
166
|
+
}
|
|
167
|
+
return '';
|
|
168
|
+
}).filter(Boolean);
|
|
169
|
+
|
|
170
|
+
let selected;
|
|
171
|
+
if (texts.length <= 6) {
|
|
172
|
+
selected = texts;
|
|
173
|
+
} else {
|
|
174
|
+
const head = texts.slice(0, 3);
|
|
175
|
+
const tail = texts.slice(-3);
|
|
176
|
+
// Dedupe: if any tail item is already in head, skip it
|
|
177
|
+
const headSet = new Set(head);
|
|
178
|
+
selected = [...head, ...tail.filter(t => !headSet.has(t))];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const joined = selected.join('\n---\n').slice(0, 2000);
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
summaryText: joined,
|
|
185
|
+
structuredSummary: null,
|
|
186
|
+
entityRaw: null,
|
|
187
|
+
isExtractive: true,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
// summarize
|
|
193
|
+
// ---------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
async function summarize(messages, {
|
|
196
|
+
llmFn,
|
|
197
|
+
promptFn,
|
|
198
|
+
mergeEntities = false,
|
|
199
|
+
} = {}) {
|
|
200
|
+
if (!llmFn) {
|
|
201
|
+
return extractiveFallback(messages);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const buildPrompt = promptFn || defaultSummarizePrompt;
|
|
205
|
+
|
|
206
|
+
try {
|
|
207
|
+
const prompt = buildPrompt(messages, { mergeEntities });
|
|
208
|
+
const response = await llmFn(prompt);
|
|
209
|
+
|
|
210
|
+
// Parse structured fields
|
|
211
|
+
const structuredSummary = _parseStructuredSummary(response);
|
|
212
|
+
|
|
213
|
+
// Extract entity section if present
|
|
214
|
+
let entityRaw = null;
|
|
215
|
+
if (mergeEntities) {
|
|
216
|
+
const idx = response.indexOf('[ENTITIES]');
|
|
217
|
+
if (idx !== -1) {
|
|
218
|
+
entityRaw = response.slice(idx);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// M6 fix: strip [ENTITIES] section from summaryText before storage
|
|
223
|
+
let cleanSummary = response;
|
|
224
|
+
if (mergeEntities) {
|
|
225
|
+
const idx = response.indexOf('[ENTITIES]');
|
|
226
|
+
if (idx !== -1) cleanSummary = response.slice(0, idx).trim();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
summaryText: cleanSummary,
|
|
231
|
+
structuredSummary,
|
|
232
|
+
entityRaw,
|
|
233
|
+
isExtractive: false,
|
|
234
|
+
};
|
|
235
|
+
} catch (err) {
|
|
236
|
+
// LLM failure: fall back to extractive
|
|
237
|
+
return extractiveFallback(messages);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// ---------------------------------------------------------------------------
|
|
242
|
+
// Exports
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
module.exports = { defaultSummarizePrompt, summarize, extractiveFallback };
|