@yeaft/webchat-agent 0.1.399 → 0.1.409

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,315 @@
1
+ /**
2
+ * chat-completions.js — OpenAI Chat Completions API adapter
3
+ *
4
+ * Covers ALL OpenAI-compatible backends via baseUrl:
5
+ * - https://api.openai.com/v1 → OpenAI direct
6
+ * - https://api.deepseek.com → DeepSeek
7
+ * - http://localhost:6628/v1 → CopilotProxy
8
+ * - Azure, Ollama, LMStudio, etc.
9
+ *
10
+ * Key translation responsibilities:
11
+ * Request: UnifiedToolDef → { type: "function", function: { name, description, parameters } }
12
+ * Response: delta.tool_calls[i].function.arguments (JSON string) → accumulate → JSON.parse → UnifiedToolCall
13
+ * Result: UnifiedToolResult → { role: "tool", tool_call_id, content }
14
+ * Finish: "tool_calls" → "tool_use", "stop" → "end_turn", "length" → "max_tokens"
15
+ */
16
+
17
+ import {
18
+ LLMAdapter,
19
+ LLMRateLimitError,
20
+ LLMAuthError,
21
+ LLMContextError,
22
+ LLMServerError,
23
+ LLMAbortError,
24
+ } from './adapter.js';
25
+
26
+ /**
27
+ * ChatCompletionsAdapter — Talks to OpenAI Chat Completions API and compatibles.
28
+ */
29
+ export class ChatCompletionsAdapter extends LLMAdapter {
30
+ #apiKey;
31
+ #baseUrl;
32
+
33
+ /**
34
+ * @param {{ apiKey: string, baseUrl: string }} config
35
+ */
36
+ constructor({ apiKey, baseUrl }) {
37
+ super({ apiKey, baseUrl });
38
+ this.#apiKey = apiKey;
39
+ this.#baseUrl = baseUrl.replace(/\/+$/, ''); // strip trailing slash
40
+ }
41
+
42
+ /**
43
+ * Translate UnifiedToolDef[] → Chat Completions tool format.
44
+ * @param {import('./adapter.js').UnifiedToolDef[]} tools
45
+ * @returns {object[]|undefined}
46
+ */
47
+ #translateTools(tools) {
48
+ if (!tools || tools.length === 0) return undefined;
49
+ return tools.map(t => ({
50
+ type: 'function',
51
+ function: {
52
+ name: t.name,
53
+ description: t.description,
54
+ parameters: t.parameters,
55
+ },
56
+ }));
57
+ }
58
+
59
+ /**
60
+ * Translate UnifiedMessage[] → Chat Completions message format.
61
+ * @param {string} system — System prompt
62
+ * @param {import('./adapter.js').UnifiedMessage[]} messages
63
+ * @returns {object[]}
64
+ */
65
+ #translateMessages(system, messages) {
66
+ const result = [];
67
+
68
+ // System message first
69
+ if (system) {
70
+ result.push({ role: 'system', content: system });
71
+ }
72
+
73
+ for (const msg of messages) {
74
+ if (msg.role === 'system') {
75
+ result.push({ role: 'system', content: msg.content });
76
+ } else if (msg.role === 'user') {
77
+ result.push({ role: 'user', content: msg.content });
78
+ } else if (msg.role === 'assistant') {
79
+ const entry = { role: 'assistant' };
80
+ // Some OpenAI-compatible APIs require `content: null` when tool_calls are present
81
+ entry.content = msg.content || null;
82
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
83
+ entry.tool_calls = msg.toolCalls.map(tc => ({
84
+ id: tc.id,
85
+ type: 'function',
86
+ function: {
87
+ name: tc.name,
88
+ arguments: JSON.stringify(tc.input),
89
+ },
90
+ }));
91
+ }
92
+ result.push(entry);
93
+ } else if (msg.role === 'tool') {
94
+ result.push({
95
+ role: 'tool',
96
+ tool_call_id: msg.toolCallId,
97
+ content: msg.content,
98
+ });
99
+ }
100
+ }
101
+ return result;
102
+ }
103
+
104
+ /**
105
+ * Classify HTTP errors.
106
+ * @param {number} status
107
+ * @param {string} body
108
+ */
109
+ #classifyError(status, body) {
110
+ if (status === 401 || status === 403) {
111
+ return new LLMAuthError(`Auth error: ${body}`, status);
112
+ }
113
+ if (status === 429) {
114
+ return new LLMRateLimitError(`Rate limit: ${body}`, status);
115
+ }
116
+ if (status === 529) {
117
+ return new LLMRateLimitError(`Overloaded: ${body}`, status);
118
+ }
119
+ if (status === 413 || body.includes('context_length_exceeded') || body.includes('maximum context length')) {
120
+ return new LLMContextError(`Context too long: ${body}`);
121
+ }
122
+ if (status >= 500) {
123
+ return new LLMServerError(`Server error: ${body}`, status);
124
+ }
125
+ return new Error(`API error ${status}: ${body}`);
126
+ }
127
+
128
+ /**
129
+ * Map Chat Completions finish_reason → unified stop reason.
130
+ * @param {string|null} reason
131
+ * @returns {'end_turn' | 'tool_use' | 'max_tokens'}
132
+ */
133
+ #mapFinishReason(reason) {
134
+ switch (reason) {
135
+ case 'tool_calls': return 'tool_use';
136
+ case 'stop': return 'end_turn';
137
+ case 'length': return 'max_tokens';
138
+ default: return 'end_turn';
139
+ }
140
+ }
141
+
142
+ /**
143
+ * @param {{ model: string, system: string, messages: import('./adapter.js').UnifiedMessage[], tools?: import('./adapter.js').UnifiedToolDef[], maxTokens?: number, signal?: AbortSignal }} params
144
+ * @returns {AsyncGenerator<import('./adapter.js').StreamEvent>}
145
+ */
146
+ async *stream({ model, system, messages, tools, maxTokens = 16384, signal }) {
147
+ if (signal?.aborted) throw new LLMAbortError();
148
+
149
+ const body = {
150
+ model,
151
+ messages: this.#translateMessages(system, messages),
152
+ max_tokens: maxTokens,
153
+ stream: true,
154
+ stream_options: { include_usage: true },
155
+ };
156
+
157
+ const translatedTools = this.#translateTools(tools);
158
+ if (translatedTools) body.tools = translatedTools;
159
+
160
+ const response = await fetch(`${this.#baseUrl}/chat/completions`, {
161
+ method: 'POST',
162
+ headers: {
163
+ 'Content-Type': 'application/json',
164
+ 'Authorization': `Bearer ${this.#apiKey}`,
165
+ },
166
+ body: JSON.stringify(body),
167
+ signal,
168
+ });
169
+
170
+ if (!response.ok) {
171
+ const errorBody = await response.text();
172
+ throw this.#classifyError(response.status, errorBody);
173
+ }
174
+
175
+ // Parse SSE stream
176
+ const reader = response.body.getReader();
177
+ const decoder = new TextDecoder();
178
+ let buffer = '';
179
+
180
+ // Tool call accumulation — Chat Completions sends tool args as fragments
181
+ // keyed by index within the delta.tool_calls array
182
+ /** @type {Map<number, { id: string, name: string, arguments: string }>} */
183
+ const toolCallAccum = new Map();
184
+
185
+ try {
186
+ while (true) {
187
+ const { done, value } = await reader.read();
188
+ if (done) break;
189
+
190
+ buffer += decoder.decode(value, { stream: true });
191
+ const lines = buffer.split('\n');
192
+ buffer = lines.pop() || '';
193
+
194
+ for (const line of lines) {
195
+ if (!line.startsWith('data: ')) continue;
196
+ const data = line.slice(6).trim();
197
+ if (data === '[DONE]') continue;
198
+
199
+ let chunk;
200
+ try {
201
+ chunk = JSON.parse(data);
202
+ } catch {
203
+ continue;
204
+ }
205
+
206
+ // Usage (from stream_options: include_usage)
207
+ if (chunk.usage) {
208
+ yield {
209
+ type: 'usage',
210
+ inputTokens: chunk.usage.prompt_tokens || 0,
211
+ outputTokens: chunk.usage.completion_tokens || 0,
212
+ cacheReadTokens: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
213
+ cacheWriteTokens: 0,
214
+ };
215
+ }
216
+
217
+ const choice = chunk.choices?.[0];
218
+ if (!choice) continue;
219
+
220
+ const delta = choice.delta;
221
+ if (!delta) continue;
222
+
223
+ // Text content
224
+ if (delta.content) {
225
+ yield { type: 'text_delta', text: delta.content };
226
+ }
227
+
228
+ // Tool calls (streamed as fragments)
229
+ if (delta.tool_calls) {
230
+ for (const tc of delta.tool_calls) {
231
+ const idx = tc.index;
232
+ if (!toolCallAccum.has(idx)) {
233
+ toolCallAccum.set(idx, {
234
+ id: tc.id || '',
235
+ name: tc.function?.name || '',
236
+ arguments: '',
237
+ });
238
+ }
239
+ const accum = toolCallAccum.get(idx);
240
+ if (tc.id) accum.id = tc.id;
241
+ if (tc.function?.name) accum.name = tc.function.name;
242
+ if (tc.function?.arguments) accum.arguments += tc.function.arguments;
243
+ }
244
+ }
245
+
246
+ // Finish reason
247
+ if (choice.finish_reason) {
248
+ // Emit accumulated tool calls before stop
249
+ for (const [, accum] of toolCallAccum) {
250
+ let parsedInput = {};
251
+ try {
252
+ parsedInput = accum.arguments ? JSON.parse(accum.arguments) : {};
253
+ } catch {
254
+ parsedInput = {};
255
+ }
256
+ yield {
257
+ type: 'tool_call',
258
+ id: accum.id,
259
+ name: accum.name,
260
+ input: parsedInput,
261
+ };
262
+ }
263
+ toolCallAccum.clear();
264
+
265
+ yield {
266
+ type: 'stop',
267
+ stopReason: this.#mapFinishReason(choice.finish_reason),
268
+ };
269
+ }
270
+ }
271
+ }
272
+ } finally {
273
+ reader.releaseLock();
274
+ }
275
+ }
276
+
277
+ /**
278
+ * Non-streaming call for side queries.
279
+ */
280
+ async call({ model, system, messages, maxTokens = 4096, signal }) {
281
+ if (signal?.aborted) throw new LLMAbortError();
282
+
283
+ const body = {
284
+ model,
285
+ messages: this.#translateMessages(system, messages),
286
+ max_tokens: maxTokens,
287
+ };
288
+
289
+ const response = await fetch(`${this.#baseUrl}/chat/completions`, {
290
+ method: 'POST',
291
+ headers: {
292
+ 'Content-Type': 'application/json',
293
+ 'Authorization': `Bearer ${this.#apiKey}`,
294
+ },
295
+ body: JSON.stringify(body),
296
+ signal,
297
+ });
298
+
299
+ if (!response.ok) {
300
+ const errorBody = await response.text();
301
+ throw this.#classifyError(response.status, errorBody);
302
+ }
303
+
304
+ const result = await response.json();
305
+ const text = result.choices?.[0]?.message?.content || '';
306
+
307
+ return {
308
+ text,
309
+ usage: {
310
+ inputTokens: result.usage?.prompt_tokens || 0,
311
+ outputTokens: result.usage?.completion_tokens || 0,
312
+ },
313
+ };
314
+ }
315
+ }
@@ -0,0 +1,187 @@
1
+ /**
2
+ * consolidate.js — Consolidate = compact + extract (one LLM call)
3
+ *
4
+ * Triggered when hot_tokens > MESSAGE_TOKEN_BUDGET.
5
+ * One LLM call does two things simultaneously:
6
+ * 1. Generate compact summary → append to compact.md ("short-term memory")
7
+ * 2. Extract memory entries → write to entries/ ("long-term memory")
8
+ *
9
+ * After consolidation:
10
+ * - Processed messages moved from messages/ to cold/
11
+ * - index.md + scopes.md updated
12
+ *
13
+ * Reference: yeaft-unify-core-systems.md §3.1, §4.2
14
+ * yeaft-unify-design.md §6.1
15
+ */
16
+
17
+ import { extractMemories } from './extract.js';
18
+
19
+ // ─── Constants ──────────────────────────────────────────────────
20
+
21
+ /** Default MESSAGE_TOKEN_BUDGET (context * 4%, default ~8192). */
22
+ export const DEFAULT_MESSAGE_TOKEN_BUDGET = 8192;
23
+
24
+ /** After compact, keep this fraction of the budget. */
25
+ export const COMPACT_KEEP_RATIO = 0.4;
26
+
27
+ /** Minimum messages to keep hot (newest). */
28
+ const MIN_KEEP_MESSAGES = 3;
29
+
30
+ // ─── Consolidate ────────────────────────────────────────────────
31
+
32
+ /**
33
+ * Check if consolidation should be triggered.
34
+ *
35
+ * @param {import('../conversation/persist.js').ConversationStore} conversationStore
36
+ * @param {number} [budget] — MESSAGE_TOKEN_BUDGET
37
+ * @returns {boolean}
38
+ */
39
+ export function shouldConsolidate(conversationStore, budget = DEFAULT_MESSAGE_TOKEN_BUDGET) {
40
+ const hotTokens = conversationStore.hotTokens();
41
+ return hotTokens > budget;
42
+ }
43
+
44
+ /**
45
+ * Determine which messages to archive (move to cold).
46
+ * Strategy: from oldest, accumulate tokens until remaining ≤ budget * 40%.
47
+ * Always keep at least MIN_KEEP_MESSAGES.
48
+ *
49
+ * @param {object[]} messages — all hot messages, sorted chronologically
50
+ * @param {number} budget — MESSAGE_TOKEN_BUDGET
51
+ * @returns {{ toArchive: object[], toKeep: object[] }}
52
+ */
53
+ export function partitionMessages(messages, budget = DEFAULT_MESSAGE_TOKEN_BUDGET) {
54
+ if (messages.length <= MIN_KEEP_MESSAGES) {
55
+ return { toArchive: [], toKeep: messages };
56
+ }
57
+
58
+ const keepBudget = Math.floor(budget * COMPACT_KEEP_RATIO);
59
+
60
+ // Work backwards from newest: accumulate tokens until we hit keepBudget
61
+ let keepTokens = 0;
62
+ let keepStart = messages.length;
63
+
64
+ for (let i = messages.length - 1; i >= 0; i--) {
65
+ const msgTokens = messages[i].tokens_est || 0;
66
+ if (keepTokens + msgTokens > keepBudget && (messages.length - i) >= MIN_KEEP_MESSAGES) {
67
+ keepStart = i + 1;
68
+ break;
69
+ }
70
+ keepTokens += msgTokens;
71
+ if (i === 0) keepStart = 0;
72
+ }
73
+
74
+ // Ensure at least MIN_KEEP_MESSAGES are kept
75
+ keepStart = Math.min(keepStart, messages.length - MIN_KEEP_MESSAGES);
76
+ keepStart = Math.max(keepStart, 0);
77
+
78
+ return {
79
+ toArchive: messages.slice(0, keepStart),
80
+ toKeep: messages.slice(keepStart),
81
+ };
82
+ }
83
+
84
+ /**
85
+ * Generate a compact summary of messages.
86
+ *
87
+ * @param {object[]} messages — messages to summarize
88
+ * @param {object} adapter — LLM adapter with .call()
89
+ * @param {object} config — { model }
90
+ * @returns {Promise<string>} — compact summary text
91
+ */
92
+ async function generateSummary(messages, adapter, config) {
93
+ const conversation = messages.map(m => {
94
+ const prefix = m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role;
95
+ return `[${prefix}]: ${(m.content || '').slice(0, 500)}`;
96
+ }).join('\n\n');
97
+
98
+ const system = 'You are a conversation summarizer. Summarize the conversation concisely in 2-3 paragraphs, preserving key decisions, facts, and context. Write in the same language as the conversation.';
99
+
100
+ try {
101
+ const result = await adapter.call({
102
+ model: config.model,
103
+ system,
104
+ messages: [{ role: 'user', content: `Summarize this conversation:\n\n${conversation}` }],
105
+ maxTokens: 1024,
106
+ });
107
+ return result.text.trim();
108
+ } catch {
109
+ // Fallback: simple concatenation of first/last messages
110
+ const first = messages[0]?.content?.slice(0, 200) || '';
111
+ const last = messages[messages.length - 1]?.content?.slice(0, 200) || '';
112
+ return `[Auto-summary failed] Started with: ${first}... Ended with: ${last}`;
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Run the full Consolidate pipeline.
118
+ *
119
+ * 1. Partition messages (what to archive vs keep)
120
+ * 2. Generate compact summary (LLM call)
121
+ * 3. Extract memory entries (LLM call)
122
+ * 4. Move archived messages to cold/
123
+ * 5. Update compact.md, index.md, scopes.md
124
+ *
125
+ * @param {{
126
+ * conversationStore: import('../conversation/persist.js').ConversationStore,
127
+ * memoryStore: import('./store.js').MemoryStore,
128
+ * adapter: object,
129
+ * config: object,
130
+ * budget?: number
131
+ * }} params
132
+ * @returns {Promise<{ compactSummary: string, extractedEntries: string[], archivedCount: number }>}
133
+ */
134
+ export async function consolidate({ conversationStore, memoryStore, adapter, config, budget = DEFAULT_MESSAGE_TOKEN_BUDGET }) {
135
+ // Load all hot messages
136
+ const messages = conversationStore.loadAll();
137
+
138
+ if (messages.length <= MIN_KEEP_MESSAGES) {
139
+ return { compactSummary: '', extractedEntries: [], archivedCount: 0 };
140
+ }
141
+
142
+ // Step 1: Partition
143
+ const { toArchive, toKeep } = partitionMessages(messages, budget);
144
+
145
+ if (toArchive.length === 0) {
146
+ return { compactSummary: '', extractedEntries: [], archivedCount: 0 };
147
+ }
148
+
149
+ // Step 2: Generate compact summary
150
+ const compactSummary = await generateSummary(toArchive, adapter, config);
151
+
152
+ // Step 3: Extract memory entries
153
+ const extracted = await extractMemories({ messages: toArchive, adapter, config });
154
+
155
+ // Step 4: Move archived messages to cold
156
+ const archiveIds = toArchive.map(m => m.id).filter(Boolean);
157
+ conversationStore.moveToColdBatch(archiveIds);
158
+
159
+ // Step 5a: Update compact.md
160
+ if (compactSummary) {
161
+ conversationStore.updateCompactSummary(compactSummary);
162
+ }
163
+
164
+ // Step 5b: Write extracted memory entries
165
+ const entryNames = [];
166
+ for (const entry of extracted) {
167
+ const slug = memoryStore.writeEntry(entry);
168
+ entryNames.push(slug);
169
+ }
170
+
171
+ // Step 5c: Update index.md
172
+ const lastMsg = toKeep[toKeep.length - 1];
173
+ conversationStore.updateIndex({
174
+ lastMessageId: lastMsg?.id || null,
175
+ });
176
+
177
+ // Step 5d: Rebuild scopes.md
178
+ if (entryNames.length > 0) {
179
+ memoryStore.rebuildScopes();
180
+ }
181
+
182
+ return {
183
+ compactSummary,
184
+ extractedEntries: entryNames,
185
+ archivedCount: archiveIds.length,
186
+ };
187
+ }
@@ -0,0 +1,97 @@
1
+ /**
2
+ * extract.js — Extract memory-worthy entries from conversation
3
+ *
4
+ * Called by consolidate.js during the Consolidate lifecycle.
5
+ * Uses a single LLM call to identify facts, preferences, skills,
6
+ * lessons, contexts, and relations from conversation messages.
7
+ *
8
+ * Reference: yeaft-unify-core-systems.md §3.1, yeaft-unify-design.md §6.1
9
+ */
10
+
11
+ import { MEMORY_KINDS } from './store.js';
12
+
13
+ /**
14
+ * Build the extraction prompt.
15
+ * @param {object[]} messages — conversation messages to analyze
16
+ * @returns {string}
17
+ */
18
+ function buildExtractionPrompt(messages) {
19
+ const conversation = messages.map(m => {
20
+ const prefix = m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : 'System';
21
+ return `[${prefix}]: ${m.content}`;
22
+ }).join('\n\n');
23
+
24
+ return `Analyze the following conversation and extract any memorable information worth saving to long-term memory.
25
+
26
+ For each memory, provide:
27
+ - **name**: A short slug-friendly name (e.g., "user-prefers-typescript", "project-uses-vue3")
28
+ - **kind**: One of: ${MEMORY_KINDS.join(', ')}
29
+ - **scope**: A tree path (e.g., "global", "tech/typescript", "work/project-name")
30
+ - **tags**: Relevant keywords as an array
31
+ - **importance**: "high", "normal", or "low"
32
+ - **content**: 1-3 sentences describing the memory
33
+
34
+ Memory kinds explained:
35
+ - fact: Objective facts (project structure, tech stack)
36
+ - preference: User preferences (coding style, tools)
37
+ - skill: How to do something (patterns, techniques)
38
+ - lesson: Lessons learned (bugs, pitfalls)
39
+ - context: Temporal context (current OKR, progress)
40
+ - relation: People and relationships (teammates, roles)
41
+
42
+ Do NOT extract:
43
+ - Specific code snippets (too large, will become stale)
44
+ - Temporary debugging information
45
+ - Trivial greetings or small talk
46
+
47
+ Return a JSON array of memory objects. If nothing is worth remembering, return an empty array [].
48
+
49
+ Conversation:
50
+ ${conversation}`;
51
+ }
52
+
53
+ /**
54
+ * Extract memory entries from a set of conversation messages.
55
+ *
56
+ * @param {{ messages: object[], adapter: object, config: object }} params
57
+ * @returns {Promise<object[]>} — extracted memory entries
58
+ */
59
+ export async function extractMemories({ messages, adapter, config }) {
60
+ if (!messages || messages.length === 0) return [];
61
+
62
+ const system = 'You are a memory extraction assistant. Analyze conversations and extract important facts, preferences, and lessons. Return ONLY a valid JSON array, no other text.';
63
+
64
+ const extractionPrompt = buildExtractionPrompt(messages);
65
+
66
+ try {
67
+ const result = await adapter.call({
68
+ model: config.model,
69
+ system,
70
+ messages: [{ role: 'user', content: extractionPrompt }],
71
+ maxTokens: 2048,
72
+ });
73
+
74
+ const text = result.text.trim();
75
+
76
+ // Try to parse JSON array from the response
77
+ const jsonMatch = text.match(/\[[\s\S]*\]/);
78
+ if (!jsonMatch) return [];
79
+
80
+ const entries = JSON.parse(jsonMatch[0]);
81
+
82
+ // Validate and normalize entries
83
+ return entries
84
+ .filter(e => e && typeof e === 'object' && e.name && e.content)
85
+ .map(e => ({
86
+ name: String(e.name).slice(0, 80),
87
+ kind: MEMORY_KINDS.includes(e.kind) ? e.kind : 'fact',
88
+ scope: String(e.scope || 'global'),
89
+ tags: Array.isArray(e.tags) ? e.tags.map(String) : [],
90
+ importance: ['high', 'normal', 'low'].includes(e.importance) ? e.importance : 'normal',
91
+ content: String(e.content),
92
+ }));
93
+ } catch {
94
+ // LLM failure — return empty (non-critical operation)
95
+ return [];
96
+ }
97
+ }