metame-cli 1.4.15 → 1.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ const LOCK_FILE = path.join(HOME, '.metame', 'distill.lock');
21
21
 
22
22
  const { hasKey, isLocked, getTier, getWritableKeysForPrompt, estimateTokens, TOKEN_BUDGET } = require('./schema');
23
23
  const { loadPending, savePending, upsertPending, getPromotable, removePromoted, expireStale } = require('./pending-traits');
24
- const { writeBrainFileSafe } = require('./utils');
24
+ const { writeBrainFileSafe, normalizeProjectPath, deriveProjectInfo } = require('./utils');
25
25
 
26
26
  // Session analytics — local skeleton extraction (zero API cost)
27
27
  let sessionAnalytics = null;
@@ -37,6 +37,64 @@ try {
37
37
  distillEnv = buildDistillEnv();
38
38
  } catch { /* providers not configured — use defaults */ }
39
39
 
40
+ function selectSignalBatch(lines) {
41
+ const parsed = [];
42
+ for (const rawLine of lines) {
43
+ try {
44
+ const entry = JSON.parse(rawLine);
45
+ if (entry && typeof entry === 'object') parsed.push({ rawLine, entry });
46
+ } catch {
47
+ // Drop malformed lines; they are non-recoverable noise.
48
+ }
49
+ }
50
+
51
+ if (parsed.length === 0) {
52
+ return {
53
+ batchEntries: [],
54
+ remainingLines: [],
55
+ anchorSessionId: null,
56
+ anchorCwd: null,
57
+ };
58
+ }
59
+
60
+ let anchorSessionId = null;
61
+ let anchorCwd = null;
62
+ for (let i = parsed.length - 1; i >= 0; i--) {
63
+ const e = parsed[i].entry;
64
+ if (!e) continue;
65
+ if (e.session) anchorSessionId = String(e.session);
66
+ if (e.cwd) anchorCwd = e.cwd;
67
+ if (anchorSessionId || anchorCwd) break;
68
+ }
69
+ const normalizedAnchorCwd = normalizeProjectPath(anchorCwd);
70
+
71
+ const batchEntries = [];
72
+ const remainingLines = [];
73
+ for (const row of parsed) {
74
+ const e = row.entry;
75
+ const rowSessionId = e.session ? String(e.session) : null;
76
+ const rowCwd = normalizeProjectPath(e.cwd);
77
+
78
+ let inBatch = true;
79
+ if (anchorSessionId) {
80
+ // Primary binding by session id; fallback to cwd for legacy signals without session.
81
+ inBatch = rowSessionId === anchorSessionId || (!rowSessionId && !!normalizedAnchorCwd && rowCwd === normalizedAnchorCwd);
82
+ } else if (normalizedAnchorCwd) {
83
+ inBatch = rowCwd === normalizedAnchorCwd;
84
+ }
85
+
86
+ if (inBatch) batchEntries.push(e);
87
+ else remainingLines.push(row.rawLine);
88
+ }
89
+
90
+ return {
91
+ batchEntries,
92
+ remainingLines,
93
+ anchorSessionId,
94
+ anchorCwd: normalizedAnchorCwd,
95
+ };
96
+ }
97
+
40
98
  /**
41
99
  * Main distillation process.
42
100
  * Returns { updated: boolean, summary: string }
@@ -84,13 +142,21 @@ async function distill() {
84
142
  }
85
143
  }
86
144
 
145
+ let remainingSignalLines = lines;
146
+ let ackSignals = false;
147
+ const finalize = () => cleanup({ ack: ackSignals, remainingLines: remainingSignalLines });
148
+
87
149
  try {
88
150
  // 3. Parse signals (preserve confidence + type from signal-capture)
151
+ const batch = selectSignalBatch(lines);
152
+ remainingSignalLines = batch.remainingLines;
153
+ const signalAnchorSessionId = batch.anchorSessionId;
154
+ const signalProjectInfo = deriveProjectInfo(batch.anchorCwd);
155
+
89
156
  const signals = [];
90
157
  let highConfidenceCount = 0;
91
- for (const line of lines) {
158
+ for (const entry of batch.batchEntries) {
92
159
  try {
93
- const entry = JSON.parse(line);
94
160
  if (entry.prompt) {
95
161
  signals.push({ text: entry.prompt, type: entry.type || 'implicit' });
96
162
  if (entry.confidence === 'high') highConfidenceCount++;
@@ -101,7 +167,8 @@ async function distill() {
101
167
  }
102
168
 
103
169
  if (signals.length === 0) {
104
- cleanup();
170
+ ackSignals = true;
171
+ finalize();
105
172
  return { updated: false, behavior: null, summary: 'No valid signals.' };
106
173
  }
107
174
 
@@ -111,18 +178,50 @@ async function distill() {
111
178
  let sessionSummary = null;
112
179
  if (sessionAnalytics) {
113
180
  try {
114
- const latest = sessionAnalytics.findLatestUnanalyzedSession();
115
- if (latest) {
116
- skeleton = sessionAnalytics.extractSkeleton(latest.path);
181
+ let targetSession = null;
182
+ if (signalAnchorSessionId && typeof sessionAnalytics.findSessionById === 'function') {
183
+ targetSession = sessionAnalytics.findSessionById(signalAnchorSessionId);
184
+ if (!targetSession) {
185
+ console.log(`[distill] signal session ${signalAnchorSessionId.slice(0, 8)} not found — skip session context to avoid cross-session mismatch`);
186
+ }
187
+ } else {
188
+ targetSession = sessionAnalytics.findLatestUnanalyzedSession();
189
+ }
190
+ if (targetSession) {
191
+ skeleton = sessionAnalytics.extractSkeleton(targetSession.path);
117
192
  sessionContext = sessionAnalytics.formatForPrompt(skeleton);
118
193
  // For long sessions, extract pivot points
119
- sessionSummary = sessionAnalytics.summarizeSession(skeleton, latest.path);
194
+ sessionSummary = sessionAnalytics.summarizeSession(skeleton, targetSession.path);
120
195
  }
121
196
  } catch (e) {
122
197
  console.log(`[distill] session context extraction failed: ${e.message}`);
123
198
  }
124
199
  }
125
200
 
201
+ // 3c. Recall relevant long-term facts as additional cognition context (read-only).
202
+ let memorySection = '';
203
+ try {
204
+ const memory = require('./memory');
205
+ const searchFn = memory.searchFactsAsync || memory.searchFacts;
206
+ const signalTail = signals.slice(-6).map(s => s.text).join(' ').slice(0, 260);
207
+ const outcomeHint = sessionSummary && sessionSummary.outcome ? ` outcome:${sessionSummary.outcome}` : '';
208
+ const recallQuery = (signalTail + outcomeHint).trim();
209
+ if (recallQuery) {
210
+ const recallProject = (skeleton && skeleton.project) ? skeleton.project : signalProjectInfo.project;
211
+ const recallScope = (skeleton && skeleton.project_id) ? skeleton.project_id : signalProjectInfo.project_id;
212
+ const facts = await Promise.resolve(searchFn(recallQuery, {
213
+ limit: 4,
214
+ project: recallProject || undefined,
215
+ scope: recallScope || undefined,
216
+ }));
217
+ if (facts && facts.length > 0) {
218
+ const factLines = facts.map((f, i) => `${i + 1}. [${f.relation}] ${f.value}`).join('\n');
219
+ memorySection = `\nRECALLED LONG-TERM FACTS (context only, do not restate verbatim):\n${factLines}\n`;
220
+ }
221
+ }
222
+ memory.close();
223
+ } catch { /* memory optional */ }
224
+
126
225
  // 4. Read current profile
127
226
  let currentProfile = '';
128
227
  try {
@@ -147,7 +246,7 @@ async function distill() {
147
246
 
148
247
  // Build session context (lower priority — truncate first)
149
248
  let sessionSection = sessionContext
150
- ? `\nSESSION CONTEXT (what actually happened in the latest coding session):\n${sessionContext}\n`
249
+ ? `\nSESSION CONTEXT (what happened in the same session/cwd as current signals):\n${sessionContext}\n`
151
250
  : '';
152
251
 
153
252
  if (sessionSummary) {
@@ -163,21 +262,44 @@ async function distill() {
163
262
  }
164
263
  let goalSection = goalContext ? `\n${goalContext}\n` : '';
165
264
 
166
- // Allocate remaining budget: user messages get priority over session context
167
- const sessionTokens = estimateTokens(sessionSection + goalSection);
168
- let budgetForMessages = availableForContent - sessionTokens;
265
+ // Allocate remaining budget: user messages get priority.
266
+ // Context priority when tight: memorySection -> sessionSection/goalSection -> user message trimming.
267
+ const MEMORY_TOKEN_CAP = Math.max(120, Math.floor(availableForContent * 0.35));
268
+ if (memorySection && estimateTokens(memorySection) > MEMORY_TOKEN_CAP) {
269
+ let compactFacts = '';
270
+ try {
271
+ const lines = memorySection.split('\n').filter(Boolean).slice(0, 4);
272
+ compactFacts = lines.join('\n').slice(0, 900);
273
+ } catch { /* keep original if split fails */ }
274
+ memorySection = compactFacts || memorySection.slice(0, 900);
275
+ }
276
+
277
+ let contextTokens = estimateTokens(sessionSection + goalSection + memorySection);
278
+ let budgetForMessages = availableForContent - contextTokens;
169
279
 
170
- // If not enough room, drop session context first, then trim messages
280
+ // If not enough room, drop memory context first, then session/goal, then trim messages.
281
+ if (budgetForMessages < 100) {
282
+ memorySection = '';
283
+ contextTokens = estimateTokens(sessionSection + goalSection);
284
+ budgetForMessages = availableForContent - contextTokens;
285
+ }
171
286
  if (budgetForMessages < 100) {
172
287
  sessionSection = '';
173
288
  goalSection = '';
174
289
  budgetForMessages = availableForContent;
175
290
  }
176
291
 
292
+ const HARD_SIGNAL_CHAR_CAP = 900;
293
+ const clampSignalText = (text, maxChars = HARD_SIGNAL_CHAR_CAP) => {
294
+ const s = String(text || '').trim();
295
+ if (!s) return '';
296
+ return s.length > maxChars ? s.slice(0, maxChars) : s;
297
+ };
298
+
177
299
  // Format signals: tag metacognitive and correction signals so Haiku treats them differently
178
300
  const formatSignal = (s, i) => {
179
301
  const tag = s.type === 'metacognitive' ? ' [META]' : s.type === 'correction' ? ' [CORRECTION]' : '';
180
- return `${i + 1}. "${s.text}"${tag}`;
302
+ return `${i + 1}. "${clampSignalText(s.text)}"${tag}`;
181
303
  };
182
304
 
183
305
  // Truncate user messages to fit budget (keep most recent, they're more relevant)
@@ -193,7 +315,15 @@ async function distill() {
193
315
  userMessages = truncatedSignals.map(formatSignal).join('\n');
194
316
  }
195
317
 
196
- const distillPrompt = `You are a MetaMe cognitive profile distiller. Extract COGNITIVE TRAITS and PREFERENCES — how the user thinks, decides, and communicates. NOT a memory system. Do NOT store facts.
318
+ // Hard fallback for single-oversized signal: always enforce budget even when only one message remains.
319
+ if (estimateTokens(userMessages) > budgetForMessages && truncatedSignals.length > 0) {
320
+ const last = truncatedSignals[truncatedSignals.length - 1];
321
+ const dynamicCap = Math.max(120, Math.min(HARD_SIGNAL_CHAR_CAP, Math.floor(budgetForMessages * 3)));
322
+ truncatedSignals = [{ ...last, text: clampSignalText(last.text, dynamicCap) }];
323
+ userMessages = truncatedSignals.map(formatSignal).join('\n');
324
+ }
325
+
326
+ const distillPrompt = `You are a MetaMe cognitive profile distiller. Extract COGNITIVE TRAITS and PREFERENCES — how the user thinks, decides, and communicates. You are not a fact archiver.
197
327
 
198
328
  CURRENT PROFILE:
199
329
  \`\`\`yaml
@@ -205,7 +335,7 @@ ${writableKeys}
205
335
 
206
336
  RECENT USER MESSAGES:
207
337
  ${userMessages}
208
- ${sessionSection}${goalSection}
338
+ ${sessionSection}${goalSection}${memorySection}
209
339
  RULES:
210
340
  1. Extract ONLY cognitive traits, preferences, behavioral patterns — NOT facts or events.
211
341
  2. IGNORE task-specific messages. Only extract what persists across ALL sessions.
@@ -215,6 +345,7 @@ RULES:
215
345
  6. Messages tagged [META] are metacognitive signals (self-reflection, strategy shifts, error awareness). These are HIGH VALUE for cognition fields — extract decision_style, error_response, receptive_to_challenge, and behavioral patterns from them.
216
346
  7. Add _confidence and _source blocks mapping field keys to confidence level and triggering quote.
217
347
  8. NEVER extract agent identity or role definitions. Messages like "你是贾维斯/你的角色是.../you are Jarvis" define the AGENT, not the USER. The profile is about the USER's cognition only.
348
+ 9. Recalled long-term facts are context signals only. Use them to support/deny persistent cognition, never copy them as factual output.
218
349
 
219
350
  BIAS PREVENTION:
220
351
  - Single observation = STATE, not TRAIT. T3 cognition needs 3+ observations.
@@ -253,21 +384,24 @@ Do NOT repeat existing unchanged values.`;
253
384
 
254
385
  // 7. Parse result
255
386
  if (!result || result === 'NO_UPDATE') {
256
- cleanup();
387
+ ackSignals = true;
388
+ finalize();
257
389
  return { updated: false, behavior: null, summary: `Analyzed ${signals.length} messages — no persistent insights found.` };
258
390
  }
259
391
 
260
392
  // Extract YAML block from response — require explicit code block, no fallback
261
393
  const yamlMatch = result.match(/```yaml\n([\s\S]*?)```/) || result.match(/```\n([\s\S]*?)```/);
262
394
  if (!yamlMatch) {
263
- cleanup();
264
- return { updated: false, behavior: null, summary: `Analyzed ${signals.length} messages — no persistent insights found.` };
395
+ ackSignals = false;
396
+ finalize();
397
+ return { updated: false, behavior: null, summary: 'Distiller returned malformed output. Signals preserved for retry.' };
265
398
  }
266
399
  const yamlContent = yamlMatch[1].trim();
267
400
 
268
401
  if (!yamlContent) {
269
- cleanup();
270
- return { updated: false, behavior: null, summary: 'Distiller returned empty result.' };
402
+ ackSignals = false;
403
+ finalize();
404
+ return { updated: false, behavior: null, summary: 'Distiller returned empty result. Signals preserved for retry.' };
271
405
  }
272
406
 
273
407
  // 8. Validate against schema + merge into profile
@@ -275,8 +409,9 @@ Do NOT repeat existing unchanged values.`;
275
409
  const yaml = require('js-yaml');
276
410
  const updates = yaml.load(yamlContent);
277
411
  if (!updates || typeof updates !== 'object') {
278
- cleanup();
279
- return { updated: false, behavior: null, summary: 'Distiller returned invalid data.' };
412
+ ackSignals = false;
413
+ finalize();
414
+ return { updated: false, behavior: null, summary: 'Distiller returned invalid data. Signals preserved for retry.' };
280
415
  }
281
416
 
282
417
  // Extract _behavior block before filtering (it's not a profile field)
@@ -286,13 +421,15 @@ Do NOT repeat existing unchanged values.`;
286
421
  // Schema whitelist filter: drop any keys not in schema or locked
287
422
  const filtered = filterBySchema(updates);
288
423
  if (Object.keys(filtered).length === 0 && !behavior) {
289
- cleanup();
424
+ ackSignals = true;
425
+ finalize();
290
426
  return { updated: false, behavior: null, summary: `Analyzed ${signals.length} messages — all extracted fields rejected by schema.` };
291
427
  }
292
428
 
293
429
  // If only behavior detected but no profile updates
294
430
  if (Object.keys(filtered).length === 0 && behavior) {
295
- cleanup();
431
+ ackSignals = true;
432
+ finalize();
296
433
  if (skeleton && sessionAnalytics) {
297
434
  try { sessionAnalytics.markAnalyzed(skeleton.session_id); } catch { }
298
435
  }
@@ -356,7 +493,8 @@ Do NOT repeat existing unchanged values.`;
356
493
  const alert = { ts: new Date().toISOString(), type: 'budget_exceeded', tokens, budget: TOKEN_BUDGET };
357
494
  fs.appendFileSync(alertFile, JSON.stringify(alert) + '\n', 'utf8');
358
495
  } catch { /* non-fatal */ }
359
- cleanup();
496
+ ackSignals = true;
497
+ finalize();
360
498
  return { updated: false, behavior, signalCount: signals.length, summary: `Profile too large (${tokens} tokens > ${TOKEN_BUDGET}). Write rejected to prevent bloat.` };
361
499
  }
362
500
 
@@ -367,7 +505,8 @@ Do NOT repeat existing unchanged values.`;
367
505
  try { sessionAnalytics.markAnalyzed(skeleton.session_id); } catch { }
368
506
  }
369
507
 
370
- cleanup();
508
+ ackSignals = true;
509
+ finalize();
371
510
  return {
372
511
  updated: true,
373
512
  behavior,
@@ -378,13 +517,15 @@ Do NOT repeat existing unchanged values.`;
378
517
  };
379
518
 
380
519
  } catch (err) {
381
- cleanup();
382
- return { updated: false, behavior: null, summary: `Profile merge failed: ${err.message}` };
520
+ ackSignals = false;
521
+ finalize();
522
+ return { updated: false, behavior: null, summary: `Profile merge failed: ${err.message}. Signals preserved for retry.` };
383
523
  }
384
524
 
385
525
  } catch (err) {
386
- cleanup();
387
- return { updated: false, behavior: null, summary: `Distillation error: ${err.message}` };
526
+ ackSignals = false;
527
+ finalize();
528
+ return { updated: false, behavior: null, summary: `Distillation error: ${err.message}. Signals preserved for retry.` };
388
529
  }
389
530
  }
390
531
 
@@ -606,10 +747,19 @@ function truncateArrays(obj) {
606
747
 
607
748
 
608
749
  /**
609
- * Clean up: remove buffer and lock
750
+ * Clean up: when ack=true, commit consumed buffer state; otherwise keep buffer intact.
751
+ * Always releases lock.
610
752
  */
611
- function cleanup() {
612
- try { fs.unlinkSync(BUFFER_FILE); } catch { }
753
+ function cleanup({ ack = false, remainingLines = null } = {}) {
754
+ try {
755
+ if (ack) {
756
+ if (Array.isArray(remainingLines) && remainingLines.length > 0) {
757
+ fs.writeFileSync(BUFFER_FILE, remainingLines.join('\n') + '\n', 'utf8');
758
+ } else {
759
+ fs.unlinkSync(BUFFER_FILE);
760
+ }
761
+ }
762
+ } catch { /* non-fatal */ }
613
763
  try { fs.unlinkSync(LOCK_FILE); } catch { }
614
764
  }
615
765
 
@@ -121,7 +121,7 @@ async function extractFacts(skeleton, evidence, distillEnv) {
121
121
  ]);
122
122
  } catch (e) {
123
123
  console.log(`[memory-extract] Haiku call failed: ${e.message} | code:${e.code} killed:${e.killed} stdout:${String(e.stdout || '').slice(0, 100)} stderr:${String(e.stderr || '').slice(0, 100)}`);
124
- return { facts: [], session_name: "未命名会话" };
124
+ return { ok: false, facts: [], session_name: "未命名会话" };
125
125
  }
126
126
 
127
127
  let parsed;
@@ -129,7 +129,7 @@ async function extractFacts(skeleton, evidence, distillEnv) {
129
129
  const cleaned = raw.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
130
130
  parsed = JSON.parse(cleaned);
131
131
  } catch {
132
- return { facts: [], session_name: "未命名会话" };
132
+ return { ok: false, facts: [], session_name: "未命名会话" };
133
133
  }
134
134
 
135
135
  let facts = Array.isArray(parsed.facts) ? parsed.facts : [];
@@ -144,7 +144,7 @@ async function extractFacts(skeleton, evidence, distillEnv) {
144
144
  return true;
145
145
  });
146
146
 
147
- return { facts: filteredFacts, session_name };
147
+ return { ok: true, facts: filteredFacts, session_name };
148
148
  }
149
149
 
150
150
  /**
@@ -225,13 +225,21 @@ async function run() {
225
225
  evidence = sessionAnalytics.extractEvidence(session.path, 3000);
226
226
  } catch { /* non-fatal */ }
227
227
 
228
- const { facts, session_name } = await extractFacts(skeleton, evidence, distillEnv);
228
+ const { ok, facts, session_name } = await extractFacts(skeleton, evidence, distillEnv);
229
+ if (!ok) {
230
+ console.log(`[memory-extract] Session ${skeleton.session_id.slice(0, 8)}: extraction failed, will retry later`);
231
+ continue;
232
+ }
229
233
 
230
234
  if (facts.length > 0) {
235
+ const fallbackScope = skeleton.session_id
236
+ ? `sess_${String(skeleton.session_id).replace(/[^a-zA-Z0-9_-]/g, '').slice(0, 24)}`
237
+ : null;
231
238
  const { saved, skipped, superseded } = memory.saveFacts(
232
239
  skeleton.session_id,
233
240
  skeleton.project || 'unknown',
234
- facts
241
+ facts,
242
+ { scope: skeleton.project_id || fallbackScope }
235
243
  );
236
244
  totalSaved += saved;
237
245
  totalSkipped += skipped;