metame-cli 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@
8
8
 
9
9
  const fs = require('fs');
10
10
  const path = require('path');
11
+ const dns = require('dns');
11
12
 
12
13
  let Lark;
13
14
  function _tryRequireLark() {
@@ -58,6 +59,40 @@ function withTimeout(promise, ms = 10000) {
58
59
  return Promise.race([promise, timeout]).finally(() => clearTimeout(timer));
59
60
  }
60
61
 
62
+ // Wait for DNS to resolve a target host with exponential backoff.
63
+ // Used after system wake / before reconnect: the OS may report clock/events
64
+ // restored before WiFi+DNS are actually usable. Retries 1/2/4/8s, total cap 60s.
65
+ async function waitForNetworkReady(hostname, opts = {}) {
66
+ const log = opts.log || (() => {});
67
+ const totalBudget = Number.isFinite(opts.totalBudgetMs) ? opts.totalBudgetMs : 60000;
68
+ const lookup = opts.lookup || dns.promises.lookup;
69
+ const sleep = opts.sleep || ((ms) => new Promise((r) => setTimeout(r, ms)));
70
+ const startedAt = Date.now();
71
+ let attempt = 0;
72
+ let lastError = null;
73
+ // Backoff schedule: 0s, 1s, 2s, 4s, 8s between attempts (before the next attempt)
74
+ const backoff = [0, 1000, 2000, 4000, 8000];
75
+ // Always make at least one attempt; subsequent attempts are budget-gated.
76
+ do {
77
+ const wait = backoff[Math.min(attempt, backoff.length - 1)];
78
+ if (wait > 0) await sleep(wait);
79
+ attempt += 1;
80
+ try {
81
+ await lookup(hostname);
82
+ return { ok: true, attempts: attempt, elapsed: Date.now() - startedAt };
83
+ } catch (err) {
84
+ lastError = err;
85
+ log('DEBUG', `[net-ready] ${hostname} attempt ${attempt} failed: ${err.code || err.message}`);
86
+ }
87
+ } while (Date.now() - startedAt < totalBudget);
88
+ return {
89
+ ok: false,
90
+ attempts: attempt,
91
+ elapsed: Date.now() - startedAt,
92
+ error: lastError && (lastError.message || String(lastError)),
93
+ };
94
+ }
95
+
61
96
  // Max chars per lark_md element (Feishu limit ~4000)
62
97
  const MAX_CHUNK = 3800;
63
98
 
@@ -101,12 +136,25 @@ function createBot(config) {
101
136
  return { ok: true };
102
137
  } catch (err) {
103
138
  const msg = err && err.message || String(err);
104
- const isAuthError = /invalid|unauthorized|forbidden|token|credential|app_id|app_secret|permission|99991663|99991664|99991665/i.test(msg);
139
+ // Only flag as auth error when we have strong evidence: known Feishu
140
+ // auth error codes, HTTP 401/403, or explicit 'invalid app_id/secret'.
141
+ // Previously a loose /token/ regex false-positived on SDK-internal
142
+ // messages like "Cannot destructure 'tenant_access_token' of undefined"
143
+ // (which is really a network/empty-response failure) and caused the
144
+ // bridge to refuse to start across a lid-close/wake cycle.
145
+ const authPatterns = [
146
+ /\b(99991663|99991664|99991665)\b/, // Feishu token invalid codes
147
+ /\b(401|403)\b/, // HTTP 401/403
148
+ /invalid\s+(app_?id|app_?secret|tenant_access_token|access_?token)/i,
149
+ /unauthorized/i,
150
+ /\bforbidden\b/i,
151
+ ];
152
+ const isAuthError = authPatterns.some((p) => p.test(msg));
105
153
  return {
106
154
  ok: false,
107
155
  error: isAuthError
108
156
  ? `Feishu credential validation failed (app_id/app_secret may be incorrect): ${msg}`
109
- : `Feishu API probe failed (network or config issue): ${msg}`,
157
+ : `Feishu API probe failed (network or transient issue): ${msg}`,
110
158
  isAuthError,
111
159
  };
112
160
  }
@@ -381,6 +429,82 @@ function createBot(config) {
381
429
  }
382
430
  },
383
431
 
432
+ /**
433
+ * Create a new Feishu group chat. The bot is automatically a member of
434
+ * any chat it creates; pass `inviteOpenIds` to add humans at creation time.
435
+ * Requires the app to have `im:chat` (and `im:chat.member` for invitees)
436
+ * permission. Returns { ok, chatId, error }; never throws — callers can
437
+ * fall back to the manual /activate flow on failure.
438
+ *
439
+ * @param {object} opts
440
+ * @param {string} opts.name Chat name shown in user's chat list.
441
+ * @param {string} [opts.description] Optional description.
442
+ * @param {string[]} [opts.inviteOpenIds] open_ids of humans to add now.
443
+ * @param {string} [opts.ownerOpenId] open_id to mark as chat owner.
444
+ */
445
+ async createChat({ name, description = '', inviteOpenIds = [], ownerOpenId = null }) {
446
+ if (!name) return { ok: false, error: 'name is required' };
447
+ try {
448
+ const data = {
449
+ name: String(name).slice(0, 60),
450
+ description: String(description).slice(0, 256),
451
+ chat_mode: 'group',
452
+ chat_type: 'private',
453
+ // Owner is required by the API; default to the inviter if not given.
454
+ ...(ownerOpenId ? { owner_id: ownerOpenId } : {}),
455
+ ...(inviteOpenIds.length > 0 ? { user_id_list: inviteOpenIds.slice(0, 50) } : {}),
456
+ };
457
+ const res = await withTimeout(
458
+ client.im.chat.create({ params: { user_id_type: 'open_id' }, data }),
459
+ 15000
460
+ );
461
+ const chatId = res?.data?.chat_id || null;
462
+ if (!chatId) {
463
+ return { ok: false, error: `chat.create returned no chat_id: ${JSON.stringify(res?.data || res)}` };
464
+ }
465
+ return { ok: true, chatId };
466
+ } catch (err) {
467
+ const errDetail = err?.response?.data || err;
468
+ const code = errDetail?.code;
469
+ const msg = errDetail?.msg || errDetail?.message || String(err);
470
+ // Permission denied is the common first-time failure — surface a hint.
471
+ if (code === 99991663 || /permission|forbidden|scope/i.test(msg)) {
472
+ return { ok: false, error: `飞书应用缺少 im:chat 权限(${msg})`, code };
473
+ }
474
+ return { ok: false, error: msg, code };
475
+ }
476
+ },
477
+
478
+ /**
479
+ * Invite humans to an existing chat by open_id. Returns invalid_id_list
480
+ * so the caller can decide whether to surface the partial-failure case.
481
+ */
482
+ async inviteToChat(chatId, openIds = []) {
483
+ if (!chatId) return { ok: false, error: 'chatId is required' };
484
+ const list = (Array.isArray(openIds) ? openIds : [openIds]).filter(Boolean).slice(0, 50);
485
+ if (list.length === 0) return { ok: true, invalid: [] };
486
+ try {
487
+ const res = await withTimeout(
488
+ client.im.chat.members.create({
489
+ path: { chat_id: chatId },
490
+ params: { member_id_type: 'open_id' },
491
+ data: { id_list: list },
492
+ }),
493
+ 15000
494
+ );
495
+ const invalid = res?.data?.invalid_id_list || [];
496
+ return { ok: true, invalid };
497
+ } catch (err) {
498
+ const errDetail = err?.response?.data || err;
499
+ const code = errDetail?.code;
500
+ const msg = errDetail?.msg || errDetail?.message || String(err);
501
+ if (code === 99991663 || /permission|forbidden|scope/i.test(msg)) {
502
+ return { ok: false, error: `飞书应用缺少 im:chat.member 权限(${msg})`, code };
503
+ }
504
+ return { ok: false, error: msg, code };
505
+ }
506
+ },
507
+
384
508
  /**
385
509
  * Start WebSocket long connection to receive messages (with auto-reconnect)
386
510
  * @param {function} onMessage - callback(chatId, text, event)
@@ -395,15 +519,22 @@ function createBot(config) {
395
519
  let healthTimer = null;
396
520
  let sleepWakeTimer = null;
397
521
  let reconnectTimer = null;
398
- let reconnectDelay = 5000; // start 5s, doubles up to 60s
522
+ let aliveTimer = null;
523
+ let reconnectScheduled = false; // dedup flag: true while a reconnect is pending
524
+ let wsEpoch = 0; // increments each connect(); underlying-ws hooks capture their own epoch
525
+ const INITIAL_RECONNECT_DELAY = 5000;
399
526
  const MAX_RECONNECT_DELAY = 60000;
400
- const HEALTH_CHECK_INTERVAL = 90000; // check every 90s
401
- const SILENT_THRESHOLD = 300000; // 5 min no SDK activity → suspect dead
402
- const SLEEP_DETECT_INTERVAL = 5000; // tick every 5s to detect clock jump
403
- const SLEEP_JUMP_THRESHOLD = 30000; // clock jump >30s = system was sleeping
527
+ let reconnectDelay = INITIAL_RECONNECT_DELAY;
528
+ const HEALTH_CHECK_INTERVAL = 30000; // tighter bottom-line probe (was 90s)
529
+ const SILENT_THRESHOLD = 90000; // 90s no SDK activity probe (was 300s)
530
+ const SLEEP_DETECT_INTERVAL = 5000;
531
+ const SLEEP_JUMP_THRESHOLD = 30000; // clock jump >30s = was sleeping
532
+ const ALIVE_CHECK_WINDOW = 15000; // after connect, must see activity within 15s
533
+ const FEISHU_HOST = 'open.feishu.cn';
404
534
 
405
535
  // Track last SDK activity (any event received = alive)
406
536
  let _lastActivityAt = Date.now();
537
+ let _connectedAt = 0; // when the current WSClient was (re)started
407
538
  function touchActivity() { _lastActivityAt = Date.now(); }
408
539
 
409
540
  // Dedup: track recent message_ids (Feishu may redeliver on slow ack)
@@ -490,58 +621,162 @@ function createBot(config) {
490
621
  });
491
622
  }
492
623
 
624
+ // Hook the underlying ws instance for first-class close/error notification.
625
+ // Lark SDK stores the live WebSocket via wsConfig.setWSInstance; we wrap it
626
+ // so we learn about 'close' immediately instead of waiting for silence.
627
+ // Defensive: SDK internals can change between versions — any failure just
628
+ // downgrades to the silent/health/sleep bottom-lines.
629
+ function hookUnderlyingWs(wsClient, epoch) {
630
+ try {
631
+ const cfg = wsClient && wsClient.wsConfig;
632
+ if (!cfg || typeof cfg.setWSInstance !== 'function') return;
633
+ const orig = cfg.setWSInstance.bind(cfg);
634
+ cfg.setWSInstance = (inst) => {
635
+ orig(inst);
636
+ if (!inst || inst._metameHooked) return;
637
+ inst._metameHooked = true;
638
+ try {
639
+ inst.on('close', () => {
640
+ if (stopped) return;
641
+ if (epoch !== wsEpoch) return; // stale: a newer connect() has superseded this one
642
+ _log('INFO', 'Feishu underlying WS closed — scheduling reconnect');
643
+ scheduleReconnect({ immediate: true, reason: 'ws-close' });
644
+ });
645
+ inst.on('error', (e) => {
646
+ if (epoch !== wsEpoch) return;
647
+ _log('WARN', `Feishu underlying WS error: ${e && e.message || e}`);
648
+ });
649
+ } catch (hookErr) {
650
+ _log('WARN', `Feishu ws event hook failed: ${hookErr.message}`);
651
+ }
652
+ };
653
+ } catch (err) {
654
+ _log('WARN', `Feishu SDK hook unavailable (${err.message}) — falling back to silence/sleep detection`);
655
+ }
656
+ }
657
+
493
658
  function connect() {
494
659
  if (stopped) return;
660
+ clearTimeout(aliveTimer);
661
+ wsEpoch += 1;
662
+ const myEpoch = wsEpoch;
663
+ let ws;
495
664
  try {
496
- currentWs = new Lark.WSClient({
665
+ ws = new Lark.WSClient({
497
666
  appId: app_id,
498
667
  appSecret: app_secret,
499
668
  loggerLevel: Lark.LoggerLevel.info,
669
+ autoReconnect: false, // we own the reconnect lifecycle
500
670
  });
671
+ currentWs = ws;
672
+ hookUnderlyingWs(ws, myEpoch);
501
673
  const eventDispatcher = buildDispatcher();
502
- currentWs.start({ eventDispatcher });
674
+ const startResult = ws.start({ eventDispatcher });
675
+ _connectedAt = Date.now();
503
676
  touchActivity();
504
- reconnectDelay = 5000; // reset backoff on successful start
505
677
  _log('INFO', 'Feishu WebSocket connecting...');
678
+ startAliveCheck();
679
+ // start() may return a Promise. Surface async failures into the reconnect pipeline
680
+ // so we don't depend solely on the 15s alive-check to recover.
681
+ if (startResult && typeof startResult.then === 'function') {
682
+ startResult.catch((err) => {
683
+ if (stopped) return;
684
+ if (myEpoch !== wsEpoch) return; // superseded
685
+ _log('ERROR', `Feishu WSClient.start rejected: ${err && err.message || err}`);
686
+ scheduleReconnect({ immediate: true, reason: 'start-rejected', failed: true });
687
+ });
688
+ }
506
689
  } catch (err) {
507
690
  _log('ERROR', `Feishu WSClient.start failed: ${err.message}`);
508
- scheduleReconnect();
691
+ scheduleReconnect({ immediate: true, reason: 'start-failed', failed: true });
509
692
  }
510
693
  }
511
694
 
512
- function scheduleReconnect() {
695
+ // Single entry point for all reconnect signals. Dedup'd via reconnectScheduled
696
+ // so concurrent ws-close + alive-probe-fail + sleep events collapse into one
697
+ // reconnect. Backoff only grows when the caller marks this as a failure recovery
698
+ // (failed:true) — known-cause resets (manual / system-wake) start from 0s.
699
+ function scheduleReconnect({ immediate = false, reason = '', failed = false } = {}) {
513
700
  if (stopped) return;
701
+ if (reconnectScheduled) {
702
+ _log('DEBUG', `Feishu reconnect already scheduled — dropping duplicate (reason: ${reason})`);
703
+ return;
704
+ }
705
+ reconnectScheduled = true;
514
706
  clearTimeout(reconnectTimer);
515
- _log('INFO', `Feishu reconnecting in ${reconnectDelay / 1000}s...`);
516
- reconnectTimer = setTimeout(() => {
517
- _log('INFO', 'Feishu reconnecting now...');
707
+ clearTimeout(aliveTimer);
708
+ try { currentWs?.stop?.(); } catch { /* ignore */ }
709
+ currentWs = null;
710
+ if (failed) {
711
+ // Only failure paths grow the backoff ceiling for the *next* attempt.
712
+ reconnectDelay = Math.min(reconnectDelay * 2, MAX_RECONNECT_DELAY);
713
+ }
714
+ const delay = immediate ? 0 : reconnectDelay;
715
+ _log('INFO', `Feishu reconnect in ${Math.round(delay / 1000)}s (reason: ${reason || 'unspecified'})`);
716
+ reconnectTimer = setTimeout(async () => {
717
+ reconnectScheduled = false;
718
+ if (stopped) return;
719
+ const net = await waitForNetworkReady(FEISHU_HOST, { log: _log });
720
+ if (stopped) return;
721
+ if (!net.ok) {
722
+ _log('WARN', `Feishu network still down after ${Math.round(net.elapsed / 1000)}s (${net.error || 'unknown'}) — retrying`);
723
+ scheduleReconnect({ immediate: false, reason: 'network-wait-timeout', failed: true });
724
+ return;
725
+ }
726
+ if (net.attempts > 1) {
727
+ _log('INFO', `Feishu network ready after ${net.attempts} attempts (${Math.round(net.elapsed / 1000)}s)`);
728
+ }
518
729
  connect();
519
- }, reconnectDelay);
520
- reconnectDelay = Math.min(reconnectDelay * 2, MAX_RECONNECT_DELAY);
730
+ }, delay);
521
731
  }
522
732
 
523
- // Health check: detect silent WebSocket death via API probe
733
+ // Alive-check: after each connect, require either SDK activity or a
734
+ // successful API probe within ALIVE_CHECK_WINDOW. Otherwise reconnect.
735
+ // This catches the "WSClient.start returned but underlying socket is
736
+ // dead" case that the 120s SDK loop would otherwise sit on.
737
+ function startAliveCheck() {
738
+ clearTimeout(aliveTimer);
739
+ const connectedAt = _connectedAt;
740
+ aliveTimer = setTimeout(async () => {
741
+ if (stopped) return;
742
+ if (_lastActivityAt > connectedAt) {
743
+ // SDK delivered at least one event strictly after connect → healthy.
744
+ // Using `>` (not `>=`) because connect() calls touchActivity(), so
745
+ // _lastActivityAt === _connectedAt at connect time — `>=` would
746
+ // false-positive immediately without any real post-connect activity.
747
+ reconnectDelay = INITIAL_RECONNECT_DELAY;
748
+ return;
749
+ }
750
+ try {
751
+ await withTimeout(client.im.chat.list({ params: { page_size: 1 } }), 8000);
752
+ touchActivity();
753
+ reconnectDelay = INITIAL_RECONNECT_DELAY;
754
+ _log('INFO', 'Feishu alive probe ok');
755
+ } catch (err) {
756
+ _log('WARN', `Feishu alive probe failed: ${err.message} — reconnecting`);
757
+ scheduleReconnect({ immediate: true, reason: 'alive-probe-failed', failed: true });
758
+ }
759
+ }, ALIVE_CHECK_WINDOW);
760
+ }
761
+
762
+ // Health check: bottom-line probe for silent dead-sockets the hooks missed.
524
763
  function startHealthCheck() {
525
764
  clearInterval(healthTimer);
526
765
  healthTimer = setInterval(async () => {
527
766
  if (stopped) return;
528
767
  const silentMs = Date.now() - _lastActivityAt;
529
- if (silentMs < SILENT_THRESHOLD) return; // recently active, skip
530
- // Probe: try a lightweight API call to verify token + connectivity
768
+ if (silentMs < SILENT_THRESHOLD) return;
531
769
  try {
532
- await withTimeout(client.im.chat.list({ params: { page_size: 1 } }), 10000);
533
- // API works — connection might still be alive, just quiet. Reset activity.
770
+ await withTimeout(client.im.chat.list({ params: { page_size: 1 } }), 8000);
534
771
  touchActivity();
535
772
  } catch (err) {
536
773
  _log('WARN', `Feishu health check failed after ${Math.round(silentMs / 1000)}s silence: ${err.message} — reconnecting`);
537
- try { currentWs?.stop?.(); } catch { /* ignore */ }
538
- currentWs = null;
539
- connect();
774
+ scheduleReconnect({ immediate: true, reason: 'health-probe-failed', failed: true });
540
775
  }
541
776
  }, HEALTH_CHECK_INTERVAL);
542
777
  }
543
778
 
544
- // Sleep/wake detector: if the JS clock jumps >30s, system was sleeping → force reconnect
779
+ // Sleep/wake detector: JS clock jump >30s system was suspended.
545
780
  function startSleepWakeDetector() {
546
781
  let _lastTickAt = Date.now();
547
782
  sleepWakeTimer = setInterval(() => {
@@ -550,13 +785,9 @@ function createBot(config) {
550
785
  const elapsed = now - _lastTickAt;
551
786
  _lastTickAt = now;
552
787
  if (elapsed > SLEEP_JUMP_THRESHOLD) {
553
- _log('INFO', `System wake detected (${Math.round(elapsed / 1000)}s gap) — forcing reconnect`);
554
- reconnectDelay = 5000;
555
- clearTimeout(reconnectTimer);
556
- try { currentWs?.stop?.(); } catch { /* ignore */ }
557
- currentWs = null;
558
- touchActivity(); // reset silence counter so health check doesn't double-fire
559
- connect();
788
+ _log('INFO', `Feishu system wake detected (${Math.round(elapsed / 1000)}s gap) — reconnecting`);
789
+ reconnectDelay = INITIAL_RECONNECT_DELAY; // wake is a known cause, not a failure
790
+ scheduleReconnect({ immediate: true, reason: 'system-wake' });
560
791
  }
561
792
  }, SLEEP_DETECT_INTERVAL);
562
793
  }
@@ -570,17 +801,16 @@ function createBot(config) {
570
801
  stop() {
571
802
  stopped = true;
572
803
  clearTimeout(reconnectTimer);
804
+ clearTimeout(aliveTimer);
573
805
  clearInterval(healthTimer);
574
806
  clearInterval(sleepWakeTimer);
807
+ try { currentWs?.stop?.(); } catch { /* ignore */ }
575
808
  currentWs = null;
576
809
  },
577
810
  reconnect() {
578
811
  _log('INFO', 'Feishu manual reconnect triggered');
579
- reconnectDelay = 5000;
580
- clearTimeout(reconnectTimer);
581
- try { currentWs?.stop?.(); } catch { /* ignore */ }
582
- currentWs = null;
583
- connect();
812
+ reconnectDelay = INITIAL_RECONNECT_DELAY;
813
+ scheduleReconnect({ immediate: true, reason: 'manual' });
584
814
  },
585
815
  isAlive() {
586
816
  return !stopped && (Date.now() - _lastActivityAt) < SILENT_THRESHOLD;
@@ -592,4 +822,4 @@ function createBot(config) {
592
822
  };
593
823
  }
594
824
 
595
- module.exports = { createBot };
825
+ module.exports = { createBot, _internal: { waitForNetworkReady } };
@@ -15,6 +15,7 @@
15
15
  const fs = require('fs');
16
16
  const path = require('path');
17
17
  const os = require('os');
18
+ const crypto = require('crypto');
18
19
  const { callHaiku, buildDistillEnv } = require('./providers');
19
20
 
20
21
  const HOME = os.homedir();
@@ -115,6 +116,63 @@ const VAGUE_PATTERNS = [
115
116
  ];
116
117
  const ALLOWED_FLAT = new Set(['王总', 'system', 'user']);
117
118
 
119
+ function hashFile(filePath) {
120
+ if (!filePath) return null;
121
+ try {
122
+ const hash = crypto.createHash('sha256');
123
+ const fd = fs.openSync(filePath, 'r');
124
+ try {
125
+ const buf = Buffer.alloc(64 * 1024);
126
+ let bytesRead = 0;
127
+ do {
128
+ bytesRead = fs.readSync(fd, buf, 0, buf.length, null);
129
+ if (bytesRead > 0) hash.update(buf.subarray(0, bytesRead));
130
+ } while (bytesRead > 0);
131
+ } finally {
132
+ fs.closeSync(fd);
133
+ }
134
+ return hash.digest('hex');
135
+ } catch {
136
+ return null;
137
+ }
138
+ }
139
+
140
+ function statSize(filePath) {
141
+ try {
142
+ return filePath ? fs.statSync(filePath).size : 0;
143
+ } catch {
144
+ return 0;
145
+ }
146
+ }
147
+
148
+ function saveSessionSource(memory, engine, sourcePath, skeleton, status = 'indexed', errorMessage = null) {
149
+ if (!memory || typeof memory.saveSessionSource !== 'function' || !skeleton) return null;
150
+ const sourceHash = hashFile(sourcePath);
151
+ if (!sourceHash) return null;
152
+ try {
153
+ return memory.saveSessionSource({
154
+ engine,
155
+ sessionId: skeleton.session_id,
156
+ project: skeleton.project || 'unknown',
157
+ scope: skeleton.project_id || null,
158
+ cwd: skeleton.project_path || null,
159
+ sourcePath,
160
+ sourceHash,
161
+ sourceSize: statSize(sourcePath),
162
+ firstTs: skeleton.first_ts || null,
163
+ lastTs: skeleton.last_ts || null,
164
+ messageCount: skeleton.message_count || 0,
165
+ toolCallCount: skeleton.total_tool_calls || 0,
166
+ toolErrorCount: skeleton.tool_error_count || 0,
167
+ status,
168
+ errorMessage,
169
+ });
170
+ } catch (e) {
171
+ console.log(`[memory-extract] session source save failed: ${e.message}`);
172
+ return null;
173
+ }
174
+ }
175
+
118
176
  /**
119
177
  * Extract atomic facts from session skeleton + evidence via Haiku.
120
178
  * Returns filtered fact array (may be empty).
@@ -212,8 +270,6 @@ async function run() {
212
270
  const sessions = sessionAnalytics.findAllUnextractedSessions(3);
213
271
  if (sessions.length === 0) {
214
272
  console.log('[memory-extract] No unanalyzed sessions found.');
215
- memory.close();
216
- return { sessionsProcessed: 0, factsSaved: 0, factsSkipped: 0 };
217
273
  }
218
274
 
219
275
  let totalSaved = 0;
@@ -223,9 +279,11 @@ async function run() {
223
279
  for (const session of sessions) {
224
280
  try {
225
281
  const skeleton = sessionAnalytics.extractSkeleton(session.path);
282
+ const sourceRow = saveSessionSource(memory, 'claude', session.path, skeleton);
226
283
 
227
284
  // Skip trivial sessions
228
285
  if (skeleton.message_count < 2 && skeleton.duration_min < 1) {
286
+ if (sourceRow) saveSessionSource(memory, 'claude', session.path, skeleton, 'archived');
229
287
  sessionAnalytics.markFactsExtracted(skeleton.session_id);
230
288
  continue;
231
289
  }
@@ -237,6 +295,7 @@ async function run() {
237
295
 
238
296
  const { ok, facts, session_name } = await extractFacts(skeleton, evidence, distillEnv);
239
297
  if (!ok) {
298
+ if (sourceRow) saveSessionSource(memory, 'claude', session.path, skeleton, 'error', 'fact extraction failed');
240
299
  console.log(`[memory-extract] Session ${skeleton.session_id.slice(0, 8)}: extraction failed, will retry later`);
241
300
  continue;
242
301
  }
@@ -249,7 +308,7 @@ async function run() {
249
308
  skeleton.session_id,
250
309
  skeleton.project || 'unknown',
251
310
  facts,
252
- { scope: skeleton.project_id || fallbackScope }
311
+ { scope: skeleton.project_id || fallbackScope, source_id: sourceRow ? sourceRow.id : skeleton.session_id }
253
312
  );
254
313
  totalSaved += saved;
255
314
  totalSkipped += skipped;
@@ -276,6 +335,7 @@ async function run() {
276
335
 
277
336
  // P2-A: persist session name + tags to session_tags.json
278
337
  saveSessionTag(skeleton.session_id, session_name, facts);
338
+ if (sourceRow) saveSessionSource(memory, 'claude', session.path, skeleton, 'extracted');
279
339
 
280
340
  processed++;
281
341
  } catch (e) {
@@ -294,15 +354,18 @@ async function run() {
294
354
  for (const cs of codexSessions) {
295
355
  try {
296
356
  const { skeleton, evidence } = sessionAnalytics.buildCodexInput(cs.path, historyMap);
357
+ const sourceRow = saveSessionSource(memory, 'codex', cs.path, skeleton);
297
358
 
298
359
  // Skip trivial sessions with no user messages
299
360
  if (skeleton.message_count < 1) {
361
+ if (sourceRow) saveSessionSource(memory, 'codex', cs.path, skeleton, 'archived');
300
362
  sessionAnalytics.markCodexFactsExtracted(cs.session_id);
301
363
  continue;
302
364
  }
303
365
 
304
366
  const { ok, facts, session_name } = await extractFacts(skeleton, evidence, distillEnv);
305
367
  if (!ok) {
368
+ if (sourceRow) saveSessionSource(memory, 'codex', cs.path, skeleton, 'error', 'fact extraction failed');
306
369
  console.log(`[memory-extract] Codex ${cs.session_id.slice(0, 8)}: extraction failed, will retry later`);
307
370
  continue;
308
371
  }
@@ -313,7 +376,11 @@ async function run() {
313
376
  cs.session_id,
314
377
  skeleton.project || 'unknown',
315
378
  facts,
316
- { scope: skeleton.project_id || fallbackScope, source_type: 'codex' }
379
+ {
380
+ scope: skeleton.project_id || fallbackScope,
381
+ source_type: 'codex',
382
+ source_id: sourceRow ? sourceRow.id : cs.session_id,
383
+ }
317
384
  );
318
385
  totalSaved += saved;
319
386
  totalSkipped += skipped;
@@ -339,6 +406,7 @@ async function run() {
339
406
  }
340
407
 
341
408
  sessionAnalytics.markCodexFactsExtracted(cs.session_id);
409
+ if (sourceRow) saveSessionSource(memory, 'codex', cs.path, skeleton, 'extracted');
342
410
  processed++;
343
411
  } catch (e) {
344
412
  console.log(`[memory-extract] Codex session error: ${e.message}`);
@@ -130,6 +130,37 @@ function applyWikiSchema(db) {
130
130
  )
131
131
  `);
132
132
 
133
+ // ── session_sources (raw transcript provenance, L0) ───────────────────────
134
+ db.exec(`
135
+ CREATE TABLE IF NOT EXISTS session_sources (
136
+ id TEXT PRIMARY KEY,
137
+ engine TEXT NOT NULL DEFAULT 'unknown'
138
+ CHECK (engine IN ('claude','codex','unknown')),
139
+ session_id TEXT NOT NULL,
140
+ project TEXT DEFAULT '*',
141
+ scope TEXT,
142
+ agent_key TEXT,
143
+ cwd TEXT,
144
+ source_path TEXT,
145
+ source_hash TEXT NOT NULL,
146
+ source_size INTEGER DEFAULT 0,
147
+ first_ts TEXT,
148
+ last_ts TEXT,
149
+ message_count INTEGER DEFAULT 0,
150
+ tool_call_count INTEGER DEFAULT 0,
151
+ tool_error_count INTEGER DEFAULT 0,
152
+ status TEXT DEFAULT 'indexed'
153
+ CHECK (status IN ('indexed','summarized','extracted','error','archived')),
154
+ error_message TEXT,
155
+ created_at TEXT DEFAULT (datetime('now')),
156
+ updated_at TEXT DEFAULT (datetime('now')),
157
+ UNIQUE(engine, session_id, source_hash)
158
+ )
159
+ `);
160
+ db.exec('CREATE INDEX IF NOT EXISTS idx_session_sources_session ON session_sources(session_id)');
161
+ db.exec('CREATE INDEX IF NOT EXISTS idx_session_sources_project ON session_sources(project, scope, last_ts)');
162
+ db.exec('CREATE INDEX IF NOT EXISTS idx_session_sources_agent ON session_sources(agent_key, last_ts)');
163
+
133
164
  // ── doc_sources ───────────────────────────────────────────────────────────
134
165
  db.exec(`
135
166
  CREATE TABLE IF NOT EXISTS doc_sources (
package/scripts/memory.js CHANGED
@@ -177,6 +177,11 @@ function saveMemoryItem(item) {
177
177
  return { ok: true, id };
178
178
  }
179
179
 
180
+ function saveSessionSource(source) {
181
+ const { upsertSessionSource } = require('./core/session-source-db');
182
+ return upsertSessionSource(getDb(), source);
183
+ }
184
+
180
185
  function searchMemoryItems(query, { kind = null, scope = null, project = null, state = 'active', limit = 20 } = {}) {
181
186
  const db = getDb();
182
187
  const conditions = [];
@@ -335,7 +340,7 @@ function saveSession({ sessionId, project, scope = null, summary, keywords = ''
335
340
  });
336
341
  }
337
342
 
338
- function saveFacts(sessionId, project, facts, { scope = null, source_type = null } = {}) {
343
+ function saveFacts(sessionId, project, facts, { scope = null, source_type = null, source_id = null } = {}) {
339
344
  if (!Array.isArray(facts) || facts.length === 0) return { saved: 0, skipped: 0, superseded: 0, savedFacts: [] };
340
345
  const normalizedProject = project === '*' ? '*' : String(project || 'unknown');
341
346
  let saved = 0;
@@ -363,7 +368,7 @@ function saveFacts(sessionId, project, facts, { scope = null, source_type = null
363
368
  scope: scope || null,
364
369
  session_id: sessionId,
365
370
  source_type: f.source_type || source_type || 'session',
366
- source_id: sessionId,
371
+ source_id: f.source_id || source_id || sessionId,
367
372
  relation: f.relation,
368
373
  tags,
369
374
  });
@@ -564,6 +569,7 @@ async function hybridSearchWiki(query, { ftsOnly = false, expand = false, trackS
564
569
  module.exports = {
565
570
  // core
566
571
  saveMemoryItem,
572
+ saveSessionSource,
567
573
  searchMemoryItems,
568
574
  promoteItem,
569
575
  archiveItem,