@rubytech/create-maxy 1.0.831 → 1.0.832

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/neo4j/migrations/004-project-admin-agent.ts +36 -3
  3. package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +253 -110
  4. package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +41 -14
  5. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts +3 -0
  6. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts.map +1 -0
  7. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js +12 -0
  8. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js.map +1 -0
  9. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +11 -0
  10. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  11. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +17 -9
  12. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  13. package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +41 -9
  14. package/payload/platform/scripts/lib/resolve-account-dir.sh +19 -1
  15. package/payload/server/chunk-25QDCOE5.js +1116 -0
  16. package/payload/server/chunk-35YZS3KL.js +328 -0
  17. package/payload/server/chunk-7CBRZKZS.js +654 -0
  18. package/payload/server/chunk-IXOPV36P.js +2305 -0
  19. package/payload/server/chunk-ZKGAYLAK.js +10097 -0
  20. package/payload/server/client-pool-NBVGONQL.js +32 -0
  21. package/payload/server/cloudflare-task-tracker-R4FIORFL.js +19 -0
  22. package/payload/server/maxy-edge.js +4 -4
  23. package/payload/server/neo4j-migrations-3A6K2EB5.js +552 -0
  24. package/payload/server/server.js +19 -7
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rubytech/create-maxy",
3
- "version": "1.0.831",
3
+ "version": "1.0.832",
4
4
  "description": "Install Maxy — AI for Productive People",
5
5
  "bin": {
6
6
  "create-maxy": "./dist/index.js"
@@ -46,11 +46,45 @@
46
46
  * final totals line. Both entry points emit identical log lines.
47
47
  */
48
48
 
49
- import { existsSync, readdirSync } from "node:fs";
49
+ import { existsSync, readFileSync, readdirSync } from "node:fs";
50
50
  import { resolve } from "node:path";
51
51
  import { projectAgent, getSession } from "../../ui/app/lib/neo4j-store";
52
52
  import { ACCOUNTS_DIR } from "../../ui/app/lib/claude-agent/account";
53
53
 
54
+ /**
55
+ * Account-json filter (Task 900 sub-scope F) — admits only directories whose
56
+ * `account.json` parses. Stub directories (e.g. `0dbf29ef-…/logs/` left
57
+ * behind after install 1's `account.json` was lost) are SKIPPED with a
58
+ * one-line `[platform] accounts-state STUB-DIR id=<dir>` log. Mirrors
59
+ * `listValidAccounts()` in `account.ts`; copied here because this migration
60
+ * runs at boot from a separate node_modules-resolution context (see the
61
+ * structural-typing rationale at the top of `004-prune-alien-accounts.ts`).
62
+ */
63
+ function readValidAccountDirs(accountsDir: string): string[] {
64
+ const valid: string[] = [];
65
+ const entries = readdirSync(accountsDir, { withFileTypes: true })
66
+ .filter((e) => e.isDirectory())
67
+ .filter((e) => !e.name.startsWith("."));
68
+ for (const e of entries) {
69
+ const configPath = resolve(accountsDir, e.name, "account.json");
70
+ if (!existsSync(configPath)) {
71
+ console.error(
72
+ `[platform] accounts-state STUB-DIR id=${e.name} — onboarding leak; remove or repair`,
73
+ );
74
+ continue;
75
+ }
76
+ try {
77
+ JSON.parse(readFileSync(configPath, "utf-8"));
78
+ valid.push(e.name);
79
+ } catch {
80
+ console.error(
81
+ `[platform] accounts-state CORRUPT-JSON id=${e.name} — account.json failed to parse`,
82
+ );
83
+ }
84
+ }
85
+ return valid;
86
+ }
87
+
54
88
  /**
55
89
  * Structural alias for the `Driver` instance the runner passes in. Same
56
90
  * rationale as `004-prune-alien-accounts.ts`: this file lives outside
@@ -209,8 +243,7 @@ export async function applyAdminAgentBackfill(
209
243
  return;
210
244
  }
211
245
 
212
- const accountEntries = readdirSync(accountsDir, { withFileTypes: true })
213
- .filter((e) => e.isDirectory());
246
+ const accountEntries = readValidAccountDirs(accountsDir).map((name) => ({ name }));
214
247
 
215
248
  console.error(
216
249
  `[admin-agent-graph-backfill] start accounts=${accountEntries.length}`,
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  // =============================================================================
3
3
  // conversation-archive-ingest.mjs — in-process orchestrator for
4
- // conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin).
4
+ // conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin;
5
+ // Task 900 — env-only identity, fsync-per-line progress sink, per-session
6
+ // checkpointing).
5
7
  //
6
8
  // Source-agnostic. The same pipeline runs for every conversation source;
7
9
  // `--source <enum>` selects the normaliser at the top of the pipeline:
@@ -9,26 +11,44 @@
9
11
  // normalise → bind canonical senders → derive conversationIdentity
10
12
  // → look up prior :ConversationArchive (delta cursor)
11
13
  // → sessionize delta at gap-hours boundaries
12
- // → for each session: classify (mode='chat') → collect chunks
13
- // memoryIngest(parentLabel='ConversationArchive', source=<enum>)
14
+ // → for each session: classify (mode='chat') → memoryIngest immediately
15
+ // advance lastIngestedMessageHash + lastIngestedMessageAt
16
+ //
17
+ // Identity (Task 900): `process.env.ACCOUNT_ID` and `process.env.USER_ID` are
18
+ // the only inputs. Both are plumbed by `spawn-env.ts` into every Bash
19
+ // subprocess; missing/malformed → LOUD-FAIL at phase=argv. `--account-id` and
20
+ // `--owner-element-id` flags are GONE; the writer derives the AdminUser
21
+ // elementId from (accountId, userId) one Cypher hop away.
14
22
  //
15
23
  // Argv (positional): <archive-path>
16
- // Argv (flags): --source <whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other>
17
- // --owner-element-id <id>
24
+ // Argv (flags): --source <enum>
18
25
  // --participant-person-ids <csv>
19
26
  // --scope <admin|public>
20
27
  // [--session-gap-hours <N>] (default 12)
21
- // [--account-id <accountId>]
22
28
  // [--timezone <iana>]
23
29
  // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
24
30
  // [--session-id <id>]
25
31
  //
26
32
  // Stdout (success): one JSON line — counters the skill needs to formulate
27
33
  // the three operator-facing messages. See SKILL.md for the shape.
28
- // Stderr: one [conversation-archive] FAIL line on failure, exit non-zero.
34
+ // Stderr + progress file: `[conversation-archive] ...` lines, fsync per write.
35
+ // Progress path: data/accounts/<accountId>/logs/conversation-archive-<sessionId>.log
36
+ // Wrapper sh prints `[conversation-archive] progress-file=<absolute-path>` on
37
+ // stdout before exec node, so the agent computes nothing.
29
38
  // =============================================================================
30
39
 
31
- import { existsSync, mkdtempSync, readdirSync, rmSync, statSync } from "node:fs";
40
+ import {
41
+ closeSync,
42
+ existsSync,
43
+ fsyncSync,
44
+ mkdirSync,
45
+ mkdtempSync,
46
+ openSync,
47
+ readdirSync,
48
+ rmSync,
49
+ statSync,
50
+ writeSync,
51
+ } from "node:fs";
32
52
  import { join, resolve, dirname } from "node:path";
33
53
  import { tmpdir } from "node:os";
34
54
  import { spawnSync } from "node:child_process";
@@ -89,13 +109,60 @@ const MEMORY_INGEST_PATH = resolve(
89
109
  "tools",
90
110
  "memory-ingest.js",
91
111
  );
112
+ const UUID_LIB_PATH = resolve(
113
+ platformRoot,
114
+ "plugins",
115
+ "memory",
116
+ "mcp",
117
+ "dist",
118
+ "lib",
119
+ "uuid.js",
120
+ );
92
121
 
93
122
  // ---------------------------------------------------------------------------
94
- // 2. Logger
123
+ // 2. Progress sink + logger fan-out (Task 900 sub-scope A).
124
+ // `log()` and `fail()` write to stderr AND the progress file with fsync per
125
+ // write. Parent Bash returns nothing until child exits; the progress file
126
+ // is the only surface the agent's heartbeat poll can read mid-run.
95
127
  // ---------------------------------------------------------------------------
128
+ let progressFd = -1;
129
+ const startedMs = Date.now();
130
+
131
+ function openProgressSink(absolutePath) {
132
+ mkdirSync(dirname(absolutePath), { recursive: true });
133
+ progressFd = openSync(absolutePath, "a");
134
+ }
135
+
136
+ function writeProgressLine(line) {
137
+ // stderr first — preserves existing observability for parents that read
138
+ // stderr after the child exits, AND makes the line visible via the
139
+ // mcp-spawn-tee'd device server.log without a separate sink.
140
+ process.stderr.write(line);
141
+ if (progressFd >= 0) {
142
+ try {
143
+ writeSync(progressFd, line);
144
+ fsyncSync(progressFd);
145
+ } catch {
146
+ // Disk pressure / EBADF: don't kill the run — stderr already carries
147
+ // the line.
148
+ }
149
+ }
150
+ }
151
+
96
152
  function log(line) {
97
- process.stderr.write(`[conversation-archive] ${line}\n`);
153
+ writeProgressLine(`[conversation-archive] ${line}\n`);
98
154
  }
155
+
156
+ function endRun(exitCode) {
157
+ const totalMs = Date.now() - startedMs;
158
+ writeProgressLine(`[conversation-archive] end exit-code=${exitCode} ms=${totalMs}\n`);
159
+ if (progressFd >= 0) {
160
+ try { closeSync(progressFd); } catch { /* fd already closed */ }
161
+ progressFd = -1;
162
+ }
163
+ process.exit(exitCode);
164
+ }
165
+
99
166
  function fail(phase, fields) {
100
167
  const fieldStr = Object.entries(fields)
101
168
  .map(([k, v]) =>
@@ -104,8 +171,8 @@ function fail(phase, fields) {
104
171
  : `${k}=${v ?? "-"}`,
105
172
  )
106
173
  .join(" ");
107
- process.stderr.write(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
108
- process.exit(1);
174
+ writeProgressLine(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
175
+ endRun(1);
109
176
  }
110
177
 
111
178
  // ---------------------------------------------------------------------------
@@ -144,7 +211,6 @@ function parseArgv(argv) {
144
211
  if (!VALID_SOURCES.has(flags.source)) {
145
212
  fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
146
213
  }
147
- if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
148
214
  if (!flags.participantPersonIds) {
149
215
  fail("argv", {
150
216
  reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
@@ -216,35 +282,41 @@ function findChatTxt(dir) {
216
282
  }
217
283
 
218
284
  // ---------------------------------------------------------------------------
219
- // 5. Account resolution (Phase 0 = single account)
220
- // ---------------------------------------------------------------------------
221
- function resolveAccountId(flags) {
222
- if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
223
- const installDir = resolve(platformRoot, "..");
224
- const accountsDir = join(installDir, "data", "accounts");
225
- if (!existsSync(accountsDir)) {
226
- fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
227
- }
228
- const dirs = readdirSync(accountsDir, { withFileTypes: true })
229
- .filter((d) => d.isDirectory() && !d.name.startsWith("."))
230
- .map((d) => d.name);
231
- if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
232
- if (dirs.length > 1) {
233
- fail("argv", {
234
- reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
235
- });
236
- }
237
- return dirs[0];
238
- }
239
-
240
- // ---------------------------------------------------------------------------
241
- // 6. Main
285
+ // 5. Main
242
286
  // ---------------------------------------------------------------------------
243
287
  async function main() {
244
- const startedMs = Date.now();
245
288
  const { archive, flags } = parseArgv(process.argv);
246
289
  const source = flags.source;
247
- const ownerElementId = flags.ownerElementId;
290
+
291
+ // Identity is env-only (Task 900 sub-scope C). spawn-env.ts plumbs both
292
+ // ACCOUNT_ID and USER_ID into every Bash subprocess; the bin LOUD-FAILs if
293
+ // either is absent or malformed. The disk-scan resolver and the
294
+ // --account-id / --owner-element-id flags are gone.
295
+ let UUID_REGEX;
296
+ try {
297
+ ({ UUID_REGEX } = await import(UUID_LIB_PATH));
298
+ } catch (err) {
299
+ fail("import", {
300
+ reason: "failed to import compiled uuid lib",
301
+ detail: err instanceof Error ? err.message : String(err),
302
+ });
303
+ }
304
+
305
+ const accountId = process.env.ACCOUNT_ID?.trim();
306
+ if (!accountId) {
307
+ fail("argv", { reason: "ACCOUNT_ID env missing — bin must be invoked under an authenticated agent context" });
308
+ }
309
+ if (!UUID_REGEX.test(accountId)) {
310
+ fail("argv", { reason: `ACCOUNT_ID env malformed (not a UUID): "${accountId}"` });
311
+ }
312
+ const userId = process.env.USER_ID?.trim();
313
+ if (!userId) {
314
+ fail("argv", { reason: "USER_ID env missing — bin must be invoked under an authenticated user session" });
315
+ }
316
+ if (!UUID_REGEX.test(userId)) {
317
+ fail("argv", { reason: `USER_ID env malformed (not a UUID): "${userId}"` });
318
+ }
319
+
248
320
  const participantElementIds = flags.participantPersonIds
249
321
  .split(",")
250
322
  .map((s) => s.trim())
@@ -253,7 +325,6 @@ async function main() {
253
325
  fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
254
326
  }
255
327
  const scope = flags.scope;
256
- const accountId = resolveAccountId(flags);
257
328
  const timezone = flags.timezone || "Europe/London";
258
329
  const dateFormat = flags.dateFormat;
259
330
  const sessionGapHours = flags.sessionGapHours
@@ -266,6 +337,20 @@ async function main() {
266
337
  flags.sessionId ||
267
338
  `conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
268
339
 
340
+ // ---------------------------------------------------------------------------
341
+ // Open progress sink BEFORE any await. Path is deterministic from
342
+ // (accountId, sessionId); the wrapper sh prints the same path on stdout so
343
+ // the agent's heartbeat polls the right file.
344
+ // ---------------------------------------------------------------------------
345
+ const accountsDir = resolve(platformRoot, "..", "data", "accounts");
346
+ const logsDir = resolve(accountsDir, accountId, "logs");
347
+ const progressFilePath = resolve(logsDir, `conversation-archive-${sessionId}.log`);
348
+ openProgressSink(progressFilePath);
349
+
350
+ log(
351
+ `start session=${sessionId} source=${source} archive=${archive} accountId=${accountId} userId=${userId} participants=${participantElementIds.length} scope=${scope} pid=${process.pid}`,
352
+ );
353
+
269
354
  // Imports — fail loudly if any compiled dist missing
270
355
  let getNormaliser;
271
356
  let sessionize, toTurnText, findDeltaCursor;
@@ -290,6 +375,36 @@ async function main() {
290
375
  });
291
376
  }
292
377
 
378
+ // Resolve owner AdminUser elementId from (accountId, userId). One Cypher
379
+ // hop replaces the deleted --owner-element-id flag — owner identity is env
380
+ // input, never an out-of-band flag.
381
+ let ownerElementId;
382
+ {
383
+ const lookupSession = getSession();
384
+ try {
385
+ const r = await lookupSession.run(
386
+ `MATCH (au:AdminUser { accountId: $accountId, userId: $userId })
387
+ RETURN elementId(au) AS ownerElementId LIMIT 1`,
388
+ { accountId, userId },
389
+ );
390
+ const row = r.records[0]?.get("ownerElementId");
391
+ if (typeof row !== "string" || !row) {
392
+ await lookupSession.close().catch(() => {});
393
+ fail("argv", {
394
+ reason: `USER_ID does not resolve to an AdminUser under ACCOUNT_ID (accountId=${accountId.slice(0, 8)}, userId=${userId.slice(0, 8)})`,
395
+ });
396
+ }
397
+ ownerElementId = row;
398
+ } catch (err) {
399
+ await lookupSession.close().catch(() => {});
400
+ fail("argv", {
401
+ reason: `AdminUser lookup failed: ${err instanceof Error ? err.message : String(err)}`,
402
+ });
403
+ }
404
+ await lookupSession.close().catch(() => {});
405
+ }
406
+ log(`owner-resolved ownerElementId=${ownerElementId.slice(0, 12)} userId=${userId.slice(0, 8)}`);
407
+
293
408
  // 6a. Resolve source file
294
409
  const { sourceFile, cleanup } = resolveSourceFile(archive, source);
295
410
 
@@ -342,8 +457,8 @@ async function main() {
342
457
  await session.close().catch(() => {});
343
458
  cleanup();
344
459
  if (err && err.userFacing) {
345
- process.stderr.write(`[conversation-archive] FAIL ${err.message}\n`);
346
- process.exit(1);
460
+ writeProgressLine(`[conversation-archive] FAIL ${err.message}\n`);
461
+ endRun(1);
347
462
  }
348
463
  fail("argv", { reason: err instanceof Error ? err.message : String(err) });
349
464
  }
@@ -414,7 +529,7 @@ async function main() {
414
529
  ms: totalMs,
415
530
  priorLastIngestedMessageAt: priorArchive.lastAt,
416
531
  }) + "\n");
417
- process.exit(0);
532
+ endRun(0);
418
533
  }
419
534
  deltaStart = cursor.deltaStart;
420
535
  deltaKind = "delta";
@@ -430,15 +545,24 @@ async function main() {
430
545
  `sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
431
546
  );
432
547
 
433
- // 6g. Classify each session via Haiku (mode='chat'). Task 897: classifier
434
- // failure on one session does not abort the whole ingest. The session
435
- // becomes an unenriched raw chunk (timestamps + body, no summary or
436
- // keywords) and a follow-up pass can re-classify it. One bad Haiku call
437
- // must not lose 61 good sessions.
438
- const allChunks = [];
548
+ // 6g. Per-session classify + immediate memoryIngest (Task 900 sub-scope E).
549
+ // Each session commits atomically: chunks + cursor advance happen together
550
+ // via memoryIngest. A kill mid-loop leaves a partial archive whose cursor
551
+ // is at session N-1's last message; the next invocation slices from there
552
+ // and re-classifies only session N onward. degrade-on-error from Task 897
553
+ // is preserved: a Haiku error on session N still emits an unenriched chunk
554
+ // and advances the cursor, so resumption never re-attempts the same Haiku
555
+ // error.
439
556
  const allKeywords = new Set();
440
557
  let sessionsUnenriched = 0;
441
- for (const s of sessions) {
558
+ let totalChunksWritten = 0;
559
+ let totalNextEdges = 0;
560
+ let participantsLinked = 0;
561
+ let archiveElementIdLatest = priorArchive ? priorArchive.elemId : "";
562
+
563
+ for (let sIdx = 0; sIdx < sessions.length; sIdx++) {
564
+ const s = sessions[sIdx];
565
+ const isFirstSessionInRun = sIdx === 0;
442
566
  const sessionStart = Date.now();
443
567
  const text = toTurnText(s);
444
568
  const result = await classifyDocument({
@@ -449,12 +573,15 @@ async function main() {
449
573
  naturalEdgeMap: "",
450
574
  documentText: text,
451
575
  });
576
+
577
+ let sessionChunks;
578
+ let sessionKeywords = [];
452
579
  if (result.kind === "error") {
453
580
  sessionsUnenriched += 1;
454
581
  log(`session-unenriched session=${s.index + 1}/${sessions.length} reason="${result.reason.replace(/"/g, '\\"')}"`);
455
582
  // Emit one raw :Section:Conversation chunk covering the whole session.
456
583
  // No summary, no topicKeywords — a follow-up pass can re-enrich.
457
- allChunks.push({
584
+ sessionChunks = [{
458
585
  kind: "Conversation",
459
586
  title: `Unenriched session ${s.index + 1}`,
460
587
  body: text,
@@ -469,75 +596,91 @@ async function main() {
469
596
  messageCount: s.messages.length,
470
597
  },
471
598
  anchorEdge: null,
472
- });
473
- continue;
599
+ }];
600
+ } else {
601
+ const chunkCount = result.output.sections.length;
602
+ log(
603
+ `classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
604
+ );
605
+ if (chunkCount === 0 && s.messages.length > 0) {
606
+ cleanup();
607
+ fail("classify", {
608
+ reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
609
+ });
610
+ }
611
+ sessionChunks = result.output.sections;
612
+ sessionKeywords = result.output.documentKeywords ?? [];
613
+ for (const kw of sessionKeywords) allKeywords.add(kw);
474
614
  }
475
- const chunkCount = result.output.sections.length;
476
- log(
477
- `classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
478
- );
479
- if (chunkCount === 0 && s.messages.length > 0) {
615
+
616
+ // Per-session cursor: advance to THIS session's last message. Atomic with
617
+ // chunk writes inside memoryIngest's MERGE/CREATE Cypher transaction.
618
+ const lastSessionLine = s.messages[s.messages.length - 1];
619
+ const sessionLastHash = deriveMessageContentHash({
620
+ dateSent: lastSessionLine.dateSent,
621
+ body: lastSessionLine.body,
622
+ });
623
+ const sessionLastAt = lastSessionLine.dateSent;
624
+ const sessionDocumentSummary = `Session ${s.index + 1}/${sessions.length}: ${s.messages.length} messages, ${sessionChunks.length} chunks.`;
625
+
626
+ let ingestResult;
627
+ const ingestStart = Date.now();
628
+ try {
629
+ ingestResult = await memoryIngest({
630
+ accountId,
631
+ attachmentId: conversationIdentity,
632
+ parentLabel: "ConversationArchive",
633
+ source,
634
+ documentSummary: sessionDocumentSummary,
635
+ anchorNodeId: ownerElementId,
636
+ anchorLabel: "AdminUser",
637
+ sections: sessionChunks,
638
+ scope,
639
+ sessionId,
640
+ documentKeywords: Array.from(allKeywords),
641
+ archiveSha256,
642
+ archiveSourceFile,
643
+ lastIngestedMessageHash: sessionLastHash,
644
+ lastIngestedMessageAt: sessionLastAt,
645
+ participantElementIds: [ownerElementId, ...participantElementIds],
646
+ // First session of THIS run cleans prior chunks for matching
647
+ // archiveSha256 (re-run-with-same-bytes idempotency); subsequent
648
+ // sessions in the same run skip cleanup or they would delete the
649
+ // chunks just written by sessions above them.
650
+ cleanupPriorChunks: isFirstSessionInRun,
651
+ });
652
+ } catch (err) {
480
653
  cleanup();
481
- fail("classify", {
482
- reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
654
+ fail("memory-ingest", {
655
+ reason: err instanceof Error ? err.message : String(err),
656
+ sessionIndex: s.index + 1,
657
+ sessionsTotal: sessions.length,
483
658
  });
484
659
  }
485
- for (const sec of result.output.sections) allChunks.push(sec);
486
- for (const kw of result.output.documentKeywords) allKeywords.add(kw);
660
+
661
+ archiveElementIdLatest = ingestResult.documentNodeId;
662
+ totalChunksWritten += ingestResult.sectionCount;
663
+ totalNextEdges += ingestResult.edgeBreakdown.NEXT ?? 0;
664
+ if (isFirstSessionInRun) {
665
+ participantsLinked = ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0;
666
+ }
667
+ log(
668
+ `session-committed sessionIndex=${s.index + 1}/${sessions.length} chunks-written=${ingestResult.sectionCount} cursor-at="${sessionLastAt}" ms=${Date.now() - ingestStart}`,
669
+ );
487
670
  }
671
+
488
672
  if (sessionsUnenriched > 0) {
489
673
  log(`classify-summary sessions=${sessions.length} unenriched=${sessionsUnenriched} enriched=${sessions.length - sessionsUnenriched}`);
490
674
  }
491
675
 
492
- // 6h. Compute lastIngestedMessageHash from the last delta line
493
- const lastLine = deltaLines[deltaLines.length - 1];
494
- const lastIngestedMessageHash = deriveMessageContentHash({
495
- dateSent: lastLine.dateSent,
496
- body: lastLine.body,
497
- });
498
- const lastIngestedMessageAt = lastLine.dateSent;
499
-
500
- // 6i. Aggregate document-level summary across sessions
501
- const documentSummary = sessions.length === 1
502
- ? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
503
- : `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
504
-
505
- // 6j. Call memoryIngest with parentLabel='ConversationArchive'
506
- let ingestResult;
507
- const ingestStart = Date.now();
508
- try {
509
- ingestResult = await memoryIngest({
510
- accountId,
511
- attachmentId: conversationIdentity,
512
- parentLabel: "ConversationArchive",
513
- source,
514
- documentSummary,
515
- anchorNodeId: ownerElementId,
516
- anchorLabel: "AdminUser",
517
- sections: allChunks,
518
- scope,
519
- sessionId,
520
- documentKeywords: Array.from(allKeywords),
521
- archiveSha256,
522
- archiveSourceFile,
523
- lastIngestedMessageHash,
524
- lastIngestedMessageAt,
525
- participantElementIds: [ownerElementId, ...participantElementIds],
526
- });
527
- } catch (err) {
528
- cleanup();
529
- fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
530
- }
531
- log(
532
- `source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
533
- );
534
-
535
676
  cleanup();
536
677
  const totalMs = Date.now() - startedMs;
537
- log(`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
678
+ log(
679
+ `done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${archiveElementIdLatest} chunks-written=${totalChunksWritten} sessions=${sessions.length} total-ms=${totalMs}`,
680
+ );
538
681
 
539
682
  process.stdout.write(JSON.stringify({
540
- archiveElementId: ingestResult.documentNodeId,
683
+ archiveElementId: archiveElementIdLatest,
541
684
  conversationIdentity,
542
685
  archiveSha256,
543
686
  archiveSourceFile,
@@ -547,16 +690,16 @@ async function main() {
547
690
  systemSkipped: normaliserResult.counters.systemSkipped,
548
691
  delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
549
692
  sessions: sessions.length,
550
- chunks: ingestResult.sectionCount,
551
- nextEdgesCreated: ingestResult.edgeBreakdown.NEXT ?? 0,
552
- participantsLinked: ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0,
693
+ chunks: totalChunksWritten,
694
+ nextEdgesCreated: totalNextEdges,
695
+ participantsLinked,
553
696
  dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
554
697
  parsedLineCount: allLines.length,
555
698
  sessionsUnenriched,
556
699
  topicKeywords: Array.from(allKeywords),
557
700
  ms: totalMs,
558
701
  }) + "\n");
559
- process.exit(0);
702
+ endRun(0);
560
703
  }
561
704
 
562
705
  main().catch((err) => {