@rubytech/create-maxy 1.0.830 → 1.0.832

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/lib/oauth-llm/dist/index.d.ts +8 -1
  3. package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
  4. package/payload/platform/lib/oauth-llm/dist/index.js +5 -1
  5. package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
  6. package/payload/platform/lib/oauth-llm/src/index.ts +19 -4
  7. package/payload/platform/neo4j/migrations/004-project-admin-agent.ts +36 -3
  8. package/payload/platform/neo4j/migrations/008-adminuser-accountid-backfill.ts +85 -0
  9. package/payload/platform/plugins/admin/mcp/dist/index.js +25 -3
  10. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  11. package/payload/platform/plugins/docs/references/internals.md +1 -1
  12. package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +293 -127
  13. package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +41 -14
  14. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts +49 -0
  15. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts.map +1 -0
  16. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js +35 -0
  17. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js.map +1 -0
  18. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +6 -7
  19. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -1
  20. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +9 -1
  21. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -1
  22. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +49 -131
  24. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -1
  26. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +0 -1
  27. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -1
  28. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +0 -2
  29. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -1
  30. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +10 -11
  31. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -1
  32. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +3 -4
  33. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -1
  34. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +11 -42
  35. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -1
  36. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +10 -8
  38. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +10 -8
  40. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +118 -43
  42. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +3 -2
  44. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +40 -18
  46. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
  47. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts +3 -0
  48. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts.map +1 -0
  49. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js +12 -0
  50. package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js.map +1 -0
  51. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +4 -6
  52. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -1
  53. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +59 -17
  54. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -1
  55. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +11 -0
  56. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  57. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +17 -9
  58. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  59. package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js +2 -2
  60. package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js.map +1 -1
  61. package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +41 -9
  62. package/payload/platform/scripts/lib/resolve-account-dir.sh +19 -1
  63. package/payload/server/chunk-25QDCOE5.js +1116 -0
  64. package/payload/server/chunk-35YZS3KL.js +328 -0
  65. package/payload/server/chunk-7CBRZKZS.js +654 -0
  66. package/payload/server/chunk-BCFM2UPH.js +2305 -0
  67. package/payload/server/chunk-CV3HPX46.js +10097 -0
  68. package/payload/server/chunk-IXOPV36P.js +2305 -0
  69. package/payload/server/chunk-J6YWEJBN.js +1116 -0
  70. package/payload/server/chunk-OCPJGZ6S.js +654 -0
  71. package/payload/server/chunk-ZKGAYLAK.js +10097 -0
  72. package/payload/server/client-pool-NBVGONQL.js +32 -0
  73. package/payload/server/client-pool-ZNGN66GN.js +32 -0
  74. package/payload/server/cloudflare-task-tracker-MHALDN54.js +19 -0
  75. package/payload/server/cloudflare-task-tracker-R4FIORFL.js +19 -0
  76. package/payload/server/maxy-edge.js +4 -4
  77. package/payload/server/neo4j-migrations-3A6K2EB5.js +552 -0
  78. package/payload/server/neo4j-migrations-6RW423E2.js +530 -0
  79. package/payload/server/server.js +43 -20
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  // =============================================================================
3
3
  // conversation-archive-ingest.mjs — in-process orchestrator for
4
- // conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin).
4
+ // conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin;
5
+ // Task 900 — env-only identity, fsync-per-line progress sink, per-session
6
+ // checkpointing).
5
7
  //
6
8
  // Source-agnostic. The same pipeline runs for every conversation source;
7
9
  // `--source <enum>` selects the normaliser at the top of the pipeline:
@@ -9,26 +11,44 @@
9
11
  // normalise → bind canonical senders → derive conversationIdentity
10
12
  // → look up prior :ConversationArchive (delta cursor)
11
13
  // → sessionize delta at gap-hours boundaries
12
- // → for each session: classify (mode='chat') → collect chunks
13
- // memoryIngest(parentLabel='ConversationArchive', source=<enum>)
14
+ // → for each session: classify (mode='chat') → memoryIngest immediately
15
+ // advance lastIngestedMessageHash + lastIngestedMessageAt
16
+ //
17
+ // Identity (Task 900): `process.env.ACCOUNT_ID` and `process.env.USER_ID` are
18
+ // the only inputs. Both are plumbed by `spawn-env.ts` into every Bash
19
+ // subprocess; missing/malformed → LOUD-FAIL at phase=argv. `--account-id` and
20
+ // `--owner-element-id` flags are GONE; the writer derives the AdminUser
21
+ // elementId from (accountId, userId) one Cypher hop away.
14
22
  //
15
23
  // Argv (positional): <archive-path>
16
- // Argv (flags): --source <whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other>
17
- // --owner-element-id <id>
24
+ // Argv (flags): --source <enum>
18
25
  // --participant-person-ids <csv>
19
26
  // --scope <admin|public>
20
27
  // [--session-gap-hours <N>] (default 12)
21
- // [--account-id <accountId>]
22
28
  // [--timezone <iana>]
23
29
  // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
24
30
  // [--session-id <id>]
25
31
  //
26
32
  // Stdout (success): one JSON line — counters the skill needs to formulate
27
33
  // the three operator-facing messages. See SKILL.md for the shape.
28
- // Stderr: one [conversation-archive] FAIL line on failure, exit non-zero.
34
+ // Stderr + progress file: `[conversation-archive] ...` lines, fsync per write.
35
+ // Progress path: data/accounts/<accountId>/logs/conversation-archive-<sessionId>.log
36
+ // Wrapper sh prints `[conversation-archive] progress-file=<absolute-path>` on
37
+ // stdout before exec node, so the agent computes nothing.
29
38
  // =============================================================================
30
39
 
31
- import { existsSync, mkdtempSync, readdirSync, rmSync, statSync } from "node:fs";
40
+ import {
41
+ closeSync,
42
+ existsSync,
43
+ fsyncSync,
44
+ mkdirSync,
45
+ mkdtempSync,
46
+ openSync,
47
+ readdirSync,
48
+ rmSync,
49
+ statSync,
50
+ writeSync,
51
+ } from "node:fs";
32
52
  import { join, resolve, dirname } from "node:path";
33
53
  import { tmpdir } from "node:os";
34
54
  import { spawnSync } from "node:child_process";
@@ -89,13 +109,60 @@ const MEMORY_INGEST_PATH = resolve(
89
109
  "tools",
90
110
  "memory-ingest.js",
91
111
  );
112
+ const UUID_LIB_PATH = resolve(
113
+ platformRoot,
114
+ "plugins",
115
+ "memory",
116
+ "mcp",
117
+ "dist",
118
+ "lib",
119
+ "uuid.js",
120
+ );
92
121
 
93
122
  // ---------------------------------------------------------------------------
94
- // 2. Logger
123
+ // 2. Progress sink + logger fan-out (Task 900 sub-scope A).
124
+ // `log()` and `fail()` write to stderr AND the progress file with fsync per
125
+ // write. Parent Bash returns nothing until child exits; the progress file
126
+ // is the only surface the agent's heartbeat poll can read mid-run.
95
127
  // ---------------------------------------------------------------------------
128
+ let progressFd = -1;
129
+ const startedMs = Date.now();
130
+
131
+ function openProgressSink(absolutePath) {
132
+ mkdirSync(dirname(absolutePath), { recursive: true });
133
+ progressFd = openSync(absolutePath, "a");
134
+ }
135
+
136
+ function writeProgressLine(line) {
137
+ // stderr first — preserves existing observability for parents that read
138
+ // stderr after the child exits, AND makes the line visible via the
139
+ // mcp-spawn-tee'd device server.log without a separate sink.
140
+ process.stderr.write(line);
141
+ if (progressFd >= 0) {
142
+ try {
143
+ writeSync(progressFd, line);
144
+ fsyncSync(progressFd);
145
+ } catch {
146
+ // Disk pressure / EBADF: don't kill the run — stderr already carries
147
+ // the line.
148
+ }
149
+ }
150
+ }
151
+
96
152
  function log(line) {
97
- process.stderr.write(`[conversation-archive] ${line}\n`);
153
+ writeProgressLine(`[conversation-archive] ${line}\n`);
98
154
  }
155
+
156
+ function endRun(exitCode) {
157
+ const totalMs = Date.now() - startedMs;
158
+ writeProgressLine(`[conversation-archive] end exit-code=${exitCode} ms=${totalMs}\n`);
159
+ if (progressFd >= 0) {
160
+ try { closeSync(progressFd); } catch { /* fd already closed */ }
161
+ progressFd = -1;
162
+ }
163
+ process.exit(exitCode);
164
+ }
165
+
99
166
  function fail(phase, fields) {
100
167
  const fieldStr = Object.entries(fields)
101
168
  .map(([k, v]) =>
@@ -104,8 +171,8 @@ function fail(phase, fields) {
104
171
  : `${k}=${v ?? "-"}`,
105
172
  )
106
173
  .join(" ");
107
- process.stderr.write(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
108
- process.exit(1);
174
+ writeProgressLine(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
175
+ endRun(1);
109
176
  }
110
177
 
111
178
  // ---------------------------------------------------------------------------
@@ -144,7 +211,6 @@ function parseArgv(argv) {
144
211
  if (!VALID_SOURCES.has(flags.source)) {
145
212
  fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
146
213
  }
147
- if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
148
214
  if (!flags.participantPersonIds) {
149
215
  fail("argv", {
150
216
  reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
@@ -216,35 +282,41 @@ function findChatTxt(dir) {
216
282
  }
217
283
 
218
284
  // ---------------------------------------------------------------------------
219
- // 5. Account resolution (Phase 0 = single account)
220
- // ---------------------------------------------------------------------------
221
- function resolveAccountId(flags) {
222
- if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
223
- const installDir = resolve(platformRoot, "..");
224
- const accountsDir = join(installDir, "data", "accounts");
225
- if (!existsSync(accountsDir)) {
226
- fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
227
- }
228
- const dirs = readdirSync(accountsDir, { withFileTypes: true })
229
- .filter((d) => d.isDirectory() && !d.name.startsWith("."))
230
- .map((d) => d.name);
231
- if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
232
- if (dirs.length > 1) {
233
- fail("argv", {
234
- reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
235
- });
236
- }
237
- return dirs[0];
238
- }
239
-
240
- // ---------------------------------------------------------------------------
241
- // 6. Main
285
+ // 5. Main
242
286
  // ---------------------------------------------------------------------------
243
287
  async function main() {
244
- const startedMs = Date.now();
245
288
  const { archive, flags } = parseArgv(process.argv);
246
289
  const source = flags.source;
247
- const ownerElementId = flags.ownerElementId;
290
+
291
+ // Identity is env-only (Task 900 sub-scope C). spawn-env.ts plumbs both
292
+ // ACCOUNT_ID and USER_ID into every Bash subprocess; the bin LOUD-FAILs if
293
+ // either is absent or malformed. The disk-scan resolver and the
294
+ // --account-id / --owner-element-id flags are gone.
295
+ let UUID_REGEX;
296
+ try {
297
+ ({ UUID_REGEX } = await import(UUID_LIB_PATH));
298
+ } catch (err) {
299
+ fail("import", {
300
+ reason: "failed to import compiled uuid lib",
301
+ detail: err instanceof Error ? err.message : String(err),
302
+ });
303
+ }
304
+
305
+ const accountId = process.env.ACCOUNT_ID?.trim();
306
+ if (!accountId) {
307
+ fail("argv", { reason: "ACCOUNT_ID env missing — bin must be invoked under an authenticated agent context" });
308
+ }
309
+ if (!UUID_REGEX.test(accountId)) {
310
+ fail("argv", { reason: `ACCOUNT_ID env malformed (not a UUID): "${accountId}"` });
311
+ }
312
+ const userId = process.env.USER_ID?.trim();
313
+ if (!userId) {
314
+ fail("argv", { reason: "USER_ID env missing — bin must be invoked under an authenticated user session" });
315
+ }
316
+ if (!UUID_REGEX.test(userId)) {
317
+ fail("argv", { reason: `USER_ID env malformed (not a UUID): "${userId}"` });
318
+ }
319
+
248
320
  const participantElementIds = flags.participantPersonIds
249
321
  .split(",")
250
322
  .map((s) => s.trim())
@@ -253,7 +325,6 @@ async function main() {
253
325
  fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
254
326
  }
255
327
  const scope = flags.scope;
256
- const accountId = resolveAccountId(flags);
257
328
  const timezone = flags.timezone || "Europe/London";
258
329
  const dateFormat = flags.dateFormat;
259
330
  const sessionGapHours = flags.sessionGapHours
@@ -266,20 +337,34 @@ async function main() {
266
337
  flags.sessionId ||
267
338
  `conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
268
339
 
340
+ // ---------------------------------------------------------------------------
341
+ // Open progress sink BEFORE any await. Path is deterministic from
342
+ // (accountId, sessionId); the wrapper sh prints the same path on stdout so
343
+ // the agent's heartbeat polls the right file.
344
+ // ---------------------------------------------------------------------------
345
+ const accountsDir = resolve(platformRoot, "..", "data", "accounts");
346
+ const logsDir = resolve(accountsDir, accountId, "logs");
347
+ const progressFilePath = resolve(logsDir, `conversation-archive-${sessionId}.log`);
348
+ openProgressSink(progressFilePath);
349
+
350
+ log(
351
+ `start session=${sessionId} source=${source} archive=${archive} accountId=${accountId} userId=${userId} participants=${participantElementIds.length} scope=${scope} pid=${process.pid}`,
352
+ );
353
+
269
354
  // Imports — fail loudly if any compiled dist missing
270
355
  let getNormaliser;
271
356
  let sessionize, toTurnText, findDeltaCursor;
272
- let normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash;
273
- let bindCanonicalSenders;
357
+ let deriveConversationIdentity, deriveMessageContentHash;
358
+ let verifyParticipantElementIds;
274
359
  let getSession, classifyDocument, memoryIngest;
275
360
  try {
276
361
  ({ getNormaliser } = await import(NORMALISERS_PATH));
277
362
  ({ sessionize } = await import(join(PIPELINE_PATH, "sessionize.js")));
278
363
  ({ toTurnText } = await import(join(PIPELINE_PATH, "to-turn-text.js")));
279
364
  ({ findDeltaCursor } = await import(join(PIPELINE_PATH, "delta-cursor.js")));
280
- ({ normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash } =
365
+ ({ deriveConversationIdentity, deriveMessageContentHash } =
281
366
  await import(join(PIPELINE_PATH, "derive-keys.js")));
282
- ({ bindCanonicalSenders } = await import(join(PIPELINE_PATH, "sender-bind.js")));
367
+ ({ verifyParticipantElementIds } = await import(join(PIPELINE_PATH, "sender-bind.js")));
283
368
  ({ getSession } = await import(NEO4J_LIB_PATH));
284
369
  ({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
285
370
  ({ memoryIngest } = await import(MEMORY_INGEST_PATH));
@@ -290,6 +375,36 @@ async function main() {
290
375
  });
291
376
  }
292
377
 
378
+ // Resolve owner AdminUser elementId from (accountId, userId). One Cypher
379
+ // hop replaces the deleted --owner-element-id flag — owner identity is env
380
+ // input, never an out-of-band flag.
381
+ let ownerElementId;
382
+ {
383
+ const lookupSession = getSession();
384
+ try {
385
+ const r = await lookupSession.run(
386
+ `MATCH (au:AdminUser { accountId: $accountId, userId: $userId })
387
+ RETURN elementId(au) AS ownerElementId LIMIT 1`,
388
+ { accountId, userId },
389
+ );
390
+ const row = r.records[0]?.get("ownerElementId");
391
+ if (typeof row !== "string" || !row) {
392
+ await lookupSession.close().catch(() => {});
393
+ fail("argv", {
394
+ reason: `USER_ID does not resolve to an AdminUser under ACCOUNT_ID (accountId=${accountId.slice(0, 8)}, userId=${userId.slice(0, 8)})`,
395
+ });
396
+ }
397
+ ownerElementId = row;
398
+ } catch (err) {
399
+ await lookupSession.close().catch(() => {});
400
+ fail("argv", {
401
+ reason: `AdminUser lookup failed: ${err instanceof Error ? err.message : String(err)}`,
402
+ });
403
+ }
404
+ await lookupSession.close().catch(() => {});
405
+ }
406
+ log(`owner-resolved ownerElementId=${ownerElementId.slice(0, 12)} userId=${userId.slice(0, 8)}`);
407
+
293
408
  // 6a. Resolve source file
294
409
  const { sourceFile, cleanup } = resolveSourceFile(archive, source);
295
410
 
@@ -316,29 +431,34 @@ async function main() {
316
431
  log(
317
432
  `parsed lines=${normaliserResult.counters.parsed} media-skipped=${normaliserResult.counters.mediaSkipped} system-skipped=${normaliserResult.counters.systemSkipped}`,
318
433
  );
434
+ // Task 897: zero matched timestamps means this isn't a chat archive — the
435
+ // file should route through document-ingest instead. The agent's
436
+ // database-operator skill picks the right entry; this LOUD-FAIL surfaces
437
+ // misroute as an actionable signal rather than a silent empty ingest.
319
438
  if (allLines.length === 0) {
320
439
  cleanup();
321
- fail("parse", { reason: "zero parsed lines after walking archive" });
440
+ fail("parse", {
441
+ reason: `no timestamp prefixes detected — this archive contains no chat messages with the expected source=${source} grammar; route through document-ingest with mode='document', not conversation-archive-ingest`,
442
+ });
322
443
  }
323
444
 
324
- // 6c. Bind canonical senders against the confirmed set
325
- const distinctSenderNames = Array.from(new Set(allLines.map((l) => l.senderName)));
326
- const senderHistogram = computeSenderHistogram(allLines);
445
+ // 6c. Verify participant elementIds resolve to graph nodes with the right
446
+ // accountId and labels. Per-line senderName binding is gone (Task 897);
447
+ // operator-confirmed participants attach to the parent :ConversationArchive.
327
448
  let session = getSession();
328
449
  try {
329
- await bindCanonicalSenders({
450
+ await verifyParticipantElementIds({
330
451
  session,
331
452
  accountId,
332
453
  ownerElementId,
333
454
  participantElementIds,
334
- senderNames: distinctSenderNames,
335
455
  });
336
456
  } catch (err) {
337
457
  await session.close().catch(() => {});
338
458
  cleanup();
339
459
  if (err && err.userFacing) {
340
- process.stderr.write(`[conversation-archive] FAIL ${err.message}\n`);
341
- process.exit(1);
460
+ writeProgressLine(`[conversation-archive] FAIL ${err.message}\n`);
461
+ endRun(1);
342
462
  }
343
463
  fail("argv", { reason: err instanceof Error ? err.message : String(err) });
344
464
  }
@@ -403,12 +523,13 @@ async function main() {
403
523
  nextEdgesCreated: 0,
404
524
  participantsLinked: 0,
405
525
  dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
406
- senderHistogram,
526
+ parsedLineCount: allLines.length,
527
+ sessionsUnenriched: 0,
407
528
  topicKeywords: [],
408
529
  ms: totalMs,
409
530
  priorLastIngestedMessageAt: priorArchive.lastAt,
410
531
  }) + "\n");
411
- process.exit(0);
532
+ endRun(0);
412
533
  }
413
534
  deltaStart = cursor.deltaStart;
414
535
  deltaKind = "delta";
@@ -424,10 +545,24 @@ async function main() {
424
545
  `sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
425
546
  );
426
547
 
427
- // 6g. Classify each session via Haiku (mode='chat')
428
- const allChunks = [];
548
+ // 6g. Per-session classify + immediate memoryIngest (Task 900 sub-scope E).
549
+ // Each session commits atomically: chunks + cursor advance happen together
550
+ // via memoryIngest. A kill mid-loop leaves a partial archive whose cursor
551
+ // is at session N-1's last message; the next invocation slices from there
552
+ // and re-classifies only session N onward. degrade-on-error from Task 897
553
+ // is preserved: a Haiku error on session N still emits an unenriched chunk
554
+ // and advances the cursor, so resumption never re-attempts the same Haiku
555
+ // error.
429
556
  const allKeywords = new Set();
430
- for (const s of sessions) {
557
+ let sessionsUnenriched = 0;
558
+ let totalChunksWritten = 0;
559
+ let totalNextEdges = 0;
560
+ let participantsLinked = 0;
561
+ let archiveElementIdLatest = priorArchive ? priorArchive.elemId : "";
562
+
563
+ for (let sIdx = 0; sIdx < sessions.length; sIdx++) {
564
+ const s = sessions[sIdx];
565
+ const isFirstSessionInRun = sIdx === 0;
431
566
  const sessionStart = Date.now();
432
567
  const text = toTurnText(s);
433
568
  const result = await classifyDocument({
@@ -438,74 +573,114 @@ async function main() {
438
573
  naturalEdgeMap: "",
439
574
  documentText: text,
440
575
  });
441
- if (result.kind === "fallback") {
442
- cleanup();
443
- fail("classify", { reason: `Haiku fallback on session ${s.index}: ${result.reason}` });
576
+
577
+ let sessionChunks;
578
+ let sessionKeywords = [];
579
+ if (result.kind === "error") {
580
+ sessionsUnenriched += 1;
581
+ log(`session-unenriched session=${s.index + 1}/${sessions.length} reason="${result.reason.replace(/"/g, '\\"')}"`);
582
+ // Emit one raw :Section:Conversation chunk covering the whole session.
583
+ // No summary, no topicKeywords — a follow-up pass can re-enrich.
584
+ sessionChunks = [{
585
+ kind: "Conversation",
586
+ title: `Unenriched session ${s.index + 1}`,
587
+ body: text,
588
+ summary: "",
589
+ sourceStart: 0,
590
+ sourceEnd: text.length,
591
+ properties: {
592
+ unenriched: true,
593
+ unenrichedReason: result.reason,
594
+ firstMessageAt: s.firstMessageAt,
595
+ lastMessageAt: s.lastMessageAt,
596
+ messageCount: s.messages.length,
597
+ },
598
+ anchorEdge: null,
599
+ }];
600
+ } else {
601
+ const chunkCount = result.output.sections.length;
602
+ log(
603
+ `classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
604
+ );
605
+ if (chunkCount === 0 && s.messages.length > 0) {
606
+ cleanup();
607
+ fail("classify", {
608
+ reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
609
+ });
610
+ }
611
+ sessionChunks = result.output.sections;
612
+ sessionKeywords = result.output.documentKeywords ?? [];
613
+ for (const kw of sessionKeywords) allKeywords.add(kw);
444
614
  }
445
- const chunkCount = result.output.sections.length;
446
- log(
447
- `classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
448
- );
449
- if (chunkCount === 0 && s.messages.length > 0) {
615
+
616
+ // Per-session cursor: advance to THIS session's last message. Atomic with
617
+ // chunk writes inside memoryIngest's MERGE/CREATE Cypher transaction.
618
+ const lastSessionLine = s.messages[s.messages.length - 1];
619
+ const sessionLastHash = deriveMessageContentHash({
620
+ dateSent: lastSessionLine.dateSent,
621
+ body: lastSessionLine.body,
622
+ });
623
+ const sessionLastAt = lastSessionLine.dateSent;
624
+ const sessionDocumentSummary = `Session ${s.index + 1}/${sessions.length}: ${s.messages.length} messages, ${sessionChunks.length} chunks.`;
625
+
626
+ let ingestResult;
627
+ const ingestStart = Date.now();
628
+ try {
629
+ ingestResult = await memoryIngest({
630
+ accountId,
631
+ attachmentId: conversationIdentity,
632
+ parentLabel: "ConversationArchive",
633
+ source,
634
+ documentSummary: sessionDocumentSummary,
635
+ anchorNodeId: ownerElementId,
636
+ anchorLabel: "AdminUser",
637
+ sections: sessionChunks,
638
+ scope,
639
+ sessionId,
640
+ documentKeywords: Array.from(allKeywords),
641
+ archiveSha256,
642
+ archiveSourceFile,
643
+ lastIngestedMessageHash: sessionLastHash,
644
+ lastIngestedMessageAt: sessionLastAt,
645
+ participantElementIds: [ownerElementId, ...participantElementIds],
646
+ // First session of THIS run cleans prior chunks for matching
647
+ // archiveSha256 (re-run-with-same-bytes idempotency); subsequent
648
+ // sessions in the same run skip cleanup or they would delete the
649
+ // chunks just written by sessions above them.
650
+ cleanupPriorChunks: isFirstSessionInRun,
651
+ });
652
+ } catch (err) {
450
653
  cleanup();
451
- fail("classify", {
452
- reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
654
+ fail("memory-ingest", {
655
+ reason: err instanceof Error ? err.message : String(err),
656
+ sessionIndex: s.index + 1,
657
+ sessionsTotal: sessions.length,
453
658
  });
454
659
  }
455
- for (const sec of result.output.sections) allChunks.push(sec);
456
- for (const kw of result.output.documentKeywords) allKeywords.add(kw);
457
- }
458
660
 
459
- // 6h. Compute lastIngestedMessageHash from the last delta line
460
- const lastLine = deltaLines[deltaLines.length - 1];
461
- const lastIngestedMessageHash = deriveMessageContentHash({
462
- dateSent: lastLine.dateSent,
463
- senderName: lastLine.senderName,
464
- body: lastLine.body,
465
- });
466
- const lastIngestedMessageAt = lastLine.dateSent;
467
-
468
- // 6i. Aggregate document-level summary across sessions
469
- const documentSummary = sessions.length === 1
470
- ? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
471
- : `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
661
+ archiveElementIdLatest = ingestResult.documentNodeId;
662
+ totalChunksWritten += ingestResult.sectionCount;
663
+ totalNextEdges += ingestResult.edgeBreakdown.NEXT ?? 0;
664
+ if (isFirstSessionInRun) {
665
+ participantsLinked = ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0;
666
+ }
667
+ log(
668
+ `session-committed sessionIndex=${s.index + 1}/${sessions.length} chunks-written=${ingestResult.sectionCount} cursor-at="${sessionLastAt}" ms=${Date.now() - ingestStart}`,
669
+ );
670
+ }
472
671
 
473
- // 6j. Call memoryIngest with parentLabel='ConversationArchive'
474
- let ingestResult;
475
- const ingestStart = Date.now();
476
- try {
477
- ingestResult = await memoryIngest({
478
- accountId,
479
- attachmentId: conversationIdentity,
480
- parentLabel: "ConversationArchive",
481
- source,
482
- documentSummary,
483
- anchorNodeId: ownerElementId,
484
- anchorLabel: "AdminUser",
485
- sections: allChunks,
486
- scope,
487
- sessionId,
488
- documentKeywords: Array.from(allKeywords),
489
- archiveSha256,
490
- archiveSourceFile,
491
- lastIngestedMessageHash,
492
- lastIngestedMessageAt,
493
- participantElementIds: [ownerElementId, ...participantElementIds],
494
- });
495
- } catch (err) {
496
- cleanup();
497
- fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
672
+ if (sessionsUnenriched > 0) {
673
+ log(`classify-summary sessions=${sessions.length} unenriched=${sessionsUnenriched} enriched=${sessions.length - sessionsUnenriched}`);
498
674
  }
499
- log(
500
- `source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
501
- );
502
675
 
503
676
  cleanup();
504
677
  const totalMs = Date.now() - startedMs;
505
- log(`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
678
+ log(
679
+ `done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${archiveElementIdLatest} chunks-written=${totalChunksWritten} sessions=${sessions.length} total-ms=${totalMs}`,
680
+ );
506
681
 
507
682
  process.stdout.write(JSON.stringify({
508
- archiveElementId: ingestResult.documentNodeId,
683
+ archiveElementId: archiveElementIdLatest,
509
684
  conversationIdentity,
510
685
  archiveSha256,
511
686
  archiveSourceFile,
@@ -515,25 +690,16 @@ async function main() {
515
690
  systemSkipped: normaliserResult.counters.systemSkipped,
516
691
  delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
517
692
  sessions: sessions.length,
518
- chunks: ingestResult.sectionCount,
519
- nextEdgesCreated: ingestResult.edgeBreakdown.NEXT ?? 0,
520
- participantsLinked: ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0,
693
+ chunks: totalChunksWritten,
694
+ nextEdgesCreated: totalNextEdges,
695
+ participantsLinked,
521
696
  dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
522
- senderHistogram,
697
+ parsedLineCount: allLines.length,
698
+ sessionsUnenriched,
523
699
  topicKeywords: Array.from(allKeywords),
524
700
  ms: totalMs,
525
701
  }) + "\n");
526
- process.exit(0);
527
- }
528
-
529
- function computeSenderHistogram(lines) {
530
- const counts = new Map();
531
- for (const l of lines) {
532
- counts.set(l.senderName, (counts.get(l.senderName) ?? 0) + 1);
533
- }
534
- return Array.from(counts.entries())
535
- .map(([name, count]) => ({ name, count }))
536
- .sort((a, b) => b.count - a.count);
702
+ endRun(0);
537
703
  }
538
704
 
539
705
  main().catch((err) => {