@rubytech/create-maxy 1.0.799 → 1.0.801

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/dist/index.js +7 -1
  2. package/package.json +1 -1
  3. package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +191 -0
  4. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +207 -0
  5. package/payload/platform/plugins/cloudflare/references/manual-setup.md +12 -0
  6. package/payload/platform/plugins/cloudflare/scripts/_cdp-authorize-matcher.mjs +74 -0
  7. package/payload/platform/plugins/cloudflare/scripts/_cdp-authorize.mjs +60 -50
  8. package/payload/platform/plugins/cloudflare/scripts/setup-tunnel.sh +118 -22
  9. package/payload/platform/plugins/cloudflare/skills/setup-tunnel/SKILL.md +4 -0
  10. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  11. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +2 -2
  12. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +732 -0
  13. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +102 -0
  14. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +49 -97
  15. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +1 -1
  16. package/payload/platform/scripts/seed-neo4j.sh +24 -15
  17. package/payload/platform/templates/specialists/agents/database-operator.md +13 -3
  18. package/payload/server/public/assets/{admin-C0lKk6WM.js → admin-Sa301b8q.js} +6 -6
  19. package/payload/server/public/index.html +1 -1
  20. package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-gate.test.sh +0 -166
  21. package/payload/platform/plugins/admin/hooks/archive-ingest-gate.sh +0 -147
  22. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-and-messages.md +0 -99
  23. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/insight-extraction.md +0 -121
@@ -0,0 +1,732 @@
1
+ #!/usr/bin/env node
2
+ // =============================================================================
3
+ // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh (Task 855).
4
+ //
5
+ // Collapses parse → archive-write → insight into one Node process so the
6
+ // 357K-char MCP-envelope ceiling no longer exists between steps. The
7
+ // database-operator subagent's only handle on this pipeline is the wrapper
8
+ // shell script; the gate at platform/plugins/admin/hooks/
9
+ // archive-ingest-surface-gate.sh blocks the legacy MCP tools mechanically.
10
+ //
11
+ // Argv (positional): <archive-path>
12
+ // Argv (flags): --owner-element-id <id> --scope <admin|public>
13
+ // [--account-id <accountId>] [--timezone <iana>]
14
+ // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
15
+ // [--no-insight]
16
+ //
17
+ // Stdout (success): one JSON line
18
+ // {conversationId, parsed, mediaSkipped, systemSkipped,
19
+ // createdMessages, insightCounters, ms}
20
+ //
21
+ // Stderr (failure): one [whatsapp-ingest] FAIL line, exit non-zero.
22
+ // =============================================================================
23
+
24
+ import {
25
+ createReadStream,
26
+ existsSync,
27
+ mkdtempSync,
28
+ readdirSync,
29
+ readFileSync,
30
+ rmSync,
31
+ statSync,
32
+ writeFileSync,
33
+ } from "node:fs";
34
+ import { join, resolve, dirname, basename } from "node:path";
35
+ import { tmpdir } from "node:os";
36
+ import { spawnSync } from "node:child_process";
37
+ import { fileURLToPath } from "node:url";
38
+
39
+ const __dirname = dirname(fileURLToPath(import.meta.url));
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // 1. Resolve dist paths from the platform install root.
43
+ // ---------------------------------------------------------------------------
44
+ // MAXY_PLATFORM_ROOT is set by the wrapper script (and by claude-agent.ts in
45
+ // production). Fall back to walking up from this file's location: the bin/
46
+ // directory sits at platform/plugins/whatsapp-import/bin/, so platform/ is
47
+ // three levels up.
48
+
49
+ const platformRoot =
50
+ process.env.MAXY_PLATFORM_ROOT?.trim() ||
51
+ resolve(__dirname, "..", "..", "..");
52
+
53
+ const PARSE_EXPORT_PATH = resolve(
54
+ platformRoot,
55
+ "plugins",
56
+ "whatsapp-import",
57
+ "lib",
58
+ "dist",
59
+ "index.js",
60
+ );
61
+ const ARCHIVE_WRITE_PATH = resolve(
62
+ platformRoot,
63
+ "plugins",
64
+ "memory",
65
+ "mcp",
66
+ "dist",
67
+ "tools",
68
+ "memory-archive-write.js",
69
+ );
70
+ const NEO4J_LIB_PATH = resolve(
71
+ platformRoot,
72
+ "plugins",
73
+ "memory",
74
+ "mcp",
75
+ "dist",
76
+ "lib",
77
+ "neo4j.js",
78
+ );
79
+ const OAUTH_LLM_PATH = resolve(
80
+ platformRoot,
81
+ "lib",
82
+ "oauth-llm",
83
+ "dist",
84
+ "index.js",
85
+ );
86
+ const MODELS_PATH = resolve(
87
+ platformRoot,
88
+ "lib",
89
+ "models",
90
+ "dist",
91
+ "index.js",
92
+ );
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // 2. Logger — every line `[whatsapp-ingest]`-prefixed to stderr (server.log).
96
+ // ---------------------------------------------------------------------------
97
+
98
+ function log(line) {
99
+ process.stderr.write(`[whatsapp-ingest] ${line}\n`);
100
+ }
101
+
102
+ function fail(phase, fields) {
103
+ // Single failure line. Used as the only stderr line on non-zero exit.
104
+ const fieldStr = Object.entries(fields)
105
+ .map(([k, v]) =>
106
+ typeof v === "string" && (v.includes(" ") || v.includes("="))
107
+ ? `${k}="${v.replace(/"/g, '\\"')}"`
108
+ : `${k}=${v ?? "-"}`,
109
+ )
110
+ .join(" ");
111
+ process.stderr.write(`[whatsapp-ingest] FAIL phase=${phase} ${fieldStr}\n`);
112
+ process.exit(1);
113
+ }
114
+
115
+ // ---------------------------------------------------------------------------
116
+ // 3. Argv parsing.
117
+ // ---------------------------------------------------------------------------
118
+
119
+ function parseArgv(argv) {
120
+ const args = argv.slice(2);
121
+ let archive = null;
122
+ const flags = {};
123
+ for (let i = 0; i < args.length; i++) {
124
+ const a = args[i];
125
+ if (!a.startsWith("--")) {
126
+ if (archive == null) archive = a;
127
+ else fail("argv", { reason: `unexpected positional argument "${a}"` });
128
+ continue;
129
+ }
130
+ const key = a.slice(2);
131
+ if (key === "no-insight") {
132
+ flags.noInsight = true;
133
+ continue;
134
+ }
135
+ const v = args[++i];
136
+ if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
137
+ flags[camelCase(key)] = v;
138
+ }
139
+ if (!archive) fail("argv", { reason: "missing positional <archive>" });
140
+ if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
141
+ if (!flags.scope) fail("argv", { reason: "missing --scope" });
142
+ if (flags.scope !== "admin" && flags.scope !== "public") {
143
+ fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
144
+ }
145
+ return { archive, flags };
146
+ }
147
+
148
+ function camelCase(s) {
149
+ return s.replace(/-([a-z])/g, (_m, c) => c.toUpperCase());
150
+ }
151
+
152
+ // ---------------------------------------------------------------------------
153
+ // 4. Archive resolution — accept .zip / dir / _chat.txt; return absolute
154
+ // path to _chat.txt and a cleanup callback for any tmp dir we created.
155
+ // ---------------------------------------------------------------------------
156
+
157
+ function resolveChatTxt(archivePath) {
158
+ const abs = resolve(archivePath);
159
+ if (!existsSync(abs)) {
160
+ fail("argv", { reason: `archive path not found: ${abs}` });
161
+ }
162
+ const st = statSync(abs);
163
+
164
+ if (st.isFile() && abs.endsWith(".zip")) {
165
+ const tmp = mkdtempSync(join(tmpdir(), "whatsapp-ingest-"));
166
+ const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], {
167
+ encoding: "utf8",
168
+ });
169
+ if (unzip.status !== 0) {
170
+ rmSync(tmp, { recursive: true, force: true });
171
+ fail("argv", {
172
+ reason: "unzip failed",
173
+ archive: abs,
174
+ stderr: (unzip.stderr || "").slice(0, 200),
175
+ });
176
+ }
177
+ const chat = findChatTxt(tmp);
178
+ if (!chat) {
179
+ rmSync(tmp, { recursive: true, force: true });
180
+ fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
181
+ }
182
+ return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true, force: true }) };
183
+ }
184
+
185
+ if (st.isDirectory()) {
186
+ const chat = findChatTxt(abs);
187
+ if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
188
+ return { chatTxt: chat, cleanup: () => {} };
189
+ }
190
+
191
+ if (st.isFile()) {
192
+ return { chatTxt: abs, cleanup: () => {} };
193
+ }
194
+
195
+ fail("argv", { reason: `unsupported archive shape: ${abs}` });
196
+ // unreachable, but keeps tools happy
197
+ return { chatTxt: abs, cleanup: () => {} };
198
+ }
199
+
200
+ function findChatTxt(dir) {
201
+ const entries = readdirSync(dir, { withFileTypes: true });
202
+ for (const e of entries) {
203
+ if (e.isFile() && e.name === "_chat.txt") return join(dir, e.name);
204
+ }
205
+ for (const e of entries) {
206
+ if (e.isDirectory()) {
207
+ const nested = findChatTxt(join(dir, e.name));
208
+ if (nested) return nested;
209
+ }
210
+ }
211
+ return null;
212
+ }
213
+
214
+ // ---------------------------------------------------------------------------
215
+ // 5. Account resolution — Phase 0 has one account dir. The wrapper passes
216
+ // --account-id when it is known; otherwise we pick the single dir under
217
+ // {install}/data/accounts/.
218
+ // ---------------------------------------------------------------------------
219
+
220
+ function resolveAccountId(flags) {
221
+ if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
222
+ const installDir = resolve(platformRoot, "..");
223
+ const accountsDir = join(installDir, "data", "accounts");
224
+ if (!existsSync(accountsDir)) {
225
+ fail("argv", {
226
+ reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly`,
227
+ });
228
+ }
229
+ const dirs = readdirSync(accountsDir, { withFileTypes: true })
230
+ .filter((d) => d.isDirectory() && !d.name.startsWith("."))
231
+ .map((d) => d.name);
232
+ if (dirs.length === 0) {
233
+ fail("argv", { reason: `no accounts found under ${accountsDir}` });
234
+ }
235
+ if (dirs.length > 1) {
236
+ fail("argv", {
237
+ reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
238
+ });
239
+ }
240
+ return dirs[0];
241
+ }
242
+
243
+ // ---------------------------------------------------------------------------
244
+ // 6. Auto-create participants — one :Person node per distinct senderName
245
+ // minus the owner. Provenance + participantStatus='auto-created' so a
246
+ // later semantic-enrichment pass (Task 856) can promote/disambiguate.
247
+ // ---------------------------------------------------------------------------
248
+
249
+ const PARTICIPANT_UPSERT_CYPHER = `
250
+ UNWIND $names AS senderName
251
+ MERGE (p:Person {accountId: $accountId, source: 'whatsapp', name: senderName})
252
+ ON CREATE SET
253
+ p.createdByAgent = 'whatsapp-import',
254
+ p.createdBySource = 'whatsapp-import',
255
+ p.createdBySession = $sessionId,
256
+ p.createdAt = datetime(),
257
+ p.scope = $scope,
258
+ p.participantStatus = 'auto-created'
259
+ RETURN elementId(p) AS elemId, senderName AS name
260
+ `;
261
+
262
+ async function upsertAutoParticipants({
263
+ session,
264
+ accountId,
265
+ scope,
266
+ sessionId,
267
+ senderNames,
268
+ }) {
269
+ if (senderNames.length === 0) return new Map();
270
+ // Owner-as-sender reconciliation is deferred to Task 856 — when the
271
+ // owner's display name appears as a sender, the messages SENT-edge from
272
+ // the auto-created :Person, not from the operator's :AdminUser. The
273
+ // post-load enrichment skill rewrites those edges on operator confirm.
274
+ return await session.executeWrite(async (tx) => {
275
+ const res = await tx.run(PARTICIPANT_UPSERT_CYPHER, {
276
+ names: senderNames,
277
+ accountId,
278
+ scope,
279
+ sessionId,
280
+ });
281
+ const map = new Map();
282
+ for (const r of res.records) {
283
+ map.set(r.get("name"), r.get("elemId"));
284
+ }
285
+ return map;
286
+ });
287
+ }
288
+
289
+ // ---------------------------------------------------------------------------
290
+ // 7. Insight pass — chunked Haiku call, in-process Cypher writes.
291
+ // One :Observation node per extracted item, OBSERVED_IN edge to the
292
+ // Conversation. Edge wiring to specific :Person/:Task/etc. is Task 856.
293
+ // ---------------------------------------------------------------------------
294
+
295
+ const INSIGHT_CHUNK_SIZE = 1500; // messages per Haiku call (≈75K tokens budget)
296
+
297
+ const INSIGHT_SYSTEM_PROMPT = `You extract structured insights from a chunk of a WhatsApp conversation.
298
+
299
+ Return STRICT JSON via the provided tool. No prose, no commentary. Only items with concrete, verbatim evidence in the chunk. Empty arrays are valid; prefer omission to invention.
300
+
301
+ Definitions:
302
+ - "mention": a person, organisation, place, or named topic referred to by name.
303
+ - "task": something a participant committed to do or asked another to do (imperative or future-tense).
304
+ - "preference": stated like, dislike, opinion, or rule of behaviour.
305
+ - "observedRelationship": an explicit relational claim (works at, is married to, manages, etc.).
306
+
307
+ Snippets must be ≤80 characters of the original message body, no sender names, no timestamps.`;
308
+
309
+ const INSIGHT_TOOL = {
310
+ name: "submit_insights",
311
+ description: "Submit the structured insights extracted from the chunk.",
312
+ input_schema: {
313
+ type: "object",
314
+ properties: {
315
+ mentions: {
316
+ type: "array",
317
+ items: {
318
+ type: "object",
319
+ properties: {
320
+ name: { type: "string" },
321
+ snippet: { type: "string" },
322
+ },
323
+ required: ["name", "snippet"],
324
+ },
325
+ },
326
+ tasks: {
327
+ type: "array",
328
+ items: {
329
+ type: "object",
330
+ properties: {
331
+ task: { type: "string" },
332
+ snippet: { type: "string" },
333
+ },
334
+ required: ["task", "snippet"],
335
+ },
336
+ },
337
+ preferences: {
338
+ type: "array",
339
+ items: {
340
+ type: "object",
341
+ properties: {
342
+ subject: { type: "string" },
343
+ preference: { type: "string" },
344
+ },
345
+ required: ["subject", "preference"],
346
+ },
347
+ },
348
+ observedRelationships: {
349
+ type: "array",
350
+ items: {
351
+ type: "object",
352
+ properties: {
353
+ from: { type: "string" },
354
+ to: { type: "string" },
355
+ relationship: { type: "string" },
356
+ },
357
+ required: ["from", "to", "relationship"],
358
+ },
359
+ },
360
+ },
361
+ required: ["mentions", "tasks", "preferences", "observedRelationships"],
362
+ },
363
+ };
364
+
365
+ const INSIGHT_WRITE_CYPHER = `
366
+ MATCH (c:Conversation:WhatsAppConversation {conversationId: $conversationId})
367
+ UNWIND $observations AS obs
368
+ CREATE (o:Observation)
369
+ SET
370
+ o:WhatsAppObservation,
371
+ o.accountId = $accountId,
372
+ o.kind = obs.kind,
373
+ o.summary = obs.summary,
374
+ o.snippet = obs.snippet,
375
+ o.subject = obs.subject,
376
+ o.from = obs.from,
377
+ o.to = obs.to,
378
+ o.source = 'whatsapp',
379
+ o.createdByAgent = 'whatsapp-import',
380
+ o.createdBySource = 'whatsapp-import',
381
+ o.createdBySession = $sessionId,
382
+ o.createdAt = datetime(),
383
+ o.scope = $scope,
384
+ o.insightPass = true,
385
+ o.observationStatus = 'auto-extracted'
386
+ MERGE (o)-[r:OBSERVED_IN]->(c)
387
+ ON CREATE SET r.source = 'whatsapp', r.createdAt = datetime()
388
+ RETURN count(o) AS created
389
+ `;
390
+
391
+ async function runInsightPass({
392
+ callOauthLlm,
393
+ HAIKU_MODEL,
394
+ session,
395
+ conversationId,
396
+ accountId,
397
+ scope,
398
+ sessionId,
399
+ parsedLines,
400
+ }) {
401
+ const counters = {
402
+ chunks: 0,
403
+ mentions: 0,
404
+ tasks: 0,
405
+ preferences: 0,
406
+ observedRelationships: 0,
407
+ };
408
+
409
+ if (parsedLines.length === 0) return counters;
410
+
411
+ // Build chunks. Each chunk is rendered as a numbered transcript including
412
+ // sender-names — the LLM needs them to attribute tasks/preferences/observed-
413
+ // relationships. The system prompt's "no sender names" rule constrains the
414
+ // returned snippet field only, not the input transcript.
415
+ const chunks = [];
416
+ for (let i = 0; i < parsedLines.length; i += INSIGHT_CHUNK_SIZE) {
417
+ chunks.push(parsedLines.slice(i, i + INSIGHT_CHUNK_SIZE));
418
+ }
419
+ counters.chunks = chunks.length;
420
+
421
+ for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
422
+ const chunk = chunks[chunkIdx];
423
+ const lines = chunk
424
+ .map((l, j) => `[${j + 1}] ${l.senderName}: ${l.body}`)
425
+ .join("\n");
426
+
427
+ let llmResult;
428
+ try {
429
+ llmResult = await callOauthLlm({
430
+ model: HAIKU_MODEL,
431
+ system: INSIGHT_SYSTEM_PROMPT,
432
+ userMessage: lines,
433
+ maxTokens: 8192,
434
+ timeoutMs: 180_000,
435
+ tools: [INSIGHT_TOOL],
436
+ toolChoiceName: INSIGHT_TOOL.name,
437
+ });
438
+ } catch (err) {
439
+ log(
440
+ `insight-pass chunk=${chunkIdx + 1}/${chunks.length} threw=${err instanceof Error ? err.message : String(err)}`,
441
+ );
442
+ continue;
443
+ }
444
+
445
+ if (llmResult.kind === "fallback") {
446
+ log(
447
+ `insight-pass chunk=${chunkIdx + 1}/${chunks.length} fallback cause=${llmResult.cause} reason="${llmResult.reason}"`,
448
+ );
449
+ continue;
450
+ }
451
+ if (llmResult.kind !== "ok-tool") {
452
+ log(
453
+ `insight-pass chunk=${chunkIdx + 1}/${chunks.length} unexpected-result kind=${llmResult.kind}`,
454
+ );
455
+ continue;
456
+ }
457
+
458
+ const input = llmResult.input ?? {};
459
+ const observations = [];
460
+ for (const m of asArray(input.mentions)) {
461
+ observations.push({
462
+ kind: "mention",
463
+ summary: String(m.name ?? "").slice(0, 200),
464
+ snippet: String(m.snippet ?? "").slice(0, 200),
465
+ subject: null,
466
+ from: null,
467
+ to: null,
468
+ });
469
+ }
470
+ for (const t of asArray(input.tasks)) {
471
+ observations.push({
472
+ kind: "task",
473
+ summary: String(t.task ?? "").slice(0, 200),
474
+ snippet: String(t.snippet ?? "").slice(0, 200),
475
+ subject: null,
476
+ from: null,
477
+ to: null,
478
+ });
479
+ }
480
+ for (const p of asArray(input.preferences)) {
481
+ observations.push({
482
+ kind: "preference",
483
+ summary: String(p.preference ?? "").slice(0, 200),
484
+ snippet: null,
485
+ subject: String(p.subject ?? "").slice(0, 200),
486
+ from: null,
487
+ to: null,
488
+ });
489
+ }
490
+ for (const r of asArray(input.observedRelationships)) {
491
+ observations.push({
492
+ kind: "observed-relationship",
493
+ summary: String(r.relationship ?? "").slice(0, 200),
494
+ snippet: null,
495
+ subject: null,
496
+ from: String(r.from ?? "").slice(0, 200),
497
+ to: String(r.to ?? "").slice(0, 200),
498
+ });
499
+ }
500
+
501
+ counters.mentions += asArray(input.mentions).length;
502
+ counters.tasks += asArray(input.tasks).length;
503
+ counters.preferences += asArray(input.preferences).length;
504
+ counters.observedRelationships += asArray(input.observedRelationships).length;
505
+
506
+ if (observations.length === 0) continue;
507
+
508
+ try {
509
+ await session.executeWrite(async (tx) => {
510
+ await tx.run(INSIGHT_WRITE_CYPHER, {
511
+ conversationId,
512
+ accountId,
513
+ scope,
514
+ sessionId,
515
+ observations,
516
+ });
517
+ });
518
+ } catch (err) {
519
+ log(
520
+ `insight-pass chunk=${chunkIdx + 1}/${chunks.length} write-failed reason="${err instanceof Error ? err.message : String(err)}"`,
521
+ );
522
+ }
523
+ }
524
+
525
+ return counters;
526
+ }
527
+
528
+ function asArray(v) {
529
+ return Array.isArray(v) ? v : [];
530
+ }
531
+
532
+ // ---------------------------------------------------------------------------
533
+ // 8. Main.
534
+ // ---------------------------------------------------------------------------
535
+
536
+ async function main() {
537
+ const startedMs = Date.now();
538
+ const { archive, flags } = parseArgv(process.argv);
539
+ const ownerElementId = flags.ownerElementId;
540
+ const scope = flags.scope;
541
+ const accountId = resolveAccountId(flags);
542
+ const timezone = flags.timezone || "Europe/London";
543
+ const dateFormat = flags.dateFormat;
544
+ const sessionId =
545
+ flags.sessionId || `whatsapp-ingest:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
546
+
547
+ let parseExport, memoryArchiveWrite, getSession, callOauthLlm, HAIKU_MODEL;
548
+ try {
549
+ ({ parseExport } = await import(PARSE_EXPORT_PATH));
550
+ ({ memoryArchiveWrite } = await import(ARCHIVE_WRITE_PATH));
551
+ ({ getSession } = await import(NEO4J_LIB_PATH));
552
+ ({ callOauthLlm } = await import(OAUTH_LLM_PATH));
553
+ ({ HAIKU_MODEL } = await import(MODELS_PATH));
554
+ } catch (err) {
555
+ fail("import", {
556
+ reason: `failed to import compiled dist`,
557
+ detail: err instanceof Error ? err.message : String(err),
558
+ });
559
+ }
560
+
561
+ // 8a. Resolve _chat.txt
562
+ const { chatTxt, cleanup } = resolveChatTxt(archive);
563
+ const archiveBytes = statSync(chatTxt).size;
564
+ log(
565
+ `start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes}`,
566
+ );
567
+
568
+ // 8b. Parse
569
+ let parseResult;
570
+ const parseStart = Date.now();
571
+ try {
572
+ parseResult = parseExport({ filePath: chatTxt, accountId, timezone, dateFormat });
573
+ } catch (err) {
574
+ cleanup();
575
+ fail("parse", { reason: err instanceof Error ? err.message : String(err) });
576
+ }
577
+ const parseMs = Date.now() - parseStart;
578
+ const firstTs = parseResult.parsedLines[0]?.dateSent || "-";
579
+ const lastTs = parseResult.parsedLines[parseResult.parsedLines.length - 1]?.dateSent || "-";
580
+ log(
581
+ `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} first-line-ts=${firstTs} last-line-ts=${lastTs} ms=${parseMs}`,
582
+ );
583
+
584
+ // 8c. Auto-create participants and build rows
585
+ let session = getSession();
586
+ let participantIds = new Map();
587
+ const distinctSenderNames = Array.from(
588
+ new Set(parseResult.parsedLines.map((l) => l.senderName)),
589
+ );
590
+
591
+ try {
592
+ participantIds = await upsertAutoParticipants({
593
+ session,
594
+ accountId,
595
+ scope,
596
+ sessionId,
597
+ senderNames: distinctSenderNames,
598
+ });
599
+ } catch (err) {
600
+ await session.close().catch(() => {});
601
+ cleanup();
602
+ fail("archive-write", {
603
+ phase: "participant-upsert",
604
+ reason: err instanceof Error ? err.message : String(err),
605
+ });
606
+ }
607
+
608
+ if (participantIds.size !== distinctSenderNames.length) {
609
+ log(
610
+ `participant-upsert mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
611
+ );
612
+ }
613
+
614
+ const conversationId = parseResult.conversationId;
615
+ const rows = parseResult.parsedLines.map((l, idx) => ({
616
+ messageId: `whatsapp-export:${conversationId}:${l.sequenceIndex}:${hashLine(l.body, l.dateSent)}`,
617
+ conversationId,
618
+ senderNodeId: participantIds.get(l.senderName) || ownerElementId,
619
+ senderName: l.senderName,
620
+ dateSent: l.dateSent,
621
+ body: l.body,
622
+ sequenceIndex: idx,
623
+ }));
624
+
625
+ const conversationBlock = {
626
+ conversationId,
627
+ archiveSourceFile: parseResult.archiveSourceFile,
628
+ firstMessageAt: firstTs,
629
+ lastMessageAt: lastTs,
630
+ participantCount: distinctSenderNames.length,
631
+ messageCount: rows.length,
632
+ };
633
+
634
+ // Close the session before archive-write — memoryArchiveWrite opens its
635
+ // own session and closes it in finally.
636
+ await session.close().catch(() => {});
637
+
638
+ // 8d. Archive-write
639
+ let archiveResult;
640
+ const writeStart = Date.now();
641
+ try {
642
+ archiveResult = await memoryArchiveWrite({
643
+ archiveType: "whatsapp-export",
644
+ ownerNodeId: ownerElementId,
645
+ accountId,
646
+ conversation: conversationBlock,
647
+ participantNodeIds: Array.from(participantIds.values()),
648
+ rows,
649
+ sessionId,
650
+ archiveFilePath: chatTxt,
651
+ });
652
+ } catch (err) {
653
+ cleanup();
654
+ fail("archive-write", {
655
+ reason: err instanceof Error ? err.message : String(err),
656
+ });
657
+ }
658
+ const writeMs = Date.now() - writeStart;
659
+ const createdMessages = archiveResult.counters.createdMessages || 0;
660
+ log(
661
+ `archive-write created-conversations=${archiveResult.counters.createdConversations || 0} created-messages=${createdMessages} created-sent-edges=${archiveResult.counters.createdSentEdges || 0} created-participant-edges=${archiveResult.counters.createdParticipantEdges || 0} ms=${writeMs}`,
662
+ );
663
+
664
+ // 8e. Insight pass
665
+ let insightCounters = {
666
+ chunks: 0,
667
+ mentions: 0,
668
+ tasks: 0,
669
+ preferences: 0,
670
+ observedRelationships: 0,
671
+ };
672
+ if (!flags.noInsight) {
673
+ const insightStart = Date.now();
674
+ let insightSession = getSession();
675
+ try {
676
+ insightCounters = await runInsightPass({
677
+ callOauthLlm,
678
+ HAIKU_MODEL,
679
+ session: insightSession,
680
+ conversationId,
681
+ accountId,
682
+ scope,
683
+ sessionId,
684
+ parsedLines: parseResult.parsedLines,
685
+ });
686
+ } catch (err) {
687
+ log(
688
+ `insight-pass FAILED reason="${err instanceof Error ? err.message : String(err)}"`,
689
+ );
690
+ } finally {
691
+ await insightSession.close().catch(() => {});
692
+ }
693
+ const insightMs = Date.now() - insightStart;
694
+ log(
695
+ `insight-pass model=haiku chunks=${insightCounters.chunks} mentions=${insightCounters.mentions} tasks=${insightCounters.tasks} preferences=${insightCounters.preferences} observed-relationships=${insightCounters.observedRelationships} ms=${insightMs}`,
696
+ );
697
+ }
698
+
699
+ cleanup();
700
+
701
+ const totalMs = Date.now() - startedMs;
702
+ log(`done conversationId=${conversationId} total-ms=${totalMs} exit=0`);
703
+
704
+ process.stdout.write(
705
+ JSON.stringify({
706
+ conversationId,
707
+ parsed: parseResult.counters.parsed,
708
+ mediaSkipped: parseResult.counters.mediaSkipped,
709
+ systemSkipped: parseResult.counters.systemSkipped,
710
+ createdMessages,
711
+ insightCounters,
712
+ ms: totalMs,
713
+ }) + "\n",
714
+ );
715
+ process.exit(0);
716
+ }
717
+
718
+ function hashLine(body, dateSent) {
719
+ // Stable per-line hash for messageId. The parser already hashed the file
720
+ // bytes for conversationId; per-message we hash (body+dateSent) to get a
721
+ // collision-resistant tail without crypto module overhead per line.
722
+ let h = 0;
723
+ const s = `${dateSent}${body}`;
724
+ for (let i = 0; i < s.length; i++) {
725
+ h = (h * 31 + s.charCodeAt(i)) | 0;
726
+ }
727
+ return (h >>> 0).toString(16).padStart(8, "0");
728
+ }
729
+
730
+ main().catch((err) => {
731
+ fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });
732
+ });