@rubytech/create-maxy 1.0.807 → 1.0.809

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +2 -0
  3. package/payload/platform/plugins/docs/references/cloudflare.md +1 -0
  4. package/payload/platform/plugins/docs/references/memory-guide.md +4 -0
  5. package/payload/platform/plugins/docs/references/troubleshooting.md +19 -1
  6. package/payload/platform/plugins/memory/mcp/dist/index.js +86 -0
  7. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  8. package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.d.ts.map +1 -1
  9. package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.js +19 -0
  10. package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.js.map +1 -1
  11. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts +23 -0
  12. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts.map +1 -0
  13. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js +401 -0
  14. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js.map +1 -0
  15. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts +28 -0
  16. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts.map +1 -0
  17. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js +34 -0
  18. package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js.map +1 -0
  19. package/payload/platform/plugins/memory/references/schema-base.md +12 -0
  20. package/payload/platform/plugins/whatsapp/PLUGIN.md +3 -1
  21. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +225 -346
  22. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +28 -10
  23. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +21 -0
  24. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -0
  25. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +41 -0
  26. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -0
  27. package/payload/platform/plugins/whatsapp-import/lib/dist/filter.d.ts +29 -0
  28. package/payload/platform/plugins/whatsapp-import/lib/dist/filter.d.ts.map +1 -0
  29. package/payload/platform/plugins/whatsapp-import/lib/dist/filter.js +123 -0
  30. package/payload/platform/plugins/whatsapp-import/lib/dist/filter.js.map +1 -0
  31. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +4 -0
  32. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
  33. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -1
  34. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
  35. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +170 -0
  36. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +141 -0
  37. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +59 -0
  38. package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +136 -0
  39. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +12 -0
  40. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +80 -25
  41. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +22 -3
  42. package/payload/platform/templates/agents/admin/IDENTITY.md +1 -0
  43. package/payload/platform/templates/agents/admin/SOUL.md +2 -0
  44. package/payload/platform/templates/specialists/agents/database-operator.md +9 -4
  45. package/payload/server/chunk-CRWLE6BZ.js +3511 -0
  46. package/payload/server/chunk-V3VLAL7N.js +10009 -0
  47. package/payload/server/client-pool-N2Y57223.js +31 -0
  48. package/payload/server/maxy-edge.js +5 -4
  49. package/payload/server/public/assets/admin-Bwrd2DBq.js +352 -0
  50. package/payload/server/public/index.html +1 -1
  51. package/payload/server/server.js +596 -250
  52. package/payload/server/public/assets/admin-CTM9Vb-j.js +0 -352
@@ -1,37 +1,43 @@
1
1
  #!/usr/bin/env node
2
2
  // =============================================================================
3
- // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh (Task 855).
3
+ // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh.
4
4
  //
5
- // Collapses parse archive-write insight into one Node process so the
6
- // 357K-char MCP-envelope ceiling no longer exists between steps. The
7
- // database-operator subagent's only handle on this pipeline is the wrapper
8
- // shell script; the gate at platform/plugins/admin/hooks/
9
- // archive-ingest-surface-gate.sh blocks the legacy MCP tools mechanically.
5
+ // Phase 1 of the two-phase WhatsApp ingest contract (Task 855 / Task 871).
6
+ // Deterministic only: parse operator-supplied filter archive-write
7
+ // (Conversation + Messages + auto-Person participants + NEXT chronology).
8
+ // No LLM in the per-message decision path. The Haiku insight pass moved to
9
+ // Phase 2 (`mcp__memory__whatsapp-export-insight-pass`), invoked consciously
10
+ // by the operator via the `whatsapp-import-enrich` skill.
10
11
  //
11
12
  // Argv (positional): <archive-path>
12
13
  // Argv (flags): --owner-element-id <id> --scope <admin|public>
14
+ // --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
13
15
  // [--account-id <accountId>] [--timezone <iana>]
14
16
  // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
15
- // [--no-insight]
16
17
  //
17
- // Stdout (success): one JSON line
18
- // {conversationId, parsed, mediaSkipped, systemSkipped,
19
- // createdMessages, insightCounters, ms}
18
+ // Stdout (success): one JSON line — Honest counters per Task 871.5.
19
+ // {conversationElementId, conversationId,
20
+ // parsed, mediaSkipped, systemSkipped,
21
+ // filtered,
22
+ // written, messagesAlreadyExisted,
23
+ // nextEdgesProcessed, nextEdgesCreated,
24
+ // participantsAlreadyExisted,
25
+ // ms}
26
+ // The skill (`whatsapp-import` SKILL.md) maps this verbose-diagnostic shape
27
+ // to the agent-return short shape per Task 871.6 (`alreadyExisted` etc.)
28
+ // when surfacing the result to the admin agent.
20
29
  //
21
30
  // Stderr (failure): one [whatsapp-ingest] FAIL line, exit non-zero.
22
31
  // =============================================================================
23
32
 
24
33
  import {
25
- createReadStream,
26
34
  existsSync,
27
35
  mkdtempSync,
28
36
  readdirSync,
29
- readFileSync,
30
37
  rmSync,
31
38
  statSync,
32
- writeFileSync,
33
39
  } from "node:fs";
34
- import { join, resolve, dirname, basename } from "node:path";
40
+ import { join, resolve, dirname } from "node:path";
35
41
  import { tmpdir } from "node:os";
36
42
  import { spawnSync } from "node:child_process";
37
43
  import { fileURLToPath } from "node:url";
@@ -76,19 +82,14 @@ const NEO4J_LIB_PATH = resolve(
76
82
  "lib",
77
83
  "neo4j.js",
78
84
  );
79
- const OAUTH_LLM_PATH = resolve(
80
- platformRoot,
81
- "lib",
82
- "oauth-llm",
83
- "dist",
84
- "index.js",
85
- );
86
- const MODELS_PATH = resolve(
85
+ // Task 870: pure key-derivation functions ship in the whatsapp-import lib.
86
+ const DERIVE_KEYS_PATH = resolve(
87
87
  platformRoot,
88
+ "plugins",
89
+ "whatsapp-import",
88
90
  "lib",
89
- "models",
90
91
  "dist",
91
- "index.js",
92
+ "derive-keys.js",
92
93
  );
93
94
 
94
95
  // ---------------------------------------------------------------------------
@@ -128,10 +129,6 @@ function parseArgv(argv) {
128
129
  continue;
129
130
  }
130
131
  const key = a.slice(2);
131
- if (key === "no-insight") {
132
- flags.noInsight = true;
133
- continue;
134
- }
135
132
  const v = args[++i];
136
133
  if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
137
134
  flags[camelCase(key)] = v;
@@ -142,6 +139,15 @@ function parseArgv(argv) {
142
139
  if (flags.scope !== "admin" && flags.scope !== "public") {
143
140
  fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
144
141
  }
142
+ // Task 871: --filter is mandatory. The deterministic Bash entry refuses
143
+ // bulk archive writes without an operator-supplied filter — closes the
144
+ // doctrine gap named in feedback_compress_at_ingest_for_bulk_archives.md.
145
+ if (!flags.filter || !flags.filter.trim()) {
146
+ process.stderr.write(
147
+ `[whatsapp-ingest] FAIL filter-required reason="bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"\n`,
148
+ );
149
+ fail("argv", { reason: "--filter is required (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)" });
150
+ }
145
151
  return { archive, flags };
146
152
  }
147
153
 
@@ -241,11 +247,31 @@ function resolveAccountId(flags) {
241
247
  }
242
248
 
243
249
  // ---------------------------------------------------------------------------
244
- // 6. Auto-create participants — one :Person node per distinct senderName
245
- // minus the owner. Provenance + participantStatus='auto-created' so a
246
- // later semantic-enrichment pass (Task 856) can promote/disambiguate.
250
+ // 6. Resolve participants — match each sender against canonical
251
+ // (:Person|:AdminUser {accountId}) by NFKC-trim-lower normalisation
252
+ // before falling through to auto-Person creation (Task 870). Per-sender
253
+ // `participant-resolved` log line so re-imports surface canonical reuse.
254
+ // Auto-Person fallback retains the legacy `{accountId, source:'whatsapp',
255
+ // name}` shape; switching to givenName/familyName is Task 874.
247
256
  // ---------------------------------------------------------------------------
248
257
 
258
+ const CANONICAL_FETCH_CYPHER = `
259
+ MATCH (p:Person {accountId: $accountId})
260
+ WHERE coalesce(p.participantStatus, '') <> 'auto-created'
261
+ RETURN elementId(p) AS elemId,
262
+ coalesce(p.givenName, '') AS givenName,
263
+ coalesce(p.familyName, '') AS familyName,
264
+ NULL AS adminName,
265
+ 'Person' AS label
266
+ UNION
267
+ MATCH (u:AdminUser {accountId: $accountId})
268
+ RETURN elementId(u) AS elemId,
269
+ '' AS givenName,
270
+ '' AS familyName,
271
+ coalesce(u.name, '') AS adminName,
272
+ 'AdminUser' AS label
273
+ `;
274
+
249
275
  const PARTICIPANT_UPSERT_CYPHER = `
250
276
  UNWIND $names AS senderName
251
277
  MERGE (p:Person {accountId: $accountId, source: 'whatsapp', name: senderName})
@@ -259,278 +285,109 @@ MERGE (p:Person {accountId: $accountId, source: 'whatsapp', name: senderName})
259
285
  RETURN elementId(p) AS elemId, senderName AS name
260
286
  `;
261
287
 
262
- async function upsertAutoParticipants({
263
- session,
264
- accountId,
265
- scope,
266
- sessionId,
267
- senderNames,
268
- }) {
269
- if (senderNames.length === 0) return new Map();
270
- // Owner-as-sender reconciliation is deferred to Task 856 — when the
271
- // owner's display name appears as a sender, the messages SENT-edge from
272
- // the auto-created :Person, not from the operator's :AdminUser. The
273
- // post-load enrichment skill rewrites those edges on operator confirm.
274
- return await session.executeWrite(async (tx) => {
275
- const res = await tx.run(PARTICIPANT_UPSERT_CYPHER, {
276
- names: senderNames,
277
- accountId,
278
- scope,
279
- sessionId,
280
- });
281
- const map = new Map();
282
- for (const r of res.records) {
283
- map.set(r.get("name"), r.get("elemId"));
288
+ async function fetchCanonicalIndex({ session, accountId, normaliseSenderName }) {
289
+ // Returns Map<normalisedName, {elemId, label}> for canonical participants.
290
+ // Excludes auto-created Persons so we never reuse our own shadow nodes.
291
+ const res = await session.executeRead(async (tx) =>
292
+ tx.run(CANONICAL_FETCH_CYPHER, { accountId }),
293
+ );
294
+ const index = new Map();
295
+ for (const r of res.records) {
296
+ const elemId = r.get("elemId");
297
+ const label = r.get("label");
298
+ const candidates = [];
299
+ if (label === "Person") {
300
+ const given = r.get("givenName") || "";
301
+ const family = r.get("familyName") || "";
302
+ if (given || family) candidates.push(`${given} ${family}`.trim());
303
+ if (given) candidates.push(given);
304
+ if (family) candidates.push(family);
305
+ } else {
306
+ const adminName = r.get("adminName") || "";
307
+ if (adminName) candidates.push(adminName);
284
308
  }
285
- return map;
286
- });
309
+ for (const c of candidates) {
310
+ const norm = normaliseSenderName(c);
311
+ if (!norm) continue;
312
+ // First write wins — Person before AdminUser per UNION order.
313
+ if (!index.has(norm)) index.set(norm, { elemId, label });
314
+ }
315
+ }
316
+ return index;
287
317
  }
288
318
 
289
- // ---------------------------------------------------------------------------
290
- // 7. Insight pass — chunked Haiku call, in-process Cypher writes.
291
- // One :Observation node per extracted item, OBSERVED_IN edge to the
292
- // Conversation. Edge wiring to specific :Person/:Task/etc. is Task 856.
293
- // ---------------------------------------------------------------------------
294
-
295
- const INSIGHT_CHUNK_SIZE = 1500; // messages per Haiku call (≈75K tokens budget)
296
-
297
- const INSIGHT_SYSTEM_PROMPT = `You extract structured insights from a chunk of a WhatsApp conversation.
298
-
299
- Return STRICT JSON via the provided tool. No prose, no commentary. Only items with concrete, verbatim evidence in the chunk. Empty arrays are valid; prefer omission to invention.
300
-
301
- Definitions:
302
- - "mention": a person, organisation, place, or named topic referred to by name.
303
- - "task": something a participant committed to do or asked another to do (imperative or future-tense).
304
- - "preference": stated like, dislike, opinion, or rule of behaviour.
305
- - "observedRelationship": an explicit relational claim (works at, is married to, manages, etc.).
306
-
307
- Snippets must be ≤80 characters of the original message body, no sender names, no timestamps.`;
308
-
309
- const INSIGHT_TOOL = {
310
- name: "submit_insights",
311
- description: "Submit the structured insights extracted from the chunk.",
312
- input_schema: {
313
- type: "object",
314
- properties: {
315
- mentions: {
316
- type: "array",
317
- items: {
318
- type: "object",
319
- properties: {
320
- name: { type: "string" },
321
- snippet: { type: "string" },
322
- },
323
- required: ["name", "snippet"],
324
- },
325
- },
326
- tasks: {
327
- type: "array",
328
- items: {
329
- type: "object",
330
- properties: {
331
- task: { type: "string" },
332
- snippet: { type: "string" },
333
- },
334
- required: ["task", "snippet"],
335
- },
336
- },
337
- preferences: {
338
- type: "array",
339
- items: {
340
- type: "object",
341
- properties: {
342
- subject: { type: "string" },
343
- preference: { type: "string" },
344
- },
345
- required: ["subject", "preference"],
346
- },
347
- },
348
- observedRelationships: {
349
- type: "array",
350
- items: {
351
- type: "object",
352
- properties: {
353
- from: { type: "string" },
354
- to: { type: "string" },
355
- relationship: { type: "string" },
356
- },
357
- required: ["from", "to", "relationship"],
358
- },
359
- },
360
- },
361
- required: ["mentions", "tasks", "preferences", "observedRelationships"],
362
- },
363
- };
364
-
365
- const INSIGHT_WRITE_CYPHER = `
366
- MATCH (c:Conversation:WhatsAppConversation {conversationId: $conversationId})
367
- UNWIND $observations AS obs
368
- CREATE (o:Observation)
369
- SET
370
- o:WhatsAppObservation,
371
- o.accountId = $accountId,
372
- o.kind = obs.kind,
373
- o.summary = obs.summary,
374
- o.snippet = obs.snippet,
375
- o.subject = obs.subject,
376
- o.from = obs.from,
377
- o.to = obs.to,
378
- o.source = 'whatsapp',
379
- o.createdByAgent = 'whatsapp-import',
380
- o.createdBySource = 'whatsapp-import',
381
- o.createdBySession = $sessionId,
382
- o.createdAt = datetime(),
383
- o.scope = $scope,
384
- o.insightPass = true,
385
- o.observationStatus = 'auto-extracted'
386
- MERGE (o)-[r:OBSERVED_IN]->(c)
387
- ON CREATE SET r.source = 'whatsapp', r.createdAt = datetime()
388
- RETURN count(o) AS created
389
- `;
390
-
391
- async function runInsightPass({
392
- callOauthLlm,
393
- HAIKU_MODEL,
319
+ async function resolveParticipants({
394
320
  session,
395
- conversationId,
396
321
  accountId,
397
322
  scope,
398
323
  sessionId,
399
- parsedLines,
324
+ senderNames,
325
+ normaliseSenderName,
400
326
  }) {
401
- const counters = {
402
- chunks: 0,
403
- mentions: 0,
404
- tasks: 0,
405
- preferences: 0,
406
- observedRelationships: 0,
407
- };
408
-
409
- if (parsedLines.length === 0) return counters;
410
-
411
- // Build chunks. Each chunk is rendered as a numbered transcript including
412
- // sender-names — the LLM needs them to attribute tasks/preferences/observed-
413
- // relationships. The system prompt's "no sender names" rule constrains the
414
- // returned snippet field only, not the input transcript.
415
- const chunks = [];
416
- for (let i = 0; i < parsedLines.length; i += INSIGHT_CHUNK_SIZE) {
417
- chunks.push(parsedLines.slice(i, i + INSIGHT_CHUNK_SIZE));
327
+ if (senderNames.length === 0) {
328
+ return { idsByName: new Map(), participantsAlreadyExisted: 0 };
418
329
  }
419
- counters.chunks = chunks.length;
420
-
421
- for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
422
- const chunk = chunks[chunkIdx];
423
- const lines = chunk
424
- .map((l, j) => `[${j + 1}] ${l.senderName}: ${l.body}`)
425
- .join("\n");
426
-
427
- let llmResult;
428
- try {
429
- llmResult = await callOauthLlm({
430
- model: HAIKU_MODEL,
431
- system: INSIGHT_SYSTEM_PROMPT,
432
- userMessage: lines,
433
- maxTokens: 8192,
434
- timeoutMs: 180_000,
435
- tools: [INSIGHT_TOOL],
436
- toolChoiceName: INSIGHT_TOOL.name,
437
- });
438
- } catch (err) {
439
- log(
440
- `insight-pass chunk=${chunkIdx + 1}/${chunks.length} threw=${err instanceof Error ? err.message : String(err)}`,
441
- );
442
- continue;
443
- }
444
330
 
445
- if (llmResult.kind === "fallback") {
446
- log(
447
- `insight-pass chunk=${chunkIdx + 1}/${chunks.length} fallback cause=${llmResult.cause} reason="${llmResult.reason}"`,
448
- );
449
- continue;
450
- }
451
- if (llmResult.kind !== "ok-tool") {
331
+ const canonicalIndex = await fetchCanonicalIndex({
332
+ session,
333
+ accountId,
334
+ normaliseSenderName,
335
+ });
336
+
337
+ const idsByName = new Map();
338
+ const fallbackSenders = [];
339
+ let canonicalMatches = 0;
340
+ for (const senderName of senderNames) {
341
+ const norm = normaliseSenderName(senderName);
342
+ const hit = canonicalIndex.get(norm);
343
+ if (hit) {
344
+ idsByName.set(senderName, hit.elemId);
345
+ canonicalMatches++;
452
346
  log(
453
- `insight-pass chunk=${chunkIdx + 1}/${chunks.length} unexpected-result kind=${llmResult.kind}`,
347
+ `participant-resolved senderName="${senderName}" matched=canonical nodeId=${hit.elemId} label=${hit.label}`,
454
348
  );
455
- continue;
456
- }
457
-
458
- const input = llmResult.input ?? {};
459
- const observations = [];
460
- for (const m of asArray(input.mentions)) {
461
- observations.push({
462
- kind: "mention",
463
- summary: String(m.name ?? "").slice(0, 200),
464
- snippet: String(m.snippet ?? "").slice(0, 200),
465
- subject: null,
466
- from: null,
467
- to: null,
468
- });
469
- }
470
- for (const t of asArray(input.tasks)) {
471
- observations.push({
472
- kind: "task",
473
- summary: String(t.task ?? "").slice(0, 200),
474
- snippet: String(t.snippet ?? "").slice(0, 200),
475
- subject: null,
476
- from: null,
477
- to: null,
478
- });
479
- }
480
- for (const p of asArray(input.preferences)) {
481
- observations.push({
482
- kind: "preference",
483
- summary: String(p.preference ?? "").slice(0, 200),
484
- snippet: null,
485
- subject: String(p.subject ?? "").slice(0, 200),
486
- from: null,
487
- to: null,
488
- });
489
- }
490
- for (const r of asArray(input.observedRelationships)) {
491
- observations.push({
492
- kind: "observed-relationship",
493
- summary: String(r.relationship ?? "").slice(0, 200),
494
- snippet: null,
495
- subject: null,
496
- from: String(r.from ?? "").slice(0, 200),
497
- to: String(r.to ?? "").slice(0, 200),
498
- });
349
+ } else {
350
+ fallbackSenders.push(senderName);
499
351
  }
352
+ }
500
353
 
501
- counters.mentions += asArray(input.mentions).length;
502
- counters.tasks += asArray(input.tasks).length;
503
- counters.preferences += asArray(input.preferences).length;
504
- counters.observedRelationships += asArray(input.observedRelationships).length;
505
-
506
- if (observations.length === 0) continue;
507
-
508
- try {
509
- await session.executeWrite(async (tx) => {
510
- await tx.run(INSIGHT_WRITE_CYPHER, {
511
- conversationId,
512
- accountId,
513
- scope,
514
- sessionId,
515
- observations,
516
- });
354
+ let autoCreated = 0;
355
+ if (fallbackSenders.length > 0) {
356
+ const result = await session.executeWrite(async (tx) => {
357
+ const res = await tx.run(PARTICIPANT_UPSERT_CYPHER, {
358
+ names: fallbackSenders,
359
+ accountId,
360
+ scope,
361
+ sessionId,
517
362
  });
518
- } catch (err) {
363
+ const m = new Map();
364
+ for (const r of res.records) {
365
+ m.set(r.get("name"), r.get("elemId"));
366
+ }
367
+ const stats = res.summary.counters.updates();
368
+ return { m, created: stats.nodesCreated };
369
+ });
370
+ autoCreated = result.created;
371
+ for (const senderName of fallbackSenders) {
372
+ const elemId = result.m.get(senderName);
373
+ if (!elemId) continue;
374
+ idsByName.set(senderName, elemId);
519
375
  log(
520
- `insight-pass chunk=${chunkIdx + 1}/${chunks.length} write-failed reason="${err instanceof Error ? err.message : String(err)}"`,
376
+ `participant-resolved senderName="${senderName}" matched=auto nodeId=${elemId} label=Person`,
521
377
  );
522
378
  }
523
379
  }
524
380
 
525
- return counters;
526
- }
527
-
528
- function asArray(v) {
529
- return Array.isArray(v) ? v : [];
381
+ // participantsAlreadyExisted = canonical hits + auto-Persons that were already in graph.
382
+ const autoAlreadyExisted = fallbackSenders.length - autoCreated;
383
+ return {
384
+ idsByName,
385
+ participantsAlreadyExisted: canonicalMatches + autoAlreadyExisted,
386
+ };
530
387
  }
531
388
 
532
389
  // ---------------------------------------------------------------------------
533
- // 8. Main.
390
+ // 7. Main.
534
391
  // ---------------------------------------------------------------------------
535
392
 
536
393
  async function main() {
@@ -544,13 +401,13 @@ async function main() {
544
401
  const sessionId =
545
402
  flags.sessionId || `whatsapp-ingest:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
546
403
 
547
- let parseExport, memoryArchiveWrite, getSession, callOauthLlm, HAIKU_MODEL;
404
+ let parseExport, parseFilterArg, applyFilter, memoryArchiveWrite, getSession;
405
+ let normaliseSenderName, deriveMessageId;
548
406
  try {
549
- ({ parseExport } = await import(PARSE_EXPORT_PATH));
407
+ ({ parseExport, parseFilterArg, applyFilter } = await import(PARSE_EXPORT_PATH));
550
408
  ({ memoryArchiveWrite } = await import(ARCHIVE_WRITE_PATH));
551
409
  ({ getSession } = await import(NEO4J_LIB_PATH));
552
- ({ callOauthLlm } = await import(OAUTH_LLM_PATH));
553
- ({ HAIKU_MODEL } = await import(MODELS_PATH));
410
+ ({ normaliseSenderName, deriveMessageId } = await import(DERIVE_KEYS_PATH));
554
411
  } catch (err) {
555
412
  fail("import", {
556
413
  reason: `failed to import compiled dist`,
@@ -558,14 +415,23 @@ async function main() {
558
415
  });
559
416
  }
560
417
 
561
- // 8a. Resolve _chat.txt
418
+ // 7a. Parse the operator-supplied filter ahead of any IO so a malformed
419
+ // value LOUD-FAILs before unzip / Neo4j connection.
420
+ let filter;
421
+ try {
422
+ filter = parseFilterArg(flags.filter);
423
+ } catch (err) {
424
+ fail("argv", { reason: err instanceof Error ? err.message : String(err) });
425
+ }
426
+
427
+ // 7b. Resolve _chat.txt
562
428
  const { chatTxt, cleanup } = resolveChatTxt(archive);
563
429
  const archiveBytes = statSync(chatTxt).size;
564
430
  log(
565
- `start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes}`,
431
+ `start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes} filter=${flags.filter}`,
566
432
  );
567
433
 
568
- // 8b. Parse
434
+ // 7c. Parse
569
435
  let parseResult;
570
436
  const parseStart = Date.now();
571
437
  try {
@@ -575,51 +441,84 @@ async function main() {
575
441
  fail("parse", { reason: err instanceof Error ? err.message : String(err) });
576
442
  }
577
443
  const parseMs = Date.now() - parseStart;
578
- const firstTs = parseResult.parsedLines[0]?.dateSent || "-";
579
- const lastTs = parseResult.parsedLines[parseResult.parsedLines.length - 1]?.dateSent || "-";
580
444
  log(
581
- `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} first-line-ts=${firstTs} last-line-ts=${lastTs} ms=${parseMs}`,
445
+ `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} ms=${parseMs}`,
446
+ );
447
+
448
+ // 7d. Apply filter
449
+ const filteredLines = applyFilter(parseResult.parsedLines, filter);
450
+ log(
451
+ `filter-applied kind=${filter.kind} parsed=${parseResult.parsedLines.length} filtered=${filteredLines.length}`,
582
452
  );
453
+ if (filteredLines.length === 0) {
454
+ cleanup();
455
+ fail("filter", {
456
+ reason: `filter "${flags.filter}" matched zero rows from ${parseResult.parsedLines.length} parsed lines — refusing to write an empty conversation`,
457
+ });
458
+ }
459
+
460
+ const firstTs = filteredLines[0].dateSent;
461
+ const lastTs = filteredLines[filteredLines.length - 1].dateSent;
583
462
 
584
- // 8c. Auto-create participants and build rows
463
+ // 7e. Auto-create participants and build rows
585
464
  let session = getSession();
586
- let participantIds = new Map();
465
+ let participantUpsert = { idsByName: new Map(), participantsAlreadyExisted: 0 };
587
466
  const distinctSenderNames = Array.from(
588
- new Set(parseResult.parsedLines.map((l) => l.senderName)),
467
+ new Set(filteredLines.map((l) => l.senderName)),
589
468
  );
590
469
 
591
470
  try {
592
- participantIds = await upsertAutoParticipants({
471
+ participantUpsert = await resolveParticipants({
593
472
  session,
594
473
  accountId,
595
474
  scope,
596
475
  sessionId,
597
476
  senderNames: distinctSenderNames,
477
+ normaliseSenderName,
598
478
  });
599
479
  } catch (err) {
600
480
  await session.close().catch(() => {});
601
481
  cleanup();
602
482
  fail("archive-write", {
603
- phase: "participant-upsert",
483
+ phase: "participant-resolve",
604
484
  reason: err instanceof Error ? err.message : String(err),
605
485
  });
606
486
  }
607
487
 
488
+ const participantIds = participantUpsert.idsByName;
489
+ const participantsAlreadyExisted = participantUpsert.participantsAlreadyExisted;
490
+
608
491
  if (participantIds.size !== distinctSenderNames.length) {
609
492
  log(
610
- `participant-upsert mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
493
+ `participant-resolve mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
611
494
  );
612
495
  }
613
496
 
614
497
  const conversationId = parseResult.conversationId;
615
- const rows = parseResult.parsedLines.map((l, idx) => ({
616
- messageId: `whatsapp-export:${conversationId}:${l.sequenceIndex}:${hashLine(l.body, l.dateSent)}`,
498
+ // Task 870: messageId derives from a content-derived natural key. Stable
499
+ // across re-imports — same archive bytes + same operator timezone produce
500
+ // identical messageIds, so the existing MERGE in memory-archive-write is
501
+ // naturally idempotent.
502
+ const conversationSha256 = parseResult.archiveSourceFile.replace(
503
+ /^whatsapp-export:/,
504
+ "",
505
+ );
506
+ const rows = filteredLines.map((l) => ({
507
+ messageId: deriveMessageId({
508
+ conversationSha256,
509
+ dateSent: l.dateSent,
510
+ senderName: l.senderName,
511
+ body: l.body,
512
+ }),
617
513
  conversationId,
618
514
  senderNodeId: participantIds.get(l.senderName) || ownerElementId,
619
515
  senderName: l.senderName,
620
516
  dateSent: l.dateSent,
621
517
  body: l.body,
622
- sequenceIndex: idx,
518
+ // sequenceIndex preserved on the Message node for chain ordering tiebreaker
519
+ // in WHATSAPP_NEXT_CHAIN_CYPHER. NOT part of messageId — that would couple
520
+ // identity to array position and re-introduce shadowing.
521
+ sequenceIndex: l.sequenceIndex,
623
522
  }));
624
523
 
625
524
  const conversationBlock = {
@@ -635,7 +534,7 @@ async function main() {
635
534
  // own session and closes it in finally.
636
535
  await session.close().catch(() => {});
637
536
 
638
- // 8d. Archive-write
537
+ // 7f. Archive-write
639
538
  let archiveResult;
640
539
  const writeStart = Date.now();
641
540
  try {
@@ -657,43 +556,29 @@ async function main() {
657
556
  }
658
557
  const writeMs = Date.now() - writeStart;
659
558
  const createdMessages = archiveResult.counters.createdMessages || 0;
559
+ const nextEdgesProcessed = archiveResult.counters.nextEdgesProcessed || 0;
560
+ const nextEdgesCreated = archiveResult.counters.nextEdgesCreated || 0;
561
+ const messagesAlreadyExisted = rows.length - createdMessages;
660
562
  log(
661
- `archive-write created-conversations=${archiveResult.counters.createdConversations || 0} created-messages=${createdMessages} created-sent-edges=${archiveResult.counters.createdSentEdges || 0} created-participant-edges=${archiveResult.counters.createdParticipantEdges || 0} ms=${writeMs}`,
563
+ `archive-write created-messages=${createdMessages} messages-already-existed=${messagesAlreadyExisted} next-edges-processed=${nextEdgesProcessed} next-edges-created=${nextEdgesCreated} participants-already-existed=${participantsAlreadyExisted} ms=${writeMs}`,
662
564
  );
663
565
 
664
- // 8e. Insight pass
665
- let insightCounters = {
666
- chunks: 0,
667
- mentions: 0,
668
- tasks: 0,
669
- preferences: 0,
670
- observedRelationships: 0,
671
- };
672
- if (!flags.noInsight) {
673
- const insightStart = Date.now();
674
- let insightSession = getSession();
675
- try {
676
- insightCounters = await runInsightPass({
677
- callOauthLlm,
678
- HAIKU_MODEL,
679
- session: insightSession,
680
- conversationId,
681
- accountId,
682
- scope,
683
- sessionId,
684
- parsedLines: parseResult.parsedLines,
685
- });
686
- } catch (err) {
687
- log(
688
- `insight-pass FAILED reason="${err instanceof Error ? err.message : String(err)}"`,
689
- );
690
- } finally {
691
- await insightSession.close().catch(() => {});
692
- }
693
- const insightMs = Date.now() - insightStart;
694
- log(
695
- `insight-pass model=haiku chunks=${insightCounters.chunks} mentions=${insightCounters.mentions} tasks=${insightCounters.tasks} preferences=${insightCounters.preferences} observed-relationships=${insightCounters.observedRelationships} ms=${insightMs}`,
566
+ // 7g. Resolve conversationElementId for the agent-return shape (Task 871.6).
567
+ // One small read-after-write — the operator's database-operator subagent
568
+ // surfaces this as the canonical handle for downstream Phase 2 calls.
569
+ let conversationElementId = null;
570
+ const ridSession = getSession();
571
+ try {
572
+ const res = await ridSession.run(
573
+ `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid})
574
+ RETURN elementId(c) AS elemId LIMIT 1`,
575
+ { cid: conversationId },
696
576
  );
577
+ conversationElementId = res.records[0]?.get("elemId") ?? null;
578
+ } catch (err) {
579
+ log(`conversation-elementid-lookup failed reason="${err instanceof Error ? err.message : String(err)}"`);
580
+ } finally {
581
+ await ridSession.close().catch(() => {});
697
582
  }
698
583
 
699
584
  cleanup();
@@ -703,29 +588,23 @@ async function main() {
703
588
 
704
589
  process.stdout.write(
705
590
  JSON.stringify({
591
+ conversationElementId,
706
592
  conversationId,
707
593
  parsed: parseResult.counters.parsed,
708
594
  mediaSkipped: parseResult.counters.mediaSkipped,
709
595
  systemSkipped: parseResult.counters.systemSkipped,
710
- createdMessages,
711
- insightCounters,
596
+ filtered: filteredLines.length,
597
+ written: createdMessages,
598
+ messagesAlreadyExisted,
599
+ nextEdgesProcessed,
600
+ nextEdgesCreated,
601
+ participantsAlreadyExisted,
712
602
  ms: totalMs,
713
603
  }) + "\n",
714
604
  );
715
605
  process.exit(0);
716
606
  }
717
607
 
718
- function hashLine(body, dateSent) {
719
- // Stable per-line hash for messageId. The parser already hashed the file
720
- // bytes for conversationId; per-message we hash (body+dateSent) to get a
721
- // collision-resistant tail without crypto module overhead per line.
722
- let h = 0;
723
- const s = `${dateSent}${body}`;
724
- for (let i = 0; i < s.length; i++) {
725
- h = (h * 31 + s.charCodeAt(i)) | 0;
726
- }
727
- return (h >>> 0).toString(16).padStart(8, "0");
728
- }
729
608
 
730
609
  main().catch((err) => {
731
610
  fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });