@rubytech/create-realagent 1.0.826 → 1.0.828

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/neo4j/schema.cypher +34 -2
  3. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +19 -13
  4. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +5 -5
  5. package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
  6. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  7. package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
  8. package/payload/platform/plugins/memory/PLUGIN.md +1 -1
  9. package/payload/platform/plugins/memory/mcp/dist/index.js +6 -41
  10. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  11. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
  12. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
  13. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
  14. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  15. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +139 -56
  16. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  17. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
  18. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
  19. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +61 -0
  20. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
  21. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +34 -0
  22. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +241 -0
  24. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  25. package/payload/platform/plugins/memory/references/schema-base.md +5 -2
  26. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +17 -15
  27. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +313 -366
  28. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +27 -60
  29. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
  30. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
  31. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
  32. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
  33. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
  34. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
  35. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
  36. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
  37. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
  38. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
  39. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
  40. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
  41. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
  42. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
  43. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
  44. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
  45. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
  46. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
  47. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
  48. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
  49. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +163 -0
  50. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +91 -0
  51. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +59 -0
  52. package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +54 -0
  53. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +55 -32
  54. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +9 -6
  55. package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +81 -0
  56. package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +48 -0
  57. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +66 -73
  58. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +143 -0
  59. package/payload/platform/templates/specialists/agents/database-operator.md +10 -11
  60. package/payload/server/chunk-T2OPNP3L.js +654 -0
  61. package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
  62. package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
  63. package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
  64. package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
  65. package/payload/server/public/graph.html +2 -2
  66. package/payload/server/public/index.html +2 -2
  67. package/payload/server/server.js +277 -164
  68. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
  69. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
  70. package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
  71. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
@@ -1,52 +1,38 @@
1
1
  #!/usr/bin/env node
2
2
  // =============================================================================
3
- // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh.
3
+ // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh (Task 891).
4
4
  //
5
- // Phase 1 of the two-phase WhatsApp ingest contract (Task 855 / Task 871).
6
- // Deterministic only: parse → operator-supplied filter → archive-write
7
- // (Conversation + Messages + auto-Person participants + NEXT chronology).
8
- // No LLM in the per-message decision path. The Haiku insight pass moved to
9
- // Phase 2 (`mcp__memory__whatsapp-export-insight-pass`), invoked consciously
10
- // by the operator via the `whatsapp-import-enrich` skill.
5
+ // Pipeline (single phase Phase 2 insight derivation deferred to its own
6
+ // follow-up task):
7
+ //
8
+ // parse → bind canonical senders derive conversationIdentity
9
+ // look up prior :ConversationArchive (delta cursor)
10
+ // sessionize delta at gapHours boundaries
11
+ // → for each session: classify (mode='chat') → collect chunks
12
+ // → memoryIngest(parentLabel='ConversationArchive')
11
13
  //
12
14
  // Argv (positional): <archive-path>
13
- // Argv (flags): --owner-element-id <id> --scope <admin|public>
14
- // --subject-person-id <id>
15
- // --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
16
- // [--account-id <accountId>] [--timezone <iana>]
15
+ // Argv (flags): --owner-element-id <id>
16
+ // --participant-person-ids <csv>
17
+ // --scope <admin|public>
18
+ // [--session-gap-hours <N>] (default 12)
19
+ // [--account-id <accountId>]
20
+ // [--timezone <iana>]
17
21
  // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
18
22
  //
19
- // Task 887 §A0 `--subject-person-id` is required. DM scope: the operator
20
- // confirms the third party's :Person elementId from the preview histogram
21
- // before invoking. The writer accepts EXACTLY the senderNames carried by
22
- // {owner, subject}; any parsed senderName outside that set LOUD-FAILs with
23
- // `parser-miss reason="senderName=<verbatim> not in preview histogram
24
- // (parser failure — re-export or report)"`. Bounds writer cardinality to
25
- // the deterministic preview output — closes the auto-Person leak structurally.
26
- //
27
- // Stdout (success): one JSON line — Honest counters per Task 871.5.
28
- // {conversationElementId, conversationId,
29
- // parsed, mediaSkipped, systemSkipped,
30
- // filtered,
31
- // written, messagesAlreadyExisted,
32
- // nextEdgesProcessed, nextEdgesCreated,
33
- // participantsAlreadyExisted,
34
- // ms}
35
- // The skill (`whatsapp-import` SKILL.md) maps this verbose-diagnostic shape
36
- // to the agent-return short shape per Task 871.6 (`alreadyExisted` etc.)
37
- // when surfacing the result to the admin agent.
23
+ // Owner + participants form the closed sender set; any parsed senderName
24
+ // outside that set LOUD-FAILs with `parser-miss` (preserves Task 887 §A0).
25
+ // `--subject-person-id` and `--filter` are gone (the chunked archive shape
26
+ // makes per-message filtering obsolete chunking bounds operator surface).
38
27
  //
39
- // Stderr (failure): one [whatsapp-ingest] FAIL line, exit non-zero.
28
+ // Stdout (success): one JSON line all the counters the skill needs to
29
+ // formulate the three operator-facing messages. See SKILL.md for the shape.
30
+ // Stderr: one [whatsapp-import] FAIL line on failure, exit non-zero.
40
31
  // =============================================================================
41
32
 
42
- import {
43
- existsSync,
44
- mkdtempSync,
45
- readdirSync,
46
- rmSync,
47
- statSync,
48
- } from "node:fs";
49
- import { join, resolve, dirname } from "node:path";
33
+ import { existsSync, mkdtempSync, readdirSync, rmSync, statSync, createReadStream } from "node:fs";
34
+ import { createHash } from "node:crypto";
35
+ import { join, resolve, dirname, basename } from "node:path";
50
36
  import { tmpdir } from "node:os";
51
37
  import { spawnSync } from "node:child_process";
52
38
  import { fileURLToPath } from "node:url";
@@ -54,13 +40,8 @@ import { fileURLToPath } from "node:url";
54
40
  const __dirname = dirname(fileURLToPath(import.meta.url));
55
41
 
56
42
  // ---------------------------------------------------------------------------
57
- // 1. Resolve dist paths from the platform install root.
43
+ // 1. Resolve dist paths.
58
44
  // ---------------------------------------------------------------------------
59
- // MAXY_PLATFORM_ROOT is set by the wrapper script (and by claude-agent.ts in
60
- // production). Fall back to walking up from this file's location: the bin/
61
- // directory sits at platform/plugins/whatsapp-import/bin/, so platform/ is
62
- // three levels up.
63
-
64
45
  const platformRoot =
65
46
  process.env.MAXY_PLATFORM_ROOT?.trim() ||
66
47
  resolve(__dirname, "..", "..", "..");
@@ -73,44 +54,41 @@ const PARSE_EXPORT_PATH = resolve(
73
54
  "dist",
74
55
  "index.js",
75
56
  );
76
- const ARCHIVE_WRITE_PATH = resolve(
57
+ const NEO4J_LIB_PATH = resolve(
77
58
  platformRoot,
78
59
  "plugins",
79
60
  "memory",
80
61
  "mcp",
81
62
  "dist",
82
- "tools",
83
- "memory-archive-write.js",
63
+ "lib",
64
+ "neo4j.js",
84
65
  );
85
- const NEO4J_LIB_PATH = resolve(
66
+ const LLM_CLASSIFIER_PATH = resolve(
86
67
  platformRoot,
87
68
  "plugins",
88
69
  "memory",
89
70
  "mcp",
90
71
  "dist",
91
72
  "lib",
92
- "neo4j.js",
73
+ "llm-classifier.js",
93
74
  );
94
- // Task 870: pure key-derivation functions ship in the whatsapp-import lib.
95
- const DERIVE_KEYS_PATH = resolve(
75
+ const MEMORY_INGEST_PATH = resolve(
96
76
  platformRoot,
97
77
  "plugins",
98
- "whatsapp-import",
99
- "lib",
78
+ "memory",
79
+ "mcp",
100
80
  "dist",
101
- "derive-keys.js",
81
+ "tools",
82
+ "memory-ingest.js",
102
83
  );
103
84
 
104
85
  // ---------------------------------------------------------------------------
105
- // 2. Logger — every line `[whatsapp-ingest]`-prefixed to stderr (server.log).
86
+ // 2. Logger
106
87
  // ---------------------------------------------------------------------------
107
-
108
88
  function log(line) {
109
- process.stderr.write(`[whatsapp-ingest] ${line}\n`);
89
+ process.stderr.write(`[whatsapp-import] ${line}\n`);
110
90
  }
111
-
112
91
  function fail(phase, fields) {
113
- // Single failure line. Used as the only stderr line on non-zero exit.
114
92
  const fieldStr = Object.entries(fields)
115
93
  .map(([k, v]) =>
116
94
  typeof v === "string" && (v.includes(" ") || v.includes("="))
@@ -118,14 +96,13 @@ function fail(phase, fields) {
118
96
  : `${k}=${v ?? "-"}`,
119
97
  )
120
98
  .join(" ");
121
- process.stderr.write(`[whatsapp-ingest] FAIL phase=${phase} ${fieldStr}\n`);
99
+ process.stderr.write(`[whatsapp-import] FAIL phase=${phase} ${fieldStr}\n`);
122
100
  process.exit(1);
123
101
  }
124
102
 
125
103
  // ---------------------------------------------------------------------------
126
- // 3. Argv parsing.
104
+ // 3. Argv parsing
127
105
  // ---------------------------------------------------------------------------
128
-
129
106
  function parseArgv(argv) {
130
107
  const args = argv.slice(2);
131
108
  let archive = null;
@@ -144,52 +121,33 @@ function parseArgv(argv) {
144
121
  }
145
122
  if (!archive) fail("argv", { reason: "missing positional <archive>" });
146
123
  if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
124
+ if (!flags.participantPersonIds) {
125
+ fail("argv", {
126
+ reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
127
+ });
128
+ }
147
129
  if (!flags.scope) fail("argv", { reason: "missing --scope" });
148
130
  if (flags.scope !== "admin" && flags.scope !== "public") {
149
131
  fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
150
132
  }
151
- // Task 887 §A0 — DM scope: a single `--subject-person-id` identifies the
152
- // third party in the conversation. The owner + subject pair is the
153
- // canonical sender set; the writer rejects any other senderName as
154
- // parser-miss. Group-chat ingest (>2 distinct senders) is a future task.
155
- if (!flags.subjectPersonId) {
156
- fail("argv", { reason: "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)" });
157
- }
158
- // Task 871: --filter is mandatory. The deterministic Bash entry refuses
159
- // bulk archive writes without an operator-supplied filter — closes the
160
- // doctrine gap named in feedback_compress_at_ingest_for_bulk_archives.md.
161
- if (!flags.filter || !flags.filter.trim()) {
162
- process.stderr.write(
163
- `[whatsapp-ingest] FAIL filter-required reason="bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"\n`,
164
- );
165
- fail("argv", { reason: "--filter is required (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)" });
166
- }
167
133
  return { archive, flags };
168
134
  }
169
-
170
135
  function camelCase(s) {
171
136
  return s.replace(/-([a-z])/g, (_m, c) => c.toUpperCase());
172
137
  }
173
138
 
174
139
  // ---------------------------------------------------------------------------
175
- // 4. Archive resolution — accept .zip / dir / _chat.txt; return absolute
176
- // path to _chat.txt and a cleanup callback for any tmp dir we created.
140
+ // 4. Archive resolution
177
141
  // ---------------------------------------------------------------------------
178
-
179
142
  function resolveChatTxt(archivePath) {
180
143
  const abs = resolve(archivePath);
181
- if (!existsSync(abs)) {
182
- fail("argv", { reason: `archive path not found: ${abs}` });
183
- }
144
+ if (!existsSync(abs)) fail("argv", { reason: `archive path not found: ${abs}` });
184
145
  const st = statSync(abs);
185
-
186
146
  if (st.isFile() && abs.endsWith(".zip")) {
187
147
  const tmp = mkdtempSync(join(tmpdir(), "whatsapp-ingest-"));
188
- const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], {
189
- encoding: "utf8",
190
- });
148
+ const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], { encoding: "utf8" });
191
149
  if (unzip.status !== 0) {
192
- rmSync(tmp, { recursive: true, force: true });
150
+ rmSync(tmp, { recursive: true });
193
151
  fail("argv", {
194
152
  reason: "unzip failed",
195
153
  archive: abs,
@@ -198,27 +156,22 @@ function resolveChatTxt(archivePath) {
198
156
  }
199
157
  const chat = findChatTxt(tmp);
200
158
  if (!chat) {
201
- rmSync(tmp, { recursive: true, force: true });
159
+ rmSync(tmp, { recursive: true });
202
160
  fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
203
161
  }
204
- return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true, force: true }) };
162
+ return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true }) };
205
163
  }
206
-
207
164
  if (st.isDirectory()) {
208
165
  const chat = findChatTxt(abs);
209
166
  if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
210
167
  return { chatTxt: chat, cleanup: () => {} };
211
168
  }
212
-
213
169
  if (st.isFile()) {
214
170
  return { chatTxt: abs, cleanup: () => {} };
215
171
  }
216
-
217
172
  fail("argv", { reason: `unsupported archive shape: ${abs}` });
218
- // unreachable, but keeps tools happy
219
173
  return { chatTxt: abs, cleanup: () => {} };
220
174
  }
221
-
222
175
  function findChatTxt(dir) {
223
176
  const entries = readdirSync(dir, { withFileTypes: true });
224
177
  for (const e of entries) {
@@ -234,26 +187,19 @@ function findChatTxt(dir) {
234
187
  }
235
188
 
236
189
  // ---------------------------------------------------------------------------
237
- // 5. Account resolution Phase 0 has one account dir. The wrapper passes
238
- // --account-id when it is known; otherwise we pick the single dir under
239
- // {install}/data/accounts/.
190
+ // 5. Account resolution (Phase 0 = single account)
240
191
  // ---------------------------------------------------------------------------
241
-
242
192
  function resolveAccountId(flags) {
243
193
  if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
244
194
  const installDir = resolve(platformRoot, "..");
245
195
  const accountsDir = join(installDir, "data", "accounts");
246
196
  if (!existsSync(accountsDir)) {
247
- fail("argv", {
248
- reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly`,
249
- });
197
+ fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
250
198
  }
251
199
  const dirs = readdirSync(accountsDir, { withFileTypes: true })
252
200
  .filter((d) => d.isDirectory() && !d.name.startsWith("."))
253
201
  .map((d) => d.name);
254
- if (dirs.length === 0) {
255
- fail("argv", { reason: `no accounts found under ${accountsDir}` });
256
- }
202
+ if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
257
203
  if (dirs.length > 1) {
258
204
  fail("argv", {
259
205
  reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
@@ -263,26 +209,12 @@ function resolveAccountId(flags) {
263
209
  }
264
210
 
265
211
  // ---------------------------------------------------------------------------
266
- // 6. Bind senders to canonical {owner, subject} pair (Task 887 §A0).
212
+ // 6. Bind canonical sender set (Task 887 §A0 preserved).
267
213
  //
268
- // Pre-887 behaviour: a `resolveParticipants` step ran each parsed senderName
269
- // against an account-wide canonical index, falling through to auto-create a
270
- // `:Person {participantStatus:'auto-created'}` node for any miss. That
271
- // fallback path was the structural defect: any parser failure that produced
272
- // a polluted senderName (Task 845's LRM-prefixed body glued onto the prior
273
- // header → senderName="\"Adam Mackay:\\n‎[04/02/2026, 11:52:16] Adam Mackay\"")
274
- // leaked one bogus :Person per distinct miss. 23 leaked from the Adam Mackay
275
- // archive in a single ingest.
276
- //
277
- // 887 §A0 closes the leak by deleting the fallback. The operator now passes
278
- // `--subject-person-id` (third-party Person elementId from preview), the
279
- // dispatch passes `--owner-element-id`, and the writer accepts EXACTLY the
280
- // name candidates of those two nodes. Any parsed senderName outside that
281
- // closed set LOUD-FAILs the ingest with the verbatim string — surfacing a
282
- // parser bug as a hard exit instead of as graph pollution.
214
+ // All distinct parsed senderNames must resolve to {owner, participants...}.
215
+ // Any miss LOUD-FAILs with `parser-miss reason="senderName=<...>"`.
283
216
  // ---------------------------------------------------------------------------
284
-
285
- const CANONICAL_PAIR_FETCH_CYPHER = `
217
+ const CANONICAL_FETCH_CYPHER = `
286
218
  UNWIND $ids AS id
287
219
  MATCH (n) WHERE elementId(n) = id
288
220
  RETURN elementId(n) AS elemId,
@@ -293,12 +225,6 @@ RETURN elementId(n) AS elemId,
293
225
  coalesce(n.accountId, '') AS accountId
294
226
  `;
295
227
 
296
- /**
297
- * Sentinel error class so `main()`'s try/catch can recognise an operator
298
- * LOUD-FAIL (already-emitted FAIL line) and exit cleanly with cleanup.
299
- * Plain `process.exit(1)` from inside `bindCanonicalSenders` would skip
300
- * `main()`'s `cleanup()` (unzip tmp dir) and `session.close()` paths.
301
- */
302
228
  class IngestUserFacingError extends Error {
303
229
  constructor(message) {
304
230
  super(message);
@@ -311,61 +237,36 @@ async function bindCanonicalSenders({
311
237
  session,
312
238
  accountId,
313
239
  ownerElementId,
314
- subjectPersonId,
240
+ participantElementIds,
315
241
  senderNames,
316
242
  normaliseSenderName,
317
243
  }) {
318
- // Distinctness pre-flight operator passing the same id for both flags
319
- // collapses to a single bound participant; the drift between
320
- // `participantCount=2` (from distinct senderNames) and
321
- // `participantsAlreadyExisted=1` would propagate to the JSON summary
322
- // silently. Refuse early, name the cause.
323
- if (ownerElementId === subjectPersonId) {
244
+ const allIds = [ownerElementId, ...participantElementIds];
245
+ const distinctIds = Array.from(new Set(allIds));
246
+ if (distinctIds.length !== allIds.length) {
324
247
  throw new IngestUserFacingError(
325
- `--owner-element-id and --subject-person-id must be distinct elementIds (both supplied as "${ownerElementId}")`,
248
+ `participant id list contains duplicates (owner appears in --participant-person-ids?)`,
326
249
  );
327
250
  }
328
-
329
- const ids = [ownerElementId, subjectPersonId];
330
- const res = await session.executeRead(async (tx) =>
331
- tx.run(CANONICAL_PAIR_FETCH_CYPHER, { ids }),
332
- );
333
-
334
- // Build normalised-name → elementId index from owner + subject candidates.
335
- // For each node we accept the full name, given name, family name, and
336
- // "given family" composite as match candidates so an export header that
337
- // says "Adam" or "Adam Mackay" both resolve to the same node.
338
- const index = new Map();
251
+ const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
339
252
  const seenIds = new Set();
340
253
  const labelByElemId = new Map();
254
+ const index = new Map();
341
255
  for (const r of res.records) {
342
256
  const elemId = r.get("elemId");
343
257
  const labels = r.get("labels") || [];
344
258
  const acct = r.get("accountId") || "";
345
- // Empty accountId on a canonical Person/AdminUser is a graph-data
346
- // defect (migration 004 normally prunes account-less nodes). Refuse
347
- // rather than silently accept — bound-pair correctness depends on
348
- // accountId being present and matching.
349
259
  if (!acct) {
350
- throw new IngestUserFacingError(
351
- `node ${elemId} has no accountId — corrupt canonical Person/AdminUser; refusing ingest`,
352
- );
260
+ throw new IngestUserFacingError(`node ${elemId} has no accountId — corrupt canonical Person/AdminUser`);
353
261
  }
354
262
  if (acct !== accountId) {
355
- throw new IngestUserFacingError(
356
- `node ${elemId} belongs to account ${acct}, not ${accountId} — refusing cross-account ingest`,
357
- );
263
+ throw new IngestUserFacingError(`node ${elemId} belongs to account ${acct}, not ${accountId}`);
358
264
  }
359
265
  if (!labels.includes("Person") && !labels.includes("AdminUser")) {
360
- throw new IngestUserFacingError(
361
- `node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`,
362
- );
266
+ throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
363
267
  }
364
268
  seenIds.add(elemId);
365
- labelByElemId.set(
366
- elemId,
367
- labels.includes("Person") ? "Person" : "AdminUser",
368
- );
269
+ labelByElemId.set(elemId, labels.includes("Person") ? "Person" : "AdminUser");
369
270
  const candidates = [];
370
271
  const name = r.get("name") || "";
371
272
  const given = r.get("givenName") || "";
@@ -380,290 +281,336 @@ async function bindCanonicalSenders({
380
281
  if (!index.has(norm)) index.set(norm, elemId);
381
282
  }
382
283
  }
383
-
384
- // Both ids must resolve. A typo'd or stale id is operator error; LOUD-FAIL
385
- // before any parser work touches the graph.
386
- if (!seenIds.has(ownerElementId)) {
387
- throw new IngestUserFacingError(
388
- `--owner-element-id ${ownerElementId} not found in graph`,
389
- );
390
- }
391
- if (!seenIds.has(subjectPersonId)) {
392
- throw new IngestUserFacingError(
393
- `--subject-person-id ${subjectPersonId} not found in graph`,
394
- );
395
- }
396
- // Subject must specifically be a `:Person` — the third party in a DM is
397
- // never the operator's `:AdminUser`. (Owner can be either; both Adam and
398
- // an external collaborator owning an export are operator-curated cases.)
399
- if (labelByElemId.get(subjectPersonId) !== "Person") {
400
- throw new IngestUserFacingError(
401
- `--subject-person-id ${subjectPersonId} resolves to a :${labelByElemId.get(subjectPersonId)} — subject must be a :Person`,
402
- );
403
- }
404
-
405
- // Group-chat early-detect: the singular `--subject-person-id` flag is
406
- // DM-scoped. A `_chat.txt` carrying ≥3 distinct senders is an unsupported
407
- // scope, NOT a parser bug. Emit a distinct reason so the operator does
408
- // not chase a phantom parser regression. Group-chat support is the
409
- // separate Task 889 lane.
410
- if (senderNames.length > 2) {
411
- throw new IngestUserFacingError(
412
- `unsupported-scope reason="archive carries ${senderNames.length} distinct senders; --subject-person-id is DM-only (≤2 senders) — group-chat ingest is the separate Task 889 lane"`,
413
- );
284
+ for (const id of distinctIds) {
285
+ if (!seenIds.has(id)) {
286
+ throw new IngestUserFacingError(`elementId ${id} not found in graph`);
287
+ }
414
288
  }
415
-
416
- // Validate every distinct parsed senderName against the closed candidate
417
- // set. The first miss is the LOUD-FAIL — operators see one parser-miss
418
- // line per bad import, not 23.
419
- const idsByName = new Map();
289
+ // Validate every distinct parsed senderName against the closed candidate set.
420
290
  for (const senderName of senderNames) {
421
291
  const norm = normaliseSenderName(senderName);
422
292
  const hit = index.get(norm);
423
293
  if (!hit) {
424
294
  throw new IngestUserFacingError(
425
- `parser-miss reason="senderName=${senderName} not in preview histogram (parser failure re-export or report)"`,
295
+ `parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`,
426
296
  );
427
297
  }
428
- idsByName.set(senderName, hit);
429
- log(
430
- `participant-resolved senderName="${senderName}" matched=canonical nodeId=${hit}`,
431
- );
432
298
  }
433
- // participantsAlreadyExisted = always 2 (owner + subject) under the
434
- // bound-pair contract; expose it for the JSON summary's existing field.
435
- return { idsByName, participantsAlreadyExisted: seenIds.size };
299
+ return { participantsResolved: seenIds.size };
300
+ }
301
+
302
+ // ---------------------------------------------------------------------------
303
+ // 7. SHA-256 of file bytes (for archiveSha256)
304
+ // ---------------------------------------------------------------------------
305
+ async function fileSha256(filePath) {
306
+ return new Promise((resolveProm, rejectProm) => {
307
+ const hash = createHash("sha256");
308
+ const stream = createReadStream(filePath);
309
+ stream.on("data", (chunk) => hash.update(chunk));
310
+ stream.on("end", () => resolveProm(hash.digest("hex")));
311
+ stream.on("error", rejectProm);
312
+ });
436
313
  }
437
314
 
438
315
  // ---------------------------------------------------------------------------
439
- // 7. Main.
316
+ // 8. Build natural-edge map for chat-mode classifier.
317
+ // Chat mode doesn't use it (the chat prompt drops edge proposals), but
318
+ // classifyDocument's signature still takes the param. Pass the empty string.
440
319
  // ---------------------------------------------------------------------------
441
320
 
321
+ // ---------------------------------------------------------------------------
322
+ // 9. Main
323
+ // ---------------------------------------------------------------------------
442
324
  async function main() {
443
325
  const startedMs = Date.now();
444
326
  const { archive, flags } = parseArgv(process.argv);
445
327
  const ownerElementId = flags.ownerElementId;
446
- const subjectPersonId = flags.subjectPersonId;
328
+ const participantElementIds = flags.participantPersonIds
329
+ .split(",")
330
+ .map((s) => s.trim())
331
+ .filter((s) => s.length > 0);
332
+ if (participantElementIds.length === 0) {
333
+ fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
334
+ }
447
335
  const scope = flags.scope;
448
336
  const accountId = resolveAccountId(flags);
449
337
  const timezone = flags.timezone || "Europe/London";
450
338
  const dateFormat = flags.dateFormat;
339
+ const sessionGapHours = flags.sessionGapHours
340
+ ? parseFloat(flags.sessionGapHours)
341
+ : 12;
342
+ if (!Number.isFinite(sessionGapHours) || sessionGapHours <= 0) {
343
+ fail("argv", { reason: `invalid --session-gap-hours "${flags.sessionGapHours}" (must be positive number)` });
344
+ }
451
345
  const sessionId =
452
- flags.sessionId || `whatsapp-ingest:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
346
+ flags.sessionId || `whatsapp-import:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
453
347
 
454
- let parseExport, parseFilterArg, applyFilter, memoryArchiveWrite, getSession;
455
- let normaliseSenderName, deriveMessageId;
348
+ // Imports fail loudly if any compiled dist missing
349
+ let parseExport, sessionize, toClassifierInput, findDeltaCursor;
350
+ let normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash;
351
+ let getSession, classifyDocument, memoryIngest;
456
352
  try {
457
- ({ parseExport, parseFilterArg, applyFilter } = await import(PARSE_EXPORT_PATH));
458
- ({ memoryArchiveWrite } = await import(ARCHIVE_WRITE_PATH));
353
+ ({
354
+ parseExport,
355
+ sessionize,
356
+ toClassifierInput,
357
+ findDeltaCursor,
358
+ normaliseSenderName,
359
+ deriveConversationIdentity,
360
+ deriveMessageContentHash,
361
+ } = await import(PARSE_EXPORT_PATH));
459
362
  ({ getSession } = await import(NEO4J_LIB_PATH));
460
- ({ normaliseSenderName, deriveMessageId } = await import(DERIVE_KEYS_PATH));
363
+ ({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
364
+ ({ memoryIngest } = await import(MEMORY_INGEST_PATH));
461
365
  } catch (err) {
462
366
  fail("import", {
463
- reason: `failed to import compiled dist`,
367
+ reason: "failed to import compiled dist",
464
368
  detail: err instanceof Error ? err.message : String(err),
465
369
  });
466
370
  }
467
371
 
468
- // 7a. Parse the operator-supplied filter ahead of any IO so a malformed
469
- // value LOUD-FAILs before unzip / Neo4j connection.
470
- let filter;
471
- try {
472
- filter = parseFilterArg(flags.filter);
473
- } catch (err) {
474
- fail("argv", { reason: err instanceof Error ? err.message : String(err) });
475
- }
476
-
477
- // 7b. Resolve _chat.txt
372
+ // 9a. Resolve _chat.txt + sha256
478
373
  const { chatTxt, cleanup } = resolveChatTxt(archive);
479
- const archiveBytes = statSync(chatTxt).size;
374
+ const archiveSha256 = await fileSha256(chatTxt);
375
+ const archiveSourceFile = basename(chatTxt);
480
376
  log(
481
- `start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes} filter=${flags.filter}`,
377
+ `start file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} session-gap-hours=${sessionGapHours}`,
482
378
  );
483
379
 
484
- // 7c. Parse
380
+ // 9b. Parse
485
381
  let parseResult;
486
- const parseStart = Date.now();
487
382
  try {
488
383
  parseResult = parseExport({ filePath: chatTxt, accountId, timezone, dateFormat });
489
384
  } catch (err) {
490
385
  cleanup();
491
386
  fail("parse", { reason: err instanceof Error ? err.message : String(err) });
492
387
  }
493
- const parseMs = Date.now() - parseStart;
494
- log(
495
- `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} ms=${parseMs}`,
496
- );
497
-
498
- // 7d. Apply filter
499
- const filteredLines = applyFilter(parseResult.parsedLines, filter);
388
+ const allLines = parseResult.parsedLines;
500
389
  log(
501
- `filter-applied kind=${filter.kind} parsed=${parseResult.parsedLines.length} filtered=${filteredLines.length}`,
390
+ `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped}`,
502
391
  );
503
- if (filteredLines.length === 0) {
392
+ if (allLines.length === 0) {
504
393
  cleanup();
505
- fail("filter", {
506
- reason: `filter "${flags.filter}" matched zero rows from ${parseResult.parsedLines.length} parsed lines — refusing to write an empty conversation`,
507
- });
394
+ fail("parse", { reason: "zero parsed lines after walking archive" });
508
395
  }
509
396
 
510
- const firstTs = filteredLines[0].dateSent;
511
- const lastTs = filteredLines[filteredLines.length - 1].dateSent;
512
-
513
- // 7e. Auto-create participants and build rows
397
+ // 9c. Bind canonical senders against the confirmed set
398
+ const distinctSenderNames = Array.from(new Set(allLines.map((l) => l.senderName)));
399
+ const senderHistogram = computeSenderHistogram(allLines);
514
400
  let session = getSession();
515
- let participantUpsert = { idsByName: new Map(), participantsAlreadyExisted: 0 };
516
- const distinctSenderNames = Array.from(
517
- new Set(filteredLines.map((l) => l.senderName)),
518
- );
519
-
520
401
  try {
521
- participantUpsert = await bindCanonicalSenders({
402
+ await bindCanonicalSenders({
522
403
  session,
523
404
  accountId,
524
405
  ownerElementId,
525
- subjectPersonId,
406
+ participantElementIds,
526
407
  senderNames: distinctSenderNames,
527
408
  normaliseSenderName,
528
409
  });
529
410
  } catch (err) {
530
411
  await session.close().catch(() => {});
531
412
  cleanup();
532
- // IngestUserFacingError carries a brief-shaped FAIL line (parser-miss /
533
- // unsupported-scope / argv mismatch) that the operator's grep recipes
534
- // already match on. Preserve it verbatim instead of wrapping in
535
- // phase=archive-write — wrapping would defeat
536
- // `grep '\[whatsapp-ingest\] FAIL parser-miss'` and friends.
537
413
  if (err && err.userFacing) {
538
- process.stderr.write(`[whatsapp-ingest] FAIL ${err.message}\n`);
414
+ process.stderr.write(`[whatsapp-import] FAIL ${err.message}\n`);
539
415
  process.exit(1);
540
416
  }
541
- fail("archive-write", {
542
- phase: "bind-canonical-senders",
543
- reason: err instanceof Error ? err.message : String(err),
544
- });
417
+ fail("argv", { reason: err instanceof Error ? err.message : String(err) });
545
418
  }
546
419
 
547
- const participantIds = participantUpsert.idsByName;
548
- const participantsAlreadyExisted = participantUpsert.participantsAlreadyExisted;
549
-
550
- if (participantIds.size !== distinctSenderNames.length) {
551
- log(
552
- `participant-resolve mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
420
+ // 9d. Derive conversationIdentity and look up prior :ConversationArchive
421
+ const conversationIdentity = deriveConversationIdentity({
422
+ accountId,
423
+ participantElementIds: [ownerElementId, ...participantElementIds],
424
+ });
425
+ let priorArchive = null;
426
+ try {
427
+ const r = await session.run(
428
+ `MATCH (a:ConversationArchive { conversationIdentity: $cid })
429
+ RETURN elementId(a) AS elemId,
430
+ a.lastIngestedMessageHash AS lastHash,
431
+ a.lastIngestedMessageAt AS lastAt LIMIT 1`,
432
+ { cid: conversationIdentity },
553
433
  );
434
+ if (r.records[0]) {
435
+ priorArchive = {
436
+ elemId: r.records[0].get("elemId"),
437
+ lastHash: r.records[0].get("lastHash"),
438
+ lastAt: r.records[0].get("lastAt"),
439
+ };
440
+ }
441
+ } catch (err) {
442
+ await session.close().catch(() => {});
443
+ cleanup();
444
+ fail("delta-cursor-missing", { reason: `conversationArchive lookup failed: ${err instanceof Error ? err.message : String(err)}` });
554
445
  }
446
+ await session.close().catch(() => {});
555
447
 
556
- const conversationId = parseResult.conversationId;
557
- // Task 870: messageId derives from a content-derived natural key. Stable
558
- // across re-imports — same archive bytes + same operator timezone produce
559
- // identical messageIds, so the existing MERGE in memory-archive-write is
560
- // naturally idempotent.
561
- const conversationSha256 = parseResult.archiveSourceFile.replace(
562
- /^whatsapp-export:/,
563
- "",
448
+ // 9e. Compute deltaStart
449
+ let deltaStart = 0;
450
+ let deltaKind = "first-ingest";
451
+ if (priorArchive && priorArchive.lastHash) {
452
+ const cursor = findDeltaCursor(allLines, priorArchive.lastHash);
453
+ if (cursor.kind === "missing") {
454
+ cleanup();
455
+ fail("delta-cursor-missing", {
456
+ reason: `prior cursor not found in re-export (operator deleted prior messages, or this is a different chat archive)`,
457
+ priorArchive: priorArchive.elemId,
458
+ lastIngestedMessageAt: priorArchive.lastAt,
459
+ });
460
+ }
461
+ if (cursor.kind === "empty") {
462
+ log(`noop reason="no new messages since ${priorArchive.lastAt}"`);
463
+ cleanup();
464
+ const totalMs = Date.now() - startedMs;
465
+ process.stdout.write(JSON.stringify({
466
+ archiveElementId: priorArchive.elemId,
467
+ conversationIdentity,
468
+ archiveSha256,
469
+ archiveSourceFile,
470
+ parsed: parseResult.counters.parsed,
471
+ mediaSkipped: parseResult.counters.mediaSkipped,
472
+ systemSkipped: parseResult.counters.systemSkipped,
473
+ delta: { kind: "empty-delta", deltaStart: allLines.length, deltaMessages: 0 },
474
+ sessions: 0,
475
+ chunks: 0,
476
+ nextEdgesCreated: 0,
477
+ participantsLinked: 0,
478
+ dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
479
+ senderHistogram,
480
+ topicKeywords: [],
481
+ ms: totalMs,
482
+ priorLastIngestedMessageAt: priorArchive.lastAt,
483
+ }) + "\n");
484
+ process.exit(0);
485
+ }
486
+ deltaStart = cursor.deltaStart;
487
+ deltaKind = "delta";
488
+ }
489
+ const deltaLines = allLines.slice(deltaStart);
490
+ log(
491
+ `delta cursor=${priorArchive ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
564
492
  );
565
- const rows = filteredLines.map((l) => ({
566
- messageId: deriveMessageId({
567
- conversationSha256,
568
- dateSent: l.dateSent,
569
- senderName: l.senderName,
570
- body: l.body,
571
- }),
572
- conversationId,
573
- senderNodeId: participantIds.get(l.senderName) || ownerElementId,
574
- senderName: l.senderName,
575
- dateSent: l.dateSent,
576
- body: l.body,
577
- // sequenceIndex preserved on the Message node for chain ordering tiebreaker
578
- // in WHATSAPP_NEXT_CHAIN_CYPHER. NOT part of messageId — that would couple
579
- // identity to array position and re-introduce shadowing.
580
- sequenceIndex: l.sequenceIndex,
581
- }));
582
493
 
583
- const conversationBlock = {
584
- conversationId,
585
- archiveSourceFile: parseResult.archiveSourceFile,
586
- firstMessageAt: firstTs,
587
- lastMessageAt: lastTs,
588
- participantCount: distinctSenderNames.length,
589
- messageCount: rows.length,
590
- };
494
+ // 9f. Sessionize delta
495
+ const sessions = sessionize(deltaLines, sessionGapHours);
496
+ log(
497
+ `sessionize file=${archiveSourceFile} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
498
+ );
591
499
 
592
- // Close the session before archive-write — memoryArchiveWrite opens its
593
- // own session and closes it in finally.
594
- await session.close().catch(() => {});
500
+ // 9g. Classify each session via Haiku (mode='chat')
501
+ const allChunks = [];
502
+ const allKeywords = new Set();
503
+ for (const s of sessions) {
504
+ const sessionStart = Date.now();
505
+ const text = toClassifierInput(s);
506
+ const result = await classifyDocument({
507
+ accountId,
508
+ mode: "chat",
509
+ anchorDescription: `WhatsApp conversation between ${[ownerElementId, ...participantElementIds].length} participants (session ${s.index + 1} of ${sessions.length})`,
510
+ ontologyLabels: new Set([]),
511
+ naturalEdgeMap: "",
512
+ documentText: text,
513
+ });
514
+ if (result.kind === "fallback") {
515
+ cleanup();
516
+ fail("classify", { reason: `Haiku fallback on session ${s.index}: ${result.reason}` });
517
+ }
518
+ const chunkCount = result.output.sections.length;
519
+ log(
520
+ `classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
521
+ );
522
+ if (chunkCount === 0 && s.messages.length > 0) {
523
+ cleanup();
524
+ fail("classify", {
525
+ reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
526
+ });
527
+ }
528
+ for (const sec of result.output.sections) {
529
+ allChunks.push(sec);
530
+ }
531
+ for (const kw of result.output.documentKeywords) {
532
+ allKeywords.add(kw);
533
+ }
534
+ }
595
535
 
596
- // 7f. Archive-write
597
- let archiveResult;
598
- const writeStart = Date.now();
536
+ // 9h. Compute lastIngestedMessageHash from the last delta line
537
+ const lastLine = deltaLines[deltaLines.length - 1];
538
+ const lastIngestedMessageHash = deriveMessageContentHash({
539
+ dateSent: lastLine.dateSent,
540
+ senderName: lastLine.senderName,
541
+ body: lastLine.body,
542
+ });
543
+ const lastIngestedMessageAt = lastLine.dateSent;
544
+
545
+ // 9i. Aggregate document-level summary across sessions (concatenate first
546
+ // two-three classifier summaries; the parent's summary is informational)
547
+ const documentSummary = sessions.length === 1
548
+ ? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
549
+ : `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
550
+
551
+ // 9j. Call memoryIngest with parentLabel='ConversationArchive'
552
+ let ingestResult;
553
+ const ingestStart = Date.now();
599
554
  try {
600
- archiveResult = await memoryArchiveWrite({
601
- archiveType: "whatsapp-export",
602
- ownerNodeId: ownerElementId,
555
+ ingestResult = await memoryIngest({
603
556
  accountId,
604
- conversation: conversationBlock,
605
- participantNodeIds: Array.from(participantIds.values()),
606
- rows,
557
+ attachmentId: conversationIdentity, // semantic rename per parentLabel
558
+ parentLabel: "ConversationArchive",
559
+ documentSummary,
560
+ anchorNodeId: ownerElementId,
561
+ anchorLabel: "AdminUser", // unused in chat path but required by IngestParams
562
+ sections: allChunks,
563
+ scope,
607
564
  sessionId,
608
- archiveFilePath: chatTxt,
565
+ documentKeywords: Array.from(allKeywords),
566
+ archiveSha256,
567
+ archiveSourceFile,
568
+ lastIngestedMessageHash,
569
+ lastIngestedMessageAt,
570
+ participantElementIds: [ownerElementId, ...participantElementIds],
609
571
  });
610
572
  } catch (err) {
611
573
  cleanup();
612
- fail("archive-write", {
613
- reason: err instanceof Error ? err.message : String(err),
614
- });
574
+ fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
615
575
  }
616
- const writeMs = Date.now() - writeStart;
617
- const createdMessages = archiveResult.counters.createdMessages || 0;
618
- const nextEdgesProcessed = archiveResult.counters.nextEdgesProcessed || 0;
619
- const nextEdgesCreated = archiveResult.counters.nextEdgesCreated || 0;
620
- const messagesAlreadyExisted = rows.length - createdMessages;
621
576
  log(
622
- `archive-write created-messages=${createdMessages} messages-already-existed=${messagesAlreadyExisted} next-edges-processed=${nextEdgesProcessed} next-edges-created=${nextEdgesCreated} participants-already-existed=${participantsAlreadyExisted} ms=${writeMs}`,
577
+ `file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
623
578
  );
624
579
 
625
- // 7g. Resolve conversationElementId for the agent-return shape (Task 871.6).
626
- // One small read-after-write — the operator's database-operator subagent
627
- // surfaces this as the canonical handle for downstream Phase 2 calls.
628
- let conversationElementId = null;
629
- const ridSession = getSession();
630
- try {
631
- const res = await ridSession.run(
632
- `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid})
633
- RETURN elementId(c) AS elemId LIMIT 1`,
634
- { cid: conversationId },
635
- );
636
- conversationElementId = res.records[0]?.get("elemId") ?? null;
637
- } catch (err) {
638
- log(`conversation-elementid-lookup failed reason="${err instanceof Error ? err.message : String(err)}"`);
639
- } finally {
640
- await ridSession.close().catch(() => {});
641
- }
642
-
643
580
  cleanup();
644
-
645
581
  const totalMs = Date.now() - startedMs;
646
- log(`done conversationId=${conversationId} total-ms=${totalMs} exit=0`);
647
-
648
- process.stdout.write(
649
- JSON.stringify({
650
- conversationElementId,
651
- conversationId,
652
- parsed: parseResult.counters.parsed,
653
- mediaSkipped: parseResult.counters.mediaSkipped,
654
- systemSkipped: parseResult.counters.systemSkipped,
655
- filtered: filteredLines.length,
656
- written: createdMessages,
657
- messagesAlreadyExisted,
658
- nextEdgesProcessed,
659
- nextEdgesCreated,
660
- participantsAlreadyExisted,
661
- ms: totalMs,
662
- }) + "\n",
663
- );
582
+ log(`done conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
583
+
584
+ process.stdout.write(JSON.stringify({
585
+ archiveElementId: ingestResult.documentNodeId,
586
+ conversationIdentity,
587
+ archiveSha256,
588
+ archiveSourceFile,
589
+ parsed: parseResult.counters.parsed,
590
+ mediaSkipped: parseResult.counters.mediaSkipped,
591
+ systemSkipped: parseResult.counters.systemSkipped,
592
+ delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
593
+ sessions: sessions.length,
594
+ chunks: ingestResult.sectionCount,
595
+ nextEdgesCreated: ingestResult.edgeBreakdown.NEXT ?? 0,
596
+ participantsLinked: ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0,
597
+ dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
598
+ senderHistogram,
599
+ topicKeywords: Array.from(allKeywords),
600
+ ms: totalMs,
601
+ }) + "\n");
664
602
  process.exit(0);
665
603
  }
666
604
 
605
+ function computeSenderHistogram(lines) {
606
+ const counts = new Map();
607
+ for (const l of lines) {
608
+ counts.set(l.senderName, (counts.get(l.senderName) ?? 0) + 1);
609
+ }
610
+ return Array.from(counts.entries())
611
+ .map(([name, count]) => ({ name, count }))
612
+ .sort((a, b) => b.count - a.count);
613
+ }
667
614
 
668
615
  main().catch((err) => {
669
616
  fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });