@rubytech/create-realagent 1.0.826 → 1.0.829

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/neo4j/schema.cypher +35 -2
  3. package/payload/platform/package.json +2 -2
  4. package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +39 -54
  5. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +26 -52
  6. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +7 -7
  7. package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
  8. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  9. package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
  10. package/payload/platform/plugins/memory/PLUGIN.md +5 -5
  11. package/payload/platform/plugins/memory/mcp/dist/index.js +18 -253
  12. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  13. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
  14. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
  15. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
  16. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
  17. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
  18. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  19. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +149 -56
  20. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  21. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
  22. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
  24. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
  26. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
  27. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
  28. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
  29. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +66 -0
  30. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
  31. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
  32. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
  33. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
  34. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
  35. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -64
  36. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +6 -336
  38. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +30 -0
  40. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +231 -0
  42. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
  44. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
  46. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
  47. package/payload/platform/plugins/memory/references/schema-base.md +7 -2
  48. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +54 -4
  49. package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
  50. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
  51. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
  52. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
  53. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
  54. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
  55. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
  56. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
  57. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
  58. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
  59. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
  60. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
  61. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
  62. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
  63. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
  64. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
  65. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
  66. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
  67. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
  68. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
  69. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
  70. package/payload/platform/scripts/seed-neo4j.sh +15 -14
  71. package/payload/platform/templates/specialists/agents/database-operator.md +10 -17
  72. package/payload/server/chunk-CUSH3UXP.js +2305 -0
  73. package/payload/server/chunk-IWNDVGKT.js +10077 -0
  74. package/payload/server/chunk-KC7NUABI.js +654 -0
  75. package/payload/server/chunk-T2OPNP3L.js +654 -0
  76. package/payload/server/chunk-WUVXPZIV.js +1116 -0
  77. package/payload/server/client-pool-3TM3SRIA.js +32 -0
  78. package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
  79. package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
  80. package/payload/server/maxy-edge.js +3 -3
  81. package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
  82. package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
  83. package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
  84. package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
  85. package/payload/server/public/graph.html +2 -2
  86. package/payload/server/public/index.html +2 -2
  87. package/payload/server/server.js +281 -168
  88. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -46
  89. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +0 -670
  90. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +0 -131
  91. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
  92. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
  93. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
  94. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
  95. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -59
  96. package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
  97. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -19
  98. package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
  99. package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
  100. package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
  101. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +0 -131
  102. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
  103. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
@@ -1,670 +0,0 @@
1
- #!/usr/bin/env node
2
- // =============================================================================
3
- // ingest.mjs — in-process orchestrator for whatsapp-ingest.sh.
4
- //
5
- // Phase 1 of the two-phase WhatsApp ingest contract (Task 855 / Task 871).
6
- // Deterministic only: parse → operator-supplied filter → archive-write
7
- // (Conversation + Messages + auto-Person participants + NEXT chronology).
8
- // No LLM in the per-message decision path. The Haiku insight pass moved to
9
- // Phase 2 (`mcp__memory__whatsapp-export-insight-pass`), invoked consciously
10
- // by the operator via the `whatsapp-import-enrich` skill.
11
- //
12
- // Argv (positional): <archive-path>
13
- // Argv (flags): --owner-element-id <id> --scope <admin|public>
14
- // --subject-person-id <id>
15
- // --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
16
- // [--account-id <accountId>] [--timezone <iana>]
17
- // [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
18
- //
19
- // Task 887 §A0 — `--subject-person-id` is required. DM scope: the operator
20
- // confirms the third party's :Person elementId from the preview histogram
21
- // before invoking. The writer accepts EXACTLY the senderNames carried by
22
- // {owner, subject}; any parsed senderName outside that set LOUD-FAILs with
23
- // `parser-miss reason="senderName=<verbatim> not in preview histogram
24
- // (parser failure — re-export or report)"`. Bounds writer cardinality to
25
- // the deterministic preview output — closes the auto-Person leak structurally.
26
- //
27
- // Stdout (success): one JSON line — Honest counters per Task 871.5.
28
- // {conversationElementId, conversationId,
29
- // parsed, mediaSkipped, systemSkipped,
30
- // filtered,
31
- // written, messagesAlreadyExisted,
32
- // nextEdgesProcessed, nextEdgesCreated,
33
- // participantsAlreadyExisted,
34
- // ms}
35
- // The skill (`whatsapp-import` SKILL.md) maps this verbose-diagnostic shape
36
- // to the agent-return short shape per Task 871.6 (`alreadyExisted` etc.)
37
- // when surfacing the result to the admin agent.
38
- //
39
- // Stderr (failure): one [whatsapp-ingest] FAIL line, exit non-zero.
40
- // =============================================================================
41
-
42
- import {
43
- existsSync,
44
- mkdtempSync,
45
- readdirSync,
46
- rmSync,
47
- statSync,
48
- } from "node:fs";
49
- import { join, resolve, dirname } from "node:path";
50
- import { tmpdir } from "node:os";
51
- import { spawnSync } from "node:child_process";
52
- import { fileURLToPath } from "node:url";
53
-
54
- const __dirname = dirname(fileURLToPath(import.meta.url));
55
-
56
- // ---------------------------------------------------------------------------
57
- // 1. Resolve dist paths from the platform install root.
58
- // ---------------------------------------------------------------------------
59
- // MAXY_PLATFORM_ROOT is set by the wrapper script (and by claude-agent.ts in
60
- // production). Fall back to walking up from this file's location: the bin/
61
- // directory sits at platform/plugins/whatsapp-import/bin/, so platform/ is
62
- // three levels up.
63
-
64
- const platformRoot =
65
- process.env.MAXY_PLATFORM_ROOT?.trim() ||
66
- resolve(__dirname, "..", "..", "..");
67
-
68
- const PARSE_EXPORT_PATH = resolve(
69
- platformRoot,
70
- "plugins",
71
- "whatsapp-import",
72
- "lib",
73
- "dist",
74
- "index.js",
75
- );
76
- const ARCHIVE_WRITE_PATH = resolve(
77
- platformRoot,
78
- "plugins",
79
- "memory",
80
- "mcp",
81
- "dist",
82
- "tools",
83
- "memory-archive-write.js",
84
- );
85
- const NEO4J_LIB_PATH = resolve(
86
- platformRoot,
87
- "plugins",
88
- "memory",
89
- "mcp",
90
- "dist",
91
- "lib",
92
- "neo4j.js",
93
- );
94
- // Task 870: pure key-derivation functions ship in the whatsapp-import lib.
95
- const DERIVE_KEYS_PATH = resolve(
96
- platformRoot,
97
- "plugins",
98
- "whatsapp-import",
99
- "lib",
100
- "dist",
101
- "derive-keys.js",
102
- );
103
-
104
- // ---------------------------------------------------------------------------
105
- // 2. Logger — every line `[whatsapp-ingest]`-prefixed to stderr (server.log).
106
- // ---------------------------------------------------------------------------
107
-
108
- function log(line) {
109
- process.stderr.write(`[whatsapp-ingest] ${line}\n`);
110
- }
111
-
112
- function fail(phase, fields) {
113
- // Single failure line. Used as the only stderr line on non-zero exit.
114
- const fieldStr = Object.entries(fields)
115
- .map(([k, v]) =>
116
- typeof v === "string" && (v.includes(" ") || v.includes("="))
117
- ? `${k}="${v.replace(/"/g, '\\"')}"`
118
- : `${k}=${v ?? "-"}`,
119
- )
120
- .join(" ");
121
- process.stderr.write(`[whatsapp-ingest] FAIL phase=${phase} ${fieldStr}\n`);
122
- process.exit(1);
123
- }
124
-
125
- // ---------------------------------------------------------------------------
126
- // 3. Argv parsing.
127
- // ---------------------------------------------------------------------------
128
-
129
- function parseArgv(argv) {
130
- const args = argv.slice(2);
131
- let archive = null;
132
- const flags = {};
133
- for (let i = 0; i < args.length; i++) {
134
- const a = args[i];
135
- if (!a.startsWith("--")) {
136
- if (archive == null) archive = a;
137
- else fail("argv", { reason: `unexpected positional argument "${a}"` });
138
- continue;
139
- }
140
- const key = a.slice(2);
141
- const v = args[++i];
142
- if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
143
- flags[camelCase(key)] = v;
144
- }
145
- if (!archive) fail("argv", { reason: "missing positional <archive>" });
146
- if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
147
- if (!flags.scope) fail("argv", { reason: "missing --scope" });
148
- if (flags.scope !== "admin" && flags.scope !== "public") {
149
- fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
150
- }
151
- // Task 887 §A0 — DM scope: a single `--subject-person-id` identifies the
152
- // third party in the conversation. The owner + subject pair is the
153
- // canonical sender set; the writer rejects any other senderName as
154
- // parser-miss. Group-chat ingest (>2 distinct senders) is a future task.
155
- if (!flags.subjectPersonId) {
156
- fail("argv", { reason: "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)" });
157
- }
158
- // Task 871: --filter is mandatory. The deterministic Bash entry refuses
159
- // bulk archive writes without an operator-supplied filter — closes the
160
- // doctrine gap named in feedback_compress_at_ingest_for_bulk_archives.md.
161
- if (!flags.filter || !flags.filter.trim()) {
162
- process.stderr.write(
163
- `[whatsapp-ingest] FAIL filter-required reason="bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"\n`,
164
- );
165
- fail("argv", { reason: "--filter is required (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)" });
166
- }
167
- return { archive, flags };
168
- }
169
-
170
- function camelCase(s) {
171
- return s.replace(/-([a-z])/g, (_m, c) => c.toUpperCase());
172
- }
173
-
174
- // ---------------------------------------------------------------------------
175
- // 4. Archive resolution — accept .zip / dir / _chat.txt; return absolute
176
- // path to _chat.txt and a cleanup callback for any tmp dir we created.
177
- // ---------------------------------------------------------------------------
178
-
179
- function resolveChatTxt(archivePath) {
180
- const abs = resolve(archivePath);
181
- if (!existsSync(abs)) {
182
- fail("argv", { reason: `archive path not found: ${abs}` });
183
- }
184
- const st = statSync(abs);
185
-
186
- if (st.isFile() && abs.endsWith(".zip")) {
187
- const tmp = mkdtempSync(join(tmpdir(), "whatsapp-ingest-"));
188
- const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], {
189
- encoding: "utf8",
190
- });
191
- if (unzip.status !== 0) {
192
- rmSync(tmp, { recursive: true, force: true });
193
- fail("argv", {
194
- reason: "unzip failed",
195
- archive: abs,
196
- stderr: (unzip.stderr || "").slice(0, 200),
197
- });
198
- }
199
- const chat = findChatTxt(tmp);
200
- if (!chat) {
201
- rmSync(tmp, { recursive: true, force: true });
202
- fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
203
- }
204
- return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true, force: true }) };
205
- }
206
-
207
- if (st.isDirectory()) {
208
- const chat = findChatTxt(abs);
209
- if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
210
- return { chatTxt: chat, cleanup: () => {} };
211
- }
212
-
213
- if (st.isFile()) {
214
- return { chatTxt: abs, cleanup: () => {} };
215
- }
216
-
217
- fail("argv", { reason: `unsupported archive shape: ${abs}` });
218
- // unreachable, but keeps tools happy
219
- return { chatTxt: abs, cleanup: () => {} };
220
- }
221
-
222
- function findChatTxt(dir) {
223
- const entries = readdirSync(dir, { withFileTypes: true });
224
- for (const e of entries) {
225
- if (e.isFile() && e.name === "_chat.txt") return join(dir, e.name);
226
- }
227
- for (const e of entries) {
228
- if (e.isDirectory()) {
229
- const nested = findChatTxt(join(dir, e.name));
230
- if (nested) return nested;
231
- }
232
- }
233
- return null;
234
- }
235
-
236
- // ---------------------------------------------------------------------------
237
- // 5. Account resolution — Phase 0 has one account dir. The wrapper passes
238
- // --account-id when it is known; otherwise we pick the single dir under
239
- // {install}/data/accounts/.
240
- // ---------------------------------------------------------------------------
241
-
242
- function resolveAccountId(flags) {
243
- if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
244
- const installDir = resolve(platformRoot, "..");
245
- const accountsDir = join(installDir, "data", "accounts");
246
- if (!existsSync(accountsDir)) {
247
- fail("argv", {
248
- reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly`,
249
- });
250
- }
251
- const dirs = readdirSync(accountsDir, { withFileTypes: true })
252
- .filter((d) => d.isDirectory() && !d.name.startsWith("."))
253
- .map((d) => d.name);
254
- if (dirs.length === 0) {
255
- fail("argv", { reason: `no accounts found under ${accountsDir}` });
256
- }
257
- if (dirs.length > 1) {
258
- fail("argv", {
259
- reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
260
- });
261
- }
262
- return dirs[0];
263
- }
264
-
265
- // ---------------------------------------------------------------------------
266
- // 6. Bind senders to canonical {owner, subject} pair (Task 887 §A0).
267
- //
268
- // Pre-887 behaviour: a `resolveParticipants` step ran each parsed senderName
269
- // against an account-wide canonical index, falling through to auto-create a
270
- // `:Person {participantStatus:'auto-created'}` node for any miss. That
271
- // fallback path was the structural defect: any parser failure that produced
272
- // a polluted senderName (Task 845's LRM-prefixed body glued onto the prior
273
- // header → senderName="\"Adam Mackay:\\n‎[04/02/2026, 11:52:16] Adam Mackay\"")
274
- // leaked one bogus :Person per distinct miss. 23 leaked from the Adam Mackay
275
- // archive in a single ingest.
276
- //
277
- // 887 §A0 closes the leak by deleting the fallback. The operator now passes
278
- // `--subject-person-id` (third-party Person elementId from preview), the
279
- // dispatch passes `--owner-element-id`, and the writer accepts EXACTLY the
280
- // name candidates of those two nodes. Any parsed senderName outside that
281
- // closed set LOUD-FAILs the ingest with the verbatim string — surfacing a
282
- // parser bug as a hard exit instead of as graph pollution.
283
- // ---------------------------------------------------------------------------
284
-
285
- const CANONICAL_PAIR_FETCH_CYPHER = `
286
- UNWIND $ids AS id
287
- MATCH (n) WHERE elementId(n) = id
288
- RETURN elementId(n) AS elemId,
289
- labels(n) AS labels,
290
- coalesce(n.name, '') AS name,
291
- coalesce(n.givenName, '') AS givenName,
292
- coalesce(n.familyName, '') AS familyName,
293
- coalesce(n.accountId, '') AS accountId
294
- `;
295
-
296
- /**
297
- * Sentinel error class so `main()`'s try/catch can recognise an operator
298
- * LOUD-FAIL (already-emitted FAIL line) and exit cleanly with cleanup.
299
- * Plain `process.exit(1)` from inside `bindCanonicalSenders` would skip
300
- * `main()`'s `cleanup()` (unzip tmp dir) and `session.close()` paths.
301
- */
302
- class IngestUserFacingError extends Error {
303
- constructor(message) {
304
- super(message);
305
- this.name = "IngestUserFacingError";
306
- this.userFacing = true;
307
- }
308
- }
309
-
310
- async function bindCanonicalSenders({
311
- session,
312
- accountId,
313
- ownerElementId,
314
- subjectPersonId,
315
- senderNames,
316
- normaliseSenderName,
317
- }) {
318
- // Distinctness pre-flight — operator passing the same id for both flags
319
- // collapses to a single bound participant; the drift between
320
- // `participantCount=2` (from distinct senderNames) and
321
- // `participantsAlreadyExisted=1` would propagate to the JSON summary
322
- // silently. Refuse early, name the cause.
323
- if (ownerElementId === subjectPersonId) {
324
- throw new IngestUserFacingError(
325
- `--owner-element-id and --subject-person-id must be distinct elementIds (both supplied as "${ownerElementId}")`,
326
- );
327
- }
328
-
329
- const ids = [ownerElementId, subjectPersonId];
330
- const res = await session.executeRead(async (tx) =>
331
- tx.run(CANONICAL_PAIR_FETCH_CYPHER, { ids }),
332
- );
333
-
334
- // Build normalised-name → elementId index from owner + subject candidates.
335
- // For each node we accept the full name, given name, family name, and
336
- // "given family" composite as match candidates so an export header that
337
- // says "Adam" or "Adam Mackay" both resolve to the same node.
338
- const index = new Map();
339
- const seenIds = new Set();
340
- const labelByElemId = new Map();
341
- for (const r of res.records) {
342
- const elemId = r.get("elemId");
343
- const labels = r.get("labels") || [];
344
- const acct = r.get("accountId") || "";
345
- // Empty accountId on a canonical Person/AdminUser is a graph-data
346
- // defect (migration 004 normally prunes account-less nodes). Refuse
347
- // rather than silently accept — bound-pair correctness depends on
348
- // accountId being present and matching.
349
- if (!acct) {
350
- throw new IngestUserFacingError(
351
- `node ${elemId} has no accountId — corrupt canonical Person/AdminUser; refusing ingest`,
352
- );
353
- }
354
- if (acct !== accountId) {
355
- throw new IngestUserFacingError(
356
- `node ${elemId} belongs to account ${acct}, not ${accountId} — refusing cross-account ingest`,
357
- );
358
- }
359
- if (!labels.includes("Person") && !labels.includes("AdminUser")) {
360
- throw new IngestUserFacingError(
361
- `node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`,
362
- );
363
- }
364
- seenIds.add(elemId);
365
- labelByElemId.set(
366
- elemId,
367
- labels.includes("Person") ? "Person" : "AdminUser",
368
- );
369
- const candidates = [];
370
- const name = r.get("name") || "";
371
- const given = r.get("givenName") || "";
372
- const family = r.get("familyName") || "";
373
- if (name) candidates.push(name);
374
- if (given && family) candidates.push(`${given} ${family}`);
375
- if (given) candidates.push(given);
376
- if (family) candidates.push(family);
377
- for (const c of candidates) {
378
- const norm = normaliseSenderName(c);
379
- if (!norm) continue;
380
- if (!index.has(norm)) index.set(norm, elemId);
381
- }
382
- }
383
-
384
- // Both ids must resolve. A typo'd or stale id is operator error; LOUD-FAIL
385
- // before any parser work touches the graph.
386
- if (!seenIds.has(ownerElementId)) {
387
- throw new IngestUserFacingError(
388
- `--owner-element-id ${ownerElementId} not found in graph`,
389
- );
390
- }
391
- if (!seenIds.has(subjectPersonId)) {
392
- throw new IngestUserFacingError(
393
- `--subject-person-id ${subjectPersonId} not found in graph`,
394
- );
395
- }
396
- // Subject must specifically be a `:Person` — the third party in a DM is
397
- // never the operator's `:AdminUser`. (Owner can be either; both Adam and
398
- // an external collaborator owning an export are operator-curated cases.)
399
- if (labelByElemId.get(subjectPersonId) !== "Person") {
400
- throw new IngestUserFacingError(
401
- `--subject-person-id ${subjectPersonId} resolves to a :${labelByElemId.get(subjectPersonId)} — subject must be a :Person`,
402
- );
403
- }
404
-
405
- // Group-chat early-detect: the singular `--subject-person-id` flag is
406
- // DM-scoped. A `_chat.txt` carrying ≥3 distinct senders is an unsupported
407
- // scope, NOT a parser bug. Emit a distinct reason so the operator does
408
- // not chase a phantom parser regression. Group-chat support is the
409
- // separate Task 889 lane.
410
- if (senderNames.length > 2) {
411
- throw new IngestUserFacingError(
412
- `unsupported-scope reason="archive carries ${senderNames.length} distinct senders; --subject-person-id is DM-only (≤2 senders) — group-chat ingest is the separate Task 889 lane"`,
413
- );
414
- }
415
-
416
- // Validate every distinct parsed senderName against the closed candidate
417
- // set. The first miss is the LOUD-FAIL — operators see one parser-miss
418
- // line per bad import, not 23.
419
- const idsByName = new Map();
420
- for (const senderName of senderNames) {
421
- const norm = normaliseSenderName(senderName);
422
- const hit = index.get(norm);
423
- if (!hit) {
424
- throw new IngestUserFacingError(
425
- `parser-miss reason="senderName=${senderName} not in preview histogram (parser failure — re-export or report)"`,
426
- );
427
- }
428
- idsByName.set(senderName, hit);
429
- log(
430
- `participant-resolved senderName="${senderName}" matched=canonical nodeId=${hit}`,
431
- );
432
- }
433
- // participantsAlreadyExisted = always 2 (owner + subject) under the
434
- // bound-pair contract; expose it for the JSON summary's existing field.
435
- return { idsByName, participantsAlreadyExisted: seenIds.size };
436
- }
437
-
438
- // ---------------------------------------------------------------------------
439
- // 7. Main.
440
- // ---------------------------------------------------------------------------
441
-
442
- async function main() {
443
- const startedMs = Date.now();
444
- const { archive, flags } = parseArgv(process.argv);
445
- const ownerElementId = flags.ownerElementId;
446
- const subjectPersonId = flags.subjectPersonId;
447
- const scope = flags.scope;
448
- const accountId = resolveAccountId(flags);
449
- const timezone = flags.timezone || "Europe/London";
450
- const dateFormat = flags.dateFormat;
451
- const sessionId =
452
- flags.sessionId || `whatsapp-ingest:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
453
-
454
- let parseExport, parseFilterArg, applyFilter, memoryArchiveWrite, getSession;
455
- let normaliseSenderName, deriveMessageId;
456
- try {
457
- ({ parseExport, parseFilterArg, applyFilter } = await import(PARSE_EXPORT_PATH));
458
- ({ memoryArchiveWrite } = await import(ARCHIVE_WRITE_PATH));
459
- ({ getSession } = await import(NEO4J_LIB_PATH));
460
- ({ normaliseSenderName, deriveMessageId } = await import(DERIVE_KEYS_PATH));
461
- } catch (err) {
462
- fail("import", {
463
- reason: `failed to import compiled dist`,
464
- detail: err instanceof Error ? err.message : String(err),
465
- });
466
- }
467
-
468
- // 7a. Parse the operator-supplied filter ahead of any IO so a malformed
469
- // value LOUD-FAILs before unzip / Neo4j connection.
470
- let filter;
471
- try {
472
- filter = parseFilterArg(flags.filter);
473
- } catch (err) {
474
- fail("argv", { reason: err instanceof Error ? err.message : String(err) });
475
- }
476
-
477
- // 7b. Resolve _chat.txt
478
- const { chatTxt, cleanup } = resolveChatTxt(archive);
479
- const archiveBytes = statSync(chatTxt).size;
480
- log(
481
- `start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes} filter=${flags.filter}`,
482
- );
483
-
484
- // 7c. Parse
485
- let parseResult;
486
- const parseStart = Date.now();
487
- try {
488
- parseResult = parseExport({ filePath: chatTxt, accountId, timezone, dateFormat });
489
- } catch (err) {
490
- cleanup();
491
- fail("parse", { reason: err instanceof Error ? err.message : String(err) });
492
- }
493
- const parseMs = Date.now() - parseStart;
494
- log(
495
- `parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} ms=${parseMs}`,
496
- );
497
-
498
- // 7d. Apply filter
499
- const filteredLines = applyFilter(parseResult.parsedLines, filter);
500
- log(
501
- `filter-applied kind=${filter.kind} parsed=${parseResult.parsedLines.length} filtered=${filteredLines.length}`,
502
- );
503
- if (filteredLines.length === 0) {
504
- cleanup();
505
- fail("filter", {
506
- reason: `filter "${flags.filter}" matched zero rows from ${parseResult.parsedLines.length} parsed lines — refusing to write an empty conversation`,
507
- });
508
- }
509
-
510
- const firstTs = filteredLines[0].dateSent;
511
- const lastTs = filteredLines[filteredLines.length - 1].dateSent;
512
-
513
- // 7e. Auto-create participants and build rows
514
- let session = getSession();
515
- let participantUpsert = { idsByName: new Map(), participantsAlreadyExisted: 0 };
516
- const distinctSenderNames = Array.from(
517
- new Set(filteredLines.map((l) => l.senderName)),
518
- );
519
-
520
- try {
521
- participantUpsert = await bindCanonicalSenders({
522
- session,
523
- accountId,
524
- ownerElementId,
525
- subjectPersonId,
526
- senderNames: distinctSenderNames,
527
- normaliseSenderName,
528
- });
529
- } catch (err) {
530
- await session.close().catch(() => {});
531
- cleanup();
532
- // IngestUserFacingError carries a brief-shaped FAIL line (parser-miss /
533
- // unsupported-scope / argv mismatch) that the operator's grep recipes
534
- // already match on. Preserve it verbatim instead of wrapping in
535
- // phase=archive-write — wrapping would defeat
536
- // `grep '\[whatsapp-ingest\] FAIL parser-miss'` and friends.
537
- if (err && err.userFacing) {
538
- process.stderr.write(`[whatsapp-ingest] FAIL ${err.message}\n`);
539
- process.exit(1);
540
- }
541
- fail("archive-write", {
542
- phase: "bind-canonical-senders",
543
- reason: err instanceof Error ? err.message : String(err),
544
- });
545
- }
546
-
547
- const participantIds = participantUpsert.idsByName;
548
- const participantsAlreadyExisted = participantUpsert.participantsAlreadyExisted;
549
-
550
- if (participantIds.size !== distinctSenderNames.length) {
551
- log(
552
- `participant-resolve mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
553
- );
554
- }
555
-
556
- const conversationId = parseResult.conversationId;
557
- // Task 870: messageId derives from a content-derived natural key. Stable
558
- // across re-imports — same archive bytes + same operator timezone produce
559
- // identical messageIds, so the existing MERGE in memory-archive-write is
560
- // naturally idempotent.
561
- const conversationSha256 = parseResult.archiveSourceFile.replace(
562
- /^whatsapp-export:/,
563
- "",
564
- );
565
- const rows = filteredLines.map((l) => ({
566
- messageId: deriveMessageId({
567
- conversationSha256,
568
- dateSent: l.dateSent,
569
- senderName: l.senderName,
570
- body: l.body,
571
- }),
572
- conversationId,
573
- senderNodeId: participantIds.get(l.senderName) || ownerElementId,
574
- senderName: l.senderName,
575
- dateSent: l.dateSent,
576
- body: l.body,
577
- // sequenceIndex preserved on the Message node for chain ordering tiebreaker
578
- // in WHATSAPP_NEXT_CHAIN_CYPHER. NOT part of messageId — that would couple
579
- // identity to array position and re-introduce shadowing.
580
- sequenceIndex: l.sequenceIndex,
581
- }));
582
-
583
- const conversationBlock = {
584
- conversationId,
585
- archiveSourceFile: parseResult.archiveSourceFile,
586
- firstMessageAt: firstTs,
587
- lastMessageAt: lastTs,
588
- participantCount: distinctSenderNames.length,
589
- messageCount: rows.length,
590
- };
591
-
592
- // Close the session before archive-write — memoryArchiveWrite opens its
593
- // own session and closes it in finally.
594
- await session.close().catch(() => {});
595
-
596
- // 7f. Archive-write
597
- let archiveResult;
598
- const writeStart = Date.now();
599
- try {
600
- archiveResult = await memoryArchiveWrite({
601
- archiveType: "whatsapp-export",
602
- ownerNodeId: ownerElementId,
603
- accountId,
604
- conversation: conversationBlock,
605
- participantNodeIds: Array.from(participantIds.values()),
606
- rows,
607
- sessionId,
608
- archiveFilePath: chatTxt,
609
- });
610
- } catch (err) {
611
- cleanup();
612
- fail("archive-write", {
613
- reason: err instanceof Error ? err.message : String(err),
614
- });
615
- }
616
- const writeMs = Date.now() - writeStart;
617
- const createdMessages = archiveResult.counters.createdMessages || 0;
618
- const nextEdgesProcessed = archiveResult.counters.nextEdgesProcessed || 0;
619
- const nextEdgesCreated = archiveResult.counters.nextEdgesCreated || 0;
620
- const messagesAlreadyExisted = rows.length - createdMessages;
621
- log(
622
- `archive-write created-messages=${createdMessages} messages-already-existed=${messagesAlreadyExisted} next-edges-processed=${nextEdgesProcessed} next-edges-created=${nextEdgesCreated} participants-already-existed=${participantsAlreadyExisted} ms=${writeMs}`,
623
- );
624
-
625
- // 7g. Resolve conversationElementId for the agent-return shape (Task 871.6).
626
- // One small read-after-write — the operator's database-operator subagent
627
- // surfaces this as the canonical handle for downstream Phase 2 calls.
628
- let conversationElementId = null;
629
- const ridSession = getSession();
630
- try {
631
- const res = await ridSession.run(
632
- `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid})
633
- RETURN elementId(c) AS elemId LIMIT 1`,
634
- { cid: conversationId },
635
- );
636
- conversationElementId = res.records[0]?.get("elemId") ?? null;
637
- } catch (err) {
638
- log(`conversation-elementid-lookup failed reason="${err instanceof Error ? err.message : String(err)}"`);
639
- } finally {
640
- await ridSession.close().catch(() => {});
641
- }
642
-
643
- cleanup();
644
-
645
- const totalMs = Date.now() - startedMs;
646
- log(`done conversationId=${conversationId} total-ms=${totalMs} exit=0`);
647
-
648
- process.stdout.write(
649
- JSON.stringify({
650
- conversationElementId,
651
- conversationId,
652
- parsed: parseResult.counters.parsed,
653
- mediaSkipped: parseResult.counters.mediaSkipped,
654
- systemSkipped: parseResult.counters.systemSkipped,
655
- filtered: filteredLines.length,
656
- written: createdMessages,
657
- messagesAlreadyExisted,
658
- nextEdgesProcessed,
659
- nextEdgesCreated,
660
- participantsAlreadyExisted,
661
- ms: totalMs,
662
- }) + "\n",
663
- );
664
- process.exit(0);
665
- }
666
-
667
-
668
- main().catch((err) => {
669
- fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });
670
- });