@rubytech/create-maxy 1.0.807 → 1.0.809
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +2 -0
- package/payload/platform/plugins/docs/references/cloudflare.md +1 -0
- package/payload/platform/plugins/docs/references/memory-guide.md +4 -0
- package/payload/platform/plugins/docs/references/troubleshooting.md +19 -1
- package/payload/platform/plugins/memory/mcp/dist/index.js +86 -0
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.js +19 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-read.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts +23 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js +401 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts +28 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js.map +1 -0
- package/payload/platform/plugins/memory/references/schema-base.md +12 -0
- package/payload/platform/plugins/whatsapp/PLUGIN.md +3 -1
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +225 -346
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +28 -10
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +21 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +41 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/filter.d.ts +29 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/filter.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/filter.js +123 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/filter.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +4 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +170 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +141 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +59 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +136 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +12 -0
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +80 -25
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +22 -3
- package/payload/platform/templates/agents/admin/IDENTITY.md +1 -0
- package/payload/platform/templates/agents/admin/SOUL.md +2 -0
- package/payload/platform/templates/specialists/agents/database-operator.md +9 -4
- package/payload/server/chunk-CRWLE6BZ.js +3511 -0
- package/payload/server/chunk-V3VLAL7N.js +10009 -0
- package/payload/server/client-pool-N2Y57223.js +31 -0
- package/payload/server/maxy-edge.js +5 -4
- package/payload/server/public/assets/admin-Bwrd2DBq.js +352 -0
- package/payload/server/public/index.html +1 -1
- package/payload/server/server.js +596 -250
- package/payload/server/public/assets/admin-CTM9Vb-j.js +0 -352
|
@@ -1,37 +1,43 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
|
-
// ingest.mjs — in-process orchestrator for whatsapp-ingest.sh
|
|
3
|
+
// ingest.mjs — in-process orchestrator for whatsapp-ingest.sh.
|
|
4
4
|
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
5
|
+
// Phase 1 of the two-phase WhatsApp ingest contract (Task 855 / Task 871).
|
|
6
|
+
// Deterministic only: parse → operator-supplied filter → archive-write
|
|
7
|
+
// (Conversation + Messages + auto-Person participants + NEXT chronology).
|
|
8
|
+
// No LLM in the per-message decision path. The Haiku insight pass moved to
|
|
9
|
+
// Phase 2 (`mcp__memory__whatsapp-export-insight-pass`), invoked consciously
|
|
10
|
+
// by the operator via the `whatsapp-import-enrich` skill.
|
|
10
11
|
//
|
|
11
12
|
// Argv (positional): <archive-path>
|
|
12
13
|
// Argv (flags): --owner-element-id <id> --scope <admin|public>
|
|
14
|
+
// --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
|
|
13
15
|
// [--account-id <accountId>] [--timezone <iana>]
|
|
14
16
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
15
|
-
// [--no-insight]
|
|
16
17
|
//
|
|
17
|
-
// Stdout (success): one JSON line
|
|
18
|
-
// {
|
|
19
|
-
//
|
|
18
|
+
// Stdout (success): one JSON line — Honest counters per Task 871.5.
|
|
19
|
+
// {conversationElementId, conversationId,
|
|
20
|
+
// parsed, mediaSkipped, systemSkipped,
|
|
21
|
+
// filtered,
|
|
22
|
+
// written, messagesAlreadyExisted,
|
|
23
|
+
// nextEdgesProcessed, nextEdgesCreated,
|
|
24
|
+
// participantsAlreadyExisted,
|
|
25
|
+
// ms}
|
|
26
|
+
// The skill (`whatsapp-import` SKILL.md) maps this verbose-diagnostic shape
|
|
27
|
+
// to the agent-return short shape per Task 871.6 (`alreadyExisted` etc.)
|
|
28
|
+
// when surfacing the result to the admin agent.
|
|
20
29
|
//
|
|
21
30
|
// Stderr (failure): one [whatsapp-ingest] FAIL line, exit non-zero.
|
|
22
31
|
// =============================================================================
|
|
23
32
|
|
|
24
33
|
import {
|
|
25
|
-
createReadStream,
|
|
26
34
|
existsSync,
|
|
27
35
|
mkdtempSync,
|
|
28
36
|
readdirSync,
|
|
29
|
-
readFileSync,
|
|
30
37
|
rmSync,
|
|
31
38
|
statSync,
|
|
32
|
-
writeFileSync,
|
|
33
39
|
} from "node:fs";
|
|
34
|
-
import { join, resolve, dirname
|
|
40
|
+
import { join, resolve, dirname } from "node:path";
|
|
35
41
|
import { tmpdir } from "node:os";
|
|
36
42
|
import { spawnSync } from "node:child_process";
|
|
37
43
|
import { fileURLToPath } from "node:url";
|
|
@@ -76,19 +82,14 @@ const NEO4J_LIB_PATH = resolve(
|
|
|
76
82
|
"lib",
|
|
77
83
|
"neo4j.js",
|
|
78
84
|
);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"lib",
|
|
82
|
-
"oauth-llm",
|
|
83
|
-
"dist",
|
|
84
|
-
"index.js",
|
|
85
|
-
);
|
|
86
|
-
const MODELS_PATH = resolve(
|
|
85
|
+
// Task 870: pure key-derivation functions ship in the whatsapp-import lib.
|
|
86
|
+
const DERIVE_KEYS_PATH = resolve(
|
|
87
87
|
platformRoot,
|
|
88
|
+
"plugins",
|
|
89
|
+
"whatsapp-import",
|
|
88
90
|
"lib",
|
|
89
|
-
"models",
|
|
90
91
|
"dist",
|
|
91
|
-
"
|
|
92
|
+
"derive-keys.js",
|
|
92
93
|
);
|
|
93
94
|
|
|
94
95
|
// ---------------------------------------------------------------------------
|
|
@@ -128,10 +129,6 @@ function parseArgv(argv) {
|
|
|
128
129
|
continue;
|
|
129
130
|
}
|
|
130
131
|
const key = a.slice(2);
|
|
131
|
-
if (key === "no-insight") {
|
|
132
|
-
flags.noInsight = true;
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
132
|
const v = args[++i];
|
|
136
133
|
if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
|
|
137
134
|
flags[camelCase(key)] = v;
|
|
@@ -142,6 +139,15 @@ function parseArgv(argv) {
|
|
|
142
139
|
if (flags.scope !== "admin" && flags.scope !== "public") {
|
|
143
140
|
fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
|
|
144
141
|
}
|
|
142
|
+
// Task 871: --filter is mandatory. The deterministic Bash entry refuses
|
|
143
|
+
// bulk archive writes without an operator-supplied filter — closes the
|
|
144
|
+
// doctrine gap named in feedback_compress_at_ingest_for_bulk_archives.md.
|
|
145
|
+
if (!flags.filter || !flags.filter.trim()) {
|
|
146
|
+
process.stderr.write(
|
|
147
|
+
`[whatsapp-ingest] FAIL filter-required reason="bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"\n`,
|
|
148
|
+
);
|
|
149
|
+
fail("argv", { reason: "--filter is required (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)" });
|
|
150
|
+
}
|
|
145
151
|
return { archive, flags };
|
|
146
152
|
}
|
|
147
153
|
|
|
@@ -241,11 +247,31 @@ function resolveAccountId(flags) {
|
|
|
241
247
|
}
|
|
242
248
|
|
|
243
249
|
// ---------------------------------------------------------------------------
|
|
244
|
-
// 6.
|
|
245
|
-
//
|
|
246
|
-
//
|
|
250
|
+
// 6. Resolve participants — match each sender against canonical
|
|
251
|
+
// (:Person|:AdminUser {accountId}) by NFKC-trim-lower normalisation
|
|
252
|
+
// before falling through to auto-Person creation (Task 870). Per-sender
|
|
253
|
+
// `participant-resolved` log line so re-imports surface canonical reuse.
|
|
254
|
+
// Auto-Person fallback retains the legacy `{accountId, source:'whatsapp',
|
|
255
|
+
// name}` shape; switching to givenName/familyName is Task 874.
|
|
247
256
|
// ---------------------------------------------------------------------------
|
|
248
257
|
|
|
258
|
+
const CANONICAL_FETCH_CYPHER = `
|
|
259
|
+
MATCH (p:Person {accountId: $accountId})
|
|
260
|
+
WHERE coalesce(p.participantStatus, '') <> 'auto-created'
|
|
261
|
+
RETURN elementId(p) AS elemId,
|
|
262
|
+
coalesce(p.givenName, '') AS givenName,
|
|
263
|
+
coalesce(p.familyName, '') AS familyName,
|
|
264
|
+
NULL AS adminName,
|
|
265
|
+
'Person' AS label
|
|
266
|
+
UNION
|
|
267
|
+
MATCH (u:AdminUser {accountId: $accountId})
|
|
268
|
+
RETURN elementId(u) AS elemId,
|
|
269
|
+
'' AS givenName,
|
|
270
|
+
'' AS familyName,
|
|
271
|
+
coalesce(u.name, '') AS adminName,
|
|
272
|
+
'AdminUser' AS label
|
|
273
|
+
`;
|
|
274
|
+
|
|
249
275
|
const PARTICIPANT_UPSERT_CYPHER = `
|
|
250
276
|
UNWIND $names AS senderName
|
|
251
277
|
MERGE (p:Person {accountId: $accountId, source: 'whatsapp', name: senderName})
|
|
@@ -259,278 +285,109 @@ MERGE (p:Person {accountId: $accountId, source: 'whatsapp', name: senderName})
|
|
|
259
285
|
RETURN elementId(p) AS elemId, senderName AS name
|
|
260
286
|
`;
|
|
261
287
|
|
|
262
|
-
async function
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
for (const r of res.records) {
|
|
283
|
-
map.set(r.get("name"), r.get("elemId"));
|
|
288
|
+
async function fetchCanonicalIndex({ session, accountId, normaliseSenderName }) {
|
|
289
|
+
// Returns Map<normalisedName, {elemId, label}> for canonical participants.
|
|
290
|
+
// Excludes auto-created Persons so we never reuse our own shadow nodes.
|
|
291
|
+
const res = await session.executeRead(async (tx) =>
|
|
292
|
+
tx.run(CANONICAL_FETCH_CYPHER, { accountId }),
|
|
293
|
+
);
|
|
294
|
+
const index = new Map();
|
|
295
|
+
for (const r of res.records) {
|
|
296
|
+
const elemId = r.get("elemId");
|
|
297
|
+
const label = r.get("label");
|
|
298
|
+
const candidates = [];
|
|
299
|
+
if (label === "Person") {
|
|
300
|
+
const given = r.get("givenName") || "";
|
|
301
|
+
const family = r.get("familyName") || "";
|
|
302
|
+
if (given || family) candidates.push(`${given} ${family}`.trim());
|
|
303
|
+
if (given) candidates.push(given);
|
|
304
|
+
if (family) candidates.push(family);
|
|
305
|
+
} else {
|
|
306
|
+
const adminName = r.get("adminName") || "";
|
|
307
|
+
if (adminName) candidates.push(adminName);
|
|
284
308
|
}
|
|
285
|
-
|
|
286
|
-
|
|
309
|
+
for (const c of candidates) {
|
|
310
|
+
const norm = normaliseSenderName(c);
|
|
311
|
+
if (!norm) continue;
|
|
312
|
+
// First write wins — Person before AdminUser per UNION order.
|
|
313
|
+
if (!index.has(norm)) index.set(norm, { elemId, label });
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return index;
|
|
287
317
|
}
|
|
288
318
|
|
|
289
|
-
|
|
290
|
-
// 7. Insight pass — chunked Haiku call, in-process Cypher writes.
|
|
291
|
-
// One :Observation node per extracted item, OBSERVED_IN edge to the
|
|
292
|
-
// Conversation. Edge wiring to specific :Person/:Task/etc. is Task 856.
|
|
293
|
-
// ---------------------------------------------------------------------------
|
|
294
|
-
|
|
295
|
-
const INSIGHT_CHUNK_SIZE = 1500; // messages per Haiku call (≈75K tokens budget)
|
|
296
|
-
|
|
297
|
-
const INSIGHT_SYSTEM_PROMPT = `You extract structured insights from a chunk of a WhatsApp conversation.
|
|
298
|
-
|
|
299
|
-
Return STRICT JSON via the provided tool. No prose, no commentary. Only items with concrete, verbatim evidence in the chunk. Empty arrays are valid; prefer omission to invention.
|
|
300
|
-
|
|
301
|
-
Definitions:
|
|
302
|
-
- "mention": a person, organisation, place, or named topic referred to by name.
|
|
303
|
-
- "task": something a participant committed to do or asked another to do (imperative or future-tense).
|
|
304
|
-
- "preference": stated like, dislike, opinion, or rule of behaviour.
|
|
305
|
-
- "observedRelationship": an explicit relational claim (works at, is married to, manages, etc.).
|
|
306
|
-
|
|
307
|
-
Snippets must be ≤80 characters of the original message body, no sender names, no timestamps.`;
|
|
308
|
-
|
|
309
|
-
const INSIGHT_TOOL = {
|
|
310
|
-
name: "submit_insights",
|
|
311
|
-
description: "Submit the structured insights extracted from the chunk.",
|
|
312
|
-
input_schema: {
|
|
313
|
-
type: "object",
|
|
314
|
-
properties: {
|
|
315
|
-
mentions: {
|
|
316
|
-
type: "array",
|
|
317
|
-
items: {
|
|
318
|
-
type: "object",
|
|
319
|
-
properties: {
|
|
320
|
-
name: { type: "string" },
|
|
321
|
-
snippet: { type: "string" },
|
|
322
|
-
},
|
|
323
|
-
required: ["name", "snippet"],
|
|
324
|
-
},
|
|
325
|
-
},
|
|
326
|
-
tasks: {
|
|
327
|
-
type: "array",
|
|
328
|
-
items: {
|
|
329
|
-
type: "object",
|
|
330
|
-
properties: {
|
|
331
|
-
task: { type: "string" },
|
|
332
|
-
snippet: { type: "string" },
|
|
333
|
-
},
|
|
334
|
-
required: ["task", "snippet"],
|
|
335
|
-
},
|
|
336
|
-
},
|
|
337
|
-
preferences: {
|
|
338
|
-
type: "array",
|
|
339
|
-
items: {
|
|
340
|
-
type: "object",
|
|
341
|
-
properties: {
|
|
342
|
-
subject: { type: "string" },
|
|
343
|
-
preference: { type: "string" },
|
|
344
|
-
},
|
|
345
|
-
required: ["subject", "preference"],
|
|
346
|
-
},
|
|
347
|
-
},
|
|
348
|
-
observedRelationships: {
|
|
349
|
-
type: "array",
|
|
350
|
-
items: {
|
|
351
|
-
type: "object",
|
|
352
|
-
properties: {
|
|
353
|
-
from: { type: "string" },
|
|
354
|
-
to: { type: "string" },
|
|
355
|
-
relationship: { type: "string" },
|
|
356
|
-
},
|
|
357
|
-
required: ["from", "to", "relationship"],
|
|
358
|
-
},
|
|
359
|
-
},
|
|
360
|
-
},
|
|
361
|
-
required: ["mentions", "tasks", "preferences", "observedRelationships"],
|
|
362
|
-
},
|
|
363
|
-
};
|
|
364
|
-
|
|
365
|
-
const INSIGHT_WRITE_CYPHER = `
|
|
366
|
-
MATCH (c:Conversation:WhatsAppConversation {conversationId: $conversationId})
|
|
367
|
-
UNWIND $observations AS obs
|
|
368
|
-
CREATE (o:Observation)
|
|
369
|
-
SET
|
|
370
|
-
o:WhatsAppObservation,
|
|
371
|
-
o.accountId = $accountId,
|
|
372
|
-
o.kind = obs.kind,
|
|
373
|
-
o.summary = obs.summary,
|
|
374
|
-
o.snippet = obs.snippet,
|
|
375
|
-
o.subject = obs.subject,
|
|
376
|
-
o.from = obs.from,
|
|
377
|
-
o.to = obs.to,
|
|
378
|
-
o.source = 'whatsapp',
|
|
379
|
-
o.createdByAgent = 'whatsapp-import',
|
|
380
|
-
o.createdBySource = 'whatsapp-import',
|
|
381
|
-
o.createdBySession = $sessionId,
|
|
382
|
-
o.createdAt = datetime(),
|
|
383
|
-
o.scope = $scope,
|
|
384
|
-
o.insightPass = true,
|
|
385
|
-
o.observationStatus = 'auto-extracted'
|
|
386
|
-
MERGE (o)-[r:OBSERVED_IN]->(c)
|
|
387
|
-
ON CREATE SET r.source = 'whatsapp', r.createdAt = datetime()
|
|
388
|
-
RETURN count(o) AS created
|
|
389
|
-
`;
|
|
390
|
-
|
|
391
|
-
async function runInsightPass({
|
|
392
|
-
callOauthLlm,
|
|
393
|
-
HAIKU_MODEL,
|
|
319
|
+
async function resolveParticipants({
|
|
394
320
|
session,
|
|
395
|
-
conversationId,
|
|
396
321
|
accountId,
|
|
397
322
|
scope,
|
|
398
323
|
sessionId,
|
|
399
|
-
|
|
324
|
+
senderNames,
|
|
325
|
+
normaliseSenderName,
|
|
400
326
|
}) {
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
mentions: 0,
|
|
404
|
-
tasks: 0,
|
|
405
|
-
preferences: 0,
|
|
406
|
-
observedRelationships: 0,
|
|
407
|
-
};
|
|
408
|
-
|
|
409
|
-
if (parsedLines.length === 0) return counters;
|
|
410
|
-
|
|
411
|
-
// Build chunks. Each chunk is rendered as a numbered transcript including
|
|
412
|
-
// sender-names — the LLM needs them to attribute tasks/preferences/observed-
|
|
413
|
-
// relationships. The system prompt's "no sender names" rule constrains the
|
|
414
|
-
// returned snippet field only, not the input transcript.
|
|
415
|
-
const chunks = [];
|
|
416
|
-
for (let i = 0; i < parsedLines.length; i += INSIGHT_CHUNK_SIZE) {
|
|
417
|
-
chunks.push(parsedLines.slice(i, i + INSIGHT_CHUNK_SIZE));
|
|
327
|
+
if (senderNames.length === 0) {
|
|
328
|
+
return { idsByName: new Map(), participantsAlreadyExisted: 0 };
|
|
418
329
|
}
|
|
419
|
-
counters.chunks = chunks.length;
|
|
420
|
-
|
|
421
|
-
for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
|
|
422
|
-
const chunk = chunks[chunkIdx];
|
|
423
|
-
const lines = chunk
|
|
424
|
-
.map((l, j) => `[${j + 1}] ${l.senderName}: ${l.body}`)
|
|
425
|
-
.join("\n");
|
|
426
|
-
|
|
427
|
-
let llmResult;
|
|
428
|
-
try {
|
|
429
|
-
llmResult = await callOauthLlm({
|
|
430
|
-
model: HAIKU_MODEL,
|
|
431
|
-
system: INSIGHT_SYSTEM_PROMPT,
|
|
432
|
-
userMessage: lines,
|
|
433
|
-
maxTokens: 8192,
|
|
434
|
-
timeoutMs: 180_000,
|
|
435
|
-
tools: [INSIGHT_TOOL],
|
|
436
|
-
toolChoiceName: INSIGHT_TOOL.name,
|
|
437
|
-
});
|
|
438
|
-
} catch (err) {
|
|
439
|
-
log(
|
|
440
|
-
`insight-pass chunk=${chunkIdx + 1}/${chunks.length} threw=${err instanceof Error ? err.message : String(err)}`,
|
|
441
|
-
);
|
|
442
|
-
continue;
|
|
443
|
-
}
|
|
444
330
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
331
|
+
const canonicalIndex = await fetchCanonicalIndex({
|
|
332
|
+
session,
|
|
333
|
+
accountId,
|
|
334
|
+
normaliseSenderName,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
const idsByName = new Map();
|
|
338
|
+
const fallbackSenders = [];
|
|
339
|
+
let canonicalMatches = 0;
|
|
340
|
+
for (const senderName of senderNames) {
|
|
341
|
+
const norm = normaliseSenderName(senderName);
|
|
342
|
+
const hit = canonicalIndex.get(norm);
|
|
343
|
+
if (hit) {
|
|
344
|
+
idsByName.set(senderName, hit.elemId);
|
|
345
|
+
canonicalMatches++;
|
|
452
346
|
log(
|
|
453
|
-
`
|
|
347
|
+
`participant-resolved senderName="${senderName}" matched=canonical nodeId=${hit.elemId} label=${hit.label}`,
|
|
454
348
|
);
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
const input = llmResult.input ?? {};
|
|
459
|
-
const observations = [];
|
|
460
|
-
for (const m of asArray(input.mentions)) {
|
|
461
|
-
observations.push({
|
|
462
|
-
kind: "mention",
|
|
463
|
-
summary: String(m.name ?? "").slice(0, 200),
|
|
464
|
-
snippet: String(m.snippet ?? "").slice(0, 200),
|
|
465
|
-
subject: null,
|
|
466
|
-
from: null,
|
|
467
|
-
to: null,
|
|
468
|
-
});
|
|
469
|
-
}
|
|
470
|
-
for (const t of asArray(input.tasks)) {
|
|
471
|
-
observations.push({
|
|
472
|
-
kind: "task",
|
|
473
|
-
summary: String(t.task ?? "").slice(0, 200),
|
|
474
|
-
snippet: String(t.snippet ?? "").slice(0, 200),
|
|
475
|
-
subject: null,
|
|
476
|
-
from: null,
|
|
477
|
-
to: null,
|
|
478
|
-
});
|
|
479
|
-
}
|
|
480
|
-
for (const p of asArray(input.preferences)) {
|
|
481
|
-
observations.push({
|
|
482
|
-
kind: "preference",
|
|
483
|
-
summary: String(p.preference ?? "").slice(0, 200),
|
|
484
|
-
snippet: null,
|
|
485
|
-
subject: String(p.subject ?? "").slice(0, 200),
|
|
486
|
-
from: null,
|
|
487
|
-
to: null,
|
|
488
|
-
});
|
|
489
|
-
}
|
|
490
|
-
for (const r of asArray(input.observedRelationships)) {
|
|
491
|
-
observations.push({
|
|
492
|
-
kind: "observed-relationship",
|
|
493
|
-
summary: String(r.relationship ?? "").slice(0, 200),
|
|
494
|
-
snippet: null,
|
|
495
|
-
subject: null,
|
|
496
|
-
from: String(r.from ?? "").slice(0, 200),
|
|
497
|
-
to: String(r.to ?? "").slice(0, 200),
|
|
498
|
-
});
|
|
349
|
+
} else {
|
|
350
|
+
fallbackSenders.push(senderName);
|
|
499
351
|
}
|
|
352
|
+
}
|
|
500
353
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
await session.executeWrite(async (tx) => {
|
|
510
|
-
await tx.run(INSIGHT_WRITE_CYPHER, {
|
|
511
|
-
conversationId,
|
|
512
|
-
accountId,
|
|
513
|
-
scope,
|
|
514
|
-
sessionId,
|
|
515
|
-
observations,
|
|
516
|
-
});
|
|
354
|
+
let autoCreated = 0;
|
|
355
|
+
if (fallbackSenders.length > 0) {
|
|
356
|
+
const result = await session.executeWrite(async (tx) => {
|
|
357
|
+
const res = await tx.run(PARTICIPANT_UPSERT_CYPHER, {
|
|
358
|
+
names: fallbackSenders,
|
|
359
|
+
accountId,
|
|
360
|
+
scope,
|
|
361
|
+
sessionId,
|
|
517
362
|
});
|
|
518
|
-
|
|
363
|
+
const m = new Map();
|
|
364
|
+
for (const r of res.records) {
|
|
365
|
+
m.set(r.get("name"), r.get("elemId"));
|
|
366
|
+
}
|
|
367
|
+
const stats = res.summary.counters.updates();
|
|
368
|
+
return { m, created: stats.nodesCreated };
|
|
369
|
+
});
|
|
370
|
+
autoCreated = result.created;
|
|
371
|
+
for (const senderName of fallbackSenders) {
|
|
372
|
+
const elemId = result.m.get(senderName);
|
|
373
|
+
if (!elemId) continue;
|
|
374
|
+
idsByName.set(senderName, elemId);
|
|
519
375
|
log(
|
|
520
|
-
`
|
|
376
|
+
`participant-resolved senderName="${senderName}" matched=auto nodeId=${elemId} label=Person`,
|
|
521
377
|
);
|
|
522
378
|
}
|
|
523
379
|
}
|
|
524
380
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
381
|
+
// participantsAlreadyExisted = canonical hits + auto-Persons that were already in graph.
|
|
382
|
+
const autoAlreadyExisted = fallbackSenders.length - autoCreated;
|
|
383
|
+
return {
|
|
384
|
+
idsByName,
|
|
385
|
+
participantsAlreadyExisted: canonicalMatches + autoAlreadyExisted,
|
|
386
|
+
};
|
|
530
387
|
}
|
|
531
388
|
|
|
532
389
|
// ---------------------------------------------------------------------------
|
|
533
|
-
//
|
|
390
|
+
// 7. Main.
|
|
534
391
|
// ---------------------------------------------------------------------------
|
|
535
392
|
|
|
536
393
|
async function main() {
|
|
@@ -544,13 +401,13 @@ async function main() {
|
|
|
544
401
|
const sessionId =
|
|
545
402
|
flags.sessionId || `whatsapp-ingest:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
546
403
|
|
|
547
|
-
let parseExport,
|
|
404
|
+
let parseExport, parseFilterArg, applyFilter, memoryArchiveWrite, getSession;
|
|
405
|
+
let normaliseSenderName, deriveMessageId;
|
|
548
406
|
try {
|
|
549
|
-
({ parseExport } = await import(PARSE_EXPORT_PATH));
|
|
407
|
+
({ parseExport, parseFilterArg, applyFilter } = await import(PARSE_EXPORT_PATH));
|
|
550
408
|
({ memoryArchiveWrite } = await import(ARCHIVE_WRITE_PATH));
|
|
551
409
|
({ getSession } = await import(NEO4J_LIB_PATH));
|
|
552
|
-
({
|
|
553
|
-
({ HAIKU_MODEL } = await import(MODELS_PATH));
|
|
410
|
+
({ normaliseSenderName, deriveMessageId } = await import(DERIVE_KEYS_PATH));
|
|
554
411
|
} catch (err) {
|
|
555
412
|
fail("import", {
|
|
556
413
|
reason: `failed to import compiled dist`,
|
|
@@ -558,14 +415,23 @@ async function main() {
|
|
|
558
415
|
});
|
|
559
416
|
}
|
|
560
417
|
|
|
561
|
-
//
|
|
418
|
+
// 7a. Parse the operator-supplied filter ahead of any IO so a malformed
|
|
419
|
+
// value LOUD-FAILs before unzip / Neo4j connection.
|
|
420
|
+
let filter;
|
|
421
|
+
try {
|
|
422
|
+
filter = parseFilterArg(flags.filter);
|
|
423
|
+
} catch (err) {
|
|
424
|
+
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// 7b. Resolve _chat.txt
|
|
562
428
|
const { chatTxt, cleanup } = resolveChatTxt(archive);
|
|
563
429
|
const archiveBytes = statSync(chatTxt).size;
|
|
564
430
|
log(
|
|
565
|
-
`start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes}`,
|
|
431
|
+
`start file=${chatTxt} owner=${ownerElementId} scope=${scope} accountId=${accountId} archive-bytes=${archiveBytes} filter=${flags.filter}`,
|
|
566
432
|
);
|
|
567
433
|
|
|
568
|
-
//
|
|
434
|
+
// 7c. Parse
|
|
569
435
|
let parseResult;
|
|
570
436
|
const parseStart = Date.now();
|
|
571
437
|
try {
|
|
@@ -575,51 +441,84 @@ async function main() {
|
|
|
575
441
|
fail("parse", { reason: err instanceof Error ? err.message : String(err) });
|
|
576
442
|
}
|
|
577
443
|
const parseMs = Date.now() - parseStart;
|
|
578
|
-
const firstTs = parseResult.parsedLines[0]?.dateSent || "-";
|
|
579
|
-
const lastTs = parseResult.parsedLines[parseResult.parsedLines.length - 1]?.dateSent || "-";
|
|
580
444
|
log(
|
|
581
|
-
`parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped}
|
|
445
|
+
`parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} ms=${parseMs}`,
|
|
446
|
+
);
|
|
447
|
+
|
|
448
|
+
// 7d. Apply filter
|
|
449
|
+
const filteredLines = applyFilter(parseResult.parsedLines, filter);
|
|
450
|
+
log(
|
|
451
|
+
`filter-applied kind=${filter.kind} parsed=${parseResult.parsedLines.length} filtered=${filteredLines.length}`,
|
|
582
452
|
);
|
|
453
|
+
if (filteredLines.length === 0) {
|
|
454
|
+
cleanup();
|
|
455
|
+
fail("filter", {
|
|
456
|
+
reason: `filter "${flags.filter}" matched zero rows from ${parseResult.parsedLines.length} parsed lines — refusing to write an empty conversation`,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const firstTs = filteredLines[0].dateSent;
|
|
461
|
+
const lastTs = filteredLines[filteredLines.length - 1].dateSent;
|
|
583
462
|
|
|
584
|
-
//
|
|
463
|
+
// 7e. Auto-create participants and build rows
|
|
585
464
|
let session = getSession();
|
|
586
|
-
let
|
|
465
|
+
let participantUpsert = { idsByName: new Map(), participantsAlreadyExisted: 0 };
|
|
587
466
|
const distinctSenderNames = Array.from(
|
|
588
|
-
new Set(
|
|
467
|
+
new Set(filteredLines.map((l) => l.senderName)),
|
|
589
468
|
);
|
|
590
469
|
|
|
591
470
|
try {
|
|
592
|
-
|
|
471
|
+
participantUpsert = await resolveParticipants({
|
|
593
472
|
session,
|
|
594
473
|
accountId,
|
|
595
474
|
scope,
|
|
596
475
|
sessionId,
|
|
597
476
|
senderNames: distinctSenderNames,
|
|
477
|
+
normaliseSenderName,
|
|
598
478
|
});
|
|
599
479
|
} catch (err) {
|
|
600
480
|
await session.close().catch(() => {});
|
|
601
481
|
cleanup();
|
|
602
482
|
fail("archive-write", {
|
|
603
|
-
phase: "participant-
|
|
483
|
+
phase: "participant-resolve",
|
|
604
484
|
reason: err instanceof Error ? err.message : String(err),
|
|
605
485
|
});
|
|
606
486
|
}
|
|
607
487
|
|
|
488
|
+
const participantIds = participantUpsert.idsByName;
|
|
489
|
+
const participantsAlreadyExisted = participantUpsert.participantsAlreadyExisted;
|
|
490
|
+
|
|
608
491
|
if (participantIds.size !== distinctSenderNames.length) {
|
|
609
492
|
log(
|
|
610
|
-
`participant-
|
|
493
|
+
`participant-resolve mismatch expected=${distinctSenderNames.length} got=${participantIds.size}`,
|
|
611
494
|
);
|
|
612
495
|
}
|
|
613
496
|
|
|
614
497
|
const conversationId = parseResult.conversationId;
|
|
615
|
-
|
|
616
|
-
|
|
498
|
+
// Task 870: messageId derives from a content-derived natural key. Stable
|
|
499
|
+
// across re-imports — same archive bytes + same operator timezone produce
|
|
500
|
+
// identical messageIds, so the existing MERGE in memory-archive-write is
|
|
501
|
+
// naturally idempotent.
|
|
502
|
+
const conversationSha256 = parseResult.archiveSourceFile.replace(
|
|
503
|
+
/^whatsapp-export:/,
|
|
504
|
+
"",
|
|
505
|
+
);
|
|
506
|
+
const rows = filteredLines.map((l) => ({
|
|
507
|
+
messageId: deriveMessageId({
|
|
508
|
+
conversationSha256,
|
|
509
|
+
dateSent: l.dateSent,
|
|
510
|
+
senderName: l.senderName,
|
|
511
|
+
body: l.body,
|
|
512
|
+
}),
|
|
617
513
|
conversationId,
|
|
618
514
|
senderNodeId: participantIds.get(l.senderName) || ownerElementId,
|
|
619
515
|
senderName: l.senderName,
|
|
620
516
|
dateSent: l.dateSent,
|
|
621
517
|
body: l.body,
|
|
622
|
-
sequenceIndex
|
|
518
|
+
// sequenceIndex preserved on the Message node for chain ordering tiebreaker
|
|
519
|
+
// in WHATSAPP_NEXT_CHAIN_CYPHER. NOT part of messageId — that would couple
|
|
520
|
+
// identity to array position and re-introduce shadowing.
|
|
521
|
+
sequenceIndex: l.sequenceIndex,
|
|
623
522
|
}));
|
|
624
523
|
|
|
625
524
|
const conversationBlock = {
|
|
@@ -635,7 +534,7 @@ async function main() {
|
|
|
635
534
|
// own session and closes it in finally.
|
|
636
535
|
await session.close().catch(() => {});
|
|
637
536
|
|
|
638
|
-
//
|
|
537
|
+
// 7f. Archive-write
|
|
639
538
|
let archiveResult;
|
|
640
539
|
const writeStart = Date.now();
|
|
641
540
|
try {
|
|
@@ -657,43 +556,29 @@ async function main() {
|
|
|
657
556
|
}
|
|
658
557
|
const writeMs = Date.now() - writeStart;
|
|
659
558
|
const createdMessages = archiveResult.counters.createdMessages || 0;
|
|
559
|
+
const nextEdgesProcessed = archiveResult.counters.nextEdgesProcessed || 0;
|
|
560
|
+
const nextEdgesCreated = archiveResult.counters.nextEdgesCreated || 0;
|
|
561
|
+
const messagesAlreadyExisted = rows.length - createdMessages;
|
|
660
562
|
log(
|
|
661
|
-
`archive-write created-
|
|
563
|
+
`archive-write created-messages=${createdMessages} messages-already-existed=${messagesAlreadyExisted} next-edges-processed=${nextEdgesProcessed} next-edges-created=${nextEdgesCreated} participants-already-existed=${participantsAlreadyExisted} ms=${writeMs}`,
|
|
662
564
|
);
|
|
663
565
|
|
|
664
|
-
//
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
let insightSession = getSession();
|
|
675
|
-
try {
|
|
676
|
-
insightCounters = await runInsightPass({
|
|
677
|
-
callOauthLlm,
|
|
678
|
-
HAIKU_MODEL,
|
|
679
|
-
session: insightSession,
|
|
680
|
-
conversationId,
|
|
681
|
-
accountId,
|
|
682
|
-
scope,
|
|
683
|
-
sessionId,
|
|
684
|
-
parsedLines: parseResult.parsedLines,
|
|
685
|
-
});
|
|
686
|
-
} catch (err) {
|
|
687
|
-
log(
|
|
688
|
-
`insight-pass FAILED reason="${err instanceof Error ? err.message : String(err)}"`,
|
|
689
|
-
);
|
|
690
|
-
} finally {
|
|
691
|
-
await insightSession.close().catch(() => {});
|
|
692
|
-
}
|
|
693
|
-
const insightMs = Date.now() - insightStart;
|
|
694
|
-
log(
|
|
695
|
-
`insight-pass model=haiku chunks=${insightCounters.chunks} mentions=${insightCounters.mentions} tasks=${insightCounters.tasks} preferences=${insightCounters.preferences} observed-relationships=${insightCounters.observedRelationships} ms=${insightMs}`,
|
|
566
|
+
// 7g. Resolve conversationElementId for the agent-return shape (Task 871.6).
|
|
567
|
+
// One small read-after-write — the operator's database-operator subagent
|
|
568
|
+
// surfaces this as the canonical handle for downstream Phase 2 calls.
|
|
569
|
+
let conversationElementId = null;
|
|
570
|
+
const ridSession = getSession();
|
|
571
|
+
try {
|
|
572
|
+
const res = await ridSession.run(
|
|
573
|
+
`MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid})
|
|
574
|
+
RETURN elementId(c) AS elemId LIMIT 1`,
|
|
575
|
+
{ cid: conversationId },
|
|
696
576
|
);
|
|
577
|
+
conversationElementId = res.records[0]?.get("elemId") ?? null;
|
|
578
|
+
} catch (err) {
|
|
579
|
+
log(`conversation-elementid-lookup failed reason="${err instanceof Error ? err.message : String(err)}"`);
|
|
580
|
+
} finally {
|
|
581
|
+
await ridSession.close().catch(() => {});
|
|
697
582
|
}
|
|
698
583
|
|
|
699
584
|
cleanup();
|
|
@@ -703,29 +588,23 @@ async function main() {
|
|
|
703
588
|
|
|
704
589
|
process.stdout.write(
|
|
705
590
|
JSON.stringify({
|
|
591
|
+
conversationElementId,
|
|
706
592
|
conversationId,
|
|
707
593
|
parsed: parseResult.counters.parsed,
|
|
708
594
|
mediaSkipped: parseResult.counters.mediaSkipped,
|
|
709
595
|
systemSkipped: parseResult.counters.systemSkipped,
|
|
710
|
-
|
|
711
|
-
|
|
596
|
+
filtered: filteredLines.length,
|
|
597
|
+
written: createdMessages,
|
|
598
|
+
messagesAlreadyExisted,
|
|
599
|
+
nextEdgesProcessed,
|
|
600
|
+
nextEdgesCreated,
|
|
601
|
+
participantsAlreadyExisted,
|
|
712
602
|
ms: totalMs,
|
|
713
603
|
}) + "\n",
|
|
714
604
|
);
|
|
715
605
|
process.exit(0);
|
|
716
606
|
}
|
|
717
607
|
|
|
718
|
-
function hashLine(body, dateSent) {
|
|
719
|
-
// Stable per-line hash for messageId. The parser already hashed the file
|
|
720
|
-
// bytes for conversationId; per-message we hash (body+dateSent) to get a
|
|
721
|
-
// collision-resistant tail without crypto module overhead per line.
|
|
722
|
-
let h = 0;
|
|
723
|
-
const s = `${dateSent}${body}`;
|
|
724
|
-
for (let i = 0; i < s.length; i++) {
|
|
725
|
-
h = (h * 31 + s.charCodeAt(i)) | 0;
|
|
726
|
-
}
|
|
727
|
-
return (h >>> 0).toString(16).padStart(8, "0");
|
|
728
|
-
}
|
|
729
608
|
|
|
730
609
|
main().catch((err) => {
|
|
731
610
|
fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });
|