@rubytech/create-maxy 1.0.830 → 1.0.832
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +8 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.js +5 -1
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
- package/payload/platform/lib/oauth-llm/src/index.ts +19 -4
- package/payload/platform/neo4j/migrations/004-project-admin-agent.ts +36 -3
- package/payload/platform/neo4j/migrations/008-adminuser-accountid-backfill.ts +85 -0
- package/payload/platform/plugins/admin/mcp/dist/index.js +25 -3
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/docs/references/internals.md +1 -1
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +293 -127
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +41 -14
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts +49 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js +35 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +6 -7
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +9 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +49 -131
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +0 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +0 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +10 -11
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +3 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +11 -42
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +10 -8
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +10 -8
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +118 -43
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +3 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +40 -18
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js +12 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +4 -6
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +59 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +11 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +17 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js +2 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js.map +1 -1
- package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +41 -9
- package/payload/platform/scripts/lib/resolve-account-dir.sh +19 -1
- package/payload/server/chunk-25QDCOE5.js +1116 -0
- package/payload/server/chunk-35YZS3KL.js +328 -0
- package/payload/server/chunk-7CBRZKZS.js +654 -0
- package/payload/server/chunk-BCFM2UPH.js +2305 -0
- package/payload/server/chunk-CV3HPX46.js +10097 -0
- package/payload/server/chunk-IXOPV36P.js +2305 -0
- package/payload/server/chunk-J6YWEJBN.js +1116 -0
- package/payload/server/chunk-OCPJGZ6S.js +654 -0
- package/payload/server/chunk-ZKGAYLAK.js +10097 -0
- package/payload/server/client-pool-NBVGONQL.js +32 -0
- package/payload/server/client-pool-ZNGN66GN.js +32 -0
- package/payload/server/cloudflare-task-tracker-MHALDN54.js +19 -0
- package/payload/server/cloudflare-task-tracker-R4FIORFL.js +19 -0
- package/payload/server/maxy-edge.js +4 -4
- package/payload/server/neo4j-migrations-3A6K2EB5.js +552 -0
- package/payload/server/neo4j-migrations-6RW423E2.js +530 -0
- package/payload/server/server.js +43 -20
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
3
|
// conversation-archive-ingest.mjs — in-process orchestrator for
|
|
4
|
-
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin
|
|
4
|
+
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin;
|
|
5
|
+
// Task 900 — env-only identity, fsync-per-line progress sink, per-session
|
|
6
|
+
// checkpointing).
|
|
5
7
|
//
|
|
6
8
|
// Source-agnostic. The same pipeline runs for every conversation source;
|
|
7
9
|
// `--source <enum>` selects the normaliser at the top of the pipeline:
|
|
@@ -9,26 +11,44 @@
|
|
|
9
11
|
// normalise → bind canonical senders → derive conversationIdentity
|
|
10
12
|
// → look up prior :ConversationArchive (delta cursor)
|
|
11
13
|
// → sessionize delta at gap-hours boundaries
|
|
12
|
-
// → for each session: classify (mode='chat') →
|
|
13
|
-
//
|
|
14
|
+
// → for each session: classify (mode='chat') → memoryIngest immediately
|
|
15
|
+
// → advance lastIngestedMessageHash + lastIngestedMessageAt
|
|
16
|
+
//
|
|
17
|
+
// Identity (Task 900): `process.env.ACCOUNT_ID` and `process.env.USER_ID` are
|
|
18
|
+
// the only inputs. Both are plumbed by `spawn-env.ts` into every Bash
|
|
19
|
+
// subprocess; missing/malformed → LOUD-FAIL at phase=argv. `--account-id` and
|
|
20
|
+
// `--owner-element-id` flags are GONE; the writer derives the AdminUser
|
|
21
|
+
// elementId from (accountId, userId) one Cypher hop away.
|
|
14
22
|
//
|
|
15
23
|
// Argv (positional): <archive-path>
|
|
16
|
-
// Argv (flags): --source <
|
|
17
|
-
// --owner-element-id <id>
|
|
24
|
+
// Argv (flags): --source <enum>
|
|
18
25
|
// --participant-person-ids <csv>
|
|
19
26
|
// --scope <admin|public>
|
|
20
27
|
// [--session-gap-hours <N>] (default 12)
|
|
21
|
-
// [--account-id <accountId>]
|
|
22
28
|
// [--timezone <iana>]
|
|
23
29
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
24
30
|
// [--session-id <id>]
|
|
25
31
|
//
|
|
26
32
|
// Stdout (success): one JSON line — counters the skill needs to formulate
|
|
27
33
|
// the three operator-facing messages. See SKILL.md for the shape.
|
|
28
|
-
// Stderr:
|
|
34
|
+
// Stderr + progress file: `[conversation-archive] ...` lines, fsync per write.
|
|
35
|
+
// Progress path: data/accounts/<accountId>/logs/conversation-archive-<sessionId>.log
|
|
36
|
+
// Wrapper sh prints `[conversation-archive] progress-file=<absolute-path>` on
|
|
37
|
+
// stdout before exec node, so the agent computes nothing.
|
|
29
38
|
// =============================================================================
|
|
30
39
|
|
|
31
|
-
import {
|
|
40
|
+
import {
|
|
41
|
+
closeSync,
|
|
42
|
+
existsSync,
|
|
43
|
+
fsyncSync,
|
|
44
|
+
mkdirSync,
|
|
45
|
+
mkdtempSync,
|
|
46
|
+
openSync,
|
|
47
|
+
readdirSync,
|
|
48
|
+
rmSync,
|
|
49
|
+
statSync,
|
|
50
|
+
writeSync,
|
|
51
|
+
} from "node:fs";
|
|
32
52
|
import { join, resolve, dirname } from "node:path";
|
|
33
53
|
import { tmpdir } from "node:os";
|
|
34
54
|
import { spawnSync } from "node:child_process";
|
|
@@ -89,13 +109,60 @@ const MEMORY_INGEST_PATH = resolve(
|
|
|
89
109
|
"tools",
|
|
90
110
|
"memory-ingest.js",
|
|
91
111
|
);
|
|
112
|
+
const UUID_LIB_PATH = resolve(
|
|
113
|
+
platformRoot,
|
|
114
|
+
"plugins",
|
|
115
|
+
"memory",
|
|
116
|
+
"mcp",
|
|
117
|
+
"dist",
|
|
118
|
+
"lib",
|
|
119
|
+
"uuid.js",
|
|
120
|
+
);
|
|
92
121
|
|
|
93
122
|
// ---------------------------------------------------------------------------
|
|
94
|
-
// 2.
|
|
123
|
+
// 2. Progress sink + logger fan-out (Task 900 sub-scope A).
|
|
124
|
+
// `log()` and `fail()` write to stderr AND the progress file with fsync per
|
|
125
|
+
// write. Parent Bash returns nothing until child exits; the progress file
|
|
126
|
+
// is the only surface the agent's heartbeat poll can read mid-run.
|
|
95
127
|
// ---------------------------------------------------------------------------
|
|
128
|
+
let progressFd = -1;
|
|
129
|
+
const startedMs = Date.now();
|
|
130
|
+
|
|
131
|
+
function openProgressSink(absolutePath) {
|
|
132
|
+
mkdirSync(dirname(absolutePath), { recursive: true });
|
|
133
|
+
progressFd = openSync(absolutePath, "a");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function writeProgressLine(line) {
|
|
137
|
+
// stderr first — preserves existing observability for parents that read
|
|
138
|
+
// stderr after the child exits, AND makes the line visible via the
|
|
139
|
+
// mcp-spawn-tee'd device server.log without a separate sink.
|
|
140
|
+
process.stderr.write(line);
|
|
141
|
+
if (progressFd >= 0) {
|
|
142
|
+
try {
|
|
143
|
+
writeSync(progressFd, line);
|
|
144
|
+
fsyncSync(progressFd);
|
|
145
|
+
} catch {
|
|
146
|
+
// Disk pressure / EBADF: don't kill the run — stderr already carries
|
|
147
|
+
// the line.
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
96
152
|
function log(line) {
|
|
97
|
-
|
|
153
|
+
writeProgressLine(`[conversation-archive] ${line}\n`);
|
|
98
154
|
}
|
|
155
|
+
|
|
156
|
+
function endRun(exitCode) {
|
|
157
|
+
const totalMs = Date.now() - startedMs;
|
|
158
|
+
writeProgressLine(`[conversation-archive] end exit-code=${exitCode} ms=${totalMs}\n`);
|
|
159
|
+
if (progressFd >= 0) {
|
|
160
|
+
try { closeSync(progressFd); } catch { /* fd already closed */ }
|
|
161
|
+
progressFd = -1;
|
|
162
|
+
}
|
|
163
|
+
process.exit(exitCode);
|
|
164
|
+
}
|
|
165
|
+
|
|
99
166
|
function fail(phase, fields) {
|
|
100
167
|
const fieldStr = Object.entries(fields)
|
|
101
168
|
.map(([k, v]) =>
|
|
@@ -104,8 +171,8 @@ function fail(phase, fields) {
|
|
|
104
171
|
: `${k}=${v ?? "-"}`,
|
|
105
172
|
)
|
|
106
173
|
.join(" ");
|
|
107
|
-
|
|
108
|
-
|
|
174
|
+
writeProgressLine(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
|
|
175
|
+
endRun(1);
|
|
109
176
|
}
|
|
110
177
|
|
|
111
178
|
// ---------------------------------------------------------------------------
|
|
@@ -144,7 +211,6 @@ function parseArgv(argv) {
|
|
|
144
211
|
if (!VALID_SOURCES.has(flags.source)) {
|
|
145
212
|
fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
|
|
146
213
|
}
|
|
147
|
-
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
148
214
|
if (!flags.participantPersonIds) {
|
|
149
215
|
fail("argv", {
|
|
150
216
|
reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
|
|
@@ -216,35 +282,41 @@ function findChatTxt(dir) {
|
|
|
216
282
|
}
|
|
217
283
|
|
|
218
284
|
// ---------------------------------------------------------------------------
|
|
219
|
-
// 5.
|
|
220
|
-
// ---------------------------------------------------------------------------
|
|
221
|
-
function resolveAccountId(flags) {
|
|
222
|
-
if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
|
|
223
|
-
const installDir = resolve(platformRoot, "..");
|
|
224
|
-
const accountsDir = join(installDir, "data", "accounts");
|
|
225
|
-
if (!existsSync(accountsDir)) {
|
|
226
|
-
fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
|
|
227
|
-
}
|
|
228
|
-
const dirs = readdirSync(accountsDir, { withFileTypes: true })
|
|
229
|
-
.filter((d) => d.isDirectory() && !d.name.startsWith("."))
|
|
230
|
-
.map((d) => d.name);
|
|
231
|
-
if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
|
|
232
|
-
if (dirs.length > 1) {
|
|
233
|
-
fail("argv", {
|
|
234
|
-
reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
return dirs[0];
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// ---------------------------------------------------------------------------
|
|
241
|
-
// 6. Main
|
|
285
|
+
// 5. Main
|
|
242
286
|
// ---------------------------------------------------------------------------
|
|
243
287
|
async function main() {
|
|
244
|
-
const startedMs = Date.now();
|
|
245
288
|
const { archive, flags } = parseArgv(process.argv);
|
|
246
289
|
const source = flags.source;
|
|
247
|
-
|
|
290
|
+
|
|
291
|
+
// Identity is env-only (Task 900 sub-scope C). spawn-env.ts plumbs both
|
|
292
|
+
// ACCOUNT_ID and USER_ID into every Bash subprocess; the bin LOUD-FAILs if
|
|
293
|
+
// either is absent or malformed. The disk-scan resolver and the
|
|
294
|
+
// --account-id / --owner-element-id flags are gone.
|
|
295
|
+
let UUID_REGEX;
|
|
296
|
+
try {
|
|
297
|
+
({ UUID_REGEX } = await import(UUID_LIB_PATH));
|
|
298
|
+
} catch (err) {
|
|
299
|
+
fail("import", {
|
|
300
|
+
reason: "failed to import compiled uuid lib",
|
|
301
|
+
detail: err instanceof Error ? err.message : String(err),
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const accountId = process.env.ACCOUNT_ID?.trim();
|
|
306
|
+
if (!accountId) {
|
|
307
|
+
fail("argv", { reason: "ACCOUNT_ID env missing — bin must be invoked under an authenticated agent context" });
|
|
308
|
+
}
|
|
309
|
+
if (!UUID_REGEX.test(accountId)) {
|
|
310
|
+
fail("argv", { reason: `ACCOUNT_ID env malformed (not a UUID): "${accountId}"` });
|
|
311
|
+
}
|
|
312
|
+
const userId = process.env.USER_ID?.trim();
|
|
313
|
+
if (!userId) {
|
|
314
|
+
fail("argv", { reason: "USER_ID env missing — bin must be invoked under an authenticated user session" });
|
|
315
|
+
}
|
|
316
|
+
if (!UUID_REGEX.test(userId)) {
|
|
317
|
+
fail("argv", { reason: `USER_ID env malformed (not a UUID): "${userId}"` });
|
|
318
|
+
}
|
|
319
|
+
|
|
248
320
|
const participantElementIds = flags.participantPersonIds
|
|
249
321
|
.split(",")
|
|
250
322
|
.map((s) => s.trim())
|
|
@@ -253,7 +325,6 @@ async function main() {
|
|
|
253
325
|
fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
|
|
254
326
|
}
|
|
255
327
|
const scope = flags.scope;
|
|
256
|
-
const accountId = resolveAccountId(flags);
|
|
257
328
|
const timezone = flags.timezone || "Europe/London";
|
|
258
329
|
const dateFormat = flags.dateFormat;
|
|
259
330
|
const sessionGapHours = flags.sessionGapHours
|
|
@@ -266,20 +337,34 @@ async function main() {
|
|
|
266
337
|
flags.sessionId ||
|
|
267
338
|
`conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
268
339
|
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
// Open progress sink BEFORE any await. Path is deterministic from
|
|
342
|
+
// (accountId, sessionId); the wrapper sh prints the same path on stdout so
|
|
343
|
+
// the agent's heartbeat polls the right file.
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
const accountsDir = resolve(platformRoot, "..", "data", "accounts");
|
|
346
|
+
const logsDir = resolve(accountsDir, accountId, "logs");
|
|
347
|
+
const progressFilePath = resolve(logsDir, `conversation-archive-${sessionId}.log`);
|
|
348
|
+
openProgressSink(progressFilePath);
|
|
349
|
+
|
|
350
|
+
log(
|
|
351
|
+
`start session=${sessionId} source=${source} archive=${archive} accountId=${accountId} userId=${userId} participants=${participantElementIds.length} scope=${scope} pid=${process.pid}`,
|
|
352
|
+
);
|
|
353
|
+
|
|
269
354
|
// Imports — fail loudly if any compiled dist missing
|
|
270
355
|
let getNormaliser;
|
|
271
356
|
let sessionize, toTurnText, findDeltaCursor;
|
|
272
|
-
let
|
|
273
|
-
let
|
|
357
|
+
let deriveConversationIdentity, deriveMessageContentHash;
|
|
358
|
+
let verifyParticipantElementIds;
|
|
274
359
|
let getSession, classifyDocument, memoryIngest;
|
|
275
360
|
try {
|
|
276
361
|
({ getNormaliser } = await import(NORMALISERS_PATH));
|
|
277
362
|
({ sessionize } = await import(join(PIPELINE_PATH, "sessionize.js")));
|
|
278
363
|
({ toTurnText } = await import(join(PIPELINE_PATH, "to-turn-text.js")));
|
|
279
364
|
({ findDeltaCursor } = await import(join(PIPELINE_PATH, "delta-cursor.js")));
|
|
280
|
-
({
|
|
365
|
+
({ deriveConversationIdentity, deriveMessageContentHash } =
|
|
281
366
|
await import(join(PIPELINE_PATH, "derive-keys.js")));
|
|
282
|
-
({
|
|
367
|
+
({ verifyParticipantElementIds } = await import(join(PIPELINE_PATH, "sender-bind.js")));
|
|
283
368
|
({ getSession } = await import(NEO4J_LIB_PATH));
|
|
284
369
|
({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
|
|
285
370
|
({ memoryIngest } = await import(MEMORY_INGEST_PATH));
|
|
@@ -290,6 +375,36 @@ async function main() {
|
|
|
290
375
|
});
|
|
291
376
|
}
|
|
292
377
|
|
|
378
|
+
// Resolve owner AdminUser elementId from (accountId, userId). One Cypher
|
|
379
|
+
// hop replaces the deleted --owner-element-id flag — owner identity is env
|
|
380
|
+
// input, never an out-of-band flag.
|
|
381
|
+
let ownerElementId;
|
|
382
|
+
{
|
|
383
|
+
const lookupSession = getSession();
|
|
384
|
+
try {
|
|
385
|
+
const r = await lookupSession.run(
|
|
386
|
+
`MATCH (au:AdminUser { accountId: $accountId, userId: $userId })
|
|
387
|
+
RETURN elementId(au) AS ownerElementId LIMIT 1`,
|
|
388
|
+
{ accountId, userId },
|
|
389
|
+
);
|
|
390
|
+
const row = r.records[0]?.get("ownerElementId");
|
|
391
|
+
if (typeof row !== "string" || !row) {
|
|
392
|
+
await lookupSession.close().catch(() => {});
|
|
393
|
+
fail("argv", {
|
|
394
|
+
reason: `USER_ID does not resolve to an AdminUser under ACCOUNT_ID (accountId=${accountId.slice(0, 8)}, userId=${userId.slice(0, 8)})`,
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
ownerElementId = row;
|
|
398
|
+
} catch (err) {
|
|
399
|
+
await lookupSession.close().catch(() => {});
|
|
400
|
+
fail("argv", {
|
|
401
|
+
reason: `AdminUser lookup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
await lookupSession.close().catch(() => {});
|
|
405
|
+
}
|
|
406
|
+
log(`owner-resolved ownerElementId=${ownerElementId.slice(0, 12)} userId=${userId.slice(0, 8)}`);
|
|
407
|
+
|
|
293
408
|
// 6a. Resolve source file
|
|
294
409
|
const { sourceFile, cleanup } = resolveSourceFile(archive, source);
|
|
295
410
|
|
|
@@ -316,29 +431,34 @@ async function main() {
|
|
|
316
431
|
log(
|
|
317
432
|
`parsed lines=${normaliserResult.counters.parsed} media-skipped=${normaliserResult.counters.mediaSkipped} system-skipped=${normaliserResult.counters.systemSkipped}`,
|
|
318
433
|
);
|
|
434
|
+
// Task 897: zero matched timestamps means this isn't a chat archive — the
|
|
435
|
+
// file should route through document-ingest instead. The agent's
|
|
436
|
+
// database-operator skill picks the right entry; this LOUD-FAIL surfaces
|
|
437
|
+
// misroute as an actionable signal rather than a silent empty ingest.
|
|
319
438
|
if (allLines.length === 0) {
|
|
320
439
|
cleanup();
|
|
321
|
-
fail("parse", {
|
|
440
|
+
fail("parse", {
|
|
441
|
+
reason: `no timestamp prefixes detected — this archive contains no chat messages with the expected source=${source} grammar; route through document-ingest with mode='document', not conversation-archive-ingest`,
|
|
442
|
+
});
|
|
322
443
|
}
|
|
323
444
|
|
|
324
|
-
// 6c.
|
|
325
|
-
|
|
326
|
-
|
|
445
|
+
// 6c. Verify participant elementIds resolve to graph nodes with the right
|
|
446
|
+
// accountId and labels. Per-line senderName binding is gone (Task 897);
|
|
447
|
+
// operator-confirmed participants attach to the parent :ConversationArchive.
|
|
327
448
|
let session = getSession();
|
|
328
449
|
try {
|
|
329
|
-
await
|
|
450
|
+
await verifyParticipantElementIds({
|
|
330
451
|
session,
|
|
331
452
|
accountId,
|
|
332
453
|
ownerElementId,
|
|
333
454
|
participantElementIds,
|
|
334
|
-
senderNames: distinctSenderNames,
|
|
335
455
|
});
|
|
336
456
|
} catch (err) {
|
|
337
457
|
await session.close().catch(() => {});
|
|
338
458
|
cleanup();
|
|
339
459
|
if (err && err.userFacing) {
|
|
340
|
-
|
|
341
|
-
|
|
460
|
+
writeProgressLine(`[conversation-archive] FAIL ${err.message}\n`);
|
|
461
|
+
endRun(1);
|
|
342
462
|
}
|
|
343
463
|
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
344
464
|
}
|
|
@@ -403,12 +523,13 @@ async function main() {
|
|
|
403
523
|
nextEdgesCreated: 0,
|
|
404
524
|
participantsLinked: 0,
|
|
405
525
|
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
406
|
-
|
|
526
|
+
parsedLineCount: allLines.length,
|
|
527
|
+
sessionsUnenriched: 0,
|
|
407
528
|
topicKeywords: [],
|
|
408
529
|
ms: totalMs,
|
|
409
530
|
priorLastIngestedMessageAt: priorArchive.lastAt,
|
|
410
531
|
}) + "\n");
|
|
411
|
-
|
|
532
|
+
endRun(0);
|
|
412
533
|
}
|
|
413
534
|
deltaStart = cursor.deltaStart;
|
|
414
535
|
deltaKind = "delta";
|
|
@@ -424,10 +545,24 @@ async function main() {
|
|
|
424
545
|
`sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
|
|
425
546
|
);
|
|
426
547
|
|
|
427
|
-
// 6g.
|
|
428
|
-
|
|
548
|
+
// 6g. Per-session classify + immediate memoryIngest (Task 900 sub-scope E).
|
|
549
|
+
// Each session commits atomically: chunks + cursor advance happen together
|
|
550
|
+
// via memoryIngest. A kill mid-loop leaves a partial archive whose cursor
|
|
551
|
+
// is at session N-1's last message; the next invocation slices from there
|
|
552
|
+
// and re-classifies only session N onward. degrade-on-error from Task 897
|
|
553
|
+
// is preserved: a Haiku error on session N still emits an unenriched chunk
|
|
554
|
+
// and advances the cursor, so resumption never re-attempts the same Haiku
|
|
555
|
+
// error.
|
|
429
556
|
const allKeywords = new Set();
|
|
430
|
-
|
|
557
|
+
let sessionsUnenriched = 0;
|
|
558
|
+
let totalChunksWritten = 0;
|
|
559
|
+
let totalNextEdges = 0;
|
|
560
|
+
let participantsLinked = 0;
|
|
561
|
+
let archiveElementIdLatest = priorArchive ? priorArchive.elemId : "";
|
|
562
|
+
|
|
563
|
+
for (let sIdx = 0; sIdx < sessions.length; sIdx++) {
|
|
564
|
+
const s = sessions[sIdx];
|
|
565
|
+
const isFirstSessionInRun = sIdx === 0;
|
|
431
566
|
const sessionStart = Date.now();
|
|
432
567
|
const text = toTurnText(s);
|
|
433
568
|
const result = await classifyDocument({
|
|
@@ -438,74 +573,114 @@ async function main() {
|
|
|
438
573
|
naturalEdgeMap: "",
|
|
439
574
|
documentText: text,
|
|
440
575
|
});
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
576
|
+
|
|
577
|
+
let sessionChunks;
|
|
578
|
+
let sessionKeywords = [];
|
|
579
|
+
if (result.kind === "error") {
|
|
580
|
+
sessionsUnenriched += 1;
|
|
581
|
+
log(`session-unenriched session=${s.index + 1}/${sessions.length} reason="${result.reason.replace(/"/g, '\\"')}"`);
|
|
582
|
+
// Emit one raw :Section:Conversation chunk covering the whole session.
|
|
583
|
+
// No summary, no topicKeywords — a follow-up pass can re-enrich.
|
|
584
|
+
sessionChunks = [{
|
|
585
|
+
kind: "Conversation",
|
|
586
|
+
title: `Unenriched session ${s.index + 1}`,
|
|
587
|
+
body: text,
|
|
588
|
+
summary: "",
|
|
589
|
+
sourceStart: 0,
|
|
590
|
+
sourceEnd: text.length,
|
|
591
|
+
properties: {
|
|
592
|
+
unenriched: true,
|
|
593
|
+
unenrichedReason: result.reason,
|
|
594
|
+
firstMessageAt: s.firstMessageAt,
|
|
595
|
+
lastMessageAt: s.lastMessageAt,
|
|
596
|
+
messageCount: s.messages.length,
|
|
597
|
+
},
|
|
598
|
+
anchorEdge: null,
|
|
599
|
+
}];
|
|
600
|
+
} else {
|
|
601
|
+
const chunkCount = result.output.sections.length;
|
|
602
|
+
log(
|
|
603
|
+
`classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
|
|
604
|
+
);
|
|
605
|
+
if (chunkCount === 0 && s.messages.length > 0) {
|
|
606
|
+
cleanup();
|
|
607
|
+
fail("classify", {
|
|
608
|
+
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
609
|
+
});
|
|
610
|
+
}
|
|
611
|
+
sessionChunks = result.output.sections;
|
|
612
|
+
sessionKeywords = result.output.documentKeywords ?? [];
|
|
613
|
+
for (const kw of sessionKeywords) allKeywords.add(kw);
|
|
444
614
|
}
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
615
|
+
|
|
616
|
+
// Per-session cursor: advance to THIS session's last message. Atomic with
|
|
617
|
+
// chunk writes inside memoryIngest's MERGE/CREATE Cypher transaction.
|
|
618
|
+
const lastSessionLine = s.messages[s.messages.length - 1];
|
|
619
|
+
const sessionLastHash = deriveMessageContentHash({
|
|
620
|
+
dateSent: lastSessionLine.dateSent,
|
|
621
|
+
body: lastSessionLine.body,
|
|
622
|
+
});
|
|
623
|
+
const sessionLastAt = lastSessionLine.dateSent;
|
|
624
|
+
const sessionDocumentSummary = `Session ${s.index + 1}/${sessions.length}: ${s.messages.length} messages, ${sessionChunks.length} chunks.`;
|
|
625
|
+
|
|
626
|
+
let ingestResult;
|
|
627
|
+
const ingestStart = Date.now();
|
|
628
|
+
try {
|
|
629
|
+
ingestResult = await memoryIngest({
|
|
630
|
+
accountId,
|
|
631
|
+
attachmentId: conversationIdentity,
|
|
632
|
+
parentLabel: "ConversationArchive",
|
|
633
|
+
source,
|
|
634
|
+
documentSummary: sessionDocumentSummary,
|
|
635
|
+
anchorNodeId: ownerElementId,
|
|
636
|
+
anchorLabel: "AdminUser",
|
|
637
|
+
sections: sessionChunks,
|
|
638
|
+
scope,
|
|
639
|
+
sessionId,
|
|
640
|
+
documentKeywords: Array.from(allKeywords),
|
|
641
|
+
archiveSha256,
|
|
642
|
+
archiveSourceFile,
|
|
643
|
+
lastIngestedMessageHash: sessionLastHash,
|
|
644
|
+
lastIngestedMessageAt: sessionLastAt,
|
|
645
|
+
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
646
|
+
// First session of THIS run cleans prior chunks for matching
|
|
647
|
+
// archiveSha256 (re-run-with-same-bytes idempotency); subsequent
|
|
648
|
+
// sessions in the same run skip cleanup or they would delete the
|
|
649
|
+
// chunks just written by sessions above them.
|
|
650
|
+
cleanupPriorChunks: isFirstSessionInRun,
|
|
651
|
+
});
|
|
652
|
+
} catch (err) {
|
|
450
653
|
cleanup();
|
|
451
|
-
fail("
|
|
452
|
-
reason:
|
|
654
|
+
fail("memory-ingest", {
|
|
655
|
+
reason: err instanceof Error ? err.message : String(err),
|
|
656
|
+
sessionIndex: s.index + 1,
|
|
657
|
+
sessionsTotal: sessions.length,
|
|
453
658
|
});
|
|
454
659
|
}
|
|
455
|
-
for (const sec of result.output.sections) allChunks.push(sec);
|
|
456
|
-
for (const kw of result.output.documentKeywords) allKeywords.add(kw);
|
|
457
|
-
}
|
|
458
660
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
const documentSummary = sessions.length === 1
|
|
470
|
-
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
471
|
-
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
661
|
+
archiveElementIdLatest = ingestResult.documentNodeId;
|
|
662
|
+
totalChunksWritten += ingestResult.sectionCount;
|
|
663
|
+
totalNextEdges += ingestResult.edgeBreakdown.NEXT ?? 0;
|
|
664
|
+
if (isFirstSessionInRun) {
|
|
665
|
+
participantsLinked = ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0;
|
|
666
|
+
}
|
|
667
|
+
log(
|
|
668
|
+
`session-committed sessionIndex=${s.index + 1}/${sessions.length} chunks-written=${ingestResult.sectionCount} cursor-at="${sessionLastAt}" ms=${Date.now() - ingestStart}`,
|
|
669
|
+
);
|
|
670
|
+
}
|
|
472
671
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
const ingestStart = Date.now();
|
|
476
|
-
try {
|
|
477
|
-
ingestResult = await memoryIngest({
|
|
478
|
-
accountId,
|
|
479
|
-
attachmentId: conversationIdentity,
|
|
480
|
-
parentLabel: "ConversationArchive",
|
|
481
|
-
source,
|
|
482
|
-
documentSummary,
|
|
483
|
-
anchorNodeId: ownerElementId,
|
|
484
|
-
anchorLabel: "AdminUser",
|
|
485
|
-
sections: allChunks,
|
|
486
|
-
scope,
|
|
487
|
-
sessionId,
|
|
488
|
-
documentKeywords: Array.from(allKeywords),
|
|
489
|
-
archiveSha256,
|
|
490
|
-
archiveSourceFile,
|
|
491
|
-
lastIngestedMessageHash,
|
|
492
|
-
lastIngestedMessageAt,
|
|
493
|
-
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
494
|
-
});
|
|
495
|
-
} catch (err) {
|
|
496
|
-
cleanup();
|
|
497
|
-
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
672
|
+
if (sessionsUnenriched > 0) {
|
|
673
|
+
log(`classify-summary sessions=${sessions.length} unenriched=${sessionsUnenriched} enriched=${sessions.length - sessionsUnenriched}`);
|
|
498
674
|
}
|
|
499
|
-
log(
|
|
500
|
-
`source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
501
|
-
);
|
|
502
675
|
|
|
503
676
|
cleanup();
|
|
504
677
|
const totalMs = Date.now() - startedMs;
|
|
505
|
-
log(
|
|
678
|
+
log(
|
|
679
|
+
`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${archiveElementIdLatest} chunks-written=${totalChunksWritten} sessions=${sessions.length} total-ms=${totalMs}`,
|
|
680
|
+
);
|
|
506
681
|
|
|
507
682
|
process.stdout.write(JSON.stringify({
|
|
508
|
-
archiveElementId:
|
|
683
|
+
archiveElementId: archiveElementIdLatest,
|
|
509
684
|
conversationIdentity,
|
|
510
685
|
archiveSha256,
|
|
511
686
|
archiveSourceFile,
|
|
@@ -515,25 +690,16 @@ async function main() {
|
|
|
515
690
|
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
516
691
|
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
517
692
|
sessions: sessions.length,
|
|
518
|
-
chunks:
|
|
519
|
-
nextEdgesCreated:
|
|
520
|
-
participantsLinked
|
|
693
|
+
chunks: totalChunksWritten,
|
|
694
|
+
nextEdgesCreated: totalNextEdges,
|
|
695
|
+
participantsLinked,
|
|
521
696
|
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
522
|
-
|
|
697
|
+
parsedLineCount: allLines.length,
|
|
698
|
+
sessionsUnenriched,
|
|
523
699
|
topicKeywords: Array.from(allKeywords),
|
|
524
700
|
ms: totalMs,
|
|
525
701
|
}) + "\n");
|
|
526
|
-
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
function computeSenderHistogram(lines) {
|
|
530
|
-
const counts = new Map();
|
|
531
|
-
for (const l of lines) {
|
|
532
|
-
counts.set(l.senderName, (counts.get(l.senderName) ?? 0) + 1);
|
|
533
|
-
}
|
|
534
|
-
return Array.from(counts.entries())
|
|
535
|
-
.map(([name, count]) => ({ name, count }))
|
|
536
|
-
.sort((a, b) => b.count - a.count);
|
|
702
|
+
endRun(0);
|
|
537
703
|
}
|
|
538
704
|
|
|
539
705
|
main().catch((err) => {
|