@rubytech/create-realagent 1.0.831 → 1.0.833
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +131 -9
- package/package.json +1 -1
- package/payload/platform/lib/admins-write/dist/index.d.ts +87 -0
- package/payload/platform/lib/admins-write/dist/index.d.ts.map +1 -0
- package/payload/platform/lib/admins-write/dist/index.js +248 -0
- package/payload/platform/lib/admins-write/dist/index.js.map +1 -0
- package/payload/platform/lib/admins-write/src/index.ts +311 -0
- package/payload/platform/lib/admins-write/tsconfig.json +8 -0
- package/payload/platform/neo4j/migrations/004-project-admin-agent.ts +36 -3
- package/payload/platform/neo4j/migrations/009-conversation-archive-title.ts +197 -0
- package/payload/platform/neo4j/schema.cypher +1 -1
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/PLUGIN.md +1 -1
- package/payload/platform/plugins/admin/mcp/dist/index.js +37 -44
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/docs/references/internals.md +4 -3
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +455 -140
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +47 -15
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +75 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +16 -10
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +155 -100
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +13 -5
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +53 -59
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js +12 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +28 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +35 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +64 -12
- package/payload/platform/scripts/lib/resolve-account-dir.sh +22 -2
- package/payload/platform/scripts/migrate-import.sh +3 -1
- package/payload/platform/scripts/seed-neo4j.sh +13 -3
- package/payload/server/chunk-25QDCOE5.js +1116 -0
- package/payload/server/chunk-35YZS3KL.js +328 -0
- package/payload/server/chunk-7CBRZKZS.js +654 -0
- package/payload/server/chunk-CRAIGEXY.js +654 -0
- package/payload/server/chunk-I2NOLBQA.js +2123 -0
- package/payload/server/chunk-IVTESKFR.js +9961 -0
- package/payload/server/chunk-IXOPV36P.js +2305 -0
- package/payload/server/chunk-KD3XP4IK.js +1116 -0
- package/payload/server/chunk-OJZPS4BL.js +367 -0
- package/payload/server/chunk-ZKGAYLAK.js +10097 -0
- package/payload/server/client-pool-J5BCVVI2.js +32 -0
- package/payload/server/client-pool-NBVGONQL.js +32 -0
- package/payload/server/cloudflare-task-tracker-R4FIORFL.js +19 -0
- package/payload/server/cloudflare-task-tracker-XCUO4N74.js +19 -0
- package/payload/server/maxy-edge.js +6 -5
- package/payload/server/neo4j-migrations-3A6K2EB5.js +552 -0
- package/payload/server/neo4j-migrations-5AN2U3YO.js +664 -0
- package/payload/server/public/assets/{admin-BNwPsMhJ.js → admin-CgGQafDG.js} +1 -1
- package/payload/server/public/assets/{graph-N_Bw-8oT.js → graph-BlrcvwWP.js} +1 -1
- package/payload/server/public/assets/{page-BKLGP-th.js → page-ClTUYMK8.js} +1 -1
- package/payload/server/public/graph.html +2 -2
- package/payload/server/public/index.html +2 -2
- package/payload/server/server.js +385 -167
- package/payload/platform/plugins/admin/mcp/dist/lib/review-tools.d.ts +0 -31
- package/payload/platform/plugins/admin/mcp/dist/lib/review-tools.d.ts.map +0 -1
- package/payload/platform/plugins/admin/mcp/dist/lib/review-tools.js +0 -666
- package/payload/platform/plugins/admin/mcp/dist/lib/review-tools.js.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/semantic-chunker.d.ts +0 -61
- package/payload/platform/plugins/memory/mcp/dist/lib/semantic-chunker.d.ts.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/semantic-chunker.js +0 -266
- package/payload/platform/plugins/memory/mcp/dist/lib/semantic-chunker.js.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts +0 -27
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.d.ts.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js +0 -477
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-pass.js.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-write.d.ts +0 -27
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-write.d.ts.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-write.js +0 -160
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-insight-write.js.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-parse.d.ts +0 -10
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-parse.d.ts.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-parse.js +0 -29
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-parse.js.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts +0 -28
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.d.ts.map +0 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js +0 -34
- package/payload/platform/plugins/memory/mcp/dist/tools/whatsapp-export-preview.js.map +0 -1
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
3
|
// conversation-archive-ingest.mjs — in-process orchestrator for
|
|
4
|
-
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin
|
|
4
|
+
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin;
|
|
5
|
+
// Task 900 — env-only identity, fsync-per-line progress sink, per-session
|
|
6
|
+
// checkpointing).
|
|
5
7
|
//
|
|
6
8
|
// Source-agnostic. The same pipeline runs for every conversation source;
|
|
7
9
|
// `--source <enum>` selects the normaliser at the top of the pipeline:
|
|
@@ -9,26 +11,49 @@
|
|
|
9
11
|
// normalise → bind canonical senders → derive conversationIdentity
|
|
10
12
|
// → look up prior :ConversationArchive (delta cursor)
|
|
11
13
|
// → sessionize delta at gap-hours boundaries
|
|
12
|
-
// → for each session: classify (mode='chat') →
|
|
13
|
-
//
|
|
14
|
+
// → for each session: classify (mode='chat') → memoryIngest immediately
|
|
15
|
+
// → advance lastIngestedMessageHash + lastIngestedMessageAt
|
|
16
|
+
//
|
|
17
|
+
// Identity (Task 900): `process.env.ACCOUNT_ID` and `process.env.USER_ID` are
|
|
18
|
+
// the only inputs. Both are plumbed by `spawn-env.ts` into every Bash
|
|
19
|
+
// subprocess; missing/malformed → LOUD-FAIL at phase=argv. `--account-id` and
|
|
20
|
+
// `--owner-element-id` flags are GONE; the writer derives the AdminUser
|
|
21
|
+
// elementId from (accountId, userId) one Cypher hop away.
|
|
14
22
|
//
|
|
15
23
|
// Argv (positional): <archive-path>
|
|
16
|
-
// Argv (flags): --source <
|
|
17
|
-
// --owner-element-id <id>
|
|
24
|
+
// Argv (flags): --source <enum>
|
|
18
25
|
// --participant-person-ids <csv>
|
|
19
26
|
// --scope <admin|public>
|
|
20
|
-
// [--
|
|
21
|
-
// [--account-id <accountId>]
|
|
27
|
+
// [--rebuild] (Task 902 — destructive rebuild)
|
|
22
28
|
// [--timezone <iana>]
|
|
23
29
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
24
30
|
// [--session-id <id>]
|
|
25
31
|
//
|
|
32
|
+
// Sessions split at SESSION_GAP_HOURS=8 (fixed constant — Task 902). The flag
|
|
33
|
+
// `--session-gap-hours` is REMOVED; passing it FAILs at phase=argv. The
|
|
34
|
+
// chunked chat-mode classify path (Task 902 sub-scope C) absorbs oversize
|
|
35
|
+
// sessions internally — no operator lever required.
|
|
36
|
+
//
|
|
26
37
|
// Stdout (success): one JSON line — counters the skill needs to formulate
|
|
27
38
|
// the three operator-facing messages. See SKILL.md for the shape.
|
|
28
|
-
// Stderr:
|
|
39
|
+
// Stderr + progress file: `[conversation-archive] ...` lines, fsync per write.
|
|
40
|
+
// Progress path: data/accounts/<accountId>/logs/conversation-archive-<sessionId>.log
|
|
41
|
+
// Wrapper sh prints `[conversation-archive] progress-file=<absolute-path>` on
|
|
42
|
+
// stdout before exec node, so the agent computes nothing.
|
|
29
43
|
// =============================================================================
|
|
30
44
|
|
|
31
|
-
import {
|
|
45
|
+
import {
|
|
46
|
+
closeSync,
|
|
47
|
+
existsSync,
|
|
48
|
+
fsyncSync,
|
|
49
|
+
mkdirSync,
|
|
50
|
+
mkdtempSync,
|
|
51
|
+
openSync,
|
|
52
|
+
readdirSync,
|
|
53
|
+
rmSync,
|
|
54
|
+
statSync,
|
|
55
|
+
writeSync,
|
|
56
|
+
} from "node:fs";
|
|
32
57
|
import { join, resolve, dirname } from "node:path";
|
|
33
58
|
import { tmpdir } from "node:os";
|
|
34
59
|
import { spawnSync } from "node:child_process";
|
|
@@ -36,6 +61,16 @@ import { fileURLToPath } from "node:url";
|
|
|
36
61
|
|
|
37
62
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
38
63
|
|
|
64
|
+
// Task 902 sub-scope D — fixed session gap. Pre-902 this was a CLI flag with a
|
|
65
|
+
// 12h default; oversize sessions on dense archives drove operators to re-issue
|
|
66
|
+
// with a smaller gap, which combined with cleanup-by-archiveSha256 to silently
|
|
67
|
+
// destroy 138 chunks (Adam Mackay incident, 2026-05-04). The flag is gone;
|
|
68
|
+
// chunked chat-mode classify (sub-scope C) absorbs oversize sessions
|
|
69
|
+
// internally so the gap value is no longer an operational lever. 8h is a
|
|
70
|
+
// tighter default than 12h and matches the operator-decided post-incident
|
|
71
|
+
// constant.
|
|
72
|
+
const SESSION_GAP_HOURS = 8;
|
|
73
|
+
|
|
39
74
|
// ---------------------------------------------------------------------------
|
|
40
75
|
// 1. Resolve dist paths.
|
|
41
76
|
// ---------------------------------------------------------------------------
|
|
@@ -89,13 +124,60 @@ const MEMORY_INGEST_PATH = resolve(
|
|
|
89
124
|
"tools",
|
|
90
125
|
"memory-ingest.js",
|
|
91
126
|
);
|
|
127
|
+
const UUID_LIB_PATH = resolve(
|
|
128
|
+
platformRoot,
|
|
129
|
+
"plugins",
|
|
130
|
+
"memory",
|
|
131
|
+
"mcp",
|
|
132
|
+
"dist",
|
|
133
|
+
"lib",
|
|
134
|
+
"uuid.js",
|
|
135
|
+
);
|
|
92
136
|
|
|
93
137
|
// ---------------------------------------------------------------------------
|
|
94
|
-
// 2.
|
|
138
|
+
// 2. Progress sink + logger fan-out (Task 900 sub-scope A).
|
|
139
|
+
// `log()` and `fail()` write to stderr AND the progress file with fsync per
|
|
140
|
+
// write. Parent Bash returns nothing until child exits; the progress file
|
|
141
|
+
// is the only surface the agent's heartbeat poll can read mid-run.
|
|
95
142
|
// ---------------------------------------------------------------------------
|
|
143
|
+
let progressFd = -1;
|
|
144
|
+
const startedMs = Date.now();
|
|
145
|
+
|
|
146
|
+
function openProgressSink(absolutePath) {
|
|
147
|
+
mkdirSync(dirname(absolutePath), { recursive: true });
|
|
148
|
+
progressFd = openSync(absolutePath, "a");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function writeProgressLine(line) {
|
|
152
|
+
// stderr first — preserves existing observability for parents that read
|
|
153
|
+
// stderr after the child exits, AND makes the line visible via the
|
|
154
|
+
// mcp-spawn-tee'd device server.log without a separate sink.
|
|
155
|
+
process.stderr.write(line);
|
|
156
|
+
if (progressFd >= 0) {
|
|
157
|
+
try {
|
|
158
|
+
writeSync(progressFd, line);
|
|
159
|
+
fsyncSync(progressFd);
|
|
160
|
+
} catch {
|
|
161
|
+
// Disk pressure / EBADF: don't kill the run — stderr already carries
|
|
162
|
+
// the line.
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
96
167
|
function log(line) {
|
|
97
|
-
|
|
168
|
+
writeProgressLine(`[conversation-archive] ${line}\n`);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function endRun(exitCode) {
|
|
172
|
+
const totalMs = Date.now() - startedMs;
|
|
173
|
+
writeProgressLine(`[conversation-archive] end exit-code=${exitCode} ms=${totalMs}\n`);
|
|
174
|
+
if (progressFd >= 0) {
|
|
175
|
+
try { closeSync(progressFd); } catch { /* fd already closed */ }
|
|
176
|
+
progressFd = -1;
|
|
177
|
+
}
|
|
178
|
+
process.exit(exitCode);
|
|
98
179
|
}
|
|
180
|
+
|
|
99
181
|
function fail(phase, fields) {
|
|
100
182
|
const fieldStr = Object.entries(fields)
|
|
101
183
|
.map(([k, v]) =>
|
|
@@ -104,8 +186,8 @@ function fail(phase, fields) {
|
|
|
104
186
|
: `${k}=${v ?? "-"}`,
|
|
105
187
|
)
|
|
106
188
|
.join(" ");
|
|
107
|
-
|
|
108
|
-
|
|
189
|
+
writeProgressLine(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
|
|
190
|
+
endRun(1);
|
|
109
191
|
}
|
|
110
192
|
|
|
111
193
|
// ---------------------------------------------------------------------------
|
|
@@ -123,6 +205,9 @@ const VALID_SOURCES = new Set([
|
|
|
123
205
|
"other",
|
|
124
206
|
]);
|
|
125
207
|
|
|
208
|
+
// Boolean flags carry no value; every other flag consumes the following arg.
|
|
209
|
+
const BOOLEAN_FLAGS = new Set(["rebuild"]);
|
|
210
|
+
|
|
126
211
|
function parseArgv(argv) {
|
|
127
212
|
const args = argv.slice(2);
|
|
128
213
|
let archive = null;
|
|
@@ -135,6 +220,18 @@ function parseArgv(argv) {
|
|
|
135
220
|
continue;
|
|
136
221
|
}
|
|
137
222
|
const key = a.slice(2);
|
|
223
|
+
// Task 902 sub-scope D — the flag re-introduced the surface that enabled
|
|
224
|
+
// the silent-partial-wipe failure (Defect B). Reject loudly with the
|
|
225
|
+
// remediation the operator should use instead.
|
|
226
|
+
if (key === "session-gap-hours") {
|
|
227
|
+
fail("argv", {
|
|
228
|
+
reason: `--session-gap-hours is no longer supported; gap is fixed at 8h. Use --rebuild on the prior export bytes if you need to re-classify.`,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
if (BOOLEAN_FLAGS.has(camelCase(key))) {
|
|
232
|
+
flags[camelCase(key)] = true;
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
138
235
|
const v = args[++i];
|
|
139
236
|
if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
|
|
140
237
|
flags[camelCase(key)] = v;
|
|
@@ -144,7 +241,6 @@ function parseArgv(argv) {
|
|
|
144
241
|
if (!VALID_SOURCES.has(flags.source)) {
|
|
145
242
|
fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
|
|
146
243
|
}
|
|
147
|
-
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
148
244
|
if (!flags.participantPersonIds) {
|
|
149
245
|
fail("argv", {
|
|
150
246
|
reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
|
|
@@ -216,35 +312,122 @@ function findChatTxt(dir) {
|
|
|
216
312
|
}
|
|
217
313
|
|
|
218
314
|
// ---------------------------------------------------------------------------
|
|
219
|
-
//
|
|
315
|
+
// 4b. Stable archive title (Task 902 sub-scope A).
|
|
316
|
+
//
|
|
317
|
+
// Format: `<source> · <owner> ↔ <other1>, <other2>, … · <YYYY-MM-DD>→<YYYY-MM-DD>`.
|
|
318
|
+
//
|
|
319
|
+
// The title is computed once per ingest from inputs that are immutable for
|
|
320
|
+
// the conversation (source, accountId, sortedParticipantElementIds, archive
|
|
321
|
+
// dateRange) so re-ingests produce the same string. Memory-ingest writes it
|
|
322
|
+
// to `:ConversationArchive.title` ON CREATE and COALESCE-on-MATCH so the UI
|
|
323
|
+
// (display-helpers.ts) reads a stable label without ever falling through to
|
|
324
|
+
// `summary`. Per-session counter strings stay in the progress file only.
|
|
325
|
+
//
|
|
326
|
+
// Names: AdminUser uses `displayName` then `slug`; Person composes
|
|
327
|
+
// `givenName + familyName`. When neither resolves, fall back to a short
|
|
328
|
+
// elementId prefix — same degraded format the migration uses on backfill,
|
|
329
|
+
// so a re-ingest after the migration produces the same title shape that
|
|
330
|
+
// already populated the node.
|
|
220
331
|
// ---------------------------------------------------------------------------
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
332
|
+
|
|
333
|
+
function isoToYmd(iso) {
|
|
334
|
+
// Read the YYYY-MM-DD prefix verbatim — same tz-stable contract as
|
|
335
|
+
// formatRunDate in display-helpers.ts. Parsing to Date + reformatting
|
|
336
|
+
// would flip the day between operator timezones.
|
|
337
|
+
const m = typeof iso === "string" ? iso.match(/^(\d{4}-\d{2}-\d{2})/) : null;
|
|
338
|
+
return m ? m[1] : "?";
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function pickNameForRow(labels, props) {
|
|
342
|
+
if (Array.isArray(labels) && labels.includes("AdminUser")) {
|
|
343
|
+
const dn = typeof props.displayName === "string" ? props.displayName.trim() : "";
|
|
344
|
+
if (dn) return dn;
|
|
345
|
+
const slug = typeof props.slug === "string" ? props.slug.trim() : "";
|
|
346
|
+
if (slug) return slug;
|
|
227
347
|
}
|
|
228
|
-
|
|
229
|
-
.
|
|
230
|
-
.
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
fail("argv", {
|
|
234
|
-
reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
|
|
235
|
-
});
|
|
348
|
+
if (Array.isArray(labels) && labels.includes("Person")) {
|
|
349
|
+
const given = typeof props.givenName === "string" ? props.givenName.trim() : "";
|
|
350
|
+
const family = typeof props.familyName === "string" ? props.familyName.trim() : "";
|
|
351
|
+
const full = [given, family].filter((s) => s.length > 0).join(" ");
|
|
352
|
+
if (full) return full;
|
|
236
353
|
}
|
|
237
|
-
return
|
|
354
|
+
return "";
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async function computeArchiveTitle({
|
|
358
|
+
source,
|
|
359
|
+
accountId,
|
|
360
|
+
ownerElementId,
|
|
361
|
+
participantElementIds,
|
|
362
|
+
firstMessageAt,
|
|
363
|
+
lastMessageAt,
|
|
364
|
+
getSession,
|
|
365
|
+
}) {
|
|
366
|
+
const session = getSession();
|
|
367
|
+
let nameByElementId = new Map();
|
|
368
|
+
try {
|
|
369
|
+
const ids = [ownerElementId, ...participantElementIds];
|
|
370
|
+
const r = await session.run(
|
|
371
|
+
`UNWIND $ids AS id
|
|
372
|
+
MATCH (n) WHERE elementId(n) = id AND n.accountId = $accountId
|
|
373
|
+
RETURN elementId(n) AS elemId, labels(n) AS labels, properties(n) AS props`,
|
|
374
|
+
{ ids, accountId },
|
|
375
|
+
);
|
|
376
|
+
for (const rec of r.records) {
|
|
377
|
+
const elemId = rec.get("elemId");
|
|
378
|
+
const labels = rec.get("labels");
|
|
379
|
+
const props = rec.get("props") || {};
|
|
380
|
+
const name = pickNameForRow(labels, props);
|
|
381
|
+
if (name) nameByElementId.set(elemId, name);
|
|
382
|
+
}
|
|
383
|
+
} finally {
|
|
384
|
+
await session.close().catch(() => {});
|
|
385
|
+
}
|
|
386
|
+
const ownerName = nameByElementId.get(ownerElementId) || ownerElementId.slice(0, 8);
|
|
387
|
+
const otherNames = participantElementIds
|
|
388
|
+
.map((id) => nameByElementId.get(id) || id.slice(0, 8))
|
|
389
|
+
.join(", ");
|
|
390
|
+
const firstYmd = isoToYmd(firstMessageAt);
|
|
391
|
+
const lastYmd = isoToYmd(lastMessageAt);
|
|
392
|
+
return `${source} · ${ownerName} ↔ ${otherNames} · ${firstYmd}→${lastYmd}`;
|
|
238
393
|
}
|
|
239
394
|
|
|
240
395
|
// ---------------------------------------------------------------------------
|
|
241
|
-
//
|
|
396
|
+
// 5. Main
|
|
242
397
|
// ---------------------------------------------------------------------------
|
|
243
398
|
async function main() {
|
|
244
|
-
const startedMs = Date.now();
|
|
245
399
|
const { archive, flags } = parseArgv(process.argv);
|
|
246
400
|
const source = flags.source;
|
|
247
|
-
|
|
401
|
+
|
|
402
|
+
// Identity is env-only (Task 900 sub-scope C). spawn-env.ts plumbs both
|
|
403
|
+
// ACCOUNT_ID and USER_ID into every Bash subprocess; the bin LOUD-FAILs if
|
|
404
|
+
// either is absent or malformed. The disk-scan resolver and the
|
|
405
|
+
// --account-id / --owner-element-id flags are gone.
|
|
406
|
+
let UUID_REGEX;
|
|
407
|
+
try {
|
|
408
|
+
({ UUID_REGEX } = await import(UUID_LIB_PATH));
|
|
409
|
+
} catch (err) {
|
|
410
|
+
fail("import", {
|
|
411
|
+
reason: "failed to import compiled uuid lib",
|
|
412
|
+
detail: err instanceof Error ? err.message : String(err),
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const accountId = process.env.ACCOUNT_ID?.trim();
|
|
417
|
+
if (!accountId) {
|
|
418
|
+
fail("argv", { reason: "ACCOUNT_ID env missing — bin must be invoked under an authenticated agent context" });
|
|
419
|
+
}
|
|
420
|
+
if (!UUID_REGEX.test(accountId)) {
|
|
421
|
+
fail("argv", { reason: `ACCOUNT_ID env malformed (not a UUID): "${accountId}"` });
|
|
422
|
+
}
|
|
423
|
+
const userId = process.env.USER_ID?.trim();
|
|
424
|
+
if (!userId) {
|
|
425
|
+
fail("argv", { reason: "USER_ID env missing — bin must be invoked under an authenticated user session" });
|
|
426
|
+
}
|
|
427
|
+
if (!UUID_REGEX.test(userId)) {
|
|
428
|
+
fail("argv", { reason: `USER_ID env malformed (not a UUID): "${userId}"` });
|
|
429
|
+
}
|
|
430
|
+
|
|
248
431
|
const participantElementIds = flags.participantPersonIds
|
|
249
432
|
.split(",")
|
|
250
433
|
.map((s) => s.trim())
|
|
@@ -253,19 +436,32 @@ async function main() {
|
|
|
253
436
|
fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
|
|
254
437
|
}
|
|
255
438
|
const scope = flags.scope;
|
|
256
|
-
const accountId = resolveAccountId(flags);
|
|
257
439
|
const timezone = flags.timezone || "Europe/London";
|
|
258
440
|
const dateFormat = flags.dateFormat;
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
441
|
+
// Task 902 sub-scope B — `--rebuild` is the single opt-in for destructive
|
|
442
|
+
// re-run-with-same-bytes. Without it, re-issuing the bin on the same export
|
|
443
|
+
// is a no-op (chunk-count summary, exit 0). With it, the bin treats the run
|
|
444
|
+
// as first-ingest (skips prior-cursor lookup) and the FIRST session's
|
|
445
|
+
// memoryIngest call cleans prior chunks for the archiveSha256.
|
|
446
|
+
const rebuild = flags.rebuild === true;
|
|
265
447
|
const sessionId =
|
|
266
448
|
flags.sessionId ||
|
|
267
449
|
`conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
268
450
|
|
|
451
|
+
// ---------------------------------------------------------------------------
|
|
452
|
+
// Open progress sink BEFORE any await. Path is deterministic from
|
|
453
|
+
// (accountId, sessionId); the wrapper sh prints the same path on stdout so
|
|
454
|
+
// the agent's heartbeat polls the right file.
|
|
455
|
+
// ---------------------------------------------------------------------------
|
|
456
|
+
const accountsDir = resolve(platformRoot, "..", "data", "accounts");
|
|
457
|
+
const logsDir = resolve(accountsDir, accountId, "logs");
|
|
458
|
+
const progressFilePath = resolve(logsDir, `conversation-archive-${sessionId}.log`);
|
|
459
|
+
openProgressSink(progressFilePath);
|
|
460
|
+
|
|
461
|
+
log(
|
|
462
|
+
`start session=${sessionId} source=${source} archive=${archive} accountId=${accountId} userId=${userId} participants=${participantElementIds.length} scope=${scope} pid=${process.pid}`,
|
|
463
|
+
);
|
|
464
|
+
|
|
269
465
|
// Imports — fail loudly if any compiled dist missing
|
|
270
466
|
let getNormaliser;
|
|
271
467
|
let sessionize, toTurnText, findDeltaCursor;
|
|
@@ -290,6 +486,36 @@ async function main() {
|
|
|
290
486
|
});
|
|
291
487
|
}
|
|
292
488
|
|
|
489
|
+
// Resolve owner AdminUser elementId from (accountId, userId). One Cypher
|
|
490
|
+
// hop replaces the deleted --owner-element-id flag — owner identity is env
|
|
491
|
+
// input, never an out-of-band flag.
|
|
492
|
+
let ownerElementId;
|
|
493
|
+
{
|
|
494
|
+
const lookupSession = getSession();
|
|
495
|
+
try {
|
|
496
|
+
const r = await lookupSession.run(
|
|
497
|
+
`MATCH (au:AdminUser { accountId: $accountId, userId: $userId })
|
|
498
|
+
RETURN elementId(au) AS ownerElementId LIMIT 1`,
|
|
499
|
+
{ accountId, userId },
|
|
500
|
+
);
|
|
501
|
+
const row = r.records[0]?.get("ownerElementId");
|
|
502
|
+
if (typeof row !== "string" || !row) {
|
|
503
|
+
await lookupSession.close().catch(() => {});
|
|
504
|
+
fail("argv", {
|
|
505
|
+
reason: `USER_ID does not resolve to an AdminUser under ACCOUNT_ID (accountId=${accountId.slice(0, 8)}, userId=${userId.slice(0, 8)})`,
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
ownerElementId = row;
|
|
509
|
+
} catch (err) {
|
|
510
|
+
await lookupSession.close().catch(() => {});
|
|
511
|
+
fail("argv", {
|
|
512
|
+
reason: `AdminUser lookup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
await lookupSession.close().catch(() => {});
|
|
516
|
+
}
|
|
517
|
+
log(`owner-resolved ownerElementId=${ownerElementId.slice(0, 12)} userId=${userId.slice(0, 8)}`);
|
|
518
|
+
|
|
293
519
|
// 6a. Resolve source file
|
|
294
520
|
const { sourceFile, cleanup } = resolveSourceFile(archive, source);
|
|
295
521
|
|
|
@@ -311,7 +537,7 @@ async function main() {
|
|
|
311
537
|
const archiveSha256 = normaliserResult.archiveSha256;
|
|
312
538
|
const archiveSourceFile = normaliserResult.archiveSourceFile;
|
|
313
539
|
log(
|
|
314
|
-
`source=${source} file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)}
|
|
540
|
+
`source=${source} file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} gap=${SESSION_GAP_HOURS} rebuild=${rebuild}`,
|
|
315
541
|
);
|
|
316
542
|
log(
|
|
317
543
|
`parsed lines=${normaliserResult.counters.parsed} media-skipped=${normaliserResult.counters.mediaSkipped} system-skipped=${normaliserResult.counters.systemSkipped}`,
|
|
@@ -342,44 +568,62 @@ async function main() {
|
|
|
342
568
|
await session.close().catch(() => {});
|
|
343
569
|
cleanup();
|
|
344
570
|
if (err && err.userFacing) {
|
|
345
|
-
|
|
346
|
-
|
|
571
|
+
writeProgressLine(`[conversation-archive] FAIL ${err.message}\n`);
|
|
572
|
+
endRun(1);
|
|
347
573
|
}
|
|
348
574
|
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
349
575
|
}
|
|
350
576
|
|
|
351
|
-
// 6d. Derive conversationIdentity and look up prior :ConversationArchive
|
|
577
|
+
// 6d. Derive conversationIdentity and look up prior :ConversationArchive.
|
|
578
|
+
// Task 902 sub-scope B — under `--rebuild` we skip the prior-cursor lookup
|
|
579
|
+
// entirely; the run is treated as first-ingest, the first session cleans
|
|
580
|
+
// prior chunks (cleanupPriorChunks=true), and the cursor is overwritten
|
|
581
|
+
// by that session's MERGE. Without `--rebuild`, the prior cursor drives
|
|
582
|
+
// delta-append OR (Task 902 sub-scope B) the same-bytes no-op exit when
|
|
583
|
+
// chunks already exist for this archiveSha256.
|
|
352
584
|
const conversationIdentity = deriveConversationIdentity({
|
|
353
585
|
accountId,
|
|
354
586
|
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
355
587
|
});
|
|
356
588
|
let priorArchive = null;
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
589
|
+
let priorChunkCountForSha = 0;
|
|
590
|
+
if (!rebuild) {
|
|
591
|
+
try {
|
|
592
|
+
const r = await session.run(
|
|
593
|
+
`MATCH (a:ConversationArchive { conversationIdentity: $cid })
|
|
594
|
+
OPTIONAL MATCH (a)-[:HAS_SECTION]->(c:Section:Conversation { archiveSha256: $sha })
|
|
595
|
+
RETURN elementId(a) AS elemId,
|
|
596
|
+
a.lastIngestedMessageHash AS lastHash,
|
|
597
|
+
a.lastIngestedMessageAt AS lastAt,
|
|
598
|
+
count(c) AS chunksForSha LIMIT 1`,
|
|
599
|
+
{ cid: conversationIdentity, sha: archiveSha256 },
|
|
600
|
+
);
|
|
601
|
+
if (r.records[0] && r.records[0].get("elemId")) {
|
|
602
|
+
priorArchive = {
|
|
603
|
+
elemId: r.records[0].get("elemId"),
|
|
604
|
+
lastHash: r.records[0].get("lastHash"),
|
|
605
|
+
lastAt: r.records[0].get("lastAt"),
|
|
606
|
+
};
|
|
607
|
+
const cs = r.records[0].get("chunksForSha");
|
|
608
|
+
priorChunkCountForSha =
|
|
609
|
+
typeof cs === "number" ? cs : cs?.toNumber?.() ?? 0;
|
|
610
|
+
}
|
|
611
|
+
} catch (err) {
|
|
612
|
+
await session.close().catch(() => {});
|
|
613
|
+
cleanup();
|
|
614
|
+
fail("delta-cursor-missing", { reason: `conversationArchive lookup failed: ${err instanceof Error ? err.message : String(err)}` });
|
|
371
615
|
}
|
|
372
|
-
} catch (err) {
|
|
373
|
-
await session.close().catch(() => {});
|
|
374
|
-
cleanup();
|
|
375
|
-
fail("delta-cursor-missing", { reason: `conversationArchive lookup failed: ${err instanceof Error ? err.message : String(err)}` });
|
|
376
616
|
}
|
|
377
617
|
await session.close().catch(() => {});
|
|
378
618
|
|
|
379
|
-
// 6e. Compute deltaStart
|
|
619
|
+
// 6e. Compute deltaStart. Three branches:
|
|
620
|
+
// (i) --rebuild: deltaStart=0, kind=rebuild, classify entire archive.
|
|
621
|
+
// (ii) no prior archive (or no cursor): first-ingest, deltaStart=0.
|
|
622
|
+
// (iii) prior archive + cursor: standard delta-append. Same-bytes (no
|
|
623
|
+
// cursor advance possible) AND chunks exist for this sha → no-op.
|
|
380
624
|
let deltaStart = 0;
|
|
381
|
-
let deltaKind = "first-ingest";
|
|
382
|
-
if (priorArchive && priorArchive.lastHash) {
|
|
625
|
+
let deltaKind = rebuild ? "rebuild" : "first-ingest";
|
|
626
|
+
if (!rebuild && priorArchive && priorArchive.lastHash) {
|
|
383
627
|
const cursor = findDeltaCursor(allLines, priorArchive.lastHash);
|
|
384
628
|
if (cursor.kind === "missing") {
|
|
385
629
|
cleanup();
|
|
@@ -390,7 +634,7 @@ async function main() {
|
|
|
390
634
|
});
|
|
391
635
|
}
|
|
392
636
|
if (cursor.kind === "empty") {
|
|
393
|
-
log(`noop reason="no new messages since ${priorArchive.lastAt}"`);
|
|
637
|
+
log(`noop reason="no new messages since ${priorArchive.lastAt}" prior-chunks=${priorChunkCountForSha}`);
|
|
394
638
|
cleanup();
|
|
395
639
|
const totalMs = Date.now() - startedMs;
|
|
396
640
|
process.stdout.write(JSON.stringify({
|
|
@@ -404,7 +648,7 @@ async function main() {
|
|
|
404
648
|
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
405
649
|
delta: { kind: "empty-delta", deltaStart: allLines.length, deltaMessages: 0 },
|
|
406
650
|
sessions: 0,
|
|
407
|
-
chunks:
|
|
651
|
+
chunks: priorChunkCountForSha,
|
|
408
652
|
nextEdgesCreated: 0,
|
|
409
653
|
participantsLinked: 0,
|
|
410
654
|
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
@@ -414,31 +658,56 @@ async function main() {
|
|
|
414
658
|
ms: totalMs,
|
|
415
659
|
priorLastIngestedMessageAt: priorArchive.lastAt,
|
|
416
660
|
}) + "\n");
|
|
417
|
-
|
|
661
|
+
endRun(0);
|
|
418
662
|
}
|
|
419
663
|
deltaStart = cursor.deltaStart;
|
|
420
664
|
deltaKind = "delta";
|
|
421
665
|
}
|
|
422
666
|
const deltaLines = allLines.slice(deltaStart);
|
|
423
667
|
log(
|
|
424
|
-
`delta cursor=${priorArchive ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
|
|
668
|
+
`delta kind=${deltaKind} cursor=${priorArchive && priorArchive.lastHash && !rebuild ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
|
|
425
669
|
);
|
|
426
670
|
|
|
427
|
-
// 6f. Sessionize delta
|
|
428
|
-
const sessions = sessionize(deltaLines,
|
|
671
|
+
// 6f. Sessionize delta at the fixed 8h gap (Task 902 sub-scope D).
|
|
672
|
+
const sessions = sessionize(deltaLines, SESSION_GAP_HOURS);
|
|
429
673
|
log(
|
|
430
|
-
`sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${
|
|
674
|
+
`sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${SESSION_GAP_HOURS}`,
|
|
431
675
|
);
|
|
432
676
|
|
|
433
|
-
//
|
|
434
|
-
//
|
|
435
|
-
//
|
|
436
|
-
//
|
|
437
|
-
//
|
|
438
|
-
const
|
|
677
|
+
// Task 902 sub-scope A — compute the stable archive title once, before
|
|
678
|
+
// the per-session loop. The title is `<source> · <owner> ↔ <others> ·
|
|
679
|
+
// <YYYY-MM-DD>→<YYYY-MM-DD>`. Operator-confirmed participant names come
|
|
680
|
+
// from one Cypher hop; absent canonical names fall back to elementId
|
|
681
|
+
// prefixes so the migration's degraded-title path mirrors live behaviour.
|
|
682
|
+
const archiveTitle = await computeArchiveTitle({
|
|
683
|
+
source,
|
|
684
|
+
accountId,
|
|
685
|
+
ownerElementId,
|
|
686
|
+
participantElementIds,
|
|
687
|
+
firstMessageAt: allLines[0].dateSent,
|
|
688
|
+
lastMessageAt: allLines[allLines.length - 1].dateSent,
|
|
689
|
+
getSession,
|
|
690
|
+
});
|
|
691
|
+
log(`archive-title=${archiveTitle}`);
|
|
692
|
+
|
|
693
|
+
// 6g. Per-session classify + immediate memoryIngest (Task 900 sub-scope E).
|
|
694
|
+
// Each session commits atomically: chunks + cursor advance happen together
|
|
695
|
+
// via memoryIngest. A kill mid-loop leaves a partial archive whose cursor
|
|
696
|
+
// is at session N-1's last message; the next invocation slices from there
|
|
697
|
+
// and re-classifies only session N onward. degrade-on-error from Task 897
|
|
698
|
+
// is preserved: a Haiku error on session N still emits an unenriched chunk
|
|
699
|
+
// and advances the cursor, so resumption never re-attempts the same Haiku
|
|
700
|
+
// error.
|
|
439
701
|
const allKeywords = new Set();
|
|
440
702
|
let sessionsUnenriched = 0;
|
|
441
|
-
|
|
703
|
+
let totalChunksWritten = 0;
|
|
704
|
+
let totalNextEdges = 0;
|
|
705
|
+
let participantsLinked = 0;
|
|
706
|
+
let archiveElementIdLatest = priorArchive ? priorArchive.elemId : "";
|
|
707
|
+
|
|
708
|
+
for (let sIdx = 0; sIdx < sessions.length; sIdx++) {
|
|
709
|
+
const s = sessions[sIdx];
|
|
710
|
+
const isFirstSessionInRun = sIdx === 0;
|
|
442
711
|
const sessionStart = Date.now();
|
|
443
712
|
const text = toTurnText(s);
|
|
444
713
|
const result = await classifyDocument({
|
|
@@ -449,12 +718,15 @@ async function main() {
|
|
|
449
718
|
naturalEdgeMap: "",
|
|
450
719
|
documentText: text,
|
|
451
720
|
});
|
|
721
|
+
|
|
722
|
+
let sessionChunks;
|
|
723
|
+
let sessionKeywords = [];
|
|
452
724
|
if (result.kind === "error") {
|
|
453
725
|
sessionsUnenriched += 1;
|
|
454
726
|
log(`session-unenriched session=${s.index + 1}/${sessions.length} reason="${result.reason.replace(/"/g, '\\"')}"`);
|
|
455
727
|
// Emit one raw :Section:Conversation chunk covering the whole session.
|
|
456
728
|
// No summary, no topicKeywords — a follow-up pass can re-enrich.
|
|
457
|
-
|
|
729
|
+
sessionChunks = [{
|
|
458
730
|
kind: "Conversation",
|
|
459
731
|
title: `Unenriched session ${s.index + 1}`,
|
|
460
732
|
body: text,
|
|
@@ -469,75 +741,118 @@ async function main() {
|
|
|
469
741
|
messageCount: s.messages.length,
|
|
470
742
|
},
|
|
471
743
|
anchorEdge: null,
|
|
472
|
-
}
|
|
473
|
-
|
|
744
|
+
}];
|
|
745
|
+
} else {
|
|
746
|
+
const chunkCount = result.output.sections.length;
|
|
747
|
+
log(
|
|
748
|
+
`classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
|
|
749
|
+
);
|
|
750
|
+
if (chunkCount === 0 && s.messages.length > 0) {
|
|
751
|
+
cleanup();
|
|
752
|
+
fail("classify", {
|
|
753
|
+
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
sessionChunks = result.output.sections;
|
|
757
|
+
sessionKeywords = result.output.documentKeywords ?? [];
|
|
758
|
+
for (const kw of sessionKeywords) allKeywords.add(kw);
|
|
474
759
|
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
760
|
+
|
|
761
|
+
// Per-session cursor: advance to THIS session's last message. Atomic with
|
|
762
|
+
// chunk writes inside memoryIngest's MERGE/CREATE Cypher transaction.
|
|
763
|
+
const lastSessionLine = s.messages[s.messages.length - 1];
|
|
764
|
+
const sessionLastHash = deriveMessageContentHash({
|
|
765
|
+
dateSent: lastSessionLine.dateSent,
|
|
766
|
+
body: lastSessionLine.body,
|
|
767
|
+
});
|
|
768
|
+
const sessionLastAt = lastSessionLine.dateSent;
|
|
769
|
+
|
|
770
|
+
let ingestResult;
|
|
771
|
+
const ingestStart = Date.now();
|
|
772
|
+
try {
|
|
773
|
+
ingestResult = await memoryIngest({
|
|
774
|
+
accountId,
|
|
775
|
+
attachmentId: conversationIdentity,
|
|
776
|
+
parentLabel: "ConversationArchive",
|
|
777
|
+
source,
|
|
778
|
+
// Task 902 sub-scope A — `archiveTitle` is the stable label written
|
|
779
|
+
// to `:ConversationArchive.title` (UI prefers it over `summary`).
|
|
780
|
+
// `documentSummary` is also the stable title rather than the
|
|
781
|
+
// per-session counter that previously locked the parent's first
|
|
782
|
+
// checkpoint into "Session 1/N: …".
|
|
783
|
+
archiveTitle,
|
|
784
|
+
documentSummary: archiveTitle,
|
|
785
|
+
anchorNodeId: ownerElementId,
|
|
786
|
+
anchorLabel: "AdminUser",
|
|
787
|
+
sections: sessionChunks,
|
|
788
|
+
scope,
|
|
789
|
+
sessionId,
|
|
790
|
+
documentKeywords: Array.from(allKeywords),
|
|
791
|
+
archiveSha256,
|
|
792
|
+
archiveSourceFile,
|
|
793
|
+
lastIngestedMessageHash: sessionLastHash,
|
|
794
|
+
lastIngestedMessageAt: sessionLastAt,
|
|
795
|
+
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
796
|
+
// Task 902 sub-scope B — cleanup-by-archiveSha256 runs ONLY under
|
|
797
|
+
// `--rebuild`, AND only on the first session of the run (subsequent
|
|
798
|
+
// sessions skip cleanup or they would delete chunks the earlier
|
|
799
|
+
// sessions in the same run just wrote).
|
|
800
|
+
cleanupPriorChunks: rebuild && isFirstSessionInRun,
|
|
801
|
+
});
|
|
802
|
+
} catch (err) {
|
|
480
803
|
cleanup();
|
|
481
|
-
fail("
|
|
482
|
-
reason:
|
|
804
|
+
fail("memory-ingest", {
|
|
805
|
+
reason: err instanceof Error ? err.message : String(err),
|
|
806
|
+
sessionIndex: s.index + 1,
|
|
807
|
+
sessionsTotal: sessions.length,
|
|
483
808
|
});
|
|
484
809
|
}
|
|
485
|
-
|
|
486
|
-
|
|
810
|
+
|
|
811
|
+
archiveElementIdLatest = ingestResult.documentNodeId;
|
|
812
|
+
totalChunksWritten += ingestResult.sectionCount;
|
|
813
|
+
totalNextEdges += ingestResult.edgeBreakdown.NEXT ?? 0;
|
|
814
|
+
if (isFirstSessionInRun) {
|
|
815
|
+
participantsLinked = ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0;
|
|
816
|
+
// Task 902 sub-scope E — surface destructive cleanup as a WARN line in
|
|
817
|
+
// the progress sink (not just memory-ingest's stderr). The agent's
|
|
818
|
+
// heartbeat grep-matches this exact prefix and elevates to operator
|
|
819
|
+
// chat. Cleanup only ever fires under `--rebuild` per sub-scope B,
|
|
820
|
+
// so seeing the line outside of an operator-issued `--rebuild` is a
|
|
821
|
+
// doctrine violation worth surfacing loudly.
|
|
822
|
+
const cleaned = ingestResult.cleanedPriorChunks ?? 0;
|
|
823
|
+
if (cleaned > 0) {
|
|
824
|
+
log(
|
|
825
|
+
`WARN cleanup-dropped chunks=${cleaned} archiveSha256=${archiveSha256.slice(0, 12)} — prior data deleted, expected only under --rebuild`,
|
|
826
|
+
);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
log(
|
|
830
|
+
`session-committed sessionIndex=${s.index + 1}/${sessions.length} chunks-written=${ingestResult.sectionCount} cursor-at="${sessionLastAt}" ms=${Date.now() - ingestStart}`,
|
|
831
|
+
);
|
|
832
|
+
// Task 902 sub-scope F — derived progress line. The agent's heartbeat
|
|
833
|
+
// surfaces only `progress` lines as chat tokens; raw `classify-session`
|
|
834
|
+
// and `session-committed` lines remain on disk for diagnosis. The render
|
|
835
|
+
// is computed in the bin (deterministic) instead of in SKILL.md prose
|
|
836
|
+
// (LLM drift) — sessionIndex/N → P% with running totals comes from one
|
|
837
|
+
// place, not from per-tick agent arithmetic.
|
|
838
|
+
const pct = Math.round((100 * (s.index + 1)) / sessions.length);
|
|
839
|
+
log(
|
|
840
|
+
`progress sessionIndex=${s.index + 1}/${sessions.length} pct=${pct} chunks-so-far=${totalChunksWritten} elapsed-ms=${Date.now() - startedMs}`,
|
|
841
|
+
);
|
|
487
842
|
}
|
|
843
|
+
|
|
488
844
|
if (sessionsUnenriched > 0) {
|
|
489
845
|
log(`classify-summary sessions=${sessions.length} unenriched=${sessionsUnenriched} enriched=${sessions.length - sessionsUnenriched}`);
|
|
490
846
|
}
|
|
491
847
|
|
|
492
|
-
// 6h. Compute lastIngestedMessageHash from the last delta line
|
|
493
|
-
const lastLine = deltaLines[deltaLines.length - 1];
|
|
494
|
-
const lastIngestedMessageHash = deriveMessageContentHash({
|
|
495
|
-
dateSent: lastLine.dateSent,
|
|
496
|
-
body: lastLine.body,
|
|
497
|
-
});
|
|
498
|
-
const lastIngestedMessageAt = lastLine.dateSent;
|
|
499
|
-
|
|
500
|
-
// 6i. Aggregate document-level summary across sessions
|
|
501
|
-
const documentSummary = sessions.length === 1
|
|
502
|
-
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
503
|
-
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
504
|
-
|
|
505
|
-
// 6j. Call memoryIngest with parentLabel='ConversationArchive'
|
|
506
|
-
let ingestResult;
|
|
507
|
-
const ingestStart = Date.now();
|
|
508
|
-
try {
|
|
509
|
-
ingestResult = await memoryIngest({
|
|
510
|
-
accountId,
|
|
511
|
-
attachmentId: conversationIdentity,
|
|
512
|
-
parentLabel: "ConversationArchive",
|
|
513
|
-
source,
|
|
514
|
-
documentSummary,
|
|
515
|
-
anchorNodeId: ownerElementId,
|
|
516
|
-
anchorLabel: "AdminUser",
|
|
517
|
-
sections: allChunks,
|
|
518
|
-
scope,
|
|
519
|
-
sessionId,
|
|
520
|
-
documentKeywords: Array.from(allKeywords),
|
|
521
|
-
archiveSha256,
|
|
522
|
-
archiveSourceFile,
|
|
523
|
-
lastIngestedMessageHash,
|
|
524
|
-
lastIngestedMessageAt,
|
|
525
|
-
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
526
|
-
});
|
|
527
|
-
} catch (err) {
|
|
528
|
-
cleanup();
|
|
529
|
-
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
530
|
-
}
|
|
531
|
-
log(
|
|
532
|
-
`source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
533
|
-
);
|
|
534
|
-
|
|
535
848
|
cleanup();
|
|
536
849
|
const totalMs = Date.now() - startedMs;
|
|
537
|
-
log(
|
|
850
|
+
log(
|
|
851
|
+
`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${archiveElementIdLatest} chunks-written=${totalChunksWritten} sessions=${sessions.length} total-ms=${totalMs}`,
|
|
852
|
+
);
|
|
538
853
|
|
|
539
854
|
process.stdout.write(JSON.stringify({
|
|
540
|
-
archiveElementId:
|
|
855
|
+
archiveElementId: archiveElementIdLatest,
|
|
541
856
|
conversationIdentity,
|
|
542
857
|
archiveSha256,
|
|
543
858
|
archiveSourceFile,
|
|
@@ -547,16 +862,16 @@ async function main() {
|
|
|
547
862
|
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
548
863
|
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
549
864
|
sessions: sessions.length,
|
|
550
|
-
chunks:
|
|
551
|
-
nextEdgesCreated:
|
|
552
|
-
participantsLinked
|
|
865
|
+
chunks: totalChunksWritten,
|
|
866
|
+
nextEdgesCreated: totalNextEdges,
|
|
867
|
+
participantsLinked,
|
|
553
868
|
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
554
869
|
parsedLineCount: allLines.length,
|
|
555
870
|
sessionsUnenriched,
|
|
556
871
|
topicKeywords: Array.from(allKeywords),
|
|
557
872
|
ms: totalMs,
|
|
558
873
|
}) + "\n");
|
|
559
|
-
|
|
874
|
+
endRun(0);
|
|
560
875
|
}
|
|
561
876
|
|
|
562
877
|
main().catch((err) => {
|