@rubytech/create-realagent 1.0.830 → 1.0.831
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +8 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.js +5 -1
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
- package/payload/platform/lib/oauth-llm/src/index.ts +19 -4
- package/payload/platform/neo4j/migrations/008-adminuser-accountid-backfill.ts +85 -0
- package/payload/platform/plugins/admin/mcp/dist/index.js +25 -3
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/docs/references/internals.md +1 -1
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +50 -27
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts +49 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js +35 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +6 -7
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +9 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +49 -131
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +0 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +0 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +10 -11
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +3 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +11 -42
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +10 -8
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +10 -8
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +118 -43
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +3 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +40 -18
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +4 -6
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +59 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js +2 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js.map +1 -1
- package/payload/server/chunk-BCFM2UPH.js +2305 -0
- package/payload/server/chunk-CV3HPX46.js +10097 -0
- package/payload/server/chunk-J6YWEJBN.js +1116 -0
- package/payload/server/chunk-OCPJGZ6S.js +654 -0
- package/payload/server/client-pool-ZNGN66GN.js +32 -0
- package/payload/server/cloudflare-task-tracker-MHALDN54.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-6RW423E2.js +530 -0
- package/payload/server/server.js +30 -19
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
// ---------------------------------------------------------------------------
|
|
3
3
|
// derive-keys — natural-key derivation for conversation-archive ingestion
|
|
4
|
-
// (Task 894
|
|
4
|
+
// (Task 894 / 897).
|
|
5
5
|
//
|
|
6
6
|
// Pure functions. No I/O. The whole point is that re-imports of the same
|
|
7
7
|
// archive collapse to the same identity regardless of release-level drift in
|
|
8
8
|
// chunk indices, hash widths, or arbitrary tiebreakers.
|
|
9
9
|
//
|
|
10
10
|
// conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))
|
|
11
|
-
// messageContentHash = sha256(dateSent + "|" +
|
|
11
|
+
// messageContentHash = sha256(dateSent + "|" + body)
|
|
12
12
|
//
|
|
13
13
|
// `conversationIdentity` is stable across re-exports — same operator + same
|
|
14
|
-
// participant set → same identity, regardless of file bytes.
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
14
|
+
// participant set → same identity, regardless of file bytes.
|
|
15
|
+
//
|
|
16
|
+
// `messageContentHash` (Task 897): hashes `dateSent + body`. Per-line
|
|
17
|
+
// senderName is gone (the scanner is body-opaque); the body string still
|
|
18
|
+
// carries any `Sender:` prefix verbatim, so two senders posting different
|
|
19
|
+
// content at the same instant stay distinguishable. The cursor survives a
|
|
20
|
+
// fresh re-export as long as the source preserves the prefix in the body.
|
|
18
21
|
// ---------------------------------------------------------------------------
|
|
19
|
-
export function normaliseSenderName(name) {
|
|
20
|
-
return name.normalize("NFKC").trim().toLowerCase();
|
|
21
|
-
}
|
|
22
22
|
export function sha256Hex(input) {
|
|
23
23
|
return createHash("sha256").update(input).digest("hex");
|
|
24
24
|
}
|
|
@@ -33,7 +33,6 @@ export function deriveConversationIdentity(input) {
|
|
|
33
33
|
return sha256Hex(`${input.accountId}:${sorted.join(",")}`);
|
|
34
34
|
}
|
|
35
35
|
export function deriveMessageContentHash(input) {
|
|
36
|
-
|
|
37
|
-
return sha256Hex(`${input.dateSent}|${norm}|${input.body}`);
|
|
36
|
+
return sha256Hex(`${input.dateSent}|${input.body}`);
|
|
38
37
|
}
|
|
39
38
|
//# sourceMappingURL=derive-keys.js.map
|
package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/derive-keys.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,8EAA8E;AAC9E,0EAA0E;AAC1E,
|
|
1
|
+
{"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/derive-keys.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,8EAA8E;AAC9E,0EAA0E;AAC1E,oBAAoB;AACpB,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,wDAAwD;AACxD,EAAE;AACF,2FAA2F;AAC3F,yDAAyD;AACzD,EAAE;AACF,4EAA4E;AAC5E,6DAA6D;AAC7D,EAAE;AACF,sEAAsE;AACtE,yEAAyE;AACzE,0EAA0E;AAC1E,0EAA0E;AAC1E,0EAA0E;AAC1E,8EAA8E;AAE9E,MAAM,UAAU,SAAS,CAAC,KAAa;IACrC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AASD,MAAM,UAAU,0BAA0B,CACxC,KAAsC;IAEtC,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7D,CAAC;AAOD,MAAM,UAAU,wBAAwB,CACtC,KAAoC;IAEpC,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;AACtD,CAAC"}
|
|
@@ -3,15 +3,14 @@ export declare class IngestUserFacingError extends Error {
|
|
|
3
3
|
readonly userFacing = true;
|
|
4
4
|
constructor(message: string);
|
|
5
5
|
}
|
|
6
|
-
export interface
|
|
6
|
+
export interface VerifyParticipantElementIdsInput {
|
|
7
7
|
session: Neo4jSession;
|
|
8
8
|
accountId: string;
|
|
9
9
|
ownerElementId: string;
|
|
10
10
|
participantElementIds: readonly string[];
|
|
11
|
-
senderNames: readonly string[];
|
|
12
11
|
}
|
|
13
|
-
export interface
|
|
12
|
+
export interface VerifyParticipantElementIdsResult {
|
|
14
13
|
participantsResolved: number;
|
|
15
14
|
}
|
|
16
|
-
export declare function
|
|
15
|
+
export declare function verifyParticipantElementIds(input: VerifyParticipantElementIdsInput): Promise<VerifyParticipantElementIdsResult>;
|
|
17
16
|
//# sourceMappingURL=sender-bind.d.ts.map
|
package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sender-bind.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"sender-bind.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,cAAc,CAAC;AAgB5D,qBAAa,qBAAsB,SAAQ,KAAK;IAC9C,QAAQ,CAAC,UAAU,QAAQ;gBACf,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,gCAAgC;IAC/C,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED,MAAM,WAAW,iCAAiC;IAChD,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAUD,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,gCAAgC,GACtC,OAAO,CAAC,iCAAiC,CAAC,CAkC5C"}
|
|
@@ -1,17 +1,15 @@
|
|
|
1
|
-
import { normaliseSenderName } from "./derive-keys.js";
|
|
2
1
|
// ---------------------------------------------------------------------------
|
|
3
|
-
// sender-bind —
|
|
4
|
-
// :Person / :AdminUser elementIds (Task 894, replaces Task 887 §A0 path).
|
|
2
|
+
// sender-bind — defensive participant-elementId verification (Task 894 / 897).
|
|
5
3
|
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
4
|
+
// Task 897 dropped per-line senderName, so the closed-set sender-resolution
|
|
5
|
+
// step that lived here is gone. What remains is a defensive read pass: every
|
|
6
|
+
// supplied participant elementId must exist in the graph, must carry the
|
|
7
|
+
// supplied accountId, and must be labelled :Person or :AdminUser. If any
|
|
8
|
+
// fails, the bin LOUD-FAILs before the writer attaches PARTICIPANT_IN edges
|
|
9
|
+
// to a stale or cross-account node.
|
|
9
10
|
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
// operator either re-runs with the missing elementId added or reports a
|
|
13
|
-
// parser bug. This guards `feedback_archives_are_not_documents.md` and the
|
|
14
|
-
// prior Task-887 leak fingerprint.
|
|
11
|
+
// The operator confirms participants up front; participants attach to the
|
|
12
|
+
// :ConversationArchive parent via :PARTICIPANT_IN, never per-message.
|
|
15
13
|
// ---------------------------------------------------------------------------
|
|
16
14
|
export class IngestUserFacingError extends Error {
|
|
17
15
|
userFacing = true;
|
|
@@ -25,13 +23,10 @@ UNWIND $ids AS id
|
|
|
25
23
|
MATCH (n) WHERE elementId(n) = id
|
|
26
24
|
RETURN elementId(n) AS elemId,
|
|
27
25
|
labels(n) AS labels,
|
|
28
|
-
coalesce(n.name, '') AS name,
|
|
29
|
-
coalesce(n.givenName, '') AS givenName,
|
|
30
|
-
coalesce(n.familyName, '') AS familyName,
|
|
31
26
|
coalesce(n.accountId, '') AS accountId
|
|
32
27
|
`;
|
|
33
|
-
export async function
|
|
34
|
-
const { session, accountId, ownerElementId, participantElementIds
|
|
28
|
+
export async function verifyParticipantElementIds(input) {
|
|
29
|
+
const { session, accountId, ownerElementId, participantElementIds } = input;
|
|
35
30
|
const allIds = [ownerElementId, ...participantElementIds];
|
|
36
31
|
const distinctIds = Array.from(new Set(allIds));
|
|
37
32
|
if (distinctIds.length !== allIds.length) {
|
|
@@ -39,7 +34,6 @@ export async function bindCanonicalSenders(input) {
|
|
|
39
34
|
}
|
|
40
35
|
const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
|
|
41
36
|
const seenIds = new Set();
|
|
42
|
-
const index = new Map();
|
|
43
37
|
for (const r of res.records) {
|
|
44
38
|
const elemId = r.get("elemId");
|
|
45
39
|
const labels = r.get("labels") || [];
|
|
@@ -54,37 +48,12 @@ export async function bindCanonicalSenders(input) {
|
|
|
54
48
|
throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
|
|
55
49
|
}
|
|
56
50
|
seenIds.add(elemId);
|
|
57
|
-
const candidates = [];
|
|
58
|
-
const name = r.get("name") || "";
|
|
59
|
-
const given = r.get("givenName") || "";
|
|
60
|
-
const family = r.get("familyName") || "";
|
|
61
|
-
if (name)
|
|
62
|
-
candidates.push(name);
|
|
63
|
-
if (given && family)
|
|
64
|
-
candidates.push(`${given} ${family}`);
|
|
65
|
-
if (given)
|
|
66
|
-
candidates.push(given);
|
|
67
|
-
if (family)
|
|
68
|
-
candidates.push(family);
|
|
69
|
-
for (const c of candidates) {
|
|
70
|
-
const norm = normaliseSenderName(c);
|
|
71
|
-
if (!norm)
|
|
72
|
-
continue;
|
|
73
|
-
if (!index.has(norm))
|
|
74
|
-
index.set(norm, elemId);
|
|
75
|
-
}
|
|
76
51
|
}
|
|
77
52
|
for (const id of distinctIds) {
|
|
78
53
|
if (!seenIds.has(id)) {
|
|
79
54
|
throw new IngestUserFacingError(`elementId ${id} not found in graph`);
|
|
80
55
|
}
|
|
81
56
|
}
|
|
82
|
-
for (const senderName of senderNames) {
|
|
83
|
-
const norm = normaliseSenderName(senderName);
|
|
84
|
-
if (!index.has(norm)) {
|
|
85
|
-
throw new IngestUserFacingError(`parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
57
|
return { participantsResolved: seenIds.size };
|
|
89
58
|
}
|
|
90
59
|
//# sourceMappingURL=sender-bind.js.map
|
package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sender-bind.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sender-bind.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,+EAA+E;AAC/E,EAAE;AACF,4EAA4E;AAC5E,6EAA6E;AAC7E,yEAAyE;AACzE,yEAAyE;AACzE,4EAA4E;AAC5E,oCAAoC;AACpC,EAAE;AACF,0EAA0E;AAC1E,sEAAsE;AACtE,8EAA8E;AAE9E,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IACrC,UAAU,GAAG,IAAI,CAAC;IAC3B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAaD,MAAM,sBAAsB,GAAG;;;;;;CAM9B,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC/C,KAAuC;IAEvC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,qBAAqB,EAAE,GAAG,KAAK,CAAC;IAC5E,MAAM,MAAM,GAAG,CAAC,cAAc,EAAE,GAAG,qBAAqB,CAAC,CAAC;IAC1D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IAChD,IAAI,WAAW,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,MAAM,IAAI,qBAAqB,CAC7B,sFAAsF,CACvF,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE,CACjD,EAAE,CAAC,GAAG,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,CACrD,CAAC;IACF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAW,CAAC;QACzC,MAAM,MAAM,GAAI,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAc,IAAI,EAAE,CAAC;QACnD,MAAM,IAAI,GAAI,CAAC,CAAC,GAAG,CAAC,WAAW,CAAY,IAAI,EAAE,CAAC;QAClD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,wDAAwD,CAAC,CAAC;QAC1G,CAAC;QACD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,uBAAuB,IAAI,SAAS,SAAS,EAAE,CAAC,CAAC;QACjG,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAChE,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,gBAAgB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACrH,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACrB,MAAM,IAAI,qBAAqB,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,oBAAoB,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;AAChD,CAAC"}
|
package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"to-turn-text.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"to-turn-text.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAgB3D,wBAAgB,UAAU,CAAC,OAAO,EAAE,mBAAmB,GAAG,MAAM,CAM/D"}
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
|
-
// to-turn-text — render one ConversationSession as a
|
|
3
|
-
//
|
|
4
|
-
//
|
|
2
|
+
// to-turn-text — render one ConversationSession as a timestamped text block
|
|
3
|
+
// for memory-classify (mode='chat'). Task 897: bodies are opaque; the
|
|
4
|
+
// scanner already preserved the source's `Sender:` segment inside `body`,
|
|
5
|
+
// so a WhatsApp session renders as:
|
|
5
6
|
//
|
|
6
|
-
// [YYYY-MM-DD HH:MM:SS ±HH:MM]
|
|
7
|
-
// [YYYY-MM-DD HH:MM:SS ±HH:MM]
|
|
8
|
-
// ...
|
|
7
|
+
// [YYYY-MM-DD HH:MM:SS ±HH:MM] Sender: body text
|
|
8
|
+
// [YYYY-MM-DD HH:MM:SS ±HH:MM] Sender: body text
|
|
9
9
|
//
|
|
10
|
-
//
|
|
10
|
+
// Other sources whose body shape is different (system message, vCard,
|
|
11
|
+
// forwarded block) flow through unchanged — the chat-mode prompt is
|
|
12
|
+
// instructed to keep these prefixes verbatim in chunk bodies.
|
|
11
13
|
// ---------------------------------------------------------------------------
|
|
12
14
|
export function toTurnText(session) {
|
|
13
15
|
const lines = [];
|
|
14
16
|
for (const m of session.messages) {
|
|
15
|
-
lines.push(`[${formatWallClock(m.dateSent)}] ${m.
|
|
17
|
+
lines.push(`[${formatWallClock(m.dateSent)}] ${m.body}`);
|
|
16
18
|
}
|
|
17
19
|
return lines.join("\n");
|
|
18
20
|
}
|
package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"to-turn-text.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,0EAA0E;AAC1E,
|
|
1
|
+
{"version":3,"file":"to-turn-text.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,4EAA4E;AAC5E,sEAAsE;AACtE,0EAA0E;AAC1E,oCAAoC;AACpC,EAAE;AACF,mDAAmD;AACnD,mDAAmD;AACnD,EAAE;AACF,sEAAsE;AACtE,oEAAoE;AACpE,8DAA8D;AAC9D,8EAA8E;AAE9E,MAAM,UAAU,UAAU,CAAC,OAA4B;IACrD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CACjB,iFAAiF,CAClF,CAAC;IACF,IAAI,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC;IACnB,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,WAAW,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -17,9 +17,10 @@
|
|
|
17
17
|
* Hallucination defence: every returned `kind` is verified against the
|
|
18
18
|
* loaded ontology label set. Sections whose `kind` is not a real label
|
|
19
19
|
* are tagged `UNMAPPED`. Failure of the LLM call (missing creds, network,
|
|
20
|
-
* malformed JSON) returns `{kind: "
|
|
21
|
-
*
|
|
22
|
-
*
|
|
20
|
+
* malformed JSON) returns `{kind: "error", reason}` (Task 897 vocabulary
|
|
21
|
+
* — was `kind: "fallback"`). The caller decides whether to abort the
|
|
22
|
+
* ingest or degrade-on-error per session; classifier never silently
|
|
23
|
+
* substitutes a degraded write.
|
|
23
24
|
*/
|
|
24
25
|
/** Direction of the anchor edge relative to the typed node. */
|
|
25
26
|
export type AnchorEdgeDirection = "from-anchor" | "to-anchor";
|
|
@@ -160,7 +161,7 @@ export type ClassifyResult = {
|
|
|
160
161
|
kind: "ok";
|
|
161
162
|
output: ClassifierOutput;
|
|
162
163
|
} | {
|
|
163
|
-
kind: "
|
|
164
|
+
kind: "error";
|
|
164
165
|
reason: string;
|
|
165
166
|
};
|
|
166
167
|
/**
|
|
@@ -229,10 +230,11 @@ export interface ClassifyParams {
|
|
|
229
230
|
* Sections the classifier could not natural-edge appear in
|
|
230
231
|
* `output.orphanCandidates`. The skill surfaces orphans loudly to
|
|
231
232
|
* the operator.
|
|
232
|
-
* { kind: "
|
|
233
|
-
* malformed JSON
|
|
234
|
-
*
|
|
235
|
-
*
|
|
233
|
+
* { kind: "error", reason } when the LLM is unavailable, returns
|
|
234
|
+
* malformed JSON, or hits an input-too-large guard. The caller
|
|
235
|
+
* decides whether to abort the ingest entirely (document mode) or
|
|
236
|
+
* degrade-on-error per session (chat mode, Task 897). Classifier
|
|
237
|
+
* never silently substitutes a degraded write.
|
|
236
238
|
*/
|
|
237
239
|
export declare function classifyDocument(params: ClassifyParams): Promise<ClassifyResult>;
|
|
238
240
|
//# sourceMappingURL=llm-classifier.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAUH,+DAA+D;AAC/D,MAAM,MAAM,mBAAmB,GAAG,aAAa,GAAG,WAAW,CAAC;AAE9D,mEAAmE;AACnE,MAAM,MAAM,oBAAoB,GAAG,UAAU,GAAG,UAAU,CAAC;AAE3D,kFAAkF;AAClF,MAAM,WAAW,iBAAiB;IAChC,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,oDAAoD;IACpD,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,oBAAoB,CAAC;QAChC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,CAAC;IACF;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,oGAAoG;AACpG,MAAM,WAAW,iBAAiB;IAChC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;;;OAOG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,iFAAiF;IACjF,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC;;;;OAIG;IACH,UAAU,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,mBAAmB,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,GAAG,IAAI,CAAC;IACT,oFAAoF;IACpF,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,oCAAoC;AACpC,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAC;IACxB,kEAAkE;IAClE,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,6BAA6B;IAC7B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,iFAAiF;IACjF,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC;;+CAE2C;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,UAAU,GAAG,UAAU,CAAC;QACnC,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,wEAAwE;QACxE,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB,CAAC,CAAC;IACH,mFAAmF;IACnF,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,IAAI,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,gBAAgB,CAAA;CAAE,GACxC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AA8EtC;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,UAAU,CAAC;AAE1C,eAAO,MAAM,sBAAsB,wEAMzB,CAAC;AAEX,eAAO,MAAM,wBAAwB,yKAa3B,CAAC;AAEX,eAAO,MAAM,sBAAsB,4SAqBzB,CAAC;AAEX,8EAA8E;AAC9E,eAAO,MAAM,qBAAqB,sBAAuB,CAAC;AAE1D,eAAO,MAAM,iBAAiB,EAAE,WAAW,CAAC,MAAM,CAMhD,CAAC;AAsKH,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,UAAU,GAAG,MAAM,CAAC;IAC3B;;;;;;;;OAQG;IACH,iBAAiB,EAAE,MAAM,CAAC;IAC1B;;;;;;OAMG;IACH,cAAc,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IACpC;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,cAAc,CAAC,CA0WzB"}
|
|
@@ -17,9 +17,10 @@
|
|
|
17
17
|
* Hallucination defence: every returned `kind` is verified against the
|
|
18
18
|
* loaded ontology label set. Sections whose `kind` is not a real label
|
|
19
19
|
* are tagged `UNMAPPED`. Failure of the LLM call (missing creds, network,
|
|
20
|
-
* malformed JSON) returns `{kind: "
|
|
21
|
-
*
|
|
22
|
-
*
|
|
20
|
+
* malformed JSON) returns `{kind: "error", reason}` (Task 897 vocabulary
|
|
21
|
+
* — was `kind: "fallback"`). The caller decides whether to abort the
|
|
22
|
+
* ingest or degrade-on-error per session; classifier never silently
|
|
23
|
+
* substitutes a degraded write.
|
|
23
24
|
*/
|
|
24
25
|
import { callOauthLlm } from "../../../../../lib/oauth-llm/dist/index.js";
|
|
25
26
|
import { HAIKU_MODEL } from "../../../../../lib/models/dist/index.js";
|
|
@@ -29,13 +30,15 @@ import { chunkDocument, mergeOverlappingSections } from "./document-chunker.js";
|
|
|
29
30
|
// ---------------------------------------------------------------------------
|
|
30
31
|
const MAX_OUTPUT_TOKENS = 8192;
|
|
31
32
|
/**
|
|
32
|
-
* Per-section summary cap (Task 896 clause 1
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
33
|
+
* Per-section summary cap (Task 896 clause 1; tightened by Task 897 sub-scope F).
|
|
34
|
+
* Reduced 500 → 100 because the offsets-only schema's per-section JSON now
|
|
35
|
+
* has tiny structural overhead, and a 500-char summary made a 30-section
|
|
36
|
+
* document blow past the prompt's 6000-token output budget. The classifier
|
|
37
|
+
* prompt advertises this cap; the server truncates oversize summaries with
|
|
38
|
+
* an ellipsis. `summaryTruncated` counter on the haiku-ok line surfaces the
|
|
39
|
+
* rate of model misbehaviour without aborting.
|
|
37
40
|
*/
|
|
38
|
-
const SUMMARY_MAX_CHARS =
|
|
41
|
+
const SUMMARY_MAX_CHARS = 100;
|
|
39
42
|
/**
|
|
40
43
|
* Output budget the prompt advertises to the model. ≈6000 tokens leaves
|
|
41
44
|
* headroom under MAX_OUTPUT_TOKENS=8192 for a few hundred sections of
|
|
@@ -62,6 +65,33 @@ const CHUNK_OVERLAP_TOKENS = 5_000;
|
|
|
62
65
|
/** Target chunk char size for the prose chunker (Task 896 clause 3). */
|
|
63
66
|
const CHUNK_CHAR_SIZE = Math.floor(CHUNK_TOKEN_SIZE * CHARS_PER_TOKEN_ESTIMATE);
|
|
64
67
|
const CHUNK_OVERLAP_CHARS = Math.floor(CHUNK_OVERLAP_TOKENS * CHARS_PER_TOKEN_ESTIMATE);
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Task 897 sub-scope F — projected-output chunker gate.
|
|
70
|
+
//
|
|
71
|
+
// Pre-897 dispatch was `documentText.length > CHUNK_CHAR_SIZE` (≈525K chars).
|
|
72
|
+
// That gate fires only on >525K-char inputs, but a 251K-char Adam Mackay
|
|
73
|
+
// archive with ~62 dense chat sessions still produced an 8K-token-truncated
|
|
74
|
+
// response — the input wasn't oversize, the OUTPUT was. Task 896's
|
|
75
|
+
// offsets-only schema cut per-section JSON down dramatically, but a dense
|
|
76
|
+
// archive at avg ~1500 chars per section still emits ~170 sections, and at
|
|
77
|
+
// ~75 tokens per section that's ~12K output tokens — twice the prompt's
|
|
78
|
+
// advertised 6K budget.
|
|
79
|
+
//
|
|
80
|
+
// New gate: project output tokens before the call. With SUMMARY_MAX_CHARS=100
|
|
81
|
+
// the per-section JSON shape is roughly:
|
|
82
|
+
// { kind, title (≤120), summary (≤100), sourceStart, sourceEnd, properties }
|
|
83
|
+
// ≈ 250–300 chars ≈ 75 tokens worst case. Conservative chars-per-section is
|
|
84
|
+
// ~1500 (covers contract-clause documents and dense chat). Project sections
|
|
85
|
+
// from input length and force chunked dispatch when the projection exceeds
|
|
86
|
+
// PROMPT_OUTPUT_TOKEN_BUDGET. The threshold lands at ~120K chars input —
|
|
87
|
+
// well below CHUNK_CHAR_SIZE, which now serves as a defence-in-depth ceiling.
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
const PROJECTED_CHARS_PER_SECTION = 1_500;
|
|
90
|
+
const PROJECTED_TOKENS_PER_SECTION = 75;
|
|
91
|
+
function projectOutputTokens(documentTextLength) {
|
|
92
|
+
const estimatedSections = Math.ceil(documentTextLength / PROJECTED_CHARS_PER_SECTION);
|
|
93
|
+
return estimatedSections * PROJECTED_TOKENS_PER_SECTION;
|
|
94
|
+
}
|
|
65
95
|
/**
|
|
66
96
|
* Closed enumeration of section `kind` values. Each becomes a secondary
|
|
67
97
|
* label on the `:Section` node (e.g. `:Section:Position`). Anything outside
|
|
@@ -231,13 +261,32 @@ const SYSTEM_PROMPT = [
|
|
|
231
261
|
// ---------------------------------------------------------------------------
|
|
232
262
|
// Helpers
|
|
233
263
|
// ---------------------------------------------------------------------------
|
|
264
|
+
/**
|
|
265
|
+
* Strip a balanced markdown fence from a JSON response. Returns:
|
|
266
|
+
* { kind: "ok", json } when the text was bare JSON or fully fenced.
|
|
267
|
+
* { kind: "truncated", reason } when the text opens a fence but never closes
|
|
268
|
+
* it — a structural fingerprint of `max_tokens` mid-response (Task 897
|
|
269
|
+
* sub-scope G, replacing the pre-897 silent fall-through to JSON.parse
|
|
270
|
+
* which masked truncation as `malformed JSON`).
|
|
271
|
+
*/
|
|
234
272
|
function extractJson(raw) {
|
|
235
273
|
const trimmed = raw.trim();
|
|
236
|
-
const
|
|
237
|
-
|
|
274
|
+
const balanced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)```$/);
|
|
275
|
+
if (balanced)
|
|
276
|
+
return { kind: "ok", json: balanced[1].trim() };
|
|
277
|
+
// One-sided fence — opens with ``` (optionally `json`) but no closing fence.
|
|
278
|
+
// This is the truncation fingerprint: the model started a fenced response
|
|
279
|
+
// and ran out of output tokens before closing it.
|
|
280
|
+
if (/^```(?:json)?/.test(trimmed) && !/```\s*$/.test(trimmed)) {
|
|
281
|
+
return {
|
|
282
|
+
kind: "truncated",
|
|
283
|
+
reason: "response-truncated: opening ``` fence with no closing — model likely hit max_tokens mid-response",
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
return { kind: "ok", json: trimmed };
|
|
238
287
|
}
|
|
239
|
-
function
|
|
240
|
-
process.stderr.write(`[memory-classify] [${accountId}]
|
|
288
|
+
function logError(accountId, reason) {
|
|
289
|
+
process.stderr.write(`[memory-classify] [${accountId}] ERROR reason="${reason}"\n`);
|
|
241
290
|
}
|
|
242
291
|
function asString(v) {
|
|
243
292
|
return typeof v === "string" ? v : null;
|
|
@@ -270,33 +319,45 @@ function asNonNegativeInt(v) {
|
|
|
270
319
|
* Sections the classifier could not natural-edge appear in
|
|
271
320
|
* `output.orphanCandidates`. The skill surfaces orphans loudly to
|
|
272
321
|
* the operator.
|
|
273
|
-
* { kind: "
|
|
274
|
-
* malformed JSON
|
|
275
|
-
*
|
|
276
|
-
*
|
|
322
|
+
* { kind: "error", reason } when the LLM is unavailable, returns
|
|
323
|
+
* malformed JSON, or hits an input-too-large guard. The caller
|
|
324
|
+
* decides whether to abort the ingest entirely (document mode) or
|
|
325
|
+
* degrade-on-error per session (chat mode, Task 897). Classifier
|
|
326
|
+
* never silently substitutes a degraded write.
|
|
277
327
|
*/
|
|
278
328
|
export async function classifyDocument(params) {
|
|
279
329
|
const { accountId, anchorDescription, ontologyLabels, naturalEdgeMap, documentText } = params;
|
|
280
330
|
const mode = params.mode ?? "document";
|
|
281
|
-
// Task 896 clause 3
|
|
282
|
-
//
|
|
331
|
+
// Task 896 clause 3 + Task 897 sub-scope F dispatch.
|
|
332
|
+
//
|
|
333
|
+
// Document mode forces the chunked path when EITHER:
|
|
334
|
+
// - the input exceeds CHUNK_CHAR_SIZE (input-size gate, defence-in-depth), OR
|
|
335
|
+
// - the projected output tokens exceed PROMPT_OUTPUT_TOKEN_BUDGET
|
|
336
|
+
// (output-size gate, the primary trigger post-Task-896 since the
|
|
337
|
+
// offsets-only schema means input size no longer predicts output size).
|
|
338
|
+
//
|
|
339
|
+
// Oversize chat mode loud-fails (sessionize must keep sessions under
|
|
283
340
|
// the ceiling, per eng review). Single-shot path stays unchanged below.
|
|
284
|
-
if (mode === "document"
|
|
285
|
-
|
|
341
|
+
if (mode === "document") {
|
|
342
|
+
const projected = projectOutputTokens(documentText.length);
|
|
343
|
+
if (documentText.length > CHUNK_CHAR_SIZE || projected > PROMPT_OUTPUT_TOKEN_BUDGET) {
|
|
344
|
+
process.stderr.write(`[memory-classify] [${accountId}] dispatch=chunked chars=${documentText.length} projectedOutputTokens=${projected} budget=${PROMPT_OUTPUT_TOKEN_BUDGET}\n`);
|
|
345
|
+
return classifyDocumentChunked(params);
|
|
346
|
+
}
|
|
286
347
|
}
|
|
287
348
|
if (documentText.length > INPUT_CHAR_CEILING) {
|
|
288
349
|
const overage = `chars=${documentText.length}, ceiling=${INPUT_CHAR_CEILING}`;
|
|
289
350
|
if (mode === "chat") {
|
|
290
|
-
|
|
351
|
+
logError(accountId, `input-too-large: chat session exceeds Haiku input ceiling (${overage}). Sessionize must split sessions before classify (Task 894).`);
|
|
291
352
|
}
|
|
292
353
|
else {
|
|
293
354
|
// Document mode > INPUT_CHAR_CEILING but ≤ CHUNK_CHAR_SIZE shouldn't
|
|
294
355
|
// happen since CHUNK_CHAR_SIZE < INPUT_CHAR_CEILING — kept as
|
|
295
356
|
// defence-in-depth in case constants drift.
|
|
296
|
-
|
|
357
|
+
logError(accountId, `input-too-large: document exceeds Haiku input ceiling without chunking (${overage}). Constants drift between CHUNK_CHAR_SIZE and INPUT_CHAR_CEILING.`);
|
|
297
358
|
}
|
|
298
359
|
return {
|
|
299
|
-
kind: "
|
|
360
|
+
kind: "error",
|
|
300
361
|
reason: `Input is ${documentText.length} chars; classifier ceiling is ${INPUT_CHAR_CEILING}.`,
|
|
301
362
|
};
|
|
302
363
|
}
|
|
@@ -343,33 +404,47 @@ export async function classifyDocument(params) {
|
|
|
343
404
|
});
|
|
344
405
|
const haikuMs = Date.now() - haikuStart;
|
|
345
406
|
if (llmResult.kind === "fallback") {
|
|
346
|
-
|
|
347
|
-
return { kind: "
|
|
407
|
+
logError(accountId, `${llmResult.cause}: ${llmResult.reason}`);
|
|
408
|
+
return { kind: "error", reason: llmResult.reason };
|
|
348
409
|
}
|
|
349
410
|
const responseText = llmResult.text;
|
|
411
|
+
// Task 897 sub-scope G — surface finish-reason + output-tokens per call so
|
|
412
|
+
// a `max_tokens` from Haiku appears directly in observability instead of
|
|
413
|
+
// surfacing downstream as `malformed JSON`.
|
|
414
|
+
const stopReason = llmResult.stopReason ?? "(unknown)";
|
|
415
|
+
const outputTokens = llmResult.outputTokens ?? -1;
|
|
416
|
+
process.stderr.write(`[memory-classify] [${accountId}] response-finish-reason=${stopReason} output-tokens=${outputTokens}\n`);
|
|
350
417
|
// --- Parse + validate ---
|
|
351
|
-
|
|
418
|
+
// Task 897 sub-scope G — fence handling is structural now. A one-sided
|
|
419
|
+
// fence (opening ``` with no closing) is the `max_tokens` truncation
|
|
420
|
+
// fingerprint; surface it as `response-truncated` instead of falling
|
|
421
|
+
// through to JSON.parse and reporting `malformed JSON`.
|
|
422
|
+
const fence = extractJson(responseText);
|
|
423
|
+
if (fence.kind === "truncated") {
|
|
424
|
+
logError(accountId, `${fence.reason} stop-reason=${stopReason} output-tokens=${outputTokens} len=${responseText.length} pre-strip-tail=${JSON.stringify(responseText.slice(-200))}`);
|
|
425
|
+
return { kind: "error", reason: "Haiku response truncated mid-fence (likely max_tokens hit)" };
|
|
426
|
+
}
|
|
427
|
+
const jsonText = fence.json;
|
|
352
428
|
let parsed;
|
|
353
429
|
try {
|
|
354
430
|
parsed = JSON.parse(jsonText);
|
|
355
431
|
}
|
|
356
432
|
catch (err) {
|
|
357
|
-
// Task 896 clause 5: surface diagnostics so a malformed-JSON
|
|
358
|
-
// distinguishes
|
|
359
|
-
//
|
|
360
|
-
//
|
|
361
|
-
// ingest bottomed out here with no visible cause.
|
|
433
|
+
// Task 896 clause 5: surface diagnostics so a malformed-JSON error
|
|
434
|
+
// distinguishes fence drift and genuine model junk. The truncation case
|
|
435
|
+
// is now caught above by `extractJson` returning `kind: "truncated"`,
|
|
436
|
+
// so this branch covers genuinely malformed JSON only.
|
|
362
437
|
const message = err instanceof Error ? err.message : String(err);
|
|
363
438
|
const fenceStripped = jsonText !== responseText;
|
|
364
|
-
|
|
439
|
+
logError(accountId, `malformed JSON: parse-error=${JSON.stringify(message)} len=${responseText.length} fence-stripped=${fenceStripped} stop-reason=${stopReason} ` +
|
|
365
440
|
`pre-strip-head=${JSON.stringify(responseText.slice(0, 200))} ` +
|
|
366
441
|
`pre-strip-tail=${JSON.stringify(responseText.slice(-200))}`);
|
|
367
|
-
return { kind: "
|
|
442
|
+
return { kind: "error", reason: "Haiku returned malformed JSON" };
|
|
368
443
|
}
|
|
369
444
|
const root = asObject(parsed);
|
|
370
445
|
if (!root) {
|
|
371
|
-
|
|
372
|
-
return { kind: "
|
|
446
|
+
logError(accountId, "response is not an object");
|
|
447
|
+
return { kind: "error", reason: "invalid response shape" };
|
|
373
448
|
}
|
|
374
449
|
const documentSummary = asString(root.documentSummary) ?? "";
|
|
375
450
|
const documentKeywords = Array.isArray(root.documentKeywords)
|
|
@@ -377,8 +452,8 @@ export async function classifyDocument(params) {
|
|
|
377
452
|
: [];
|
|
378
453
|
const rawSections = Array.isArray(root.sections) ? root.sections : null;
|
|
379
454
|
if (!rawSections) {
|
|
380
|
-
|
|
381
|
-
return { kind: "
|
|
455
|
+
logError(accountId, "missing sections array");
|
|
456
|
+
return { kind: "error", reason: "invalid response shape (no sections)" };
|
|
382
457
|
}
|
|
383
458
|
const sections = [];
|
|
384
459
|
let hallucinatedRelated = 0;
|
|
@@ -512,9 +587,9 @@ export async function classifyDocument(params) {
|
|
|
512
587
|
// sees the regression — typically caused by a model that ignored the new
|
|
513
588
|
// offset contract and reverted to emitting `body`.
|
|
514
589
|
if (rawSections.length > 0 && sections.length === 0) {
|
|
515
|
-
|
|
590
|
+
logError(accountId, `missing-offsets: every section failed offset validation (rawSections=${rawSections.length}, droppedForOffsets=${droppedForOffsets}). Likely cause: Haiku emitted body text instead of sourceStart/sourceEnd offsets, or the prompt update didn't reach the model.`);
|
|
516
591
|
return {
|
|
517
|
-
kind: "
|
|
592
|
+
kind: "error",
|
|
518
593
|
reason: "Haiku response had no parseable section offsets",
|
|
519
594
|
};
|
|
520
595
|
}
|
|
@@ -604,9 +679,9 @@ async function classifyDocumentChunked(params) {
|
|
|
604
679
|
// chunker bug or constants-drift — loud-fail instead of pretending.
|
|
605
680
|
for (const c of chunks) {
|
|
606
681
|
if (c.chunkText.length > INPUT_CHAR_CEILING) {
|
|
607
|
-
|
|
682
|
+
logError(accountId, `input-too-large: chunker emitted oversize chunk (chars=${c.chunkText.length}, ceiling=${INPUT_CHAR_CEILING}). Chunker invariant violated.`);
|
|
608
683
|
return {
|
|
609
|
-
kind: "
|
|
684
|
+
kind: "error",
|
|
610
685
|
reason: `Chunker produced an oversize chunk (${c.chunkText.length} > ${INPUT_CHAR_CEILING})`,
|
|
611
686
|
};
|
|
612
687
|
}
|
|
@@ -622,7 +697,7 @@ async function classifyDocumentChunked(params) {
|
|
|
622
697
|
// Recurse into the single-shot path — chunkSize < CHUNK_CHAR_SIZE is the
|
|
623
698
|
// dispatch threshold so the recursive call lands in the existing logic.
|
|
624
699
|
const chunkResult = await classifyDocument({ ...params, documentText: c.chunkText });
|
|
625
|
-
if (chunkResult.kind === "
|
|
700
|
+
if (chunkResult.kind === "error") {
|
|
626
701
|
// One chunk failure aborts the whole ingest (loud-failure doctrine).
|
|
627
702
|
return chunkResult;
|
|
628
703
|
}
|