@rubytech/create-realagent 1.0.830 → 1.0.831

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/lib/oauth-llm/dist/index.d.ts +8 -1
  3. package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
  4. package/payload/platform/lib/oauth-llm/dist/index.js +5 -1
  5. package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
  6. package/payload/platform/lib/oauth-llm/src/index.ts +19 -4
  7. package/payload/platform/neo4j/migrations/008-adminuser-accountid-backfill.ts +85 -0
  8. package/payload/platform/plugins/admin/mcp/dist/index.js +25 -3
  9. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  10. package/payload/platform/plugins/docs/references/internals.md +1 -1
  11. package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +50 -27
  12. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts +49 -0
  13. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts.map +1 -0
  14. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js +35 -0
  15. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js.map +1 -0
  16. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +6 -7
  17. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -1
  18. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +9 -1
  19. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -1
  20. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -1
  21. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +49 -131
  22. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -1
  24. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +0 -1
  25. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -1
  26. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +0 -2
  27. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -1
  28. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +10 -11
  29. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -1
  30. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +3 -4
  31. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -1
  32. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +11 -42
  33. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -1
  34. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -1
  35. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +10 -8
  36. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +10 -8
  38. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +118 -43
  40. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +3 -2
  42. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +40 -18
  44. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +4 -6
  46. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -1
  47. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +59 -17
  48. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -1
  49. package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js +2 -2
  50. package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js.map +1 -1
  51. package/payload/server/chunk-BCFM2UPH.js +2305 -0
  52. package/payload/server/chunk-CV3HPX46.js +10097 -0
  53. package/payload/server/chunk-J6YWEJBN.js +1116 -0
  54. package/payload/server/chunk-OCPJGZ6S.js +654 -0
  55. package/payload/server/client-pool-ZNGN66GN.js +32 -0
  56. package/payload/server/cloudflare-task-tracker-MHALDN54.js +19 -0
  57. package/payload/server/maxy-edge.js +3 -3
  58. package/payload/server/neo4j-migrations-6RW423E2.js +530 -0
  59. package/payload/server/server.js +30 -19
@@ -1,24 +1,24 @@
1
1
  import { createHash } from "node:crypto";
2
2
  // ---------------------------------------------------------------------------
3
3
  // derive-keys — natural-key derivation for conversation-archive ingestion
4
- // (Task 894, supersedes Task 891's whatsapp-import path).
4
+ // (Task 894 / 897).
5
5
  //
6
6
  // Pure functions. No I/O. The whole point is that re-imports of the same
7
7
  // archive collapse to the same identity regardless of release-level drift in
8
8
  // chunk indices, hash widths, or arbitrary tiebreakers.
9
9
  //
10
10
  // conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))
11
- // messageContentHash = sha256(dateSent + "|" + NFKC-trim-lower(senderName) + "|" + body)
11
+ // messageContentHash = sha256(dateSent + "|" + body)
12
12
  //
13
13
  // `conversationIdentity` is stable across re-exports — same operator + same
14
- // participant set → same identity, regardless of file bytes. DM and group
15
- // follow the same formula; the only difference is the participant array
16
- // length. `messageContentHash` is content-only (no archive sha256, no chunk
17
- // index) so cursor lookup survives a fresh re-export of the same chat.
14
+ // participant set → same identity, regardless of file bytes.
15
+ //
16
+ // `messageContentHash` (Task 897): hashes `dateSent + body`. Per-line
17
+ // senderName is gone (the scanner is body-opaque); the body string still
18
+ // carries any `Sender:` prefix verbatim, so two senders posting different
19
+ // content at the same instant stay distinguishable. The cursor survives a
20
+ // fresh re-export as long as the source preserves the prefix in the body.
18
21
  // ---------------------------------------------------------------------------
19
- export function normaliseSenderName(name) {
20
- return name.normalize("NFKC").trim().toLowerCase();
21
- }
22
22
  export function sha256Hex(input) {
23
23
  return createHash("sha256").update(input).digest("hex");
24
24
  }
@@ -33,7 +33,6 @@ export function deriveConversationIdentity(input) {
33
33
  return sha256Hex(`${input.accountId}:${sorted.join(",")}`);
34
34
  }
35
35
  export function deriveMessageContentHash(input) {
36
- const norm = normaliseSenderName(input.senderName);
37
- return sha256Hex(`${input.dateSent}|${norm}|${input.body}`);
36
+ return sha256Hex(`${input.dateSent}|${input.body}`);
38
37
  }
39
38
  //# sourceMappingURL=derive-keys.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/derive-keys.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,8EAA8E;AAC9E,0EAA0E;AAC1E,0DAA0D;AAC1D,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,wDAAwD;AACxD,EAAE;AACF,2FAA2F;AAC3F,6FAA6F;AAC7F,EAAE;AACF,4EAA4E;AAC5E,0EAA0E;AAC1E,wEAAwE;AACxE,4EAA4E;AAC5E,uEAAuE;AACvE,8EAA8E;AAE9E,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,KAAa;IACrC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AASD,MAAM,UAAU,0BAA0B,CACxC,KAAsC;IAEtC,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7D,CAAC;AAQD,MAAM,UAAU,wBAAwB,CACtC,KAAoC;IAEpC,MAAM,IAAI,GAAG,mBAAmB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,QAAQ,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;AAC9D,CAAC"}
1
+ {"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/derive-keys.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,8EAA8E;AAC9E,0EAA0E;AAC1E,oBAAoB;AACpB,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,wDAAwD;AACxD,EAAE;AACF,2FAA2F;AAC3F,yDAAyD;AACzD,EAAE;AACF,4EAA4E;AAC5E,6DAA6D;AAC7D,EAAE;AACF,sEAAsE;AACtE,yEAAyE;AACzE,0EAA0E;AAC1E,0EAA0E;AAC1E,0EAA0E;AAC1E,8EAA8E;AAE9E,MAAM,UAAU,SAAS,CAAC,KAAa;IACrC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AASD,MAAM,UAAU,0BAA0B,CACxC,KAAsC;IAEtC,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7D,CAAC;AAOD,MAAM,UAAU,wBAAwB,CACtC,KAAoC;IAEpC,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;AACtD,CAAC"}
@@ -3,15 +3,14 @@ export declare class IngestUserFacingError extends Error {
3
3
  readonly userFacing = true;
4
4
  constructor(message: string);
5
5
  }
6
- export interface BindCanonicalSendersInput {
6
+ export interface VerifyParticipantElementIdsInput {
7
7
  session: Neo4jSession;
8
8
  accountId: string;
9
9
  ownerElementId: string;
10
10
  participantElementIds: readonly string[];
11
- senderNames: readonly string[];
12
11
  }
13
- export interface BindCanonicalSendersResult {
12
+ export interface VerifyParticipantElementIdsResult {
14
13
  participantsResolved: number;
15
14
  }
16
- export declare function bindCanonicalSenders(input: BindCanonicalSendersInput): Promise<BindCanonicalSendersResult>;
15
+ export declare function verifyParticipantElementIds(input: VerifyParticipantElementIdsInput): Promise<VerifyParticipantElementIdsResult>;
17
16
  //# sourceMappingURL=sender-bind.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sender-bind.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,cAAc,CAAC;AAkB5D,qBAAa,qBAAsB,SAAQ,KAAK;IAC9C,QAAQ,CAAC,UAAU,QAAQ;gBACf,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,yBAAyB;IACxC,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;CAChC;AAED,MAAM,WAAW,0BAA0B;IACzC,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAaD,wBAAsB,oBAAoB,CACxC,KAAK,EAAE,yBAAyB,GAC/B,OAAO,CAAC,0BAA0B,CAAC,CAsDrC"}
1
+ {"version":3,"file":"sender-bind.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,cAAc,CAAC;AAgB5D,qBAAa,qBAAsB,SAAQ,KAAK;IAC9C,QAAQ,CAAC,UAAU,QAAQ;gBACf,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,gCAAgC;IAC/C,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED,MAAM,WAAW,iCAAiC;IAChD,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAUD,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,gCAAgC,GACtC,OAAO,CAAC,iCAAiC,CAAC,CAkC5C"}
@@ -1,17 +1,15 @@
1
- import { normaliseSenderName } from "./derive-keys.js";
2
1
  // ---------------------------------------------------------------------------
3
- // sender-bind — closed-set sender resolution against operator-confirmed
4
- // :Person / :AdminUser elementIds (Task 894, replaces Task 887 §A0 path).
2
+ // sender-bind — defensive participant-elementId verification (Task 894 / 897).
5
3
  //
6
- // Every distinct senderName parsed from the source must resolve to one of
7
- // the supplied elementIds. Any miss LOUD-FAILs with a `parser-miss` shape
8
- // preserved verbatim from the prior whatsapp-import contract.
4
+ // Task 897 dropped per-line senderName, so the closed-set sender-resolution
5
+ // step that lived here is gone. What remains is a defensive read pass: every
6
+ // supplied participant elementId must exist in the graph, must carry the
7
+ // supplied accountId, and must be labelled :Person or :AdminUser. If any
8
+ // fails, the bin LOUD-FAILs before the writer attaches PARTICIPANT_IN edges
9
+ // to a stale or cross-account node.
9
10
  //
10
- // No auto-create. The operator confirms canonical participants before this
11
- // function ever runs; if a senderName falls outside the closed set, the
12
- // operator either re-runs with the missing elementId added or reports a
13
- // parser bug. This guards `feedback_archives_are_not_documents.md` and the
14
- // prior Task-887 leak fingerprint.
11
+ // The operator confirms participants up front; participants attach to the
12
+ // :ConversationArchive parent via :PARTICIPANT_IN, never per-message.
15
13
  // ---------------------------------------------------------------------------
16
14
  export class IngestUserFacingError extends Error {
17
15
  userFacing = true;
@@ -25,13 +23,10 @@ UNWIND $ids AS id
25
23
  MATCH (n) WHERE elementId(n) = id
26
24
  RETURN elementId(n) AS elemId,
27
25
  labels(n) AS labels,
28
- coalesce(n.name, '') AS name,
29
- coalesce(n.givenName, '') AS givenName,
30
- coalesce(n.familyName, '') AS familyName,
31
26
  coalesce(n.accountId, '') AS accountId
32
27
  `;
33
- export async function bindCanonicalSenders(input) {
34
- const { session, accountId, ownerElementId, participantElementIds, senderNames } = input;
28
+ export async function verifyParticipantElementIds(input) {
29
+ const { session, accountId, ownerElementId, participantElementIds } = input;
35
30
  const allIds = [ownerElementId, ...participantElementIds];
36
31
  const distinctIds = Array.from(new Set(allIds));
37
32
  if (distinctIds.length !== allIds.length) {
@@ -39,7 +34,6 @@ export async function bindCanonicalSenders(input) {
39
34
  }
40
35
  const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
41
36
  const seenIds = new Set();
42
- const index = new Map();
43
37
  for (const r of res.records) {
44
38
  const elemId = r.get("elemId");
45
39
  const labels = r.get("labels") || [];
@@ -54,37 +48,12 @@ export async function bindCanonicalSenders(input) {
54
48
  throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
55
49
  }
56
50
  seenIds.add(elemId);
57
- const candidates = [];
58
- const name = r.get("name") || "";
59
- const given = r.get("givenName") || "";
60
- const family = r.get("familyName") || "";
61
- if (name)
62
- candidates.push(name);
63
- if (given && family)
64
- candidates.push(`${given} ${family}`);
65
- if (given)
66
- candidates.push(given);
67
- if (family)
68
- candidates.push(family);
69
- for (const c of candidates) {
70
- const norm = normaliseSenderName(c);
71
- if (!norm)
72
- continue;
73
- if (!index.has(norm))
74
- index.set(norm, elemId);
75
- }
76
51
  }
77
52
  for (const id of distinctIds) {
78
53
  if (!seenIds.has(id)) {
79
54
  throw new IngestUserFacingError(`elementId ${id} not found in graph`);
80
55
  }
81
56
  }
82
- for (const senderName of senderNames) {
83
- const norm = normaliseSenderName(senderName);
84
- if (!index.has(norm)) {
85
- throw new IngestUserFacingError(`parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`);
86
- }
87
- }
88
57
  return { participantsResolved: seenIds.size };
89
58
  }
90
59
  //# sourceMappingURL=sender-bind.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"sender-bind.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAEvD,8EAA8E;AAC9E,wEAAwE;AACxE,0EAA0E;AAC1E,EAAE;AACF,0EAA0E;AAC1E,0EAA0E;AAC1E,8DAA8D;AAC9D,EAAE;AACF,2EAA2E;AAC3E,wEAAwE;AACxE,wEAAwE;AACxE,2EAA2E;AAC3E,mCAAmC;AACnC,8EAA8E;AAE9E,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IACrC,UAAU,GAAG,IAAI,CAAC;IAC3B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAcD,MAAM,sBAAsB,GAAG;;;;;;;;;CAS9B,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,KAAgC;IAEhC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,qBAAqB,EAAE,WAAW,EAAE,GAAG,KAAK,CAAC;IACzF,MAAM,MAAM,GAAG,CAAC,cAAc,EAAE,GAAG,qBAAqB,CAAC,CAAC;IAC1D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IAChD,IAAI,WAAW,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,MAAM,IAAI,qBAAqB,CAC7B,sFAAsF,CACvF,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;IAC1G,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAW,CAAC;QACzC,MAAM,MAAM,GAAI,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAc,IAAI,EAAE,CAAC;QACnD,MAAM,IAAI,GAAI,CAAC,CAAC,GAAG,CAAC,WAAW,CAAY,IAAI,EAAE,CAAC;QAClD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,wDAAwD,CAAC,CAAC;QAC1G,CAAC;QACD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,uBAAuB,IAAI,SAAS,SAAS,EAAE,CAAC,CAAC;QACjG,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAChE,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,gBAAgB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACrH,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACpB,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,IAAI,GAAI,CAAC,CAAC,GAAG,CAAC,MAAM,CAAY,IAAI,EAAE,CAAC;QAC7C,MAAM,KAAK,GAAI,CAAC,CAAC,GAAG,CAAC,WAAW,CAAY,IAAI,EAAE,CAAC;QACnD,MAAM,MAAM,GAAI,CAAC,CAAC,GAAG,CAAC,YAAY,CAAY,IAAI,EAAE,CAAC;QACrD,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,KAAK,IAAI,MAAM;YAAE,UAAU,CAAC,IAAI,CAAC,GAAG,KAAK,IAAI,MAAM,EAAE,CAAC,CAAC;QAC3D,IAAI,KAAK;YAAE,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,MAAM;YAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;YACpC,IAAI,CAAC,IAAI;gBAAE,SAAS;YACpB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACrB,MAAM,IAAI,qBAAqB,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IACD,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,MAAM,IAAI,qBAAqB,CAC7B,kCAAkC,UAAU,sCAAsC,WAAW,CAAC,MAAM,gGAAgG,CACrM,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,EAAE,oBAAoB,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;AAChD,CAAC"}
1
+ {"version":3,"file":"sender-bind.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/sender-bind.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,+EAA+E;AAC/E,EAAE;AACF,4EAA4E;AAC5E,6EAA6E;AAC7E,yEAAyE;AACzE,yEAAyE;AACzE,4EAA4E;AAC5E,oCAAoC;AACpC,EAAE;AACF,0EAA0E;AAC1E,sEAAsE;AACtE,8EAA8E;AAE9E,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IACrC,UAAU,GAAG,IAAI,CAAC;IAC3B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAaD,MAAM,sBAAsB,GAAG;;;;;;CAM9B,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC/C,KAAuC;IAEvC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,qBAAqB,EAAE,GAAG,KAAK,CAAC;IAC5E,MAAM,MAAM,GAAG,CAAC,cAAc,EAAE,GAAG,qBAAqB,CAAC,CAAC;IAC1D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IAChD,IAAI,WAAW,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,MAAM,IAAI,qBAAqB,CAC7B,sFAAsF,CACvF,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE,CACjD,EAAE,CAAC,GAAG,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,CACrD,CAAC;IACF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAW,CAAC;QACzC,MAAM,MAAM,GAAI,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAc,IAAI,EAAE,CAAC;QACnD,MAAM,IAAI,GAAI,CAAC,CAAC,GAAG,CAAC,WAAW,CAAY,IAAI,EAAE,CAAC;QAClD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,wDAAwD,CAAC,CAAC;QAC1G,CAAC;QACD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,uBAAuB,IAAI,SAAS,SAAS,EAAE,CAAC,CAAC;QACjG,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAChE,MAAM,IAAI,qBAAqB,CAAC,QAAQ,MAAM,gBAAgB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACrH,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACrB,MAAM,IAAI,qBAAqB,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,oBAAoB,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;AAChD,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"to-turn-text.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAc3D,wBAAgB,UAAU,CAAC,OAAO,EAAE,mBAAmB,GAAG,MAAM,CAM/D"}
1
+ {"version":3,"file":"to-turn-text.d.ts","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAgB3D,wBAAgB,UAAU,CAAC,OAAO,EAAE,mBAAmB,GAAG,MAAM,CAM/D"}
@@ -1,18 +1,20 @@
1
1
  // ---------------------------------------------------------------------------
2
- // to-turn-text — render one ConversationSession as a turn-attributed text
3
- // block ready for memory-classify (mode='chat'). Source-agnostic: every
4
- // normaliser feeds ParsedLine[] of the same shape.
2
+ // to-turn-text — render one ConversationSession as a timestamped text block
3
+ // for memory-classify (mode='chat'). Task 897: bodies are opaque; the
4
+ // scanner already preserved the source's `Sender:` segment inside `body`,
5
+ // so a WhatsApp session renders as:
5
6
  //
6
- // [YYYY-MM-DD HH:MM:SS ±HH:MM] <Sender>: <body>
7
- // [YYYY-MM-DD HH:MM:SS ±HH:MM] <Sender>: <body>
8
- // ...
7
+ // [YYYY-MM-DD HH:MM:SS ±HH:MM] Sender: body text
8
+ // [YYYY-MM-DD HH:MM:SS ±HH:MM] Sender: body text
9
9
  //
10
- // The classifier never sees the source format only this rendered text.
10
+ // Other sources whose body shape is different (system message, vCard,
11
+ // forwarded block) flow through unchanged — the chat-mode prompt is
12
+ // instructed to keep these prefixes verbatim in chunk bodies.
11
13
  // ---------------------------------------------------------------------------
12
14
  export function toTurnText(session) {
13
15
  const lines = [];
14
16
  for (const m of session.messages) {
15
- lines.push(`[${formatWallClock(m.dateSent)}] ${m.senderName}: ${m.body}`);
17
+ lines.push(`[${formatWallClock(m.dateSent)}] ${m.body}`);
16
18
  }
17
19
  return lines.join("\n");
18
20
  }
@@ -1 +1 @@
1
- {"version":3,"file":"to-turn-text.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,0EAA0E;AAC1E,wEAAwE;AACxE,mDAAmD;AACnD,EAAE;AACF,kDAAkD;AAClD,kDAAkD;AAClD,QAAQ;AACR,EAAE;AACF,yEAAyE;AACzE,8EAA8E;AAE9E,MAAM,UAAU,UAAU,CAAC,OAA4B;IACrD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CACjB,iFAAiF,CAClF,CAAC;IACF,IAAI,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC;IACnB,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,WAAW,EAAE,CAAC;AAC1D,CAAC"}
1
+ {"version":3,"file":"to-turn-text.js","sourceRoot":"","sources":["../../../src/lib/conversation-pipeline/to-turn-text.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,4EAA4E;AAC5E,sEAAsE;AACtE,0EAA0E;AAC1E,oCAAoC;AACpC,EAAE;AACF,mDAAmD;AACnD,mDAAmD;AACnD,EAAE;AACF,sEAAsE;AACtE,oEAAoE;AACpE,8DAA8D;AAC9D,8EAA8E;AAE9E,MAAM,UAAU,UAAU,CAAC,OAA4B;IACrD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CACjB,iFAAiF,CAClF,CAAC;IACF,IAAI,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC;IACnB,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,WAAW,EAAE,CAAC;AAC1D,CAAC"}
@@ -17,9 +17,10 @@
17
17
  * Hallucination defence: every returned `kind` is verified against the
18
18
  * loaded ontology label set. Sections whose `kind` is not a real label
19
19
  * are tagged `UNMAPPED`. Failure of the LLM call (missing creds, network,
20
- * malformed JSON) returns `{kind: "fallback", reason}` the calling
21
- * skill MUST treat this as terminal and abort the ingest with a loud
22
- * blocker. There is no longer a writer fallback path (Task 740).
20
+ * malformed JSON) returns `{kind: "error", reason}` (Task 897 vocabulary
21
+ * was `kind: "fallback"`). The caller decides whether to abort the
22
+ * ingest or degrade-on-error per session; classifier never silently
23
+ * substitutes a degraded write.
23
24
  */
24
25
  /** Direction of the anchor edge relative to the typed node. */
25
26
  export type AnchorEdgeDirection = "from-anchor" | "to-anchor";
@@ -160,7 +161,7 @@ export type ClassifyResult = {
160
161
  kind: "ok";
161
162
  output: ClassifierOutput;
162
163
  } | {
163
- kind: "fallback";
164
+ kind: "error";
164
165
  reason: string;
165
166
  };
166
167
  /**
@@ -229,10 +230,11 @@ export interface ClassifyParams {
229
230
  * Sections the classifier could not natural-edge appear in
230
231
  * `output.orphanCandidates`. The skill surfaces orphans loudly to
231
232
  * the operator.
232
- * { kind: "fallback", reason } when the LLM is unavailable or returns
233
- * malformed JSON. The skill MUST treat this as terminal: abort the
234
- * ingest entirely, no graph writes, surface the blocker to the
235
- * operator (Task 740 doctrine no silent fallback writes).
233
+ * { kind: "error", reason } when the LLM is unavailable, returns
234
+ * malformed JSON, or hits an input-too-large guard. The caller
235
+ * decides whether to abort the ingest entirely (document mode) or
236
+ * degrade-on-error per session (chat mode, Task 897). Classifier
237
+ * never silently substitutes a degraded write.
236
238
  */
237
239
  export declare function classifyDocument(params: ClassifyParams): Promise<ClassifyResult>;
238
240
  //# sourceMappingURL=llm-classifier.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAUH,+DAA+D;AAC/D,MAAM,MAAM,mBAAmB,GAAG,aAAa,GAAG,WAAW,CAAC;AAE9D,mEAAmE;AACnE,MAAM,MAAM,oBAAoB,GAAG,UAAU,GAAG,UAAU,CAAC;AAE3D,kFAAkF;AAClF,MAAM,WAAW,iBAAiB;IAChC,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,oDAAoD;IACpD,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,oBAAoB,CAAC;QAChC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,CAAC;IACF;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,oGAAoG;AACpG,MAAM,WAAW,iBAAiB;IAChC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;;;OAOG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,iFAAiF;IACjF,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC;;;;OAIG;IACH,UAAU,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,mBAAmB,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,GAAG,IAAI,CAAC;IACT,oFAAoF;IACpF,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,oCAAoC;AACpC,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAC;IACxB,kEAAkE;IAClE,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,6BAA6B;IAC7B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,iFAAiF;IACjF,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC;;+CAE2C;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,UAAU,GAAG,UAAU,CAAC;QACnC,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,wEAAwE;QACxE,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB,CAAC,CAAC;IACH,mFAAmF;IACnF,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,IAAI,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,gBAAgB,CAAA;CAAE,GACxC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AA8CzC;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,UAAU,CAAC;AAE1C,eAAO,MAAM,sBAAsB,wEAMzB,CAAC;AAEX,eAAO,MAAM,wBAAwB,yKAa3B,CAAC;AAEX,eAAO,MAAM,sBAAsB,4SAqBzB,CAAC;AAEX,8EAA8E;AAC9E,eAAO,MAAM,qBAAqB,sBAAuB,CAAC;AAE1D,eAAO,MAAM,iBAAiB,EAAE,WAAW,CAAC,MAAM,CAMhD,CAAC;AAoJH,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,UAAU,GAAG,MAAM,CAAC;IAC3B;;;;;;;;OAQG;IACH,iBAAiB,EAAE,MAAM,CAAC;IAC1B;;;;;;OAMG;IACH,cAAc,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IACpC;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,cAAc,CAAC,CA0UzB"}
1
+ {"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAUH,+DAA+D;AAC/D,MAAM,MAAM,mBAAmB,GAAG,aAAa,GAAG,WAAW,CAAC;AAE9D,mEAAmE;AACnE,MAAM,MAAM,oBAAoB,GAAG,UAAU,GAAG,UAAU,CAAC;AAE3D,kFAAkF;AAClF,MAAM,WAAW,iBAAiB;IAChC,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,oDAAoD;IACpD,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,oBAAoB,CAAC;QAChC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,CAAC;IACF;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,oGAAoG;AACpG,MAAM,WAAW,iBAAiB;IAChC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;;;OAOG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,iFAAiF;IACjF,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC;;;;OAIG;IACH,UAAU,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,mBAAmB,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,GAAG,IAAI,CAAC;IACT,oFAAoF;IACpF,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,oCAAoC;AACpC,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAC;IACxB,kEAAkE;IAClE,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,6BAA6B;IAC7B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,iFAAiF;IACjF,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC;;+CAE2C;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,UAAU,GAAG,UAAU,CAAC;QACnC,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,wEAAwE;QACxE,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB,CAAC,CAAC;IACH,mFAAmF;IACnF,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,IAAI,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,gBAAgB,CAAA;CAAE,GACxC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AA8EtC;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,UAAU,CAAC;AAE1C,eAAO,MAAM,sBAAsB,wEAMzB,CAAC;AAEX,eAAO,MAAM,wBAAwB,yKAa3B,CAAC;AAEX,eAAO,MAAM,sBAAsB,4SAqBzB,CAAC;AAEX,8EAA8E;AAC9E,eAAO,MAAM,qBAAqB,sBAAuB,CAAC;AAE1D,eAAO,MAAM,iBAAiB,EAAE,WAAW,CAAC,MAAM,CAMhD,CAAC;AAsKH,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,UAAU,GAAG,MAAM,CAAC;IAC3B;;;;;;;;OAQG;IACH,iBAAiB,EAAE,MAAM,CAAC;IAC1B;;;;;;OAMG;IACH,cAAc,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IACpC;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,cAAc,CAAC,CA0WzB"}
@@ -17,9 +17,10 @@
17
17
  * Hallucination defence: every returned `kind` is verified against the
18
18
  * loaded ontology label set. Sections whose `kind` is not a real label
19
19
  * are tagged `UNMAPPED`. Failure of the LLM call (missing creds, network,
20
- * malformed JSON) returns `{kind: "fallback", reason}` the calling
21
- * skill MUST treat this as terminal and abort the ingest with a loud
22
- * blocker. There is no longer a writer fallback path (Task 740).
20
+ * malformed JSON) returns `{kind: "error", reason}` (Task 897 vocabulary
21
+ * was `kind: "fallback"`). The caller decides whether to abort the
22
+ * ingest or degrade-on-error per session; classifier never silently
23
+ * substitutes a degraded write.
23
24
  */
24
25
  import { callOauthLlm } from "../../../../../lib/oauth-llm/dist/index.js";
25
26
  import { HAIKU_MODEL } from "../../../../../lib/models/dist/index.js";
@@ -29,13 +30,15 @@ import { chunkDocument, mergeOverlappingSections } from "./document-chunker.js";
29
30
  // ---------------------------------------------------------------------------
30
31
  const MAX_OUTPUT_TOKENS = 8192;
31
32
  /**
32
- * Per-section summary cap (Task 896 clause 1). The classifier prompt asks
33
- * for 500 chars; the server truncates anything longer with an ellipsis
34
- * marker so a single overlong summary never inflates the output JSON
35
- * unbounded. Truncation is observable (logged once per oversize section)
36
- * but not fatal Haiku usually respects the cap.
33
+ * Per-section summary cap (Task 896 clause 1; tightened by Task 897 sub-scope F).
34
+ * Reduced 500 100 because the offsets-only schema's per-section JSON now
35
+ * has tiny structural overhead, and a 500-char summary made a 30-section
36
+ * document blow past the prompt's 6000-token output budget. The classifier
37
+ * prompt advertises this cap; the server truncates oversize summaries with
38
+ * an ellipsis. `summaryTruncated` counter on the haiku-ok line surfaces the
39
+ * rate of model misbehaviour without aborting.
37
40
  */
38
- const SUMMARY_MAX_CHARS = 500;
41
+ const SUMMARY_MAX_CHARS = 100;
39
42
  /**
40
43
  * Output budget the prompt advertises to the model. ≈6000 tokens leaves
41
44
  * headroom under MAX_OUTPUT_TOKENS=8192 for a few hundred sections of
@@ -62,6 +65,33 @@ const CHUNK_OVERLAP_TOKENS = 5_000;
62
65
  /** Target chunk char size for the prose chunker (Task 896 clause 3). */
63
66
  const CHUNK_CHAR_SIZE = Math.floor(CHUNK_TOKEN_SIZE * CHARS_PER_TOKEN_ESTIMATE);
64
67
  const CHUNK_OVERLAP_CHARS = Math.floor(CHUNK_OVERLAP_TOKENS * CHARS_PER_TOKEN_ESTIMATE);
68
+ // ---------------------------------------------------------------------------
69
+ // Task 897 sub-scope F — projected-output chunker gate.
70
+ //
71
+ // Pre-897 dispatch was `documentText.length > CHUNK_CHAR_SIZE` (≈525K chars).
72
+ // That gate fires only on >525K-char inputs, but a 251K-char Adam Mackay
73
+ // archive with ~62 dense chat sessions still produced an 8K-token-truncated
74
+ // response — the input wasn't oversize, the OUTPUT was. Task 896's
75
+ // offsets-only schema cut per-section JSON down dramatically, but a dense
76
+ // archive at avg ~1500 chars per section still emits ~170 sections, and at
77
+ // ~75 tokens per section that's ~12K output tokens — twice the prompt's
78
+ // advertised 6K budget.
79
+ //
80
+ // New gate: project output tokens before the call. With SUMMARY_MAX_CHARS=100
81
+ // the per-section JSON shape is roughly:
82
+ // { kind, title (≤120), summary (≤100), sourceStart, sourceEnd, properties }
83
+ // ≈ 250–300 chars ≈ 75 tokens worst case. Conservative chars-per-section is
84
+ // ~1500 (covers contract-clause documents and dense chat). Project sections
85
+ // from input length and force chunked dispatch when the projection exceeds
86
+ // PROMPT_OUTPUT_TOKEN_BUDGET. The threshold lands at ~120K chars input —
87
+ // well below CHUNK_CHAR_SIZE, which now serves as a defence-in-depth ceiling.
88
+ // ---------------------------------------------------------------------------
89
+ const PROJECTED_CHARS_PER_SECTION = 1_500;
90
+ const PROJECTED_TOKENS_PER_SECTION = 75;
91
+ function projectOutputTokens(documentTextLength) {
92
+ const estimatedSections = Math.ceil(documentTextLength / PROJECTED_CHARS_PER_SECTION);
93
+ return estimatedSections * PROJECTED_TOKENS_PER_SECTION;
94
+ }
65
95
  /**
66
96
  * Closed enumeration of section `kind` values. Each becomes a secondary
67
97
  * label on the `:Section` node (e.g. `:Section:Position`). Anything outside
@@ -231,13 +261,32 @@ const SYSTEM_PROMPT = [
231
261
  // ---------------------------------------------------------------------------
232
262
  // Helpers
233
263
  // ---------------------------------------------------------------------------
264
+ /**
265
+ * Strip a balanced markdown fence from a JSON response. Returns:
266
+ * { kind: "ok", json } when the text was bare JSON or fully fenced.
267
+ * { kind: "truncated", reason } when the text opens a fence but never closes
268
+ * it — a structural fingerprint of `max_tokens` mid-response (Task 897
269
+ * sub-scope G, replacing the pre-897 silent fall-through to JSON.parse
270
+ * which masked truncation as `malformed JSON`).
271
+ */
234
272
  function extractJson(raw) {
235
273
  const trimmed = raw.trim();
236
- const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)```$/);
237
- return fenceMatch ? fenceMatch[1].trim() : trimmed;
274
+ const balanced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)```$/);
275
+ if (balanced)
276
+ return { kind: "ok", json: balanced[1].trim() };
277
+ // One-sided fence — opens with ``` (optionally `json`) but no closing fence.
278
+ // This is the truncation fingerprint: the model started a fenced response
279
+ // and ran out of output tokens before closing it.
280
+ if (/^```(?:json)?/.test(trimmed) && !/```\s*$/.test(trimmed)) {
281
+ return {
282
+ kind: "truncated",
283
+ reason: "response-truncated: opening ``` fence with no closing — model likely hit max_tokens mid-response",
284
+ };
285
+ }
286
+ return { kind: "ok", json: trimmed };
238
287
  }
239
- function logFallback(accountId, reason) {
240
- process.stderr.write(`[memory-classify] [${accountId}] fallback reason="${reason}"\n`);
288
+ function logError(accountId, reason) {
289
+ process.stderr.write(`[memory-classify] [${accountId}] ERROR reason="${reason}"\n`);
241
290
  }
242
291
  function asString(v) {
243
292
  return typeof v === "string" ? v : null;
@@ -270,33 +319,45 @@ function asNonNegativeInt(v) {
270
319
  * Sections the classifier could not natural-edge appear in
271
320
  * `output.orphanCandidates`. The skill surfaces orphans loudly to
272
321
  * the operator.
273
- * { kind: "fallback", reason } when the LLM is unavailable or returns
274
- * malformed JSON. The skill MUST treat this as terminal: abort the
275
- * ingest entirely, no graph writes, surface the blocker to the
276
- * operator (Task 740 doctrine no silent fallback writes).
322
+ * { kind: "error", reason } when the LLM is unavailable, returns
323
+ * malformed JSON, or hits an input-too-large guard. The caller
324
+ * decides whether to abort the ingest entirely (document mode) or
325
+ * degrade-on-error per session (chat mode, Task 897). Classifier
326
+ * never silently substitutes a degraded write.
277
327
  */
278
328
  export async function classifyDocument(params) {
279
329
  const { accountId, anchorDescription, ontologyLabels, naturalEdgeMap, documentText } = params;
280
330
  const mode = params.mode ?? "document";
281
- // Task 896 clause 3 dispatch oversize document mode goes to the chunked
282
- // path; oversize chat mode loud-fails (sessionize must keep sessions under
331
+ // Task 896 clause 3 + Task 897 sub-scope F dispatch.
332
+ //
333
+ // Document mode forces the chunked path when EITHER:
334
+ // - the input exceeds CHUNK_CHAR_SIZE (input-size gate, defence-in-depth), OR
335
+ // - the projected output tokens exceed PROMPT_OUTPUT_TOKEN_BUDGET
336
+ // (output-size gate, the primary trigger post-Task-896 since the
337
+ // offsets-only schema means input size no longer predicts output size).
338
+ //
339
+ // Oversize chat mode loud-fails (sessionize must keep sessions under
283
340
  // the ceiling, per eng review). Single-shot path stays unchanged below.
284
- if (mode === "document" && documentText.length > CHUNK_CHAR_SIZE) {
285
- return classifyDocumentChunked(params);
341
+ if (mode === "document") {
342
+ const projected = projectOutputTokens(documentText.length);
343
+ if (documentText.length > CHUNK_CHAR_SIZE || projected > PROMPT_OUTPUT_TOKEN_BUDGET) {
344
+ process.stderr.write(`[memory-classify] [${accountId}] dispatch=chunked chars=${documentText.length} projectedOutputTokens=${projected} budget=${PROMPT_OUTPUT_TOKEN_BUDGET}\n`);
345
+ return classifyDocumentChunked(params);
346
+ }
286
347
  }
287
348
  if (documentText.length > INPUT_CHAR_CEILING) {
288
349
  const overage = `chars=${documentText.length}, ceiling=${INPUT_CHAR_CEILING}`;
289
350
  if (mode === "chat") {
290
- logFallback(accountId, `input-too-large: chat session exceeds Haiku input ceiling (${overage}). Sessionize must split sessions before classify (Task 894).`);
351
+ logError(accountId, `input-too-large: chat session exceeds Haiku input ceiling (${overage}). Sessionize must split sessions before classify (Task 894).`);
291
352
  }
292
353
  else {
293
354
  // Document mode > INPUT_CHAR_CEILING but ≤ CHUNK_CHAR_SIZE shouldn't
294
355
  // happen since CHUNK_CHAR_SIZE < INPUT_CHAR_CEILING — kept as
295
356
  // defence-in-depth in case constants drift.
296
- logFallback(accountId, `input-too-large: document exceeds Haiku input ceiling without chunking (${overage}). Constants drift between CHUNK_CHAR_SIZE and INPUT_CHAR_CEILING.`);
357
+ logError(accountId, `input-too-large: document exceeds Haiku input ceiling without chunking (${overage}). Constants drift between CHUNK_CHAR_SIZE and INPUT_CHAR_CEILING.`);
297
358
  }
298
359
  return {
299
- kind: "fallback",
360
+ kind: "error",
300
361
  reason: `Input is ${documentText.length} chars; classifier ceiling is ${INPUT_CHAR_CEILING}.`,
301
362
  };
302
363
  }
@@ -343,33 +404,47 @@ export async function classifyDocument(params) {
343
404
  });
344
405
  const haikuMs = Date.now() - haikuStart;
345
406
  if (llmResult.kind === "fallback") {
346
- logFallback(accountId, `${llmResult.cause}: ${llmResult.reason}`);
347
- return { kind: "fallback", reason: llmResult.reason };
407
+ logError(accountId, `${llmResult.cause}: ${llmResult.reason}`);
408
+ return { kind: "error", reason: llmResult.reason };
348
409
  }
349
410
  const responseText = llmResult.text;
411
+ // Task 897 sub-scope G — surface finish-reason + output-tokens per call so
412
+ // a `max_tokens` from Haiku appears directly in observability instead of
413
+ // surfacing downstream as `malformed JSON`.
414
+ const stopReason = llmResult.stopReason ?? "(unknown)";
415
+ const outputTokens = llmResult.outputTokens ?? -1;
416
+ process.stderr.write(`[memory-classify] [${accountId}] response-finish-reason=${stopReason} output-tokens=${outputTokens}\n`);
350
417
  // --- Parse + validate ---
351
- const jsonText = extractJson(responseText);
418
+ // Task 897 sub-scope G — fence handling is structural now. A one-sided
419
+ // fence (opening ``` with no closing) is the `max_tokens` truncation
420
+ // fingerprint; surface it as `response-truncated` instead of falling
421
+ // through to JSON.parse and reporting `malformed JSON`.
422
+ const fence = extractJson(responseText);
423
+ if (fence.kind === "truncated") {
424
+ logError(accountId, `${fence.reason} stop-reason=${stopReason} output-tokens=${outputTokens} len=${responseText.length} pre-strip-tail=${JSON.stringify(responseText.slice(-200))}`);
425
+ return { kind: "error", reason: "Haiku response truncated mid-fence (likely max_tokens hit)" };
426
+ }
427
+ const jsonText = fence.json;
352
428
  let parsed;
353
429
  try {
354
430
  parsed = JSON.parse(jsonText);
355
431
  }
356
432
  catch (err) {
357
- // Task 896 clause 5: surface diagnostics so a malformed-JSON fallback
358
- // distinguishes truncation (output budget exceeded), fence drift, and
359
- // genuine model junk. Pre-Task-896 the fallback discarded the parser
360
- // error and 120 chars from the post-strip text — Adam Mackay's 251K-char
361
- // ingest bottomed out here with no visible cause.
433
+ // Task 896 clause 5: surface diagnostics so a malformed-JSON error
434
+ // distinguishes fence drift and genuine model junk. The truncation case
435
+ // is now caught above by `extractJson` returning `kind: "truncated"`,
436
+ // so this branch covers genuinely malformed JSON only.
362
437
  const message = err instanceof Error ? err.message : String(err);
363
438
  const fenceStripped = jsonText !== responseText;
364
- logFallback(accountId, `malformed JSON: parse-error=${JSON.stringify(message)} len=${responseText.length} fence-stripped=${fenceStripped} ` +
439
+ logError(accountId, `malformed JSON: parse-error=${JSON.stringify(message)} len=${responseText.length} fence-stripped=${fenceStripped} stop-reason=${stopReason} ` +
365
440
  `pre-strip-head=${JSON.stringify(responseText.slice(0, 200))} ` +
366
441
  `pre-strip-tail=${JSON.stringify(responseText.slice(-200))}`);
367
- return { kind: "fallback", reason: "Haiku returned malformed JSON" };
442
+ return { kind: "error", reason: "Haiku returned malformed JSON" };
368
443
  }
369
444
  const root = asObject(parsed);
370
445
  if (!root) {
371
- logFallback(accountId, "response is not an object");
372
- return { kind: "fallback", reason: "invalid response shape" };
446
+ logError(accountId, "response is not an object");
447
+ return { kind: "error", reason: "invalid response shape" };
373
448
  }
374
449
  const documentSummary = asString(root.documentSummary) ?? "";
375
450
  const documentKeywords = Array.isArray(root.documentKeywords)
@@ -377,8 +452,8 @@ export async function classifyDocument(params) {
377
452
  : [];
378
453
  const rawSections = Array.isArray(root.sections) ? root.sections : null;
379
454
  if (!rawSections) {
380
- logFallback(accountId, "missing sections array");
381
- return { kind: "fallback", reason: "invalid response shape (no sections)" };
455
+ logError(accountId, "missing sections array");
456
+ return { kind: "error", reason: "invalid response shape (no sections)" };
382
457
  }
383
458
  const sections = [];
384
459
  let hallucinatedRelated = 0;
@@ -512,9 +587,9 @@ export async function classifyDocument(params) {
512
587
  // sees the regression — typically caused by a model that ignored the new
513
588
  // offset contract and reverted to emitting `body`.
514
589
  if (rawSections.length > 0 && sections.length === 0) {
515
- logFallback(accountId, `missing-offsets: every section failed offset validation (rawSections=${rawSections.length}, droppedForOffsets=${droppedForOffsets}). Likely cause: Haiku emitted body text instead of sourceStart/sourceEnd offsets, or the prompt update didn't reach the model.`);
590
+ logError(accountId, `missing-offsets: every section failed offset validation (rawSections=${rawSections.length}, droppedForOffsets=${droppedForOffsets}). Likely cause: Haiku emitted body text instead of sourceStart/sourceEnd offsets, or the prompt update didn't reach the model.`);
516
591
  return {
517
- kind: "fallback",
592
+ kind: "error",
518
593
  reason: "Haiku response had no parseable section offsets",
519
594
  };
520
595
  }
@@ -604,9 +679,9 @@ async function classifyDocumentChunked(params) {
604
679
  // chunker bug or constants-drift — loud-fail instead of pretending.
605
680
  for (const c of chunks) {
606
681
  if (c.chunkText.length > INPUT_CHAR_CEILING) {
607
- logFallback(accountId, `input-too-large: chunker emitted oversize chunk (chars=${c.chunkText.length}, ceiling=${INPUT_CHAR_CEILING}). Chunker invariant violated.`);
682
+ logError(accountId, `input-too-large: chunker emitted oversize chunk (chars=${c.chunkText.length}, ceiling=${INPUT_CHAR_CEILING}). Chunker invariant violated.`);
608
683
  return {
609
- kind: "fallback",
684
+ kind: "error",
610
685
  reason: `Chunker produced an oversize chunk (${c.chunkText.length} > ${INPUT_CHAR_CEILING})`,
611
686
  };
612
687
  }
@@ -622,7 +697,7 @@ async function classifyDocumentChunked(params) {
622
697
  // Recurse into the single-shot path — chunkSize < CHUNK_CHAR_SIZE is the
623
698
  // dispatch threshold so the recursive call lands in the existing logic.
624
699
  const chunkResult = await classifyDocument({ ...params, documentText: c.chunkText });
625
- if (chunkResult.kind === "fallback") {
700
+ if (chunkResult.kind === "error") {
626
701
  // One chunk failure aborts the whole ingest (loud-failure doctrine).
627
702
  return chunkResult;
628
703
  }