@rubytech/create-realagent 1.0.829 → 1.0.831
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/config/brand.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +9 -2
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.js +26 -1
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
- package/payload/platform/lib/oauth-llm/src/index.ts +43 -4
- package/payload/platform/neo4j/migrations/007-conversation-archive-source.ts +116 -0
- package/payload/platform/neo4j/migrations/008-adminuser-accountid-backfill.ts +85 -0
- package/payload/platform/neo4j/schema.cypher +12 -3
- package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +54 -39
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +64 -26
- package/payload/platform/plugins/admin/mcp/dist/index.js +25 -3
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/index.js +5 -5
- package/payload/platform/plugins/contacts/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts.map +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js +29 -23
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js.map +1 -1
- package/payload/platform/plugins/docs/references/internals.md +1 -1
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +2 -1
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +564 -0
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +106 -0
- package/payload/platform/plugins/memory/mcp/dist/index.js +30 -16
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +4 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +11 -6
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts +5 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js +30 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts +49 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js +35 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/timestamp-scanner.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +47 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +31 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +155 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts +11 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +20 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +14 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +38 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +16 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +59 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts +9 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js +32 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +29 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts +45 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js +125 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +34 -9
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +360 -35
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts +3 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +46 -17
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +73 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +109 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +34 -3
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +17 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +34 -13
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +18 -7
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +24 -8
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js +2 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-rank.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +2 -2
- package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +133 -0
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +5 -2
- package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
- package/payload/platform/scripts/seed-neo4j.sh +15 -15
- package/payload/platform/templates/specialists/agents/database-operator.md +8 -9
- package/payload/server/chunk-7BO5HDJC.js +10093 -0
- package/payload/server/chunk-BCFM2UPH.js +2305 -0
- package/payload/server/chunk-CV3HPX46.js +10097 -0
- package/payload/server/chunk-EL4DZ56X.js +1116 -0
- package/payload/server/chunk-J6YWEJBN.js +1116 -0
- package/payload/server/chunk-OCPJGZ6S.js +654 -0
- package/payload/server/chunk-QOJ2D26Z.js +654 -0
- package/payload/server/chunk-RC46ZYGT.js +2305 -0
- package/payload/server/client-pool-7NTEFNVQ.js +32 -0
- package/payload/server/client-pool-ZNGN66GN.js +32 -0
- package/payload/server/cloudflare-task-tracker-MHALDN54.js +19 -0
- package/payload/server/cloudflare-task-tracker-WE77WXSI.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-4XPNJNM6.js +490 -0
- package/payload/server/neo4j-migrations-6RW423E2.js +530 -0
- package/payload/server/server.js +30 -19
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic prose chunker for oversize document classification (Task 896).
|
|
3
|
+
*
|
|
4
|
+
* Splits a document into overlapping fixed-size chunks so each chunk fits
|
|
5
|
+
* inside Haiku's input context window. The chunker is purely mechanical —
|
|
6
|
+
* it makes no semantic claim about where chunk boundaries should fall.
|
|
7
|
+
* Ontological boundaries remain Haiku's job per Task 737 (the document
|
|
8
|
+
* chunker that *did* try to be semantic was deleted because it leaked
|
|
9
|
+
* sections at the boundaries it picked).
|
|
10
|
+
*
|
|
11
|
+
* Overlap exists so a section straddling a chunk boundary appears in BOTH
|
|
12
|
+
* surrounding chunks; the merge step then unions the same-kind ranges so
|
|
13
|
+
* the boundary section isn't double-counted in the writer.
|
|
14
|
+
*
|
|
15
|
+
* Char counts are estimated from token counts via a fixed 3.5 chars/token
|
|
16
|
+
* ratio (English prose average). The estimate is conservative — Haiku
|
|
17
|
+
* tokenises slightly differently per script, but 3.5 leaves ~10% headroom
|
|
18
|
+
* for non-English content before bumping into the model's hard ceiling.
|
|
19
|
+
*/
|
|
20
|
+
export interface RangedSection {
|
|
21
|
+
/** Section kind from the classifier's closed enumeration. */
|
|
22
|
+
kind: string;
|
|
23
|
+
/** Inclusive whole-document start offset. */
|
|
24
|
+
sourceStart: number;
|
|
25
|
+
/** Exclusive whole-document end offset. */
|
|
26
|
+
sourceEnd: number;
|
|
27
|
+
/** Per-section summary; longer wins on merge tie-break. */
|
|
28
|
+
summary: string;
|
|
29
|
+
}
|
|
30
|
+
export interface DocumentChunk {
|
|
31
|
+
/** Substring of the source document covered by this chunk. */
|
|
32
|
+
chunkText: string;
|
|
33
|
+
/** Whole-document offset where this chunk's text begins. */
|
|
34
|
+
baseOffset: number;
|
|
35
|
+
}
|
|
36
|
+
export interface ChunkOptions {
|
|
37
|
+
/** Maximum chunk length in characters (already token→char converted). */
|
|
38
|
+
chunkSize: number;
|
|
39
|
+
/** Overlap in characters between consecutive chunks. */
|
|
40
|
+
overlap: number;
|
|
41
|
+
}
|
|
42
|
+
export declare function chunkDocument(text: string, opts: ChunkOptions): DocumentChunk[];
|
|
43
|
+
export declare const MERGE_OVERLAP_THRESHOLD = 0.5;
|
|
44
|
+
export declare function mergeOverlappingSections<T extends RangedSection>(input: T[]): T[];
|
|
45
|
+
//# sourceMappingURL=document-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-chunker.d.ts","sourceRoot":"","sources":["../../src/lib/document-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAQH,MAAM,WAAW,aAAa;IAC5B,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,WAAW,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,OAAO,EAAE,MAAM,CAAC;CACjB;AAMD,MAAM,WAAW,aAAa;IAC5B,8DAA8D;IAC9D,SAAS,EAAE,MAAM,CAAC;IAClB,4DAA4D;IAC5D,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,yEAAyE;IACzE,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,aAAa,EAAE,CA4B/E;AAqBD,eAAO,MAAM,uBAAuB,MAAM,CAAC;AAE3C,wBAAgB,wBAAwB,CAAC,CAAC,SAAS,aAAa,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAqDjF"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic prose chunker for oversize document classification (Task 896).
|
|
3
|
+
*
|
|
4
|
+
* Splits a document into overlapping fixed-size chunks so each chunk fits
|
|
5
|
+
* inside Haiku's input context window. The chunker is purely mechanical —
|
|
6
|
+
* it makes no semantic claim about where chunk boundaries should fall.
|
|
7
|
+
* Ontological boundaries remain Haiku's job per Task 737 (the document
|
|
8
|
+
* chunker that *did* try to be semantic was deleted because it leaked
|
|
9
|
+
* sections at the boundaries it picked).
|
|
10
|
+
*
|
|
11
|
+
* Overlap exists so a section straddling a chunk boundary appears in BOTH
|
|
12
|
+
* surrounding chunks; the merge step then unions the same-kind ranges so
|
|
13
|
+
* the boundary section isn't double-counted in the writer.
|
|
14
|
+
*
|
|
15
|
+
* Char counts are estimated from token counts via a fixed 3.5 chars/token
|
|
16
|
+
* ratio (English prose average). The estimate is conservative — Haiku
|
|
17
|
+
* tokenises slightly differently per script, but 3.5 leaves ~10% headroom
|
|
18
|
+
* for non-English content before bumping into the model's hard ceiling.
|
|
19
|
+
*/
|
|
20
|
+
export function chunkDocument(text, opts) {
|
|
21
|
+
const { chunkSize, overlap } = opts;
|
|
22
|
+
if (chunkSize <= 0) {
|
|
23
|
+
throw new Error(`chunkDocument: chunkSize must be positive, got ${chunkSize}`);
|
|
24
|
+
}
|
|
25
|
+
if (overlap < 0) {
|
|
26
|
+
throw new Error(`chunkDocument: overlap must be non-negative, got ${overlap}`);
|
|
27
|
+
}
|
|
28
|
+
if (overlap >= chunkSize) {
|
|
29
|
+
throw new Error(`chunkDocument: overlap (${overlap}) must be less than chunkSize (${chunkSize})`);
|
|
30
|
+
}
|
|
31
|
+
if (text.length === 0)
|
|
32
|
+
return [];
|
|
33
|
+
if (text.length <= chunkSize) {
|
|
34
|
+
// One-chunk fast path so callers that always-chunk don't pay the
|
|
35
|
+
// window-stepping arithmetic for inputs that already fit.
|
|
36
|
+
return [{ chunkText: text, baseOffset: 0 }];
|
|
37
|
+
}
|
|
38
|
+
const chunks = [];
|
|
39
|
+
const stride = chunkSize - overlap;
|
|
40
|
+
let start = 0;
|
|
41
|
+
while (start < text.length) {
|
|
42
|
+
const end = Math.min(start + chunkSize, text.length);
|
|
43
|
+
chunks.push({ chunkText: text.slice(start, end), baseOffset: start });
|
|
44
|
+
if (end >= text.length)
|
|
45
|
+
break;
|
|
46
|
+
start += stride;
|
|
47
|
+
}
|
|
48
|
+
return chunks;
|
|
49
|
+
}
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// mergeOverlappingSections — collates per-chunk classifier results.
|
|
52
|
+
//
|
|
53
|
+
// Algorithm: group sections by `kind`, sort by `sourceStart`, then walk and
|
|
54
|
+
// union consecutive same-kind ranges whose intersection covers more than
|
|
55
|
+
// MERGE_OVERLAP_THRESHOLD of the smaller range. The longer summary wins on
|
|
56
|
+
// merge — empirically Haiku's longer summary on a chunk that saw more
|
|
57
|
+
// surrounding context tends to be the better one.
|
|
58
|
+
//
|
|
59
|
+
// Cross-kind overlap is preserved: chunk A's `Position` and chunk B's
|
|
60
|
+
// `Other` covering the same range are kept as two distinct sections (per
|
|
61
|
+
// eng review). The classifier disagreed about kind; the writer's downstream
|
|
62
|
+
// :Section:Other surfacing will let the operator decide which one wins
|
|
63
|
+
// during ontology growth review.
|
|
64
|
+
//
|
|
65
|
+
// Disjoint same-kind sections are also preserved — only adjacent ranges
|
|
66
|
+
// with material overlap are merged.
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
export const MERGE_OVERLAP_THRESHOLD = 0.5;
|
|
69
|
+
export function mergeOverlappingSections(input) {
|
|
70
|
+
if (input.length <= 1)
|
|
71
|
+
return input.slice();
|
|
72
|
+
// Group by kind so we never accidentally merge across kinds.
|
|
73
|
+
const byKind = new Map();
|
|
74
|
+
for (const s of input) {
|
|
75
|
+
const arr = byKind.get(s.kind);
|
|
76
|
+
if (arr)
|
|
77
|
+
arr.push(s);
|
|
78
|
+
else
|
|
79
|
+
byKind.set(s.kind, [s]);
|
|
80
|
+
}
|
|
81
|
+
const merged = [];
|
|
82
|
+
for (const group of byKind.values()) {
|
|
83
|
+
group.sort((a, b) => a.sourceStart - b.sourceStart || a.sourceEnd - b.sourceEnd);
|
|
84
|
+
let current = null;
|
|
85
|
+
for (const s of group) {
|
|
86
|
+
if (current === null) {
|
|
87
|
+
current = { ...s };
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
const intersection = Math.max(0, Math.min(current.sourceEnd, s.sourceEnd) - Math.max(current.sourceStart, s.sourceStart));
|
|
91
|
+
if (intersection === 0) {
|
|
92
|
+
merged.push(current);
|
|
93
|
+
current = { ...s };
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
const currentLen = current.sourceEnd - current.sourceStart;
|
|
97
|
+
const sLen = s.sourceEnd - s.sourceStart;
|
|
98
|
+
const overlapFraction = intersection / Math.min(currentLen, sLen);
|
|
99
|
+
if (overlapFraction > MERGE_OVERLAP_THRESHOLD) {
|
|
100
|
+
// Union the range; the section with the longer body contributes its
|
|
101
|
+
// non-range fields (title, properties, anchorEdge, related, etc.) on
|
|
102
|
+
// the assumption that a wider classification window grounded its
|
|
103
|
+
// properties more reliably. Summary always picks the longer of the two.
|
|
104
|
+
const fieldsWinner = sLen > currentLen ? s : current;
|
|
105
|
+
current = {
|
|
106
|
+
...fieldsWinner,
|
|
107
|
+
sourceStart: Math.min(current.sourceStart, s.sourceStart),
|
|
108
|
+
sourceEnd: Math.max(current.sourceEnd, s.sourceEnd),
|
|
109
|
+
summary: s.summary.length > current.summary.length ? s.summary : current.summary,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
merged.push(current);
|
|
114
|
+
current = { ...s };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (current !== null)
|
|
118
|
+
merged.push(current);
|
|
119
|
+
}
|
|
120
|
+
// Return in whole-document reading order so the writer's :NEXT chain
|
|
121
|
+
// maps to source order.
|
|
122
|
+
merged.sort((a, b) => a.sourceStart - b.sourceStart);
|
|
123
|
+
return merged;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=document-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-chunker.js","sourceRoot":"","sources":["../../src/lib/document-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAqCH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,IAAkB;IAC5D,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;IACpC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,kDAAkD,SAAS,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,oDAAoD,OAAO,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,OAAO,IAAI,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,kCAAkC,SAAS,GAAG,CAAC,CAAC;IACpG,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,iEAAiE;QACjE,0DAA0D;QAC1D,OAAO,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC;IACnC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;QACtE,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM;QAC9B,KAAK,IAAI,MAAM,CAAC;IAClB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,oEAAoE;AACpE,EAAE;AACF,4EAA4E;AAC5E,yEAAyE;AACzE,2EAA2E;AAC3E,sEAAsE;AACtE,kDAAkD;AAClD,EAAE;AACF,sEAAsE;AACtE,yEAAyE;AACzE,4EAA4E;AAC5E,uEAAuE;AACvE,iCAAiC;AACjC,EAAE;AACF,wEAAwE;AACxE,oCAAoC;AACpC,8EAA8E;AAE9E,MAAM,CAAC,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAE3C,MAAM,UAAU,wBAAwB,CAA0B,KAAU;IAC1E,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;IAE5C,6DAA6D;IAC7D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,GAAG;YAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YAChB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,MAAM,GAAQ,EAAE,CAAC;IACvB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QACjF,IAAI,OAAO,GAAa,IAAI,CAAC;QAC7B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;gBACrB,OAAO,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;gBACnB,SAAS;YACX,CAAC;YACD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;YAC1H,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;gBACnB,SAAS;YACX,CAAC;YACD,MAAM,UAAU,GAAW,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;YACnE,MAAM,IAAI,GAAW,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,WAAW,CAAC;YACjD,MAAM,eAAe,GAAG,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;YAClE,IAAI,eAAe,GAAG,uBAAuB,EAAE,CAAC;gBAC9C,oEAAoE;gBACpE,qEAAqE;gBACrE,iEAAiE;gBACjE,wEAAwE;gBACxE,MAAM,YAAY,GAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;gBACxD,OAAO,GAAG;oBACR,GAAG,YAAY;oBACf,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,CAAC;oBACzD,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC;oBACnD,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO;iBACjF,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;QACD,IAAI,OAAO,KAAK,IAAI;YAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAED,qEAAqE;IACrE,wBAAwB;IACxB,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC;IACrD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -17,9 +17,10 @@
|
|
|
17
17
|
* Hallucination defence: every returned `kind` is verified against the
|
|
18
18
|
* loaded ontology label set. Sections whose `kind` is not a real label
|
|
19
19
|
* are tagged `UNMAPPED`. Failure of the LLM call (missing creds, network,
|
|
20
|
-
* malformed JSON) returns `{kind: "
|
|
21
|
-
*
|
|
22
|
-
*
|
|
20
|
+
* malformed JSON) returns `{kind: "error", reason}` (Task 897 vocabulary
|
|
21
|
+
* — was `kind: "fallback"`). The caller decides whether to abort the
|
|
22
|
+
* ingest or degrade-on-error per session; classifier never silently
|
|
23
|
+
* substitutes a degraded write.
|
|
23
24
|
*/
|
|
24
25
|
/** Direction of the anchor edge relative to the typed node. */
|
|
25
26
|
export type AnchorEdgeDirection = "from-anchor" | "to-anchor";
|
|
@@ -70,8 +71,31 @@ export interface ClassifiedSection {
|
|
|
70
71
|
kind: string;
|
|
71
72
|
/** Short human-readable title for the section. */
|
|
72
73
|
title: string;
|
|
73
|
-
/**
|
|
74
|
+
/**
|
|
75
|
+
* The section's body text — embedded and stored on the section node.
|
|
76
|
+
*
|
|
77
|
+
* Task 896: server-reconstructed via `documentText.slice(sourceStart, sourceEnd)`.
|
|
78
|
+
* The LLM emits offsets, never the body text — output size becomes O(sections),
|
|
79
|
+
* not O(input chars). Callers consume the same `body: string` shape as before.
|
|
80
|
+
*/
|
|
74
81
|
body: string;
|
|
82
|
+
/**
|
|
83
|
+
* 1-3 sentence summary of the section, ≤500 chars (server-validated).
|
|
84
|
+
* The LLM emits this; the server truncates if oversize. Stored as
|
|
85
|
+
* `properties.summary` on the section node so adjacency search can
|
|
86
|
+
* surface it without rehydrating the body.
|
|
87
|
+
*/
|
|
88
|
+
summary: string;
|
|
89
|
+
/**
|
|
90
|
+
* Whole-document character offsets — inclusive start, exclusive end.
|
|
91
|
+
* The LLM emits these; the server validates bounds and reconstructs
|
|
92
|
+
* `body` via `documentText.slice(sourceStart, sourceEnd)`. In the
|
|
93
|
+
* chunked-classify path these are translated from chunk-local to
|
|
94
|
+
* whole-document coordinates so the merge step can detect boundary
|
|
95
|
+
* straddlers across chunks.
|
|
96
|
+
*/
|
|
97
|
+
sourceStart: number;
|
|
98
|
+
sourceEnd: number;
|
|
75
99
|
/** Properties on the section node (excluding accountId/embedding/provenance). */
|
|
76
100
|
properties: Record<string, unknown>;
|
|
77
101
|
/**
|
|
@@ -137,7 +161,7 @@ export type ClassifyResult = {
|
|
|
137
161
|
kind: "ok";
|
|
138
162
|
output: ClassifierOutput;
|
|
139
163
|
} | {
|
|
140
|
-
kind: "
|
|
164
|
+
kind: "error";
|
|
141
165
|
reason: string;
|
|
142
166
|
};
|
|
143
167
|
/**
|
|
@@ -206,10 +230,11 @@ export interface ClassifyParams {
|
|
|
206
230
|
* Sections the classifier could not natural-edge appear in
|
|
207
231
|
* `output.orphanCandidates`. The skill surfaces orphans loudly to
|
|
208
232
|
* the operator.
|
|
209
|
-
* { kind: "
|
|
210
|
-
* malformed JSON
|
|
211
|
-
*
|
|
212
|
-
*
|
|
233
|
+
* { kind: "error", reason } when the LLM is unavailable, returns
|
|
234
|
+
* malformed JSON, or hits an input-too-large guard. The caller
|
|
235
|
+
* decides whether to abort the ingest entirely (document mode) or
|
|
236
|
+
* degrade-on-error per session (chat mode, Task 897). Classifier
|
|
237
|
+
* never silently substitutes a degraded write.
|
|
213
238
|
*/
|
|
214
239
|
export declare function classifyDocument(params: ClassifyParams): Promise<ClassifyResult>;
|
|
215
240
|
//# sourceMappingURL=llm-classifier.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"llm-classifier.d.ts","sourceRoot":"","sources":["../../src/lib/llm-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAUH,+DAA+D;AAC/D,MAAM,MAAM,mBAAmB,GAAG,aAAa,GAAG,WAAW,CAAC;AAE9D,mEAAmE;AACnE,MAAM,MAAM,oBAAoB,GAAG,UAAU,GAAG,UAAU,CAAC;AAE3D,kFAAkF;AAClF,MAAM,WAAW,iBAAiB;IAChC,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,oDAAoD;IACpD,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,oBAAoB,CAAC;QAChC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,CAAC;IACF;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,oGAAoG;AACpG,MAAM,WAAW,iBAAiB;IAChC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;;;OAOG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,iFAAiF;IACjF,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC;;;;OAIG;IACH,UAAU,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,mBAAmB,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACtC,GAAG,IAAI,CAAC;IACT,oFAAoF;IACpF,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,oCAAoC;AACpC,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAC;IACxB,kEAAkE;IAClE,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,6BAA6B;IAC7B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,iFAAiF;IACjF,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC;;+CAE2C;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,UAAU,GAAG,UAAU,CAAC;QACnC,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,wEAAwE;QACxE,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB,CAAC,CAAC;IACH,mFAAmF;IACnF,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,IAAI,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,gBAAgB,CAAA;CAAE,GACxC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AA8EtC;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,UAAU,CAAC;AAE1C,eAAO,MAAM,sBAAsB,wEAMzB,CAAC;AAEX,eAAO,MAAM,wBAAwB,yKAa3B,CAAC;AAEX,eAAO,MAAM,sBAAsB,4SAqBzB,CAAC;AAEX,8EAA8E;AAC9E,eAAO,MAAM,qBAAqB,sBAAuB,CAAC;AAE1D,eAAO,MAAM,iBAAiB,EAAE,WAAW,CAAC,MAAM,CAMhD,CAAC;AAsKH,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,UAAU,GAAG,MAAM,CAAC;IAC3B;;;;;;;;OAQG;IACH,iBAAiB,EAAE,MAAM,CAAC;IAC1B;;;;;;OAMG;IACH,cAAc,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IACpC;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,cAAc,CAAC,CA0WzB"}
|