@rubytech/create-maxy 1.0.708 → 1.0.709
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +101 -0
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -0
- package/payload/platform/lib/oauth-llm/dist/index.js +353 -0
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -0
- package/payload/platform/lib/oauth-llm/src/index.ts +526 -0
- package/payload/platform/lib/oauth-llm/tsconfig.json +8 -0
- package/payload/platform/neo4j/schema.cypher +37 -11
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts +3 -3
- package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/lib/screening.js +12 -12
- package/payload/platform/plugins/email/mcp/dist/lib/screening.js.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js +14 -28
- package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js +9 -19
- package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/index.js +46 -18
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +22 -18
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +98 -24
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +176 -86
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +12 -46
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +24 -12
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +27 -11
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +276 -238
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +66 -14
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +53 -20
- package/payload/platform/templates/specialists/agents/database-operator.md +18 -0
- package/payload/server/chunk-Y57ACANQ.js +12292 -0
- package/payload/server/maxy-edge.js +1 -1
- package/payload/server/server.js +25 -44
|
@@ -3,33 +3,51 @@ import { embed, embedBatch } from "../lib/embeddings.js";
|
|
|
3
3
|
import { extractCache } from "./memory-ingest-extract.js";
|
|
4
4
|
import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
|
|
5
5
|
import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
|
|
6
|
+
import { IDENTITY_SECTION_KINDS, STRUCTURAL_SECTION_KINDS, CONTRACT_SECTION_KINDS, STANDALONE_NODE_KINDS, SECTION_KIND_OTHER, } from "../lib/llm-classifier.js";
|
|
6
7
|
// ---------------------------------------------------------------------------
|
|
7
|
-
//
|
|
8
|
+
// Single-Section document ingestion (Task 740, replacing Task 737's typed-vs-
|
|
9
|
+
// UNMAPPED fork).
|
|
8
10
|
//
|
|
9
11
|
// CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
|
|
10
|
-
// (by attachmentId) KnowledgeDocument delete prior batch
|
|
11
|
-
// (idempotent) Section/Chunk/typed Ollama +
|
|
12
|
-
// +
|
|
12
|
+
// (by attachmentId) KnowledgeDocument delete prior batch Section nodes
|
|
13
|
+
// (idempotent) Section/Chunk/typed Ollama + secondary labels
|
|
14
|
+
// + NEXT chain
|
|
15
|
+
// + anchor edges
|
|
16
|
+
// + related entities
|
|
17
|
+
// + KD-level edges
|
|
13
18
|
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
19
|
-
//
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
//
|
|
19
|
+
// Every classified section produces ONE `:Section` node. When the classifier
|
|
20
|
+
// recognises the kind (Position/Education/Chapter/Parties/etc.), the same
|
|
21
|
+
// node carries a secondary label (`:Section:Position`) and any structured
|
|
22
|
+
// properties; identity-kind anchor edges go to the multi-labeled node
|
|
23
|
+
// directly, killing the parallel Section-vs-typed-node concept.
|
|
24
|
+
//
|
|
25
|
+
// Special-case writers fire for two contract-clause kinds:
|
|
26
|
+
// * Parties: `(:KnowledgeDocument)-[:PARTY]->(:Person|:Organization)` from documentEdges.
|
|
27
|
+
// * Definitions: `(:Section:Definitions)-[:DEFINES]->(:DefinedTerm)` from related entries.
|
|
28
|
+
//
|
|
29
|
+
// `:Chunk` is gone. Sections carry their body directly. If a body legitimately
|
|
30
|
+
// exceeds Neo4j's property limit, that is a classifier-split-the-section
|
|
31
|
+
// concern, not a writer problem.
|
|
23
32
|
//
|
|
24
33
|
// Provenance properties (createdByAgent, createdBySession, source,
|
|
25
|
-
// sourceDocumentId) stamp every node and edge
|
|
26
|
-
//
|
|
34
|
+
// sourceDocumentId) stamp every node and edge so re-ingest cleanup finds
|
|
35
|
+
// them deterministically.
|
|
27
36
|
// ---------------------------------------------------------------------------
|
|
28
|
-
const MAX_CHUNK_SIZE = 2000;
|
|
29
37
|
const PREVIEW_LENGTH = 150;
|
|
30
|
-
const UNMAPPED = "UNMAPPED";
|
|
31
38
|
const PROVENANCE_AGENT = "document-ingest";
|
|
32
39
|
const PROVENANCE_SOURCE = "document";
|
|
40
|
+
/** Identity-kind anchor edge writer set (UserProfile → Section:Kind). */
|
|
41
|
+
const IDENTITY_KINDS_SET = new Set(IDENTITY_SECTION_KINDS);
|
|
42
|
+
/** Standalone (non-Section) node kinds (currently just Project). */
|
|
43
|
+
const STANDALONE_KINDS_SET = new Set(STANDALONE_NODE_KINDS);
|
|
44
|
+
/** All section-shaped kinds — anything that becomes a `:Section` node. */
|
|
45
|
+
const SECTION_LABEL_KINDS = new Set([
|
|
46
|
+
...IDENTITY_SECTION_KINDS,
|
|
47
|
+
...STRUCTURAL_SECTION_KINDS,
|
|
48
|
+
...CONTRACT_SECTION_KINDS,
|
|
49
|
+
SECTION_KIND_OTHER,
|
|
50
|
+
]);
|
|
33
51
|
// ---------------------------------------------------------------------------
|
|
34
52
|
// Helpers
|
|
35
53
|
// ---------------------------------------------------------------------------
|
|
@@ -38,15 +56,6 @@ function normaliseKeywords(arr) {
|
|
|
38
56
|
return [];
|
|
39
57
|
return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
|
|
40
58
|
}
|
|
41
|
-
function chunkBody(body) {
|
|
42
|
-
if (body.length <= MAX_CHUNK_SIZE)
|
|
43
|
-
return [body];
|
|
44
|
-
const chunks = [];
|
|
45
|
-
for (let i = 0; i < body.length; i += MAX_CHUNK_SIZE) {
|
|
46
|
-
chunks.push(body.slice(i, i + MAX_CHUNK_SIZE));
|
|
47
|
-
}
|
|
48
|
-
return chunks;
|
|
49
|
-
}
|
|
50
59
|
/**
|
|
51
60
|
* Identifying property for MERGE on a related node by kind.
|
|
52
61
|
* Choose a stable, human-recognisable key per label so the same real-world
|
|
@@ -78,11 +87,17 @@ function mergeKeyFor(kind, properties) {
|
|
|
78
87
|
return null;
|
|
79
88
|
}
|
|
80
89
|
}
|
|
90
|
+
function bumpKind(breakdown, kind) {
|
|
91
|
+
breakdown[kind] = (breakdown[kind] ?? 0) + 1;
|
|
92
|
+
}
|
|
93
|
+
function bumpEdge(breakdown, edgeType) {
|
|
94
|
+
breakdown[edgeType] = (breakdown[edgeType] ?? 0) + 1;
|
|
95
|
+
}
|
|
81
96
|
// ---------------------------------------------------------------------------
|
|
82
97
|
// Main entry point
|
|
83
98
|
// ---------------------------------------------------------------------------
|
|
84
99
|
export async function memoryIngest(params) {
|
|
85
|
-
const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
|
|
100
|
+
const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, documentEdges = [], orphanCandidates = [], scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
|
|
86
101
|
if (!scope) {
|
|
87
102
|
throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
|
|
88
103
|
}
|
|
@@ -116,23 +131,23 @@ export async function memoryIngest(params) {
|
|
|
116
131
|
const embeddings = await embedBatch(textsToEmbed);
|
|
117
132
|
log("embedded", `${embeddings.length} vectors`);
|
|
118
133
|
const docEmbedding = embeddings[0];
|
|
119
|
-
const
|
|
134
|
+
const dbSession = getSession();
|
|
120
135
|
let documentNodeId = "";
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
let
|
|
124
|
-
let
|
|
136
|
+
const kindBreakdown = {};
|
|
137
|
+
const edgeBreakdown = {};
|
|
138
|
+
let relatedCount = 0;
|
|
139
|
+
let standaloneCount = 0;
|
|
125
140
|
try {
|
|
126
141
|
// 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
|
|
127
142
|
// with this attachmentId so MERGE finds the existing node.
|
|
128
|
-
const trashedDoc = await
|
|
143
|
+
const trashedDoc = await dbSession.run(`MATCH (d:KnowledgeDocument:Trashed)
|
|
129
144
|
WHERE d.accountId = $accountId
|
|
130
145
|
AND d._trashedKeys IS NOT NULL
|
|
131
146
|
AND d._trashedKeys CONTAINS $attachmentId
|
|
132
147
|
RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
|
|
133
148
|
if (trashedDoc.records.length > 0) {
|
|
134
149
|
const eid = trashedDoc.records[0].get("eid");
|
|
135
|
-
await restoreNode({ session, accountId, elementId: eid });
|
|
150
|
+
await restoreNode({ session: dbSession, accountId, elementId: eid });
|
|
136
151
|
log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
|
|
137
152
|
}
|
|
138
153
|
// 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
|
|
@@ -153,7 +168,7 @@ export async function memoryIngest(params) {
|
|
|
153
168
|
optionalParams.keywords = keywords;
|
|
154
169
|
}
|
|
155
170
|
const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
|
|
156
|
-
const docResult = await
|
|
171
|
+
const docResult = await dbSession.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
|
|
157
172
|
SET d.accountId = $accountId,
|
|
158
173
|
d.name = $filename,
|
|
159
174
|
d.encodingFormat = $mimeType,
|
|
@@ -179,21 +194,21 @@ export async function memoryIngest(params) {
|
|
|
179
194
|
...optionalParams,
|
|
180
195
|
});
|
|
181
196
|
documentNodeId = docResult.records[0].get("nodeId");
|
|
182
|
-
|
|
183
|
-
//
|
|
184
|
-
//
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
cleanup.typed > 0 ||
|
|
189
|
-
cleanup.references > 0) {
|
|
197
|
+
console.error(`[memory-ingest] KnowledgeDocument.name=${JSON.stringify(filename)} attachmentId=${attachmentId.slice(0, 8)} docId=${documentNodeId.slice(0, 12)}`);
|
|
198
|
+
// 4. Cleanup prior children (idempotent re-ingest). Removes Section nodes
|
|
199
|
+
// (any secondary label) and any standalone nodes stamped with this
|
|
200
|
+
// attachmentId. MERGEd related entities (Organizations, Persons) are spared.
|
|
201
|
+
const cleanup = await deleteDocumentChildren(attachmentId, dbSession);
|
|
202
|
+
if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.typed > 0 || cleanup.references > 0) {
|
|
190
203
|
log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
|
|
191
204
|
}
|
|
192
|
-
// 5. Per-section writes.
|
|
205
|
+
// 5. Per-section writes. Track the previous section's elementId so we can
|
|
206
|
+
// chain (:Section)-[:NEXT]->(:Section) in reading order.
|
|
207
|
+
let previousSectionId = null;
|
|
193
208
|
for (let i = 0; i < sections.length; i++) {
|
|
194
209
|
const section = sections[i];
|
|
195
210
|
const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
|
|
196
|
-
const
|
|
211
|
+
const baseProps = {
|
|
197
212
|
accountId,
|
|
198
213
|
title: section.title,
|
|
199
214
|
body: section.body,
|
|
@@ -209,172 +224,91 @@ export async function memoryIngest(params) {
|
|
|
209
224
|
source: PROVENANCE_SOURCE,
|
|
210
225
|
sourceDocumentId: attachmentId,
|
|
211
226
|
};
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
const
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
createdBySession: $createdBySession,
|
|
227
|
-
source: $source,
|
|
228
|
-
sourceDocumentId: $sourceDocumentId
|
|
229
|
-
})
|
|
230
|
-
WITH s
|
|
231
|
-
MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
232
|
-
CREATE (d)-[:HAS_SECTION]->(s)
|
|
233
|
-
RETURN elementId(s) AS nodeId`, { ...sectionParams, docId: documentNodeId });
|
|
234
|
-
const sectionId = sectionResult.records[0].get("nodeId");
|
|
235
|
-
// Body overflow → :Chunk children
|
|
236
|
-
if (section.body.length > MAX_CHUNK_SIZE) {
|
|
237
|
-
const overflowChunks = chunkBody(section.body);
|
|
238
|
-
const overflowEmbeddings = await embedBatch(overflowChunks);
|
|
239
|
-
for (let ci = 0; ci < overflowChunks.length; ci++) {
|
|
240
|
-
await session.run(`MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
241
|
-
CREATE (s)-[:HAS_CHUNK]->(c:Chunk {
|
|
242
|
-
accountId: $accountId,
|
|
243
|
-
content: $content,
|
|
244
|
-
summary: $summary,
|
|
245
|
-
position: $position,
|
|
246
|
-
scope: $scope,
|
|
247
|
-
embedding: $embedding,
|
|
248
|
-
createdAt: $createdAt,
|
|
249
|
-
updatedAt: $updatedAt,
|
|
250
|
-
createdByAgent: $createdByAgent,
|
|
251
|
-
createdBySource: $createdBySource,
|
|
252
|
-
createdBySession: $createdBySession,
|
|
253
|
-
source: $source,
|
|
254
|
-
sourceDocumentId: $sourceDocumentId
|
|
255
|
-
})`, {
|
|
256
|
-
sectionId,
|
|
257
|
-
accountId,
|
|
258
|
-
content: overflowChunks[ci],
|
|
259
|
-
summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
|
|
260
|
-
position: ci,
|
|
261
|
-
scope,
|
|
262
|
-
embedding: overflowEmbeddings[ci],
|
|
263
|
-
createdAt: now,
|
|
264
|
-
updatedAt: now,
|
|
265
|
-
createdByAgent: PROVENANCE_AGENT,
|
|
266
|
-
createdBySource: PROVENANCE_AGENT,
|
|
267
|
-
createdBySession: sessionId ?? "",
|
|
268
|
-
source: PROVENANCE_SOURCE,
|
|
269
|
-
sourceDocumentId: attachmentId,
|
|
270
|
-
});
|
|
271
|
-
chunkCount += 1;
|
|
227
|
+
// 5a. Standalone node kind (currently just Project) — no `:Section` label.
|
|
228
|
+
if (STANDALONE_KINDS_SET.has(section.kind)) {
|
|
229
|
+
const standaloneId = await writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId ?? "", now);
|
|
230
|
+
standaloneCount += 1;
|
|
231
|
+
bumpKind(kindBreakdown, section.kind);
|
|
232
|
+
if (section.anchorEdge)
|
|
233
|
+
bumpEdge(edgeBreakdown, section.anchorEdge.type);
|
|
234
|
+
previousSectionId = null; // standalone breaks the section chain
|
|
235
|
+
// related entities for standalone (e.g. Project UNDER Organization)
|
|
236
|
+
if (section.related && section.related.length > 0) {
|
|
237
|
+
for (const related of section.related) {
|
|
238
|
+
await writeRelatedAndEdge(dbSession, standaloneId, related, accountId, now, sessionId ?? "", attachmentId);
|
|
239
|
+
relatedCount += 1;
|
|
240
|
+
bumpEdge(edgeBreakdown, related.edge.type);
|
|
272
241
|
}
|
|
273
242
|
}
|
|
274
|
-
unmappedCount += 1;
|
|
275
|
-
console.error(`[document-ingest] unmapped-section title="${section.title}" chars=${section.body.length}`);
|
|
276
243
|
continue;
|
|
277
244
|
}
|
|
278
|
-
// 5b.
|
|
279
|
-
//
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
245
|
+
// 5b. Section-shaped kind (everything else, including Other). One `:Section`
|
|
246
|
+
// node with optional secondary label.
|
|
247
|
+
const isKnownSectionKind = SECTION_LABEL_KINDS.has(section.kind);
|
|
248
|
+
const sectionLabels = isKnownSectionKind && section.kind !== SECTION_KIND_OTHER
|
|
249
|
+
? [`Section`, section.kind]
|
|
250
|
+
: section.kind === SECTION_KIND_OTHER
|
|
251
|
+
? [`Section`, `Other`]
|
|
252
|
+
: [`Section`]; // unrecognised kind — should never happen post-classifier
|
|
253
|
+
// Compose properties: classifier-supplied properties overlaid on system
|
|
254
|
+
// fields, system fields winning. For Section:Other, also stamp
|
|
255
|
+
// classifierReason so the ontology-growth query can surface it.
|
|
256
|
+
const sectionProps = {
|
|
283
257
|
...section.properties,
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
258
|
+
...baseProps,
|
|
259
|
+
...(section.kind === SECTION_KIND_OTHER && section.classifierReason
|
|
260
|
+
? { classifierReason: section.classifierReason }
|
|
261
|
+
: {}),
|
|
262
|
+
};
|
|
263
|
+
const labelClause = sectionLabels.map((l) => `\`${l}\``).join(":");
|
|
264
|
+
const sectionResult = await dbSession.run(`CREATE (s:${labelClause})
|
|
265
|
+
SET s = $props
|
|
266
|
+
WITH s
|
|
267
|
+
MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
268
|
+
CREATE (d)-[hs:HAS_SECTION]->(s)
|
|
269
|
+
SET hs.createdByAgent = $createdByAgent,
|
|
270
|
+
hs.createdBySession = $createdBySession,
|
|
271
|
+
hs.source = $source,
|
|
272
|
+
hs.sourceDocumentId = $sourceDocumentId,
|
|
273
|
+
hs.createdAt = $createdAt
|
|
274
|
+
RETURN elementId(s) AS nodeId`, {
|
|
275
|
+
props: sectionProps,
|
|
276
|
+
docId: documentNodeId,
|
|
291
277
|
createdByAgent: PROVENANCE_AGENT,
|
|
292
|
-
createdBySource: PROVENANCE_AGENT,
|
|
293
278
|
createdBySession: sessionId ?? "",
|
|
294
279
|
source: PROVENANCE_SOURCE,
|
|
295
280
|
sourceDocumentId: attachmentId,
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
createdByAgent: $createdByAgent,
|
|
321
|
-
createdBySource: $createdBySource,
|
|
322
|
-
createdBySession: $createdBySession,
|
|
323
|
-
source: $source,
|
|
324
|
-
sourceDocumentId: $sourceDocumentId
|
|
325
|
-
})`, {
|
|
326
|
-
typedId: typedNodeId,
|
|
327
|
-
accountId,
|
|
328
|
-
content: overflowChunks[ci],
|
|
329
|
-
summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
|
|
330
|
-
position: ci,
|
|
331
|
-
scope,
|
|
332
|
-
embedding: overflowEmbeddings[ci],
|
|
333
|
-
createdAt: now,
|
|
334
|
-
updatedAt: now,
|
|
335
|
-
createdByAgent: PROVENANCE_AGENT,
|
|
336
|
-
createdBySource: PROVENANCE_AGENT,
|
|
337
|
-
createdBySession: sessionId ?? "",
|
|
338
|
-
source: PROVENANCE_SOURCE,
|
|
339
|
-
sourceDocumentId: attachmentId,
|
|
340
|
-
});
|
|
341
|
-
chunkCount += 1;
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
// Related nodes (e.g. Position's employer Organization). MERGE when
|
|
345
|
-
// the related kind has a stable identifying property; CREATE when not.
|
|
346
|
-
// Provenance is stamped only on creation (ON CREATE SET) so shared
|
|
347
|
-
// entities don't get rewritten by subsequent ingestions.
|
|
348
|
-
if (section.related && section.related.length > 0) {
|
|
349
|
-
for (const related of section.related) {
|
|
350
|
-
const relatedNodeId = await writeRelatedNode({
|
|
351
|
-
session,
|
|
352
|
-
accountId,
|
|
353
|
-
related,
|
|
354
|
-
now,
|
|
355
|
-
sessionId,
|
|
356
|
-
});
|
|
357
|
-
// Edge from typed node to related node
|
|
358
|
-
await session.run(`MATCH (t) WHERE elementId(t) = $typedId
|
|
359
|
-
MATCH (r) WHERE elementId(r) = $relatedId
|
|
360
|
-
CREATE (t)-[edge:\`${related.edge.type}\`]->(r)
|
|
361
|
-
SET edge += $edgeProps`, {
|
|
362
|
-
typedId: related.edge.direction === "outgoing" ? typedNodeId : relatedNodeId,
|
|
363
|
-
relatedId: related.edge.direction === "outgoing" ? relatedNodeId : typedNodeId,
|
|
364
|
-
edgeProps: {
|
|
365
|
-
...(related.edge.properties ?? {}),
|
|
366
|
-
createdByAgent: PROVENANCE_AGENT,
|
|
367
|
-
createdBySession: sessionId ?? "",
|
|
368
|
-
source: PROVENANCE_SOURCE,
|
|
369
|
-
sourceDocumentId: attachmentId,
|
|
370
|
-
createdAt: now,
|
|
371
|
-
},
|
|
372
|
-
});
|
|
373
|
-
}
|
|
281
|
+
createdAt: now,
|
|
282
|
+
});
|
|
283
|
+
const sectionId = sectionResult.records[0].get("nodeId");
|
|
284
|
+
bumpKind(kindBreakdown, section.kind);
|
|
285
|
+
bumpEdge(edgeBreakdown, "HAS_SECTION");
|
|
286
|
+
// 5c. NEXT chain in reading order.
|
|
287
|
+
if (previousSectionId) {
|
|
288
|
+
await dbSession.run(`MATCH (a:Section), (b:Section)
|
|
289
|
+
WHERE elementId(a) = $prev AND elementId(b) = $cur
|
|
290
|
+
CREATE (a)-[n:NEXT]->(b)
|
|
291
|
+
SET n.createdByAgent = $createdByAgent,
|
|
292
|
+
n.createdBySession = $createdBySession,
|
|
293
|
+
n.source = $source,
|
|
294
|
+
n.sourceDocumentId = $sourceDocumentId,
|
|
295
|
+
n.createdAt = $createdAt`, {
|
|
296
|
+
prev: previousSectionId,
|
|
297
|
+
cur: sectionId,
|
|
298
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
299
|
+
createdBySession: sessionId ?? "",
|
|
300
|
+
source: PROVENANCE_SOURCE,
|
|
301
|
+
sourceDocumentId: attachmentId,
|
|
302
|
+
createdAt: now,
|
|
303
|
+
});
|
|
304
|
+
bumpEdge(edgeBreakdown, "NEXT");
|
|
374
305
|
}
|
|
375
|
-
|
|
376
|
-
|
|
306
|
+
previousSectionId = sectionId;
|
|
307
|
+
// 5d. Anchor edge (identity kinds only). Writer applies the edge the
|
|
308
|
+
// classifier proposed; never invents.
|
|
309
|
+
if (section.anchorEdge && IDENTITY_KINDS_SET.has(section.kind)) {
|
|
377
310
|
const direction = section.anchorEdge.direction;
|
|
311
|
+
const edgeType = section.anchorEdge.type;
|
|
378
312
|
const edgeProps = {
|
|
379
313
|
...(section.anchorEdge.properties ?? {}),
|
|
380
314
|
createdByAgent: PROVENANCE_AGENT,
|
|
@@ -383,59 +317,163 @@ export async function memoryIngest(params) {
|
|
|
383
317
|
sourceDocumentId: attachmentId,
|
|
384
318
|
createdAt: now,
|
|
385
319
|
};
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
MATCH (
|
|
389
|
-
CREATE (a)-[edge:\`${
|
|
390
|
-
SET edge += $edgeProps
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
320
|
+
const cypher = direction === "from-anchor"
|
|
321
|
+
? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
322
|
+
MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
323
|
+
CREATE (a)-[edge:\`${edgeType}\`]->(s)
|
|
324
|
+
SET edge += $edgeProps`
|
|
325
|
+
: `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
326
|
+
MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
327
|
+
CREATE (s)-[edge:\`${edgeType}\`]->(a)
|
|
328
|
+
SET edge += $edgeProps`;
|
|
329
|
+
await dbSession.run(cypher, { anchorId: anchorNodeId, sectionId, accountId, edgeProps });
|
|
330
|
+
bumpEdge(edgeBreakdown, edgeType);
|
|
331
|
+
}
|
|
332
|
+
// 5e. Related entities (Organization for AT, DefinedTerm for DEFINES on
|
|
333
|
+
// :Section:Definitions, etc.). Apply each as the classifier emitted.
|
|
334
|
+
if (section.related && section.related.length > 0) {
|
|
335
|
+
for (const related of section.related) {
|
|
336
|
+
await writeRelatedAndEdge(dbSession, sectionId, related, accountId, now, sessionId ?? "", attachmentId);
|
|
337
|
+
relatedCount += 1;
|
|
338
|
+
bumpEdge(edgeBreakdown, related.edge.type);
|
|
397
339
|
}
|
|
398
340
|
}
|
|
399
|
-
// (KnowledgeDocument)-[:REFERENCES]->(typed) for retrieval.
|
|
400
|
-
await session.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
401
|
-
MATCH (t) WHERE elementId(t) = $typedId
|
|
402
|
-
CREATE (d)-[r:REFERENCES]->(t)
|
|
403
|
-
SET r.createdByAgent = $createdByAgent,
|
|
404
|
-
r.createdBySession = $createdBySession,
|
|
405
|
-
r.source = $source,
|
|
406
|
-
r.sourceDocumentId = $sourceDocumentId,
|
|
407
|
-
r.createdAt = $createdAt`, {
|
|
408
|
-
docId: documentNodeId,
|
|
409
|
-
typedId: typedNodeId,
|
|
410
|
-
createdByAgent: PROVENANCE_AGENT,
|
|
411
|
-
createdBySession: sessionId ?? "",
|
|
412
|
-
source: PROVENANCE_SOURCE,
|
|
413
|
-
sourceDocumentId: attachmentId,
|
|
414
|
-
createdAt: now,
|
|
415
|
-
});
|
|
416
|
-
entityLinks += 1;
|
|
417
|
-
typedCount += 1;
|
|
418
|
-
console.error(`[document-ingest] section kind=${section.kind} title="${section.title}" chars=${section.body.length}`);
|
|
419
341
|
}
|
|
420
|
-
|
|
421
|
-
|
|
342
|
+
// 6. Document-level edges (PARTY for contracts, etc.). Applied off the
|
|
343
|
+
// KnowledgeDocument; classifier proposes, writer applies.
|
|
344
|
+
if (documentEdges && documentEdges.length > 0) {
|
|
345
|
+
for (const docEdge of documentEdges) {
|
|
346
|
+
await writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId ?? "", attachmentId);
|
|
347
|
+
relatedCount += 1;
|
|
348
|
+
bumpEdge(edgeBreakdown, docEdge.type);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} kinds=${JSON.stringify(kindBreakdown)} edges=${JSON.stringify(edgeBreakdown)} related=${relatedCount} orphans=${orphanCandidates.length}`);
|
|
352
|
+
console.error(`[memory-ingest] sections=${sections.length} chain=${Math.max(0, sections.length - 1)} typed=${formatBreakdown(kindBreakdown)} edges=${formatBreakdown(edgeBreakdown)} orphans=${orphanCandidates.length} docId=${documentNodeId}`);
|
|
353
|
+
for (const orphan of orphanCandidates) {
|
|
354
|
+
console.error(`[document-ingest] orphan-candidate node=${orphan.kind} label=${JSON.stringify(orphan.label)} reason=${JSON.stringify(orphan.reason)}`);
|
|
355
|
+
}
|
|
422
356
|
return {
|
|
423
357
|
documentNodeId,
|
|
424
358
|
sectionCount: sections.length,
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
359
|
+
kindBreakdown,
|
|
360
|
+
edgeBreakdown,
|
|
361
|
+
relatedCount,
|
|
362
|
+
standaloneCount,
|
|
363
|
+
orphanCandidates,
|
|
429
364
|
documentSummary,
|
|
430
365
|
keywords,
|
|
431
366
|
};
|
|
432
367
|
}
|
|
433
368
|
finally {
|
|
434
369
|
extractCache.delete(attachmentId);
|
|
435
|
-
await
|
|
370
|
+
await dbSession.close();
|
|
436
371
|
log("complete");
|
|
437
372
|
}
|
|
438
373
|
}
|
|
374
|
+
// ---------------------------------------------------------------------------
|
|
375
|
+
// Helpers — standalone-node writer (Project), related-edge writer, document-
|
|
376
|
+
// edge writer.
|
|
377
|
+
// ---------------------------------------------------------------------------
|
|
378
|
+
async function writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId, now) {
|
|
379
|
+
const props = { ...section.properties, ...baseProps };
|
|
380
|
+
const r = await dbSession.run(`CREATE (n:\`${section.kind}\`)
|
|
381
|
+
SET n = $props
|
|
382
|
+
RETURN elementId(n) AS nodeId`, { props });
|
|
383
|
+
const nodeId = r.records[0].get("nodeId");
|
|
384
|
+
if (section.anchorEdge) {
|
|
385
|
+
const direction = section.anchorEdge.direction;
|
|
386
|
+
const edgeType = section.anchorEdge.type;
|
|
387
|
+
const edgeProps = {
|
|
388
|
+
...(section.anchorEdge.properties ?? {}),
|
|
389
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
390
|
+
createdBySession: sessionId,
|
|
391
|
+
source: PROVENANCE_SOURCE,
|
|
392
|
+
sourceDocumentId: attachmentId,
|
|
393
|
+
createdAt: now,
|
|
394
|
+
};
|
|
395
|
+
const cypher = direction === "from-anchor"
|
|
396
|
+
? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
397
|
+
MATCH (n) WHERE elementId(n) = $nodeId
|
|
398
|
+
CREATE (a)-[edge:\`${edgeType}\`]->(n)
|
|
399
|
+
SET edge += $edgeProps`
|
|
400
|
+
: `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
401
|
+
MATCH (n) WHERE elementId(n) = $nodeId
|
|
402
|
+
CREATE (n)-[edge:\`${edgeType}\`]->(a)
|
|
403
|
+
SET edge += $edgeProps`;
|
|
404
|
+
await dbSession.run(cypher, {
|
|
405
|
+
anchorId: anchorNodeId,
|
|
406
|
+
nodeId,
|
|
407
|
+
accountId: baseProps.accountId,
|
|
408
|
+
edgeProps,
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
return nodeId;
|
|
412
|
+
}
|
|
413
|
+
async function writeRelatedAndEdge(dbSession, fromNodeId, related, accountId, now, sessionId, attachmentId) {
|
|
414
|
+
const relatedNodeId = await writeRelatedNode({
|
|
415
|
+
session: dbSession,
|
|
416
|
+
accountId,
|
|
417
|
+
related,
|
|
418
|
+
now,
|
|
419
|
+
sessionId,
|
|
420
|
+
});
|
|
421
|
+
await dbSession.run(`MATCH (a) WHERE elementId(a) = $a
|
|
422
|
+
MATCH (b) WHERE elementId(b) = $b
|
|
423
|
+
CREATE (a)-[edge:\`${related.edge.type}\`]->(b)
|
|
424
|
+
SET edge += $edgeProps`, {
|
|
425
|
+
a: related.edge.direction === "outgoing" ? fromNodeId : relatedNodeId,
|
|
426
|
+
b: related.edge.direction === "outgoing" ? relatedNodeId : fromNodeId,
|
|
427
|
+
edgeProps: {
|
|
428
|
+
...(related.edge.properties ?? {}),
|
|
429
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
430
|
+
createdBySession: sessionId,
|
|
431
|
+
source: PROVENANCE_SOURCE,
|
|
432
|
+
sourceDocumentId: attachmentId,
|
|
433
|
+
createdAt: now,
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
async function writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId, attachmentId) {
|
|
438
|
+
// Build a synthetic ClassifiedRelated so we can reuse writeRelatedNode for
|
|
439
|
+
// the target. The MERGE-vs-CREATE decision is the same; the edge is off
|
|
440
|
+
// the KnowledgeDocument rather than off a Section.
|
|
441
|
+
const synthetic = {
|
|
442
|
+
kind: docEdge.targetKind,
|
|
443
|
+
properties: docEdge.targetProperties,
|
|
444
|
+
edge: { type: docEdge.type, direction: docEdge.direction },
|
|
445
|
+
merge: docEdge.merge !== false,
|
|
446
|
+
};
|
|
447
|
+
const targetNodeId = await writeRelatedNode({
|
|
448
|
+
session: dbSession,
|
|
449
|
+
accountId,
|
|
450
|
+
related: synthetic,
|
|
451
|
+
now,
|
|
452
|
+
sessionId,
|
|
453
|
+
});
|
|
454
|
+
await dbSession.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
455
|
+
MATCH (n) WHERE elementId(n) = $targetId
|
|
456
|
+
CREATE (d)-[edge:\`${docEdge.type}\`]->(n)
|
|
457
|
+
SET edge.createdByAgent = $createdByAgent,
|
|
458
|
+
edge.createdBySession = $createdBySession,
|
|
459
|
+
edge.source = $source,
|
|
460
|
+
edge.sourceDocumentId = $sourceDocumentId,
|
|
461
|
+
edge.createdAt = $createdAt`, {
|
|
462
|
+
docId: documentNodeId,
|
|
463
|
+
targetId: targetNodeId,
|
|
464
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
465
|
+
createdBySession: sessionId,
|
|
466
|
+
source: PROVENANCE_SOURCE,
|
|
467
|
+
sourceDocumentId: attachmentId,
|
|
468
|
+
createdAt: now,
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
function formatBreakdown(b) {
|
|
472
|
+
const entries = Object.entries(b);
|
|
473
|
+
if (entries.length === 0)
|
|
474
|
+
return "{}";
|
|
475
|
+
return entries.map(([k, v]) => `${k}:${v}`).join(",");
|
|
476
|
+
}
|
|
439
477
|
async function writeRelatedNode(opts) {
|
|
440
478
|
const { session, accountId, related, now, sessionId } = opts;
|
|
441
479
|
// Compute embedding from a representative property string so the related
|