@rubytech/create-maxy 1.0.705 → 1.0.707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/apt-resolve.test.js +179 -0
- package/dist/apt-resolve.js +73 -0
- package/dist/index.js +48 -46
- package/package.json +3 -3
- package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts +2 -0
- package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts.map +1 -0
- package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js +89 -0
- package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js.map +1 -0
- package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts +42 -0
- package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts.map +1 -0
- package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js +87 -0
- package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js.map +1 -0
- package/payload/platform/lib/graph-mcp/src/__tests__/schema-cypher-parser.test.ts +99 -0
- package/payload/platform/lib/graph-mcp/src/schema-cypher-parser.ts +84 -0
- package/payload/platform/neo4j/schema.cypher +23 -0
- package/payload/platform/plugins/admin/PLUGIN.md +1 -0
- package/payload/platform/plugins/admin/mcp/dist/index.js +30 -0
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +2 -2
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +47 -6
- package/payload/platform/plugins/docs/references/adherence.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +25 -16
- package/payload/platform/plugins/memory/mcp/dist/index.js +146 -38
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js +92 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +51 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +222 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +27 -14
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts +16 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js +38 -11
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts +136 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js +180 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +126 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +253 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts +11 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js +6 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +44 -22
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +94 -57
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +46 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +23 -14
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +410 -164
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts +7 -5
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js +2 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +33 -0
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +112 -0
- package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
- package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
- package/payload/platform/templates/specialists/agents/database-operator.md +21 -13
- package/payload/server/chunk-PE76FPYP.js +12040 -0
- package/payload/server/maxy-edge.js +1 -1
- package/payload/server/public/assets/{Checkbox-B2Lk8F4X.js → Checkbox-CjbS9JcG.js} +1 -1
- package/payload/server/public/assets/{admin-agtgi48Q.js → admin-Ce9DbUuu.js} +1 -1
- package/payload/server/public/assets/{data-B7nsyBTV.js → data-C-SxjLC9.js} +1 -1
- package/payload/server/public/assets/{file-DHWTu8LP.js → file-D4cbAAuo.js} +1 -1
- package/payload/server/public/assets/{graph-ChDwqqhJ.js → graph-BRD96pKD.js} +8 -8
- package/payload/server/public/assets/{house-CfjnRPO6.js → house-CYsVygEQ.js} +1 -1
- package/payload/server/public/assets/{jsx-runtime-81wg0w0Q.css → jsx-runtime-DPXE45W9.css} +1 -1
- package/payload/server/public/assets/{public-CE1kyVnz.js → public-BTOF98iO.js} +1 -1
- package/payload/server/public/assets/{share-2-CAd1beVT.js → share-2-B-sbkB36.js} +1 -1
- package/payload/server/public/assets/{useVoiceRecorder-LSAU68Eo.js → useVoiceRecorder-DLVFx3ms.js} +1 -1
- package/payload/server/public/assets/{x-B0xK3Aoq.js → x-BNidzSAn.js} +1 -1
- package/payload/server/public/data.html +6 -6
- package/payload/server/public/graph.html +7 -7
- package/payload/server/public/index.html +8 -8
- package/payload/server/public/public.html +5 -5
- package/payload/server/server.js +6 -10
- /package/payload/server/public/assets/{jsx-runtime-DhzH26q8.js → jsx-runtime-BUs3sHtV.js} +0 -0
|
@@ -1,90 +1,143 @@
|
|
|
1
1
|
import { getSession } from "../lib/neo4j.js";
|
|
2
|
-
import { embedBatch } from "../lib/embeddings.js";
|
|
2
|
+
import { embed, embedBatch } from "../lib/embeddings.js";
|
|
3
3
|
import { extractCache } from "./memory-ingest-extract.js";
|
|
4
4
|
import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
|
|
5
5
|
import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Typed-node document ingestion (Task 737).
|
|
8
|
+
//
|
|
9
|
+
// CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
|
|
10
|
+
// (by attachmentId) KnowledgeDocument delete prior batch typed nodes
|
|
11
|
+
// (idempotent) Section/Chunk/typed Ollama + anchor edges
|
|
12
|
+
// + REFERENCES
|
|
13
|
+
//
|
|
14
|
+
// Each classified section becomes either:
|
|
15
|
+
// - a typed graph node (Position, Service, Credential, etc.) anchored to
|
|
16
|
+
// UserProfile / LocalBusiness / Person / Organization via the natural
|
|
17
|
+
// ontology edge, plus a (KnowledgeDocument)-[:REFERENCES]->(typed) link
|
|
18
|
+
// for retrieval, plus optional related-entity nodes (e.g. Position's
|
|
19
|
+
// employer Organization, MERGEd by name).
|
|
20
|
+
// - a generic :Section node (UNMAPPED fallback) hanging off the document
|
|
21
|
+
// via the legacy (KnowledgeDocument)-[:HAS_SECTION] edge, with optional
|
|
22
|
+
// :Chunk overflow when the body exceeds MAX_CHUNK_SIZE.
|
|
23
|
+
//
|
|
24
|
+
// Provenance properties (createdByAgent, createdBySession, source,
|
|
25
|
+
// sourceDocumentId) stamp every node and edge the skill creates so the
|
|
26
|
+
// re-ingest cleanup can find and replace them deterministically.
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
const MAX_CHUNK_SIZE = 2000;
|
|
29
|
+
const PREVIEW_LENGTH = 150;
|
|
30
|
+
const UNMAPPED = "UNMAPPED";
|
|
31
|
+
const PROVENANCE_AGENT = "document-ingest";
|
|
32
|
+
const PROVENANCE_SOURCE = "document";
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// Helpers
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
function normaliseKeywords(arr) {
|
|
37
|
+
if (!arr || arr.length === 0)
|
|
38
|
+
return [];
|
|
39
|
+
return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
|
|
40
|
+
}
|
|
41
|
+
function chunkBody(body) {
|
|
42
|
+
if (body.length <= MAX_CHUNK_SIZE)
|
|
43
|
+
return [body];
|
|
44
|
+
const chunks = [];
|
|
45
|
+
for (let i = 0; i < body.length; i += MAX_CHUNK_SIZE) {
|
|
46
|
+
chunks.push(body.slice(i, i + MAX_CHUNK_SIZE));
|
|
47
|
+
}
|
|
48
|
+
return chunks;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Identifying property for MERGE on a related node by kind.
|
|
52
|
+
* Choose a stable, human-recognisable key per label so the same real-world
|
|
53
|
+
* entity collapses to one node across documents.
|
|
54
|
+
*/
|
|
55
|
+
function mergeKeyFor(kind, properties) {
|
|
56
|
+
switch (kind) {
|
|
57
|
+
case "Organization":
|
|
58
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
59
|
+
? { key: "name", value: properties.name.trim() }
|
|
60
|
+
: null;
|
|
61
|
+
case "Person":
|
|
62
|
+
if (typeof properties.email === "string" && properties.email.trim()) {
|
|
63
|
+
return { key: "email", value: properties.email.trim() };
|
|
64
|
+
}
|
|
65
|
+
if (typeof properties.telephone === "string" && properties.telephone.trim()) {
|
|
66
|
+
return { key: "telephone", value: properties.telephone.trim() };
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
case "DefinedTerm":
|
|
70
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
71
|
+
? { key: "name", value: properties.name.trim() }
|
|
72
|
+
: null;
|
|
73
|
+
case "Credential":
|
|
74
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
75
|
+
? { key: "name", value: properties.name.trim() }
|
|
76
|
+
: null;
|
|
77
|
+
default:
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Main entry point
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
6
84
|
export async function memoryIngest(params) {
|
|
7
|
-
const { accountId, attachmentId, documentSummary, sections, scope,
|
|
85
|
+
const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
|
|
8
86
|
if (!scope) {
|
|
9
87
|
throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
|
|
10
88
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
89
|
+
if (!anchorNodeId) {
|
|
90
|
+
throw new Error("anchorNodeId is required — the document subject's element ID (UserProfile/LocalBusiness/Person/Organization)");
|
|
91
|
+
}
|
|
92
|
+
if (!anchorLabel) {
|
|
93
|
+
throw new Error("anchorLabel is required — the anchor node's primary label");
|
|
94
|
+
}
|
|
95
|
+
const keywords = (() => {
|
|
96
|
+
const u = normaliseKeywords(rawUserKeywords);
|
|
97
|
+
const d = normaliseKeywords(rawDocKeywords);
|
|
98
|
+
if (u.length === 0 && d.length === 0)
|
|
99
|
+
return undefined;
|
|
100
|
+
return [...new Set([...u, ...d])];
|
|
101
|
+
})();
|
|
22
102
|
const t0 = Date.now();
|
|
23
103
|
const log = (stage, detail) => console.error(`[memory-ingest] [${attachmentId.slice(0, 8)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
|
|
24
|
-
log("start", `${sections.length} sections, scope=${scope}`);
|
|
25
|
-
// 1. Retrieve cached content from memory-ingest-extract
|
|
104
|
+
log("start", `${sections.length} sections, scope=${scope}, anchor=${anchorLabel}`);
|
|
26
105
|
const cached = extractCache.get(attachmentId);
|
|
27
106
|
if (!cached) {
|
|
28
107
|
throw new Error(`No cached extract found for attachment "${attachmentId}". ` +
|
|
29
|
-
`Call memory-ingest-extract first
|
|
108
|
+
`Call memory-ingest-extract first.`);
|
|
30
109
|
}
|
|
31
110
|
log("cache-hit", cached.filename);
|
|
32
|
-
// Validate section count matches
|
|
33
|
-
if (sections.length !== cached.sections.length) {
|
|
34
|
-
throw new Error(`Section count mismatch: model provided ${sections.length} sections ` +
|
|
35
|
-
`but the extracted document has ${cached.sections.length} sections. ` +
|
|
36
|
-
`Provide exactly one summary per section returned by memory-ingest-extract.`);
|
|
37
|
-
}
|
|
38
|
-
// Validate chunk summary counts per section
|
|
39
|
-
for (let i = 0; i < sections.length; i++) {
|
|
40
|
-
const expectedChunks = cached.sections[i].chunks.length;
|
|
41
|
-
const providedSummaries = sections[i].chunkSummaries.length;
|
|
42
|
-
if (providedSummaries !== expectedChunks) {
|
|
43
|
-
throw new Error(`Chunk count mismatch in section "${sections[i].title}": ` +
|
|
44
|
-
`model provided ${providedSummaries} chunk summaries ` +
|
|
45
|
-
`but the section has ${expectedChunks} chunks.`);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
111
|
const { filename, mimeType } = cached;
|
|
49
112
|
const now = new Date().toISOString();
|
|
50
|
-
//
|
|
51
|
-
const textsToEmbed = [];
|
|
52
|
-
// Document-level summary
|
|
53
|
-
textsToEmbed.push(documentSummary);
|
|
54
|
-
const docEmbedIdx = 0;
|
|
55
|
-
// Section-level summaries
|
|
56
|
-
const sectionEmbedIndices = [];
|
|
57
|
-
for (const section of sections) {
|
|
58
|
-
sectionEmbedIndices.push(textsToEmbed.length);
|
|
59
|
-
textsToEmbed.push(section.summary);
|
|
60
|
-
}
|
|
61
|
-
// Chunk-level summaries
|
|
62
|
-
const chunkEmbedIndices = [];
|
|
63
|
-
for (let si = 0; si < sections.length; si++) {
|
|
64
|
-
for (let ci = 0; ci < sections[si].chunkSummaries.length; ci++) {
|
|
65
|
-
chunkEmbedIndices.push({
|
|
66
|
-
sectionIdx: si,
|
|
67
|
-
chunkIdx: ci,
|
|
68
|
-
embedIdx: textsToEmbed.length,
|
|
69
|
-
});
|
|
70
|
-
textsToEmbed.push(sections[si].chunkSummaries[ci]);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
// 3. Batch embed all summaries in a single Ollama call.
|
|
113
|
+
// 1. Embed document summary + every section body in one batch.
|
|
114
|
+
const textsToEmbed = [documentSummary, ...sections.map((s) => s.body)];
|
|
74
115
|
log("embedding", `${textsToEmbed.length} texts`);
|
|
75
116
|
const embeddings = await embedBatch(textsToEmbed);
|
|
76
117
|
log("embedded", `${embeddings.length} vectors`);
|
|
77
|
-
|
|
78
|
-
log("neo4j-write", "starting");
|
|
118
|
+
const docEmbedding = embeddings[0];
|
|
79
119
|
const session = getSession();
|
|
80
120
|
let documentNodeId = "";
|
|
81
|
-
let
|
|
121
|
+
let typedCount = 0;
|
|
122
|
+
let unmappedCount = 0;
|
|
123
|
+
let chunkCount = 0;
|
|
82
124
|
let entityLinks = 0;
|
|
83
125
|
try {
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
126
|
+
// 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
|
|
127
|
+
// with this attachmentId so MERGE finds the existing node.
|
|
128
|
+
const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
|
|
129
|
+
WHERE d.accountId = $accountId
|
|
130
|
+
AND d._trashedKeys IS NOT NULL
|
|
131
|
+
AND d._trashedKeys CONTAINS $attachmentId
|
|
132
|
+
RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
|
|
133
|
+
if (trashedDoc.records.length > 0) {
|
|
134
|
+
const eid = trashedDoc.records[0].get("eid");
|
|
135
|
+
await restoreNode({ session, accountId, elementId: eid });
|
|
136
|
+
log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
|
|
137
|
+
}
|
|
138
|
+
// 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
|
|
139
|
+
// sourceType, keywords) are SET only when present so re-ingest of a
|
|
140
|
+
// file upload doesn't null out a previously-set web-source URL.
|
|
88
141
|
const optionalSets = [];
|
|
89
142
|
const optionalParams = {};
|
|
90
143
|
if (sourceUrl !== undefined) {
|
|
@@ -99,35 +152,19 @@ export async function memoryIngest(params) {
|
|
|
99
152
|
optionalSets.push("d.keywords = $keywords");
|
|
100
153
|
optionalParams.keywords = keywords;
|
|
101
154
|
}
|
|
102
|
-
const optionalSetClause = optionalSets.length > 0
|
|
103
|
-
? ", " + optionalSets.join(", ")
|
|
104
|
-
: "";
|
|
105
|
-
// Trash recovery: a prior memory-delete on this attachmentId nulls the
|
|
106
|
-
// live attachmentId (Task 576 unique-key handling). MERGE on
|
|
107
|
-
// attachmentId would then create a NEW node, orphaning the trashed copy
|
|
108
|
-
// — and 30 days later memory-empty-trash would purge the disk dir the
|
|
109
|
-
// new node depends on. Restore first so MERGE finds the existing node.
|
|
110
|
-
const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
|
|
111
|
-
WHERE d.accountId = $accountId
|
|
112
|
-
AND d._trashedKeys IS NOT NULL
|
|
113
|
-
AND d._trashedKeys CONTAINS $attachmentId
|
|
114
|
-
RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
|
|
115
|
-
if (trashedDoc.records.length > 0) {
|
|
116
|
-
const eid = trashedDoc.records[0].get("eid");
|
|
117
|
-
await restoreNode({ session, accountId, elementId: eid });
|
|
118
|
-
log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
|
|
119
|
-
}
|
|
155
|
+
const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
|
|
120
156
|
const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
|
|
121
157
|
SET d.accountId = $accountId,
|
|
122
158
|
d.name = $filename,
|
|
123
|
-
d.encodingFormat
|
|
124
|
-
d.summary
|
|
125
|
-
d.sectionCount
|
|
126
|
-
d.scope
|
|
127
|
-
d.embedding
|
|
128
|
-
d.createdAt
|
|
129
|
-
d.updatedAt
|
|
159
|
+
d.encodingFormat = $mimeType,
|
|
160
|
+
d.summary = $documentSummary,
|
|
161
|
+
d.sectionCount = $sectionCount,
|
|
162
|
+
d.scope = $scope,
|
|
163
|
+
d.embedding = $embedding,
|
|
164
|
+
d.createdAt = coalesce(d.createdAt, $createdAt),
|
|
165
|
+
d.updatedAt = $updatedAt
|
|
130
166
|
${optionalSetClause}
|
|
167
|
+
REMOVE d.deletedAt
|
|
131
168
|
RETURN elementId(d) AS nodeId`, {
|
|
132
169
|
attachmentId,
|
|
133
170
|
accountId,
|
|
@@ -136,114 +173,323 @@ export async function memoryIngest(params) {
|
|
|
136
173
|
documentSummary,
|
|
137
174
|
sectionCount: sections.length,
|
|
138
175
|
scope,
|
|
139
|
-
embedding:
|
|
176
|
+
embedding: docEmbedding,
|
|
140
177
|
createdAt: now,
|
|
141
178
|
updatedAt: now,
|
|
142
179
|
...optionalParams,
|
|
143
180
|
});
|
|
144
181
|
documentNodeId = docResult.records[0].get("nodeId");
|
|
145
|
-
//
|
|
146
|
-
//
|
|
147
|
-
//
|
|
182
|
+
// 4. Cleanup prior children (idempotent re-ingest). Removes generic
|
|
183
|
+
// :Section/Chunk fallbacks and typed nodes that originated from this
|
|
184
|
+
// document. MERGEd related nodes (Organizations, Persons) are spared.
|
|
148
185
|
const cleanup = await deleteDocumentChildren(attachmentId, session);
|
|
149
|
-
if (cleanup.sections > 0 ||
|
|
150
|
-
|
|
186
|
+
if (cleanup.sections > 0 ||
|
|
187
|
+
cleanup.chunks > 0 ||
|
|
188
|
+
cleanup.typed > 0 ||
|
|
189
|
+
cleanup.references > 0) {
|
|
190
|
+
log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
|
|
151
191
|
}
|
|
152
|
-
//
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
const sectionNodeIds = [];
|
|
158
|
-
for (let si = 0; si < sections.length; si++) {
|
|
159
|
-
const section = sections[si];
|
|
160
|
-
const sectionResult = await session.run(`CREATE (s:Section {
|
|
161
|
-
accountId: $accountId,
|
|
162
|
-
title: $title,
|
|
163
|
-
summary: $summary,
|
|
164
|
-
position: $position,
|
|
165
|
-
scope: $scope,
|
|
166
|
-
embedding: $embedding,
|
|
167
|
-
createdAt: $createdAt,
|
|
168
|
-
updatedAt: $updatedAt
|
|
169
|
-
})
|
|
170
|
-
RETURN elementId(s) AS nodeId`, {
|
|
192
|
+
// 5. Per-section writes.
|
|
193
|
+
for (let i = 0; i < sections.length; i++) {
|
|
194
|
+
const section = sections[i];
|
|
195
|
+
const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
|
|
196
|
+
const sectionParams = {
|
|
171
197
|
accountId,
|
|
172
198
|
title: section.title,
|
|
173
|
-
|
|
174
|
-
|
|
199
|
+
body: section.body,
|
|
200
|
+
bodyPreview: section.body.slice(0, PREVIEW_LENGTH),
|
|
201
|
+
position: i,
|
|
175
202
|
scope,
|
|
176
|
-
embedding:
|
|
203
|
+
embedding: bodyEmbedding,
|
|
177
204
|
createdAt: now,
|
|
178
205
|
updatedAt: now,
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
206
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
207
|
+
createdBySource: PROVENANCE_AGENT,
|
|
208
|
+
createdBySession: sessionId ?? "",
|
|
209
|
+
source: PROVENANCE_SOURCE,
|
|
210
|
+
sourceDocumentId: attachmentId,
|
|
211
|
+
};
|
|
212
|
+
if (section.kind === UNMAPPED) {
|
|
213
|
+
// 5a. UNMAPPED: generic :Section + (KnowledgeDocument)-[:HAS_SECTION]
|
|
214
|
+
const sectionResult = await session.run(`CREATE (s:Section {
|
|
215
|
+
accountId: $accountId,
|
|
216
|
+
title: $title,
|
|
217
|
+
body: $body,
|
|
218
|
+
summary: $bodyPreview,
|
|
219
|
+
position: $position,
|
|
220
|
+
scope: $scope,
|
|
221
|
+
embedding: $embedding,
|
|
222
|
+
createdAt: $createdAt,
|
|
223
|
+
updatedAt: $updatedAt,
|
|
224
|
+
createdByAgent: $createdByAgent,
|
|
225
|
+
createdBySource: $createdBySource,
|
|
226
|
+
createdBySession: $createdBySession,
|
|
227
|
+
source: $source,
|
|
228
|
+
sourceDocumentId: $sourceDocumentId
|
|
229
|
+
})
|
|
230
|
+
WITH s
|
|
231
|
+
MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
232
|
+
CREATE (d)-[:HAS_SECTION]->(s)
|
|
233
|
+
RETURN elementId(s) AS nodeId`, { ...sectionParams, docId: documentNodeId });
|
|
234
|
+
const sectionId = sectionResult.records[0].get("nodeId");
|
|
235
|
+
// Body overflow → :Chunk children
|
|
236
|
+
if (section.body.length > MAX_CHUNK_SIZE) {
|
|
237
|
+
const overflowChunks = chunkBody(section.body);
|
|
238
|
+
const overflowEmbeddings = await embedBatch(overflowChunks);
|
|
239
|
+
for (let ci = 0; ci < overflowChunks.length; ci++) {
|
|
240
|
+
await session.run(`MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
241
|
+
CREATE (s)-[:HAS_CHUNK]->(c:Chunk {
|
|
242
|
+
accountId: $accountId,
|
|
243
|
+
content: $content,
|
|
244
|
+
summary: $summary,
|
|
245
|
+
position: $position,
|
|
246
|
+
scope: $scope,
|
|
247
|
+
embedding: $embedding,
|
|
248
|
+
createdAt: $createdAt,
|
|
249
|
+
updatedAt: $updatedAt,
|
|
250
|
+
createdByAgent: $createdByAgent,
|
|
251
|
+
createdBySource: $createdBySource,
|
|
252
|
+
createdBySession: $createdBySession,
|
|
253
|
+
source: $source,
|
|
254
|
+
sourceDocumentId: $sourceDocumentId
|
|
255
|
+
})`, {
|
|
256
|
+
sectionId,
|
|
257
|
+
accountId,
|
|
258
|
+
content: overflowChunks[ci],
|
|
259
|
+
summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
|
|
260
|
+
position: ci,
|
|
261
|
+
scope,
|
|
262
|
+
embedding: overflowEmbeddings[ci],
|
|
263
|
+
createdAt: now,
|
|
264
|
+
updatedAt: now,
|
|
265
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
266
|
+
createdBySource: PROVENANCE_AGENT,
|
|
267
|
+
createdBySession: sessionId ?? "",
|
|
268
|
+
source: PROVENANCE_SOURCE,
|
|
269
|
+
sourceDocumentId: attachmentId,
|
|
270
|
+
});
|
|
271
|
+
chunkCount += 1;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
unmappedCount += 1;
|
|
275
|
+
console.error(`[document-ingest] unmapped-section title="${section.title}" chars=${section.body.length}`);
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
// 5b. Typed kind — write the typed node, anchor edge, related nodes,
|
|
279
|
+
// and the document REFERENCES link.
|
|
280
|
+
// Build the typed-node properties: classifier-supplied properties
|
|
281
|
+
// overlaid on top of the system fields, with system fields winning.
|
|
282
|
+
const typedProps = {
|
|
283
|
+
...section.properties,
|
|
202
284
|
accountId,
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
position: chunkIdx,
|
|
285
|
+
title: section.title,
|
|
286
|
+
body: section.body,
|
|
206
287
|
scope,
|
|
207
|
-
embedding:
|
|
288
|
+
embedding: bodyEmbedding,
|
|
208
289
|
createdAt: now,
|
|
209
290
|
updatedAt: now,
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
291
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
292
|
+
createdBySource: PROVENANCE_AGENT,
|
|
293
|
+
createdBySession: sessionId ?? "",
|
|
294
|
+
source: PROVENANCE_SOURCE,
|
|
295
|
+
sourceDocumentId: attachmentId,
|
|
296
|
+
};
|
|
297
|
+
// CREATE the typed node. Label is interpolated (validated against the
|
|
298
|
+
// ontology label set by the classifier) — Cypher does not allow
|
|
299
|
+
// parameterising labels.
|
|
300
|
+
const typedResult = await session.run(`CREATE (t:\`${section.kind}\`)
|
|
301
|
+
SET t = $props
|
|
302
|
+
RETURN elementId(t) AS nodeId`, { props: typedProps });
|
|
303
|
+
const typedNodeId = typedResult.records[0].get("nodeId");
|
|
304
|
+
// Body overflow → :Chunk children attached directly to the typed node
|
|
305
|
+
// (mirroring :Section overflow). Most typed nodes won't trip this.
|
|
306
|
+
if (section.body.length > MAX_CHUNK_SIZE) {
|
|
307
|
+
const overflowChunks = chunkBody(section.body);
|
|
308
|
+
const overflowEmbeddings = await embedBatch(overflowChunks);
|
|
309
|
+
for (let ci = 0; ci < overflowChunks.length; ci++) {
|
|
310
|
+
await session.run(`MATCH (t) WHERE elementId(t) = $typedId
|
|
311
|
+
CREATE (t)-[:HAS_CHUNK]->(c:Chunk {
|
|
312
|
+
accountId: $accountId,
|
|
313
|
+
content: $content,
|
|
314
|
+
summary: $summary,
|
|
315
|
+
position: $position,
|
|
316
|
+
scope: $scope,
|
|
317
|
+
embedding: $embedding,
|
|
318
|
+
createdAt: $createdAt,
|
|
319
|
+
updatedAt: $updatedAt,
|
|
320
|
+
createdByAgent: $createdByAgent,
|
|
321
|
+
createdBySource: $createdBySource,
|
|
322
|
+
createdBySession: $createdBySession,
|
|
323
|
+
source: $source,
|
|
324
|
+
sourceDocumentId: $sourceDocumentId
|
|
325
|
+
})`, {
|
|
326
|
+
typedId: typedNodeId,
|
|
327
|
+
accountId,
|
|
328
|
+
content: overflowChunks[ci],
|
|
329
|
+
summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
|
|
330
|
+
position: ci,
|
|
331
|
+
scope,
|
|
332
|
+
embedding: overflowEmbeddings[ci],
|
|
333
|
+
createdAt: now,
|
|
334
|
+
updatedAt: now,
|
|
335
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
336
|
+
createdBySource: PROVENANCE_AGENT,
|
|
337
|
+
createdBySession: sessionId ?? "",
|
|
338
|
+
source: PROVENANCE_SOURCE,
|
|
339
|
+
sourceDocumentId: attachmentId,
|
|
340
|
+
});
|
|
341
|
+
chunkCount += 1;
|
|
226
342
|
}
|
|
227
|
-
|
|
228
|
-
|
|
343
|
+
}
|
|
344
|
+
// Related nodes (e.g. Position's employer Organization). MERGE when
|
|
345
|
+
// the related kind has a stable identifying property; CREATE when not.
|
|
346
|
+
// Provenance is stamped only on creation (ON CREATE SET) so shared
|
|
347
|
+
// entities don't get rewritten by subsequent ingestions.
|
|
348
|
+
if (section.related && section.related.length > 0) {
|
|
349
|
+
for (const related of section.related) {
|
|
350
|
+
const relatedNodeId = await writeRelatedNode({
|
|
351
|
+
session,
|
|
352
|
+
accountId,
|
|
353
|
+
related,
|
|
354
|
+
now,
|
|
355
|
+
sessionId,
|
|
356
|
+
});
|
|
357
|
+
// Edge from typed node to related node
|
|
358
|
+
await session.run(`MATCH (t) WHERE elementId(t) = $typedId
|
|
359
|
+
MATCH (r) WHERE elementId(r) = $relatedId
|
|
360
|
+
CREATE (t)-[edge:\`${related.edge.type}\`]->(r)
|
|
361
|
+
SET edge += $edgeProps`, {
|
|
362
|
+
typedId: related.edge.direction === "outgoing" ? typedNodeId : relatedNodeId,
|
|
363
|
+
relatedId: related.edge.direction === "outgoing" ? relatedNodeId : typedNodeId,
|
|
364
|
+
edgeProps: {
|
|
365
|
+
...(related.edge.properties ?? {}),
|
|
366
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
367
|
+
createdBySession: sessionId ?? "",
|
|
368
|
+
source: PROVENANCE_SOURCE,
|
|
369
|
+
sourceDocumentId: attachmentId,
|
|
370
|
+
createdAt: now,
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
// Anchor edge (anchor → typed or typed → anchor)
|
|
376
|
+
if (section.anchorEdge) {
|
|
377
|
+
const direction = section.anchorEdge.direction;
|
|
378
|
+
const edgeProps = {
|
|
379
|
+
...(section.anchorEdge.properties ?? {}),
|
|
380
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
381
|
+
createdBySession: sessionId ?? "",
|
|
382
|
+
source: PROVENANCE_SOURCE,
|
|
383
|
+
sourceDocumentId: attachmentId,
|
|
384
|
+
createdAt: now,
|
|
385
|
+
};
|
|
386
|
+
if (direction === "from-anchor") {
|
|
387
|
+
await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
388
|
+
MATCH (t) WHERE elementId(t) = $typedId
|
|
389
|
+
CREATE (a)-[edge:\`${section.anchorEdge.type}\`]->(t)
|
|
390
|
+
SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
|
|
391
|
+
}
|
|
392
|
+
else {
|
|
393
|
+
await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
394
|
+
MATCH (t) WHERE elementId(t) = $typedId
|
|
395
|
+
CREATE (t)-[edge:\`${section.anchorEdge.type}\`]->(a)
|
|
396
|
+
SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
|
|
229
397
|
}
|
|
230
398
|
}
|
|
399
|
+
// (KnowledgeDocument)-[:REFERENCES]->(typed) for retrieval.
|
|
400
|
+
await session.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
401
|
+
MATCH (t) WHERE elementId(t) = $typedId
|
|
402
|
+
CREATE (d)-[r:REFERENCES]->(t)
|
|
403
|
+
SET r.createdByAgent = $createdByAgent,
|
|
404
|
+
r.createdBySession = $createdBySession,
|
|
405
|
+
r.source = $source,
|
|
406
|
+
r.sourceDocumentId = $sourceDocumentId,
|
|
407
|
+
r.createdAt = $createdAt`, {
|
|
408
|
+
docId: documentNodeId,
|
|
409
|
+
typedId: typedNodeId,
|
|
410
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
411
|
+
createdBySession: sessionId ?? "",
|
|
412
|
+
source: PROVENANCE_SOURCE,
|
|
413
|
+
sourceDocumentId: attachmentId,
|
|
414
|
+
createdAt: now,
|
|
415
|
+
});
|
|
416
|
+
entityLinks += 1;
|
|
417
|
+
typedCount += 1;
|
|
418
|
+
console.error(`[document-ingest] section kind=${section.kind} title="${section.title}" chars=${section.body.length}`);
|
|
231
419
|
}
|
|
232
|
-
log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} chunks=${
|
|
420
|
+
log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} chunks=${chunkCount} entityLinks=${entityLinks}`);
|
|
421
|
+
console.error(`[document-ingest] done sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} ms=${Date.now() - t0}`);
|
|
233
422
|
return {
|
|
234
423
|
documentNodeId,
|
|
235
424
|
sectionCount: sections.length,
|
|
236
|
-
|
|
425
|
+
typedCount,
|
|
426
|
+
unmappedCount,
|
|
427
|
+
chunkCount,
|
|
237
428
|
entityLinks,
|
|
238
429
|
documentSummary,
|
|
239
430
|
keywords,
|
|
240
431
|
};
|
|
241
432
|
}
|
|
242
433
|
finally {
|
|
243
|
-
// Evict cache entry — content is now persisted in Neo4j
|
|
244
434
|
extractCache.delete(attachmentId);
|
|
245
435
|
await session.close();
|
|
246
436
|
log("complete");
|
|
247
437
|
}
|
|
248
438
|
}
|
|
439
|
+
async function writeRelatedNode(opts) {
|
|
440
|
+
const { session, accountId, related, now, sessionId } = opts;
|
|
441
|
+
// Compute embedding from a representative property string so the related
|
|
442
|
+
// node is searchable. Falls back to label+JSON if no `name` field exists.
|
|
443
|
+
const embedSource = typeof related.properties.name === "string"
|
|
444
|
+
? related.properties.name
|
|
445
|
+
: `${related.kind} ${JSON.stringify(related.properties)}`;
|
|
446
|
+
const relatedEmbedding = await embed(embedSource);
|
|
447
|
+
const mergeKey = related.merge !== false ? mergeKeyFor(related.kind, related.properties) : null;
|
|
448
|
+
if (mergeKey) {
|
|
449
|
+
// MERGE on the identifying property + accountId. ON CREATE stamps
|
|
450
|
+
// provenance; ON MATCH leaves provenance intact (so re-ingest of the
|
|
451
|
+
// same Organization across docs doesn't overwrite the original
|
|
452
|
+
// provenance — first-write wins for shared entities).
|
|
453
|
+
const result = await session.run(`MERGE (r:\`${related.kind}\` { accountId: $accountId, \`${mergeKey.key}\`: $mergeValue })
|
|
454
|
+
ON CREATE SET r += $createProps,
|
|
455
|
+
r.embedding = $embedding,
|
|
456
|
+
r.createdAt = $createdAt,
|
|
457
|
+
r.updatedAt = $createdAt,
|
|
458
|
+
r.createdByAgent = $createdByAgent,
|
|
459
|
+
r.createdBySource = $createdByAgent,
|
|
460
|
+
r.createdBySession = $createdBySession,
|
|
461
|
+
r.source = $source
|
|
462
|
+
ON MATCH SET r.updatedAt = $createdAt
|
|
463
|
+
RETURN elementId(r) AS nodeId`, {
|
|
464
|
+
accountId,
|
|
465
|
+
mergeValue: mergeKey.value,
|
|
466
|
+
createProps: { ...related.properties, accountId, scope: "shared" },
|
|
467
|
+
embedding: relatedEmbedding,
|
|
468
|
+
createdAt: now,
|
|
469
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
470
|
+
createdBySession: sessionId ?? "",
|
|
471
|
+
source: PROVENANCE_SOURCE,
|
|
472
|
+
});
|
|
473
|
+
return result.records[0].get("nodeId");
|
|
474
|
+
}
|
|
475
|
+
// CREATE — no stable identifying property, treat as one-of-a-kind for
|
|
476
|
+
// this document. Track sourceDocumentId so re-ingest cleanup catches it.
|
|
477
|
+
const result = await session.run(`CREATE (r:\`${related.kind}\`)
|
|
478
|
+
SET r = $props
|
|
479
|
+
RETURN elementId(r) AS nodeId`, {
|
|
480
|
+
props: {
|
|
481
|
+
...related.properties,
|
|
482
|
+
accountId,
|
|
483
|
+
scope: "shared",
|
|
484
|
+
embedding: relatedEmbedding,
|
|
485
|
+
createdAt: now,
|
|
486
|
+
updatedAt: now,
|
|
487
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
488
|
+
createdBySource: PROVENANCE_AGENT,
|
|
489
|
+
createdBySession: sessionId ?? "",
|
|
490
|
+
source: PROVENANCE_SOURCE,
|
|
491
|
+
},
|
|
492
|
+
});
|
|
493
|
+
return result.records[0].get("nodeId");
|
|
494
|
+
}
|
|
249
495
|
//# sourceMappingURL=memory-ingest.js.map
|