@rubytech/create-realagent 1.0.706 → 1.0.709
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +101 -0
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -0
- package/payload/platform/lib/oauth-llm/dist/index.js +353 -0
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -0
- package/payload/platform/lib/oauth-llm/src/index.ts +526 -0
- package/payload/platform/lib/oauth-llm/tsconfig.json +8 -0
- package/payload/platform/neo4j/schema.cypher +60 -11
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/mcp/dist/index.js +9 -9
- package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +1 -1
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +6 -11
- package/payload/platform/plugins/docs/references/adherence.md +1 -1
- package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts +3 -3
- package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/lib/screening.js +12 -12
- package/payload/platform/plugins/email/mcp/dist/lib/screening.js.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js +14 -28
- package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js.map +1 -1
- package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js +9 -19
- package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js.map +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +22 -15
- package/payload/platform/plugins/memory/mcp/dist/index.js +130 -44
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +32 -15
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js +4 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +200 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +343 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +12 -46
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +58 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +41 -16
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +457 -173
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +82 -1
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +145 -0
- package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
- package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
- package/payload/platform/templates/specialists/agents/database-operator.md +39 -13
- package/payload/server/chunk-Y57ACANQ.js +12292 -0
- package/payload/server/maxy-edge.js +1 -1
- package/payload/server/public/assets/{graph-D-Rqh0Md.js → graph-BRD96pKD.js} +8 -8
- package/payload/server/public/graph.html +1 -1
- package/payload/server/server.js +30 -53
|
@@ -1,90 +1,158 @@
|
|
|
1
1
|
import { getSession } from "../lib/neo4j.js";
|
|
2
|
-
import { embedBatch } from "../lib/embeddings.js";
|
|
2
|
+
import { embed, embedBatch } from "../lib/embeddings.js";
|
|
3
3
|
import { extractCache } from "./memory-ingest-extract.js";
|
|
4
4
|
import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
|
|
5
5
|
import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
|
|
6
|
+
import { IDENTITY_SECTION_KINDS, STRUCTURAL_SECTION_KINDS, CONTRACT_SECTION_KINDS, STANDALONE_NODE_KINDS, SECTION_KIND_OTHER, } from "../lib/llm-classifier.js";
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Single-Section document ingestion (Task 740, replacing Task 737's typed-vs-
|
|
9
|
+
// UNMAPPED fork).
|
|
10
|
+
//
|
|
11
|
+
// CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
|
|
12
|
+
// (by attachmentId) KnowledgeDocument delete prior batch Section nodes
|
|
13
|
+
// (idempotent) Section/Chunk/typed Ollama + secondary labels
|
|
14
|
+
// + NEXT chain
|
|
15
|
+
// + anchor edges
|
|
16
|
+
// + related entities
|
|
17
|
+
// + KD-level edges
|
|
18
|
+
//
|
|
19
|
+
// Every classified section produces ONE `:Section` node. When the classifier
|
|
20
|
+
// recognises the kind (Position/Education/Chapter/Parties/etc.), the same
|
|
21
|
+
// node carries a secondary label (`:Section:Position`) and any structured
|
|
22
|
+
// properties; identity-kind anchor edges go to the multi-labeled node
|
|
23
|
+
// directly, killing the parallel Section-vs-typed-node concept.
|
|
24
|
+
//
|
|
25
|
+
// Special-case writers fire for two contract-clause kinds:
|
|
26
|
+
// * Parties: `(:KnowledgeDocument)-[:PARTY]->(:Person|:Organization)` from documentEdges.
|
|
27
|
+
// * Definitions: `(:Section:Definitions)-[:DEFINES]->(:DefinedTerm)` from related entries.
|
|
28
|
+
//
|
|
29
|
+
// `:Chunk` is gone. Sections carry their body directly. If a body legitimately
|
|
30
|
+
// exceeds Neo4j's property limit, that is a classifier-split-the-section
|
|
31
|
+
// concern, not a writer problem.
|
|
32
|
+
//
|
|
33
|
+
// Provenance properties (createdByAgent, createdBySession, source,
|
|
34
|
+
// sourceDocumentId) stamp every node and edge so re-ingest cleanup finds
|
|
35
|
+
// them deterministically.
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
const PREVIEW_LENGTH = 150;
|
|
38
|
+
const PROVENANCE_AGENT = "document-ingest";
|
|
39
|
+
const PROVENANCE_SOURCE = "document";
|
|
40
|
+
/** Identity-kind anchor edge writer set (UserProfile → Section:Kind). */
|
|
41
|
+
const IDENTITY_KINDS_SET = new Set(IDENTITY_SECTION_KINDS);
|
|
42
|
+
/** Standalone (non-Section) node kinds (currently just Project). */
|
|
43
|
+
const STANDALONE_KINDS_SET = new Set(STANDALONE_NODE_KINDS);
|
|
44
|
+
/** All section-shaped kinds — anything that becomes a `:Section` node. */
|
|
45
|
+
const SECTION_LABEL_KINDS = new Set([
|
|
46
|
+
...IDENTITY_SECTION_KINDS,
|
|
47
|
+
...STRUCTURAL_SECTION_KINDS,
|
|
48
|
+
...CONTRACT_SECTION_KINDS,
|
|
49
|
+
SECTION_KIND_OTHER,
|
|
50
|
+
]);
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Helpers
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
function normaliseKeywords(arr) {
|
|
55
|
+
if (!arr || arr.length === 0)
|
|
56
|
+
return [];
|
|
57
|
+
return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Identifying property for MERGE on a related node by kind.
|
|
61
|
+
* Choose a stable, human-recognisable key per label so the same real-world
|
|
62
|
+
* entity collapses to one node across documents.
|
|
63
|
+
*/
|
|
64
|
+
function mergeKeyFor(kind, properties) {
|
|
65
|
+
switch (kind) {
|
|
66
|
+
case "Organization":
|
|
67
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
68
|
+
? { key: "name", value: properties.name.trim() }
|
|
69
|
+
: null;
|
|
70
|
+
case "Person":
|
|
71
|
+
if (typeof properties.email === "string" && properties.email.trim()) {
|
|
72
|
+
return { key: "email", value: properties.email.trim() };
|
|
73
|
+
}
|
|
74
|
+
if (typeof properties.telephone === "string" && properties.telephone.trim()) {
|
|
75
|
+
return { key: "telephone", value: properties.telephone.trim() };
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
case "DefinedTerm":
|
|
79
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
80
|
+
? { key: "name", value: properties.name.trim() }
|
|
81
|
+
: null;
|
|
82
|
+
case "Credential":
|
|
83
|
+
return typeof properties.name === "string" && properties.name.trim()
|
|
84
|
+
? { key: "name", value: properties.name.trim() }
|
|
85
|
+
: null;
|
|
86
|
+
default:
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function bumpKind(breakdown, kind) {
|
|
91
|
+
breakdown[kind] = (breakdown[kind] ?? 0) + 1;
|
|
92
|
+
}
|
|
93
|
+
function bumpEdge(breakdown, edgeType) {
|
|
94
|
+
breakdown[edgeType] = (breakdown[edgeType] ?? 0) + 1;
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Main entry point
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
6
99
|
export async function memoryIngest(params) {
|
|
7
|
-
const { accountId, attachmentId, documentSummary, sections,
|
|
100
|
+
const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, documentEdges = [], orphanCandidates = [], scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
|
|
8
101
|
if (!scope) {
|
|
9
102
|
throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
|
|
10
103
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
104
|
+
if (!anchorNodeId) {
|
|
105
|
+
throw new Error("anchorNodeId is required — the document subject's element ID (UserProfile/LocalBusiness/Person/Organization)");
|
|
106
|
+
}
|
|
107
|
+
if (!anchorLabel) {
|
|
108
|
+
throw new Error("anchorLabel is required — the anchor node's primary label");
|
|
109
|
+
}
|
|
110
|
+
const keywords = (() => {
|
|
111
|
+
const u = normaliseKeywords(rawUserKeywords);
|
|
112
|
+
const d = normaliseKeywords(rawDocKeywords);
|
|
113
|
+
if (u.length === 0 && d.length === 0)
|
|
114
|
+
return undefined;
|
|
115
|
+
return [...new Set([...u, ...d])];
|
|
116
|
+
})();
|
|
22
117
|
const t0 = Date.now();
|
|
23
118
|
const log = (stage, detail) => console.error(`[memory-ingest] [${attachmentId.slice(0, 8)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
|
|
24
|
-
log("start", `${sections.length} sections, scope=${scope}`);
|
|
25
|
-
// 1. Retrieve cached content from memory-ingest-extract
|
|
119
|
+
log("start", `${sections.length} sections, scope=${scope}, anchor=${anchorLabel}`);
|
|
26
120
|
const cached = extractCache.get(attachmentId);
|
|
27
121
|
if (!cached) {
|
|
28
122
|
throw new Error(`No cached extract found for attachment "${attachmentId}". ` +
|
|
29
|
-
`Call memory-ingest-extract first
|
|
123
|
+
`Call memory-ingest-extract first.`);
|
|
30
124
|
}
|
|
31
125
|
log("cache-hit", cached.filename);
|
|
32
|
-
// Validate section count matches
|
|
33
|
-
if (sections.length !== cached.sections.length) {
|
|
34
|
-
throw new Error(`Section count mismatch: model provided ${sections.length} sections ` +
|
|
35
|
-
`but the extracted document has ${cached.sections.length} sections. ` +
|
|
36
|
-
`Provide exactly one summary per section returned by memory-ingest-extract.`);
|
|
37
|
-
}
|
|
38
|
-
// Validate chunk summary counts per section
|
|
39
|
-
for (let i = 0; i < sections.length; i++) {
|
|
40
|
-
const expectedChunks = cached.sections[i].chunks.length;
|
|
41
|
-
const providedSummaries = sections[i].chunkSummaries.length;
|
|
42
|
-
if (providedSummaries !== expectedChunks) {
|
|
43
|
-
throw new Error(`Chunk count mismatch in section "${sections[i].title}": ` +
|
|
44
|
-
`model provided ${providedSummaries} chunk summaries ` +
|
|
45
|
-
`but the section has ${expectedChunks} chunks.`);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
126
|
const { filename, mimeType } = cached;
|
|
49
127
|
const now = new Date().toISOString();
|
|
50
|
-
//
|
|
51
|
-
const textsToEmbed = [];
|
|
52
|
-
// Document-level summary
|
|
53
|
-
textsToEmbed.push(documentSummary);
|
|
54
|
-
const docEmbedIdx = 0;
|
|
55
|
-
// Section-level summaries
|
|
56
|
-
const sectionEmbedIndices = [];
|
|
57
|
-
for (const section of sections) {
|
|
58
|
-
sectionEmbedIndices.push(textsToEmbed.length);
|
|
59
|
-
textsToEmbed.push(section.summary);
|
|
60
|
-
}
|
|
61
|
-
// Chunk-level summaries
|
|
62
|
-
const chunkEmbedIndices = [];
|
|
63
|
-
for (let si = 0; si < sections.length; si++) {
|
|
64
|
-
for (let ci = 0; ci < sections[si].chunkSummaries.length; ci++) {
|
|
65
|
-
chunkEmbedIndices.push({
|
|
66
|
-
sectionIdx: si,
|
|
67
|
-
chunkIdx: ci,
|
|
68
|
-
embedIdx: textsToEmbed.length,
|
|
69
|
-
});
|
|
70
|
-
textsToEmbed.push(sections[si].chunkSummaries[ci]);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
// 3. Batch embed all summaries in a single Ollama call.
|
|
128
|
+
// 1. Embed document summary + every section body in one batch.
|
|
129
|
+
const textsToEmbed = [documentSummary, ...sections.map((s) => s.body)];
|
|
74
130
|
log("embedding", `${textsToEmbed.length} texts`);
|
|
75
131
|
const embeddings = await embedBatch(textsToEmbed);
|
|
76
132
|
log("embedded", `${embeddings.length} vectors`);
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
const session = getSession();
|
|
133
|
+
const docEmbedding = embeddings[0];
|
|
134
|
+
const dbSession = getSession();
|
|
80
135
|
let documentNodeId = "";
|
|
81
|
-
|
|
82
|
-
|
|
136
|
+
const kindBreakdown = {};
|
|
137
|
+
const edgeBreakdown = {};
|
|
138
|
+
let relatedCount = 0;
|
|
139
|
+
let standaloneCount = 0;
|
|
83
140
|
try {
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
141
|
+
// 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
|
|
142
|
+
// with this attachmentId so MERGE finds the existing node.
|
|
143
|
+
const trashedDoc = await dbSession.run(`MATCH (d:KnowledgeDocument:Trashed)
|
|
144
|
+
WHERE d.accountId = $accountId
|
|
145
|
+
AND d._trashedKeys IS NOT NULL
|
|
146
|
+
AND d._trashedKeys CONTAINS $attachmentId
|
|
147
|
+
RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
|
|
148
|
+
if (trashedDoc.records.length > 0) {
|
|
149
|
+
const eid = trashedDoc.records[0].get("eid");
|
|
150
|
+
await restoreNode({ session: dbSession, accountId, elementId: eid });
|
|
151
|
+
log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
|
|
152
|
+
}
|
|
153
|
+
// 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
|
|
154
|
+
// sourceType, keywords) are SET only when present so re-ingest of a
|
|
155
|
+
// file upload doesn't null out a previously-set web-source URL.
|
|
88
156
|
const optionalSets = [];
|
|
89
157
|
const optionalParams = {};
|
|
90
158
|
if (sourceUrl !== undefined) {
|
|
@@ -99,35 +167,19 @@ export async function memoryIngest(params) {
|
|
|
99
167
|
optionalSets.push("d.keywords = $keywords");
|
|
100
168
|
optionalParams.keywords = keywords;
|
|
101
169
|
}
|
|
102
|
-
const optionalSetClause = optionalSets.length > 0
|
|
103
|
-
|
|
104
|
-
: "";
|
|
105
|
-
// Trash recovery: a prior memory-delete on this attachmentId nulls the
|
|
106
|
-
// live attachmentId (Task 576 unique-key handling). MERGE on
|
|
107
|
-
// attachmentId would then create a NEW node, orphaning the trashed copy
|
|
108
|
-
// — and 30 days later memory-empty-trash would purge the disk dir the
|
|
109
|
-
// new node depends on. Restore first so MERGE finds the existing node.
|
|
110
|
-
const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
|
|
111
|
-
WHERE d.accountId = $accountId
|
|
112
|
-
AND d._trashedKeys IS NOT NULL
|
|
113
|
-
AND d._trashedKeys CONTAINS $attachmentId
|
|
114
|
-
RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
|
|
115
|
-
if (trashedDoc.records.length > 0) {
|
|
116
|
-
const eid = trashedDoc.records[0].get("eid");
|
|
117
|
-
await restoreNode({ session, accountId, elementId: eid });
|
|
118
|
-
log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
|
|
119
|
-
}
|
|
120
|
-
const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
|
|
170
|
+
const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
|
|
171
|
+
const docResult = await dbSession.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
|
|
121
172
|
SET d.accountId = $accountId,
|
|
122
173
|
d.name = $filename,
|
|
123
|
-
d.encodingFormat
|
|
124
|
-
d.summary
|
|
125
|
-
d.sectionCount
|
|
126
|
-
d.scope
|
|
127
|
-
d.embedding
|
|
128
|
-
d.createdAt
|
|
129
|
-
d.updatedAt
|
|
174
|
+
d.encodingFormat = $mimeType,
|
|
175
|
+
d.summary = $documentSummary,
|
|
176
|
+
d.sectionCount = $sectionCount,
|
|
177
|
+
d.scope = $scope,
|
|
178
|
+
d.embedding = $embedding,
|
|
179
|
+
d.createdAt = coalesce(d.createdAt, $createdAt),
|
|
180
|
+
d.updatedAt = $updatedAt
|
|
130
181
|
${optionalSetClause}
|
|
182
|
+
REMOVE d.deletedAt
|
|
131
183
|
RETURN elementId(d) AS nodeId`, {
|
|
132
184
|
attachmentId,
|
|
133
185
|
accountId,
|
|
@@ -136,114 +188,346 @@ export async function memoryIngest(params) {
|
|
|
136
188
|
documentSummary,
|
|
137
189
|
sectionCount: sections.length,
|
|
138
190
|
scope,
|
|
139
|
-
embedding:
|
|
191
|
+
embedding: docEmbedding,
|
|
140
192
|
createdAt: now,
|
|
141
193
|
updatedAt: now,
|
|
142
194
|
...optionalParams,
|
|
143
195
|
});
|
|
144
196
|
documentNodeId = docResult.records[0].get("nodeId");
|
|
145
|
-
|
|
146
|
-
//
|
|
147
|
-
//
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
197
|
+
console.error(`[memory-ingest] KnowledgeDocument.name=${JSON.stringify(filename)} attachmentId=${attachmentId.slice(0, 8)} docId=${documentNodeId.slice(0, 12)}`);
|
|
198
|
+
// 4. Cleanup prior children (idempotent re-ingest). Removes Section nodes
|
|
199
|
+
// (any secondary label) and any standalone nodes stamped with this
|
|
200
|
+
// attachmentId. MERGEd related entities (Organizations, Persons) are spared.
|
|
201
|
+
const cleanup = await deleteDocumentChildren(attachmentId, dbSession);
|
|
202
|
+
if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.typed > 0 || cleanup.references > 0) {
|
|
203
|
+
log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
|
|
151
204
|
}
|
|
152
|
-
//
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
const section = sections[si];
|
|
160
|
-
const sectionResult = await session.run(`CREATE (s:Section {
|
|
161
|
-
accountId: $accountId,
|
|
162
|
-
title: $title,
|
|
163
|
-
summary: $summary,
|
|
164
|
-
position: $position,
|
|
165
|
-
scope: $scope,
|
|
166
|
-
embedding: $embedding,
|
|
167
|
-
createdAt: $createdAt,
|
|
168
|
-
updatedAt: $updatedAt
|
|
169
|
-
})
|
|
170
|
-
RETURN elementId(s) AS nodeId`, {
|
|
205
|
+
// 5. Per-section writes. Track the previous section's elementId so we can
|
|
206
|
+
// chain (:Section)-[:NEXT]->(:Section) in reading order.
|
|
207
|
+
let previousSectionId = null;
|
|
208
|
+
for (let i = 0; i < sections.length; i++) {
|
|
209
|
+
const section = sections[i];
|
|
210
|
+
const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
|
|
211
|
+
const baseProps = {
|
|
171
212
|
accountId,
|
|
172
213
|
title: section.title,
|
|
173
|
-
|
|
174
|
-
|
|
214
|
+
body: section.body,
|
|
215
|
+
bodyPreview: section.body.slice(0, PREVIEW_LENGTH),
|
|
216
|
+
position: i,
|
|
175
217
|
scope,
|
|
176
|
-
embedding:
|
|
218
|
+
embedding: bodyEmbedding,
|
|
177
219
|
createdAt: now,
|
|
178
220
|
updatedAt: now,
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
221
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
222
|
+
createdBySource: PROVENANCE_AGENT,
|
|
223
|
+
createdBySession: sessionId ?? "",
|
|
224
|
+
source: PROVENANCE_SOURCE,
|
|
225
|
+
sourceDocumentId: attachmentId,
|
|
226
|
+
};
|
|
227
|
+
// 5a. Standalone node kind (currently just Project) — no `:Section` label.
|
|
228
|
+
if (STANDALONE_KINDS_SET.has(section.kind)) {
|
|
229
|
+
const standaloneId = await writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId ?? "", now);
|
|
230
|
+
standaloneCount += 1;
|
|
231
|
+
bumpKind(kindBreakdown, section.kind);
|
|
232
|
+
if (section.anchorEdge)
|
|
233
|
+
bumpEdge(edgeBreakdown, section.anchorEdge.type);
|
|
234
|
+
previousSectionId = null; // standalone breaks the section chain
|
|
235
|
+
// related entities for standalone (e.g. Project UNDER Organization)
|
|
236
|
+
if (section.related && section.related.length > 0) {
|
|
237
|
+
for (const related of section.related) {
|
|
238
|
+
await writeRelatedAndEdge(dbSession, standaloneId, related, accountId, now, sessionId ?? "", attachmentId);
|
|
239
|
+
relatedCount += 1;
|
|
240
|
+
bumpEdge(edgeBreakdown, related.edge.type);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
// 5b. Section-shaped kind (everything else, including Other). One `:Section`
|
|
246
|
+
// node with optional secondary label.
|
|
247
|
+
const isKnownSectionKind = SECTION_LABEL_KINDS.has(section.kind);
|
|
248
|
+
const sectionLabels = isKnownSectionKind && section.kind !== SECTION_KIND_OTHER
|
|
249
|
+
? [`Section`, section.kind]
|
|
250
|
+
: section.kind === SECTION_KIND_OTHER
|
|
251
|
+
? [`Section`, `Other`]
|
|
252
|
+
: [`Section`]; // unrecognised kind — should never happen post-classifier
|
|
253
|
+
// Compose properties: classifier-supplied properties overlaid on system
|
|
254
|
+
// fields, system fields winning. For Section:Other, also stamp
|
|
255
|
+
// classifierReason so the ontology-growth query can surface it.
|
|
256
|
+
const sectionProps = {
|
|
257
|
+
...section.properties,
|
|
258
|
+
...baseProps,
|
|
259
|
+
...(section.kind === SECTION_KIND_OTHER && section.classifierReason
|
|
260
|
+
? { classifierReason: section.classifierReason }
|
|
261
|
+
: {}),
|
|
262
|
+
};
|
|
263
|
+
const labelClause = sectionLabels.map((l) => `\`${l}\``).join(":");
|
|
264
|
+
const sectionResult = await dbSession.run(`CREATE (s:${labelClause})
|
|
265
|
+
SET s = $props
|
|
266
|
+
WITH s
|
|
267
|
+
MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
268
|
+
CREATE (d)-[hs:HAS_SECTION]->(s)
|
|
269
|
+
SET hs.createdByAgent = $createdByAgent,
|
|
270
|
+
hs.createdBySession = $createdBySession,
|
|
271
|
+
hs.source = $source,
|
|
272
|
+
hs.sourceDocumentId = $sourceDocumentId,
|
|
273
|
+
hs.createdAt = $createdAt
|
|
274
|
+
RETURN elementId(s) AS nodeId`, {
|
|
275
|
+
props: sectionProps,
|
|
276
|
+
docId: documentNodeId,
|
|
277
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
278
|
+
createdBySession: sessionId ?? "",
|
|
279
|
+
source: PROVENANCE_SOURCE,
|
|
280
|
+
sourceDocumentId: attachmentId,
|
|
208
281
|
createdAt: now,
|
|
209
|
-
updatedAt: now,
|
|
210
282
|
});
|
|
211
|
-
const
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
283
|
+
const sectionId = sectionResult.records[0].get("nodeId");
|
|
284
|
+
bumpKind(kindBreakdown, section.kind);
|
|
285
|
+
bumpEdge(edgeBreakdown, "HAS_SECTION");
|
|
286
|
+
// 5c. NEXT chain in reading order.
|
|
287
|
+
if (previousSectionId) {
|
|
288
|
+
await dbSession.run(`MATCH (a:Section), (b:Section)
|
|
289
|
+
WHERE elementId(a) = $prev AND elementId(b) = $cur
|
|
290
|
+
CREATE (a)-[n:NEXT]->(b)
|
|
291
|
+
SET n.createdByAgent = $createdByAgent,
|
|
292
|
+
n.createdBySession = $createdBySession,
|
|
293
|
+
n.source = $source,
|
|
294
|
+
n.sourceDocumentId = $sourceDocumentId,
|
|
295
|
+
n.createdAt = $createdAt`, {
|
|
296
|
+
prev: previousSectionId,
|
|
297
|
+
cur: sectionId,
|
|
298
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
299
|
+
createdBySession: sessionId ?? "",
|
|
300
|
+
source: PROVENANCE_SOURCE,
|
|
301
|
+
sourceDocumentId: attachmentId,
|
|
302
|
+
createdAt: now,
|
|
303
|
+
});
|
|
304
|
+
bumpEdge(edgeBreakdown, "NEXT");
|
|
305
|
+
}
|
|
306
|
+
previousSectionId = sectionId;
|
|
307
|
+
// 5d. Anchor edge (identity kinds only). Writer applies the edge the
|
|
308
|
+
// classifier proposed; never invents.
|
|
309
|
+
if (section.anchorEdge && IDENTITY_KINDS_SET.has(section.kind)) {
|
|
310
|
+
const direction = section.anchorEdge.direction;
|
|
311
|
+
const edgeType = section.anchorEdge.type;
|
|
312
|
+
const edgeProps = {
|
|
313
|
+
...(section.anchorEdge.properties ?? {}),
|
|
314
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
315
|
+
createdBySession: sessionId ?? "",
|
|
316
|
+
source: PROVENANCE_SOURCE,
|
|
317
|
+
sourceDocumentId: attachmentId,
|
|
318
|
+
createdAt: now,
|
|
319
|
+
};
|
|
320
|
+
const cypher = direction === "from-anchor"
|
|
321
|
+
? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
322
|
+
MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
323
|
+
CREATE (a)-[edge:\`${edgeType}\`]->(s)
|
|
324
|
+
SET edge += $edgeProps`
|
|
325
|
+
: `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
326
|
+
MATCH (s:Section) WHERE elementId(s) = $sectionId
|
|
327
|
+
CREATE (s)-[edge:\`${edgeType}\`]->(a)
|
|
328
|
+
SET edge += $edgeProps`;
|
|
329
|
+
await dbSession.run(cypher, { anchorId: anchorNodeId, sectionId, accountId, edgeProps });
|
|
330
|
+
bumpEdge(edgeBreakdown, edgeType);
|
|
331
|
+
}
|
|
332
|
+
// 5e. Related entities (Organization for AT, DefinedTerm for DEFINES on
|
|
333
|
+
// :Section:Definitions, etc.). Apply each as the classifier emitted.
|
|
334
|
+
if (section.related && section.related.length > 0) {
|
|
335
|
+
for (const related of section.related) {
|
|
336
|
+
await writeRelatedAndEdge(dbSession, sectionId, related, accountId, now, sessionId ?? "", attachmentId);
|
|
337
|
+
relatedCount += 1;
|
|
338
|
+
bumpEdge(edgeBreakdown, related.edge.type);
|
|
229
339
|
}
|
|
230
340
|
}
|
|
231
341
|
}
|
|
232
|
-
|
|
342
|
+
// 6. Document-level edges (PARTY for contracts, etc.). Applied off the
|
|
343
|
+
// KnowledgeDocument; classifier proposes, writer applies.
|
|
344
|
+
if (documentEdges && documentEdges.length > 0) {
|
|
345
|
+
for (const docEdge of documentEdges) {
|
|
346
|
+
await writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId ?? "", attachmentId);
|
|
347
|
+
relatedCount += 1;
|
|
348
|
+
bumpEdge(edgeBreakdown, docEdge.type);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} kinds=${JSON.stringify(kindBreakdown)} edges=${JSON.stringify(edgeBreakdown)} related=${relatedCount} orphans=${orphanCandidates.length}`);
|
|
352
|
+
console.error(`[memory-ingest] sections=${sections.length} chain=${Math.max(0, sections.length - 1)} typed=${formatBreakdown(kindBreakdown)} edges=${formatBreakdown(edgeBreakdown)} orphans=${orphanCandidates.length} docId=${documentNodeId}`);
|
|
353
|
+
for (const orphan of orphanCandidates) {
|
|
354
|
+
console.error(`[document-ingest] orphan-candidate node=${orphan.kind} label=${JSON.stringify(orphan.label)} reason=${JSON.stringify(orphan.reason)}`);
|
|
355
|
+
}
|
|
233
356
|
return {
|
|
234
357
|
documentNodeId,
|
|
235
358
|
sectionCount: sections.length,
|
|
236
|
-
|
|
237
|
-
|
|
359
|
+
kindBreakdown,
|
|
360
|
+
edgeBreakdown,
|
|
361
|
+
relatedCount,
|
|
362
|
+
standaloneCount,
|
|
363
|
+
orphanCandidates,
|
|
238
364
|
documentSummary,
|
|
239
365
|
keywords,
|
|
240
366
|
};
|
|
241
367
|
}
|
|
242
368
|
finally {
|
|
243
|
-
// Evict cache entry — content is now persisted in Neo4j
|
|
244
369
|
extractCache.delete(attachmentId);
|
|
245
|
-
await
|
|
370
|
+
await dbSession.close();
|
|
246
371
|
log("complete");
|
|
247
372
|
}
|
|
248
373
|
}
|
|
374
|
+
// ---------------------------------------------------------------------------
|
|
375
|
+
// Helpers — standalone-node writer (Project), related-edge writer, document-
|
|
376
|
+
// edge writer.
|
|
377
|
+
// ---------------------------------------------------------------------------
|
|
378
|
+
async function writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId, now) {
|
|
379
|
+
const props = { ...section.properties, ...baseProps };
|
|
380
|
+
const r = await dbSession.run(`CREATE (n:\`${section.kind}\`)
|
|
381
|
+
SET n = $props
|
|
382
|
+
RETURN elementId(n) AS nodeId`, { props });
|
|
383
|
+
const nodeId = r.records[0].get("nodeId");
|
|
384
|
+
if (section.anchorEdge) {
|
|
385
|
+
const direction = section.anchorEdge.direction;
|
|
386
|
+
const edgeType = section.anchorEdge.type;
|
|
387
|
+
const edgeProps = {
|
|
388
|
+
...(section.anchorEdge.properties ?? {}),
|
|
389
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
390
|
+
createdBySession: sessionId,
|
|
391
|
+
source: PROVENANCE_SOURCE,
|
|
392
|
+
sourceDocumentId: attachmentId,
|
|
393
|
+
createdAt: now,
|
|
394
|
+
};
|
|
395
|
+
const cypher = direction === "from-anchor"
|
|
396
|
+
? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
397
|
+
MATCH (n) WHERE elementId(n) = $nodeId
|
|
398
|
+
CREATE (a)-[edge:\`${edgeType}\`]->(n)
|
|
399
|
+
SET edge += $edgeProps`
|
|
400
|
+
: `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
|
|
401
|
+
MATCH (n) WHERE elementId(n) = $nodeId
|
|
402
|
+
CREATE (n)-[edge:\`${edgeType}\`]->(a)
|
|
403
|
+
SET edge += $edgeProps`;
|
|
404
|
+
await dbSession.run(cypher, {
|
|
405
|
+
anchorId: anchorNodeId,
|
|
406
|
+
nodeId,
|
|
407
|
+
accountId: baseProps.accountId,
|
|
408
|
+
edgeProps,
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
return nodeId;
|
|
412
|
+
}
|
|
413
|
+
async function writeRelatedAndEdge(dbSession, fromNodeId, related, accountId, now, sessionId, attachmentId) {
|
|
414
|
+
const relatedNodeId = await writeRelatedNode({
|
|
415
|
+
session: dbSession,
|
|
416
|
+
accountId,
|
|
417
|
+
related,
|
|
418
|
+
now,
|
|
419
|
+
sessionId,
|
|
420
|
+
});
|
|
421
|
+
await dbSession.run(`MATCH (a) WHERE elementId(a) = $a
|
|
422
|
+
MATCH (b) WHERE elementId(b) = $b
|
|
423
|
+
CREATE (a)-[edge:\`${related.edge.type}\`]->(b)
|
|
424
|
+
SET edge += $edgeProps`, {
|
|
425
|
+
a: related.edge.direction === "outgoing" ? fromNodeId : relatedNodeId,
|
|
426
|
+
b: related.edge.direction === "outgoing" ? relatedNodeId : fromNodeId,
|
|
427
|
+
edgeProps: {
|
|
428
|
+
...(related.edge.properties ?? {}),
|
|
429
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
430
|
+
createdBySession: sessionId,
|
|
431
|
+
source: PROVENANCE_SOURCE,
|
|
432
|
+
sourceDocumentId: attachmentId,
|
|
433
|
+
createdAt: now,
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
async function writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId, attachmentId) {
|
|
438
|
+
// Build a synthetic ClassifiedRelated so we can reuse writeRelatedNode for
|
|
439
|
+
// the target. The MERGE-vs-CREATE decision is the same; the edge is off
|
|
440
|
+
// the KnowledgeDocument rather than off a Section.
|
|
441
|
+
const synthetic = {
|
|
442
|
+
kind: docEdge.targetKind,
|
|
443
|
+
properties: docEdge.targetProperties,
|
|
444
|
+
edge: { type: docEdge.type, direction: docEdge.direction },
|
|
445
|
+
merge: docEdge.merge !== false,
|
|
446
|
+
};
|
|
447
|
+
const targetNodeId = await writeRelatedNode({
|
|
448
|
+
session: dbSession,
|
|
449
|
+
accountId,
|
|
450
|
+
related: synthetic,
|
|
451
|
+
now,
|
|
452
|
+
sessionId,
|
|
453
|
+
});
|
|
454
|
+
await dbSession.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
|
|
455
|
+
MATCH (n) WHERE elementId(n) = $targetId
|
|
456
|
+
CREATE (d)-[edge:\`${docEdge.type}\`]->(n)
|
|
457
|
+
SET edge.createdByAgent = $createdByAgent,
|
|
458
|
+
edge.createdBySession = $createdBySession,
|
|
459
|
+
edge.source = $source,
|
|
460
|
+
edge.sourceDocumentId = $sourceDocumentId,
|
|
461
|
+
edge.createdAt = $createdAt`, {
|
|
462
|
+
docId: documentNodeId,
|
|
463
|
+
targetId: targetNodeId,
|
|
464
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
465
|
+
createdBySession: sessionId,
|
|
466
|
+
source: PROVENANCE_SOURCE,
|
|
467
|
+
sourceDocumentId: attachmentId,
|
|
468
|
+
createdAt: now,
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
function formatBreakdown(b) {
|
|
472
|
+
const entries = Object.entries(b);
|
|
473
|
+
if (entries.length === 0)
|
|
474
|
+
return "{}";
|
|
475
|
+
return entries.map(([k, v]) => `${k}:${v}`).join(",");
|
|
476
|
+
}
|
|
477
|
+
async function writeRelatedNode(opts) {
|
|
478
|
+
const { session, accountId, related, now, sessionId } = opts;
|
|
479
|
+
// Compute embedding from a representative property string so the related
|
|
480
|
+
// node is searchable. Falls back to label+JSON if no `name` field exists.
|
|
481
|
+
const embedSource = typeof related.properties.name === "string"
|
|
482
|
+
? related.properties.name
|
|
483
|
+
: `${related.kind} ${JSON.stringify(related.properties)}`;
|
|
484
|
+
const relatedEmbedding = await embed(embedSource);
|
|
485
|
+
const mergeKey = related.merge !== false ? mergeKeyFor(related.kind, related.properties) : null;
|
|
486
|
+
if (mergeKey) {
|
|
487
|
+
// MERGE on the identifying property + accountId. ON CREATE stamps
|
|
488
|
+
// provenance; ON MATCH leaves provenance intact (so re-ingest of the
|
|
489
|
+
// same Organization across docs doesn't overwrite the original
|
|
490
|
+
// provenance — first-write wins for shared entities).
|
|
491
|
+
const result = await session.run(`MERGE (r:\`${related.kind}\` { accountId: $accountId, \`${mergeKey.key}\`: $mergeValue })
|
|
492
|
+
ON CREATE SET r += $createProps,
|
|
493
|
+
r.embedding = $embedding,
|
|
494
|
+
r.createdAt = $createdAt,
|
|
495
|
+
r.updatedAt = $createdAt,
|
|
496
|
+
r.createdByAgent = $createdByAgent,
|
|
497
|
+
r.createdBySource = $createdByAgent,
|
|
498
|
+
r.createdBySession = $createdBySession,
|
|
499
|
+
r.source = $source
|
|
500
|
+
ON MATCH SET r.updatedAt = $createdAt
|
|
501
|
+
RETURN elementId(r) AS nodeId`, {
|
|
502
|
+
accountId,
|
|
503
|
+
mergeValue: mergeKey.value,
|
|
504
|
+
createProps: { ...related.properties, accountId, scope: "shared" },
|
|
505
|
+
embedding: relatedEmbedding,
|
|
506
|
+
createdAt: now,
|
|
507
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
508
|
+
createdBySession: sessionId ?? "",
|
|
509
|
+
source: PROVENANCE_SOURCE,
|
|
510
|
+
});
|
|
511
|
+
return result.records[0].get("nodeId");
|
|
512
|
+
}
|
|
513
|
+
// CREATE — no stable identifying property, treat as one-of-a-kind for
|
|
514
|
+
// this document. Track sourceDocumentId so re-ingest cleanup catches it.
|
|
515
|
+
const result = await session.run(`CREATE (r:\`${related.kind}\`)
|
|
516
|
+
SET r = $props
|
|
517
|
+
RETURN elementId(r) AS nodeId`, {
|
|
518
|
+
props: {
|
|
519
|
+
...related.properties,
|
|
520
|
+
accountId,
|
|
521
|
+
scope: "shared",
|
|
522
|
+
embedding: relatedEmbedding,
|
|
523
|
+
createdAt: now,
|
|
524
|
+
updatedAt: now,
|
|
525
|
+
createdByAgent: PROVENANCE_AGENT,
|
|
526
|
+
createdBySource: PROVENANCE_AGENT,
|
|
527
|
+
createdBySession: sessionId ?? "",
|
|
528
|
+
source: PROVENANCE_SOURCE,
|
|
529
|
+
},
|
|
530
|
+
});
|
|
531
|
+
return result.records[0].get("nodeId");
|
|
532
|
+
}
|
|
249
533
|
//# sourceMappingURL=memory-ingest.js.map
|