@rubytech/create-realagent 1.0.706 → 1.0.709

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/lib/oauth-llm/dist/index.d.ts +101 -0
  3. package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -0
  4. package/payload/platform/lib/oauth-llm/dist/index.js +353 -0
  5. package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -0
  6. package/payload/platform/lib/oauth-llm/src/index.ts +526 -0
  7. package/payload/platform/lib/oauth-llm/tsconfig.json +8 -0
  8. package/payload/platform/neo4j/schema.cypher +60 -11
  9. package/payload/platform/package.json +2 -2
  10. package/payload/platform/plugins/admin/mcp/dist/index.js +9 -9
  11. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  12. package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +1 -1
  13. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +6 -11
  14. package/payload/platform/plugins/docs/references/adherence.md +1 -1
  15. package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts +3 -3
  16. package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts.map +1 -1
  17. package/payload/platform/plugins/email/mcp/dist/lib/screening.js +12 -12
  18. package/payload/platform/plugins/email/mcp/dist/lib/screening.js.map +1 -1
  19. package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js +14 -28
  20. package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js.map +1 -1
  21. package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js +9 -19
  22. package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js.map +1 -1
  23. package/payload/platform/plugins/memory/PLUGIN.md +22 -15
  24. package/payload/platform/plugins/memory/mcp/dist/index.js +130 -44
  25. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  26. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
  27. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
  28. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +32 -15
  29. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
  30. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js +4 -4
  31. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js.map +1 -1
  32. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +200 -0
  33. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
  34. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +343 -0
  35. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
  36. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +12 -46
  38. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
  40. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
  41. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +58 -0
  42. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
  43. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
  44. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
  46. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
  47. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
  48. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
  49. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
  50. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
  51. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
  52. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
  53. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
  54. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +41 -16
  55. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  56. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +457 -173
  57. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  58. package/payload/platform/plugins/memory/references/schema-base.md +82 -1
  59. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +145 -0
  60. package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
  61. package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
  62. package/payload/platform/templates/specialists/agents/database-operator.md +39 -13
  63. package/payload/server/chunk-Y57ACANQ.js +12292 -0
  64. package/payload/server/maxy-edge.js +1 -1
  65. package/payload/server/public/assets/{graph-D-Rqh0Md.js → graph-BRD96pKD.js} +8 -8
  66. package/payload/server/public/graph.html +1 -1
  67. package/payload/server/server.js +30 -53
@@ -1,90 +1,158 @@
1
1
  import { getSession } from "../lib/neo4j.js";
2
- import { embedBatch } from "../lib/embeddings.js";
2
+ import { embed, embedBatch } from "../lib/embeddings.js";
3
3
  import { extractCache } from "./memory-ingest-extract.js";
4
4
  import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
5
5
  import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
6
+ import { IDENTITY_SECTION_KINDS, STRUCTURAL_SECTION_KINDS, CONTRACT_SECTION_KINDS, STANDALONE_NODE_KINDS, SECTION_KIND_OTHER, } from "../lib/llm-classifier.js";
7
+ // ---------------------------------------------------------------------------
8
+ // Single-Section document ingestion (Task 740, replacing Task 737's typed-vs-
9
+ // UNMAPPED fork).
10
+ //
11
+ // CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
12
+ // (by attachmentId) KnowledgeDocument delete prior batch Section nodes
13
+ // (idempotent) Section/Chunk/typed Ollama + secondary labels
14
+ // + NEXT chain
15
+ // + anchor edges
16
+ // + related entities
17
+ // + KD-level edges
18
+ //
19
+ // Every classified section produces ONE `:Section` node. When the classifier
20
+ // recognises the kind (Position/Education/Chapter/Parties/etc.), the same
21
+ // node carries a secondary label (`:Section:Position`) and any structured
22
+ // properties; identity-kind anchor edges go to the multi-labeled node
23
+ // directly, killing the parallel Section-vs-typed-node concept.
24
+ //
25
+ // Special-case writers fire for two contract-clause kinds:
26
+ // * Parties: `(:KnowledgeDocument)-[:PARTY]->(:Person|:Organization)` from documentEdges.
27
+ // * Definitions: `(:Section:Definitions)-[:DEFINES]->(:DefinedTerm)` from related entries.
28
+ //
29
+ // `:Chunk` is gone. Sections carry their body directly. If a body legitimately
30
+ // exceeds Neo4j's property limit, that is a classifier-split-the-section
31
+ // concern, not a writer problem.
32
+ //
33
+ // Provenance properties (createdByAgent, createdBySession, source,
34
+ // sourceDocumentId) stamp every node and edge so re-ingest cleanup finds
35
+ // them deterministically.
36
+ // ---------------------------------------------------------------------------
37
+ const PREVIEW_LENGTH = 150;
38
+ const PROVENANCE_AGENT = "document-ingest";
39
+ const PROVENANCE_SOURCE = "document";
40
+ /** Identity-kind anchor edge writer set (UserProfile → Section:Kind). */
41
+ const IDENTITY_KINDS_SET = new Set(IDENTITY_SECTION_KINDS);
42
+ /** Standalone (non-Section) node kinds (currently just Project). */
43
+ const STANDALONE_KINDS_SET = new Set(STANDALONE_NODE_KINDS);
44
+ /** All section-shaped kinds — anything that becomes a `:Section` node. */
45
+ const SECTION_LABEL_KINDS = new Set([
46
+ ...IDENTITY_SECTION_KINDS,
47
+ ...STRUCTURAL_SECTION_KINDS,
48
+ ...CONTRACT_SECTION_KINDS,
49
+ SECTION_KIND_OTHER,
50
+ ]);
51
+ // ---------------------------------------------------------------------------
52
+ // Helpers
53
+ // ---------------------------------------------------------------------------
54
+ function normaliseKeywords(arr) {
55
+ if (!arr || arr.length === 0)
56
+ return [];
57
+ return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
58
+ }
59
+ /**
60
+ * Identifying property for MERGE on a related node by kind.
61
+ * Choose a stable, human-recognisable key per label so the same real-world
62
+ * entity collapses to one node across documents.
63
+ */
64
+ function mergeKeyFor(kind, properties) {
65
+ switch (kind) {
66
+ case "Organization":
67
+ return typeof properties.name === "string" && properties.name.trim()
68
+ ? { key: "name", value: properties.name.trim() }
69
+ : null;
70
+ case "Person":
71
+ if (typeof properties.email === "string" && properties.email.trim()) {
72
+ return { key: "email", value: properties.email.trim() };
73
+ }
74
+ if (typeof properties.telephone === "string" && properties.telephone.trim()) {
75
+ return { key: "telephone", value: properties.telephone.trim() };
76
+ }
77
+ return null;
78
+ case "DefinedTerm":
79
+ return typeof properties.name === "string" && properties.name.trim()
80
+ ? { key: "name", value: properties.name.trim() }
81
+ : null;
82
+ case "Credential":
83
+ return typeof properties.name === "string" && properties.name.trim()
84
+ ? { key: "name", value: properties.name.trim() }
85
+ : null;
86
+ default:
87
+ return null;
88
+ }
89
+ }
90
+ function bumpKind(breakdown, kind) {
91
+ breakdown[kind] = (breakdown[kind] ?? 0) + 1;
92
+ }
93
+ function bumpEdge(breakdown, edgeType) {
94
+ breakdown[edgeType] = (breakdown[edgeType] ?? 0) + 1;
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // Main entry point
98
+ // ---------------------------------------------------------------------------
6
99
  export async function memoryIngest(params) {
7
- const { accountId, attachmentId, documentSummary, sections, scope, entities, sourceUrl, sourceType, keywords: rawKeywords, userKeywords: rawUserKeywords, } = params;
100
+ const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, documentEdges = [], orphanCandidates = [], scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
8
101
  if (!scope) {
9
102
  throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
10
103
  }
11
- // Normalize and merge keywords: user-supplied ∪ LLM-extracted, deduplicated.
12
- // User keywords appear first (cosmetic both are equal after merge).
13
- const normalizeArray = (arr) => arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
14
- const hasUserKeywords = rawUserKeywords && rawUserKeywords.length > 0;
15
- const hasKeywords = rawKeywords && rawKeywords.length > 0;
16
- const keywords = hasUserKeywords || hasKeywords
17
- ? [...new Set([
18
- ...normalizeArray(rawUserKeywords ?? []),
19
- ...normalizeArray(rawKeywords ?? []),
20
- ])]
21
- : undefined;
104
+ if (!anchorNodeId) {
105
+ throw new Error("anchorNodeId is requiredthe document subject's element ID (UserProfile/LocalBusiness/Person/Organization)");
106
+ }
107
+ if (!anchorLabel) {
108
+ throw new Error("anchorLabel is required the anchor node's primary label");
109
+ }
110
+ const keywords = (() => {
111
+ const u = normaliseKeywords(rawUserKeywords);
112
+ const d = normaliseKeywords(rawDocKeywords);
113
+ if (u.length === 0 && d.length === 0)
114
+ return undefined;
115
+ return [...new Set([...u, ...d])];
116
+ })();
22
117
  const t0 = Date.now();
23
118
  const log = (stage, detail) => console.error(`[memory-ingest] [${attachmentId.slice(0, 8)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
24
- log("start", `${sections.length} sections, scope=${scope}`);
25
- // 1. Retrieve cached content from memory-ingest-extract
119
+ log("start", `${sections.length} sections, scope=${scope}, anchor=${anchorLabel}`);
26
120
  const cached = extractCache.get(attachmentId);
27
121
  if (!cached) {
28
122
  throw new Error(`No cached extract found for attachment "${attachmentId}". ` +
29
- `Call memory-ingest-extract first to extract and chunk the document.`);
123
+ `Call memory-ingest-extract first.`);
30
124
  }
31
125
  log("cache-hit", cached.filename);
32
- // Validate section count matches
33
- if (sections.length !== cached.sections.length) {
34
- throw new Error(`Section count mismatch: model provided ${sections.length} sections ` +
35
- `but the extracted document has ${cached.sections.length} sections. ` +
36
- `Provide exactly one summary per section returned by memory-ingest-extract.`);
37
- }
38
- // Validate chunk summary counts per section
39
- for (let i = 0; i < sections.length; i++) {
40
- const expectedChunks = cached.sections[i].chunks.length;
41
- const providedSummaries = sections[i].chunkSummaries.length;
42
- if (providedSummaries !== expectedChunks) {
43
- throw new Error(`Chunk count mismatch in section "${sections[i].title}": ` +
44
- `model provided ${providedSummaries} chunk summaries ` +
45
- `but the section has ${expectedChunks} chunks.`);
46
- }
47
- }
48
126
  const { filename, mimeType } = cached;
49
127
  const now = new Date().toISOString();
50
- // 2. Collect all texts that need embedding in a flat array.
51
- const textsToEmbed = [];
52
- // Document-level summary
53
- textsToEmbed.push(documentSummary);
54
- const docEmbedIdx = 0;
55
- // Section-level summaries
56
- const sectionEmbedIndices = [];
57
- for (const section of sections) {
58
- sectionEmbedIndices.push(textsToEmbed.length);
59
- textsToEmbed.push(section.summary);
60
- }
61
- // Chunk-level summaries
62
- const chunkEmbedIndices = [];
63
- for (let si = 0; si < sections.length; si++) {
64
- for (let ci = 0; ci < sections[si].chunkSummaries.length; ci++) {
65
- chunkEmbedIndices.push({
66
- sectionIdx: si,
67
- chunkIdx: ci,
68
- embedIdx: textsToEmbed.length,
69
- });
70
- textsToEmbed.push(sections[si].chunkSummaries[ci]);
71
- }
72
- }
73
- // 3. Batch embed all summaries in a single Ollama call.
128
+ // 1. Embed document summary + every section body in one batch.
129
+ const textsToEmbed = [documentSummary, ...sections.map((s) => s.body)];
74
130
  log("embedding", `${textsToEmbed.length} texts`);
75
131
  const embeddings = await embedBatch(textsToEmbed);
76
132
  log("embedded", `${embeddings.length} vectors`);
77
- // 4. Write nodes to Neo4j.
78
- log("neo4j-write", "starting");
79
- const session = getSession();
133
+ const docEmbedding = embeddings[0];
134
+ const dbSession = getSession();
80
135
  let documentNodeId = "";
81
- let totalChunks = 0;
82
- let entityLinks = 0;
136
+ const kindBreakdown = {};
137
+ const edgeBreakdown = {};
138
+ let relatedCount = 0;
139
+ let standaloneCount = 0;
83
140
  try {
84
- // 4a. Create KnowledgeDocument node
85
- // Build optional SET clauses for web-sourced properties.
86
- // When sourceUrl/sourceType/keywords are undefined (file uploads),
87
- // the corresponding SET lines are omitted — existing values preserved on re-ingest.
141
+ // 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
142
+ // with this attachmentId so MERGE finds the existing node.
143
+ const trashedDoc = await dbSession.run(`MATCH (d:KnowledgeDocument:Trashed)
144
+ WHERE d.accountId = $accountId
145
+ AND d._trashedKeys IS NOT NULL
146
+ AND d._trashedKeys CONTAINS $attachmentId
147
+ RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
148
+ if (trashedDoc.records.length > 0) {
149
+ const eid = trashedDoc.records[0].get("eid");
150
+ await restoreNode({ session: dbSession, accountId, elementId: eid });
151
+ log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
152
+ }
153
+ // 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
154
+ // sourceType, keywords) are SET only when present so re-ingest of a
155
+ // file upload doesn't null out a previously-set web-source URL.
88
156
  const optionalSets = [];
89
157
  const optionalParams = {};
90
158
  if (sourceUrl !== undefined) {
@@ -99,35 +167,19 @@ export async function memoryIngest(params) {
99
167
  optionalSets.push("d.keywords = $keywords");
100
168
  optionalParams.keywords = keywords;
101
169
  }
102
- const optionalSetClause = optionalSets.length > 0
103
- ? ", " + optionalSets.join(", ")
104
- : "";
105
- // Trash recovery: a prior memory-delete on this attachmentId nulls the
106
- // live attachmentId (Task 576 unique-key handling). MERGE on
107
- // attachmentId would then create a NEW node, orphaning the trashed copy
108
- // — and 30 days later memory-empty-trash would purge the disk dir the
109
- // new node depends on. Restore first so MERGE finds the existing node.
110
- const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
111
- WHERE d.accountId = $accountId
112
- AND d._trashedKeys IS NOT NULL
113
- AND d._trashedKeys CONTAINS $attachmentId
114
- RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
115
- if (trashedDoc.records.length > 0) {
116
- const eid = trashedDoc.records[0].get("eid");
117
- await restoreNode({ session, accountId, elementId: eid });
118
- log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
119
- }
120
- const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
170
+ const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
171
+ const docResult = await dbSession.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
121
172
  SET d.accountId = $accountId,
122
173
  d.name = $filename,
123
- d.encodingFormat = $mimeType,
124
- d.summary = $documentSummary,
125
- d.sectionCount = $sectionCount,
126
- d.scope = $scope,
127
- d.embedding = $embedding,
128
- d.createdAt = $createdAt,
129
- d.updatedAt = $updatedAt
174
+ d.encodingFormat = $mimeType,
175
+ d.summary = $documentSummary,
176
+ d.sectionCount = $sectionCount,
177
+ d.scope = $scope,
178
+ d.embedding = $embedding,
179
+ d.createdAt = coalesce(d.createdAt, $createdAt),
180
+ d.updatedAt = $updatedAt
130
181
  ${optionalSetClause}
182
+ REMOVE d.deletedAt
131
183
  RETURN elementId(d) AS nodeId`, {
132
184
  attachmentId,
133
185
  accountId,
@@ -136,114 +188,346 @@ export async function memoryIngest(params) {
136
188
  documentSummary,
137
189
  sectionCount: sections.length,
138
190
  scope,
139
- embedding: embeddings[docEmbedIdx],
191
+ embedding: docEmbedding,
140
192
  createdAt: now,
141
193
  updatedAt: now,
142
194
  ...optionalParams,
143
195
  });
144
196
  documentNodeId = docResult.records[0].get("nodeId");
145
- // 4a-cleanup. Remove stale children before re-creating.
146
- // On first ingestion the queries return nothing and the DELETEs are no-ops.
147
- // Also clear any soft-delete marker on the document itself (re-ingestion revives).
148
- const cleanup = await deleteDocumentChildren(attachmentId, session);
149
- if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.references > 0) {
150
- log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.references} references`);
197
+ console.error(`[memory-ingest] KnowledgeDocument.name=${JSON.stringify(filename)} attachmentId=${attachmentId.slice(0, 8)} docId=${documentNodeId.slice(0, 12)}`);
198
+ // 4. Cleanup prior children (idempotent re-ingest). Removes Section nodes
199
+ // (any secondary label) and any standalone nodes stamped with this
200
+ // attachmentId. MERGEd related entities (Organizations, Persons) are spared.
201
+ const cleanup = await deleteDocumentChildren(attachmentId, dbSession);
202
+ if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.typed > 0 || cleanup.references > 0) {
203
+ log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
151
204
  }
152
- // Clear deletedAt if the document was previously soft-deleted
153
- await session.run(`MATCH (d:KnowledgeDocument { attachmentId: $attachmentId })
154
- WHERE d.deletedAt IS NOT NULL
155
- REMOVE d.deletedAt`, { attachmentId });
156
- // 4b. Create Section nodes and link HAS_SECTION
157
- const sectionNodeIds = [];
158
- for (let si = 0; si < sections.length; si++) {
159
- const section = sections[si];
160
- const sectionResult = await session.run(`CREATE (s:Section {
161
- accountId: $accountId,
162
- title: $title,
163
- summary: $summary,
164
- position: $position,
165
- scope: $scope,
166
- embedding: $embedding,
167
- createdAt: $createdAt,
168
- updatedAt: $updatedAt
169
- })
170
- RETURN elementId(s) AS nodeId`, {
205
+ // 5. Per-section writes. Track the previous section's elementId so we can
206
+ // chain (:Section)-[:NEXT]->(:Section) in reading order.
207
+ let previousSectionId = null;
208
+ for (let i = 0; i < sections.length; i++) {
209
+ const section = sections[i];
210
+ const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
211
+ const baseProps = {
171
212
  accountId,
172
213
  title: section.title,
173
- summary: section.summary,
174
- position: si,
214
+ body: section.body,
215
+ bodyPreview: section.body.slice(0, PREVIEW_LENGTH),
216
+ position: i,
175
217
  scope,
176
- embedding: embeddings[sectionEmbedIndices[si]],
218
+ embedding: bodyEmbedding,
177
219
  createdAt: now,
178
220
  updatedAt: now,
179
- });
180
- const sectionNodeId = sectionResult.records[0].get("nodeId");
181
- sectionNodeIds.push(sectionNodeId);
182
- // Link KnowledgeDocument -[HAS_SECTION]-> Section
183
- await session.run(`MATCH (d), (s)
184
- WHERE elementId(d) = $docId AND elementId(s) = $sectionId
185
- CREATE (d)-[:HAS_SECTION]->(s)`, { docId: documentNodeId, sectionId: sectionNodeId });
186
- }
187
- // 4c. Create Chunk nodes and link HAS_CHUNK content from cache
188
- for (const { sectionIdx, chunkIdx, embedIdx } of chunkEmbedIndices) {
189
- const cachedChunk = cached.sections[sectionIdx].chunks[chunkIdx];
190
- const chunkSummary = sections[sectionIdx].chunkSummaries[chunkIdx];
191
- const chunkResult = await session.run(`CREATE (c:Chunk {
192
- accountId: $accountId,
193
- summary: $summary,
194
- content: $content,
195
- position: $position,
196
- scope: $scope,
197
- embedding: $embedding,
198
- createdAt: $createdAt,
199
- updatedAt: $updatedAt
200
- })
201
- RETURN elementId(c) AS nodeId`, {
202
- accountId,
203
- summary: chunkSummary,
204
- content: cachedChunk.content,
205
- position: chunkIdx,
206
- scope,
207
- embedding: embeddings[embedIdx],
221
+ createdByAgent: PROVENANCE_AGENT,
222
+ createdBySource: PROVENANCE_AGENT,
223
+ createdBySession: sessionId ?? "",
224
+ source: PROVENANCE_SOURCE,
225
+ sourceDocumentId: attachmentId,
226
+ };
227
+ // 5a. Standalone node kind (currently just Project) no `:Section` label.
228
+ if (STANDALONE_KINDS_SET.has(section.kind)) {
229
+ const standaloneId = await writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId ?? "", now);
230
+ standaloneCount += 1;
231
+ bumpKind(kindBreakdown, section.kind);
232
+ if (section.anchorEdge)
233
+ bumpEdge(edgeBreakdown, section.anchorEdge.type);
234
+ previousSectionId = null; // standalone breaks the section chain
235
+ // related entities for standalone (e.g. Project UNDER Organization)
236
+ if (section.related && section.related.length > 0) {
237
+ for (const related of section.related) {
238
+ await writeRelatedAndEdge(dbSession, standaloneId, related, accountId, now, sessionId ?? "", attachmentId);
239
+ relatedCount += 1;
240
+ bumpEdge(edgeBreakdown, related.edge.type);
241
+ }
242
+ }
243
+ continue;
244
+ }
245
+ // 5b. Section-shaped kind (everything else, including Other). One `:Section`
246
+ // node with optional secondary label.
247
+ const isKnownSectionKind = SECTION_LABEL_KINDS.has(section.kind);
248
+ const sectionLabels = isKnownSectionKind && section.kind !== SECTION_KIND_OTHER
249
+ ? [`Section`, section.kind]
250
+ : section.kind === SECTION_KIND_OTHER
251
+ ? [`Section`, `Other`]
252
+ : [`Section`]; // unrecognised kind — should never happen post-classifier
253
+ // Compose properties: classifier-supplied properties overlaid on system
254
+ // fields, system fields winning. For Section:Other, also stamp
255
+ // classifierReason so the ontology-growth query can surface it.
256
+ const sectionProps = {
257
+ ...section.properties,
258
+ ...baseProps,
259
+ ...(section.kind === SECTION_KIND_OTHER && section.classifierReason
260
+ ? { classifierReason: section.classifierReason }
261
+ : {}),
262
+ };
263
+ const labelClause = sectionLabels.map((l) => `\`${l}\``).join(":");
264
+ const sectionResult = await dbSession.run(`CREATE (s:${labelClause})
265
+ SET s = $props
266
+ WITH s
267
+ MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
268
+ CREATE (d)-[hs:HAS_SECTION]->(s)
269
+ SET hs.createdByAgent = $createdByAgent,
270
+ hs.createdBySession = $createdBySession,
271
+ hs.source = $source,
272
+ hs.sourceDocumentId = $sourceDocumentId,
273
+ hs.createdAt = $createdAt
274
+ RETURN elementId(s) AS nodeId`, {
275
+ props: sectionProps,
276
+ docId: documentNodeId,
277
+ createdByAgent: PROVENANCE_AGENT,
278
+ createdBySession: sessionId ?? "",
279
+ source: PROVENANCE_SOURCE,
280
+ sourceDocumentId: attachmentId,
208
281
  createdAt: now,
209
- updatedAt: now,
210
282
  });
211
- const chunkNodeId = chunkResult.records[0].get("nodeId");
212
- totalChunks++;
213
- // Link Section -[HAS_CHUNK]-> Chunk
214
- await session.run(`MATCH (s), (c)
215
- WHERE elementId(s) = $sectionId AND elementId(c) = $chunkId
216
- CREATE (s)-[:HAS_CHUNK]->(c)`, { sectionId: sectionNodeIds[sectionIdx], chunkId: chunkNodeId });
217
- }
218
- // 4d. Create REFERENCES links to entities
219
- if (entities && entities.length > 0) {
220
- for (const entity of entities) {
221
- try {
222
- await session.run(`MATCH (d), (e)
223
- WHERE elementId(d) = $docId AND elementId(e) = $entityId
224
- MERGE (d)-[:REFERENCES]->(e)`, { docId: documentNodeId, entityId: entity.nodeId });
225
- entityLinks++;
226
- }
227
- catch {
228
- // Entity node may have been deleted — skip silently, log via caller
283
+ const sectionId = sectionResult.records[0].get("nodeId");
284
+ bumpKind(kindBreakdown, section.kind);
285
+ bumpEdge(edgeBreakdown, "HAS_SECTION");
286
+ // 5c. NEXT chain in reading order.
287
+ if (previousSectionId) {
288
+ await dbSession.run(`MATCH (a:Section), (b:Section)
289
+ WHERE elementId(a) = $prev AND elementId(b) = $cur
290
+ CREATE (a)-[n:NEXT]->(b)
291
+ SET n.createdByAgent = $createdByAgent,
292
+ n.createdBySession = $createdBySession,
293
+ n.source = $source,
294
+ n.sourceDocumentId = $sourceDocumentId,
295
+ n.createdAt = $createdAt`, {
296
+ prev: previousSectionId,
297
+ cur: sectionId,
298
+ createdByAgent: PROVENANCE_AGENT,
299
+ createdBySession: sessionId ?? "",
300
+ source: PROVENANCE_SOURCE,
301
+ sourceDocumentId: attachmentId,
302
+ createdAt: now,
303
+ });
304
+ bumpEdge(edgeBreakdown, "NEXT");
305
+ }
306
+ previousSectionId = sectionId;
307
+ // 5d. Anchor edge (identity kinds only). Writer applies the edge the
308
+ // classifier proposed; never invents.
309
+ if (section.anchorEdge && IDENTITY_KINDS_SET.has(section.kind)) {
310
+ const direction = section.anchorEdge.direction;
311
+ const edgeType = section.anchorEdge.type;
312
+ const edgeProps = {
313
+ ...(section.anchorEdge.properties ?? {}),
314
+ createdByAgent: PROVENANCE_AGENT,
315
+ createdBySession: sessionId ?? "",
316
+ source: PROVENANCE_SOURCE,
317
+ sourceDocumentId: attachmentId,
318
+ createdAt: now,
319
+ };
320
+ const cypher = direction === "from-anchor"
321
+ ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
322
+ MATCH (s:Section) WHERE elementId(s) = $sectionId
323
+ CREATE (a)-[edge:\`${edgeType}\`]->(s)
324
+ SET edge += $edgeProps`
325
+ : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
326
+ MATCH (s:Section) WHERE elementId(s) = $sectionId
327
+ CREATE (s)-[edge:\`${edgeType}\`]->(a)
328
+ SET edge += $edgeProps`;
329
+ await dbSession.run(cypher, { anchorId: anchorNodeId, sectionId, accountId, edgeProps });
330
+ bumpEdge(edgeBreakdown, edgeType);
331
+ }
332
+ // 5e. Related entities (Organization for AT, DefinedTerm for DEFINES on
333
+ // :Section:Definitions, etc.). Apply each as the classifier emitted.
334
+ if (section.related && section.related.length > 0) {
335
+ for (const related of section.related) {
336
+ await writeRelatedAndEdge(dbSession, sectionId, related, accountId, now, sessionId ?? "", attachmentId);
337
+ relatedCount += 1;
338
+ bumpEdge(edgeBreakdown, related.edge.type);
229
339
  }
230
340
  }
231
341
  }
232
- log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} chunks=${totalChunks} entityLinks=${entityLinks}`);
342
+ // 6. Document-level edges (PARTY for contracts, etc.). Applied off the
343
+ // KnowledgeDocument; classifier proposes, writer applies.
344
+ if (documentEdges && documentEdges.length > 0) {
345
+ for (const docEdge of documentEdges) {
346
+ await writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId ?? "", attachmentId);
347
+ relatedCount += 1;
348
+ bumpEdge(edgeBreakdown, docEdge.type);
349
+ }
350
+ }
351
+ log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} kinds=${JSON.stringify(kindBreakdown)} edges=${JSON.stringify(edgeBreakdown)} related=${relatedCount} orphans=${orphanCandidates.length}`);
352
+ console.error(`[memory-ingest] sections=${sections.length} chain=${Math.max(0, sections.length - 1)} typed=${formatBreakdown(kindBreakdown)} edges=${formatBreakdown(edgeBreakdown)} orphans=${orphanCandidates.length} docId=${documentNodeId}`);
353
+ for (const orphan of orphanCandidates) {
354
+ console.error(`[document-ingest] orphan-candidate node=${orphan.kind} label=${JSON.stringify(orphan.label)} reason=${JSON.stringify(orphan.reason)}`);
355
+ }
233
356
  return {
234
357
  documentNodeId,
235
358
  sectionCount: sections.length,
236
- chunkCount: totalChunks,
237
- entityLinks,
359
+ kindBreakdown,
360
+ edgeBreakdown,
361
+ relatedCount,
362
+ standaloneCount,
363
+ orphanCandidates,
238
364
  documentSummary,
239
365
  keywords,
240
366
  };
241
367
  }
242
368
  finally {
243
- // Evict cache entry — content is now persisted in Neo4j
244
369
  extractCache.delete(attachmentId);
245
- await session.close();
370
+ await dbSession.close();
246
371
  log("complete");
247
372
  }
248
373
  }
374
+ // ---------------------------------------------------------------------------
375
+ // Helpers — standalone-node writer (Project), related-edge writer, document-
376
+ // edge writer.
377
+ // ---------------------------------------------------------------------------
378
+ async function writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId, now) {
379
+ const props = { ...section.properties, ...baseProps };
380
+ const r = await dbSession.run(`CREATE (n:\`${section.kind}\`)
381
+ SET n = $props
382
+ RETURN elementId(n) AS nodeId`, { props });
383
+ const nodeId = r.records[0].get("nodeId");
384
+ if (section.anchorEdge) {
385
+ const direction = section.anchorEdge.direction;
386
+ const edgeType = section.anchorEdge.type;
387
+ const edgeProps = {
388
+ ...(section.anchorEdge.properties ?? {}),
389
+ createdByAgent: PROVENANCE_AGENT,
390
+ createdBySession: sessionId,
391
+ source: PROVENANCE_SOURCE,
392
+ sourceDocumentId: attachmentId,
393
+ createdAt: now,
394
+ };
395
+ const cypher = direction === "from-anchor"
396
+ ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
397
+ MATCH (n) WHERE elementId(n) = $nodeId
398
+ CREATE (a)-[edge:\`${edgeType}\`]->(n)
399
+ SET edge += $edgeProps`
400
+ : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
401
+ MATCH (n) WHERE elementId(n) = $nodeId
402
+ CREATE (n)-[edge:\`${edgeType}\`]->(a)
403
+ SET edge += $edgeProps`;
404
+ await dbSession.run(cypher, {
405
+ anchorId: anchorNodeId,
406
+ nodeId,
407
+ accountId: baseProps.accountId,
408
+ edgeProps,
409
+ });
410
+ }
411
+ return nodeId;
412
+ }
413
+ async function writeRelatedAndEdge(dbSession, fromNodeId, related, accountId, now, sessionId, attachmentId) {
414
+ const relatedNodeId = await writeRelatedNode({
415
+ session: dbSession,
416
+ accountId,
417
+ related,
418
+ now,
419
+ sessionId,
420
+ });
421
+ await dbSession.run(`MATCH (a) WHERE elementId(a) = $a
422
+ MATCH (b) WHERE elementId(b) = $b
423
+ CREATE (a)-[edge:\`${related.edge.type}\`]->(b)
424
+ SET edge += $edgeProps`, {
425
+ a: related.edge.direction === "outgoing" ? fromNodeId : relatedNodeId,
426
+ b: related.edge.direction === "outgoing" ? relatedNodeId : fromNodeId,
427
+ edgeProps: {
428
+ ...(related.edge.properties ?? {}),
429
+ createdByAgent: PROVENANCE_AGENT,
430
+ createdBySession: sessionId,
431
+ source: PROVENANCE_SOURCE,
432
+ sourceDocumentId: attachmentId,
433
+ createdAt: now,
434
+ },
435
+ });
436
+ }
437
+ async function writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId, attachmentId) {
438
+ // Build a synthetic ClassifiedRelated so we can reuse writeRelatedNode for
439
+ // the target. The MERGE-vs-CREATE decision is the same; the edge is off
440
+ // the KnowledgeDocument rather than off a Section.
441
+ const synthetic = {
442
+ kind: docEdge.targetKind,
443
+ properties: docEdge.targetProperties,
444
+ edge: { type: docEdge.type, direction: docEdge.direction },
445
+ merge: docEdge.merge !== false,
446
+ };
447
+ const targetNodeId = await writeRelatedNode({
448
+ session: dbSession,
449
+ accountId,
450
+ related: synthetic,
451
+ now,
452
+ sessionId,
453
+ });
454
+ await dbSession.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
455
+ MATCH (n) WHERE elementId(n) = $targetId
456
+ CREATE (d)-[edge:\`${docEdge.type}\`]->(n)
457
+ SET edge.createdByAgent = $createdByAgent,
458
+ edge.createdBySession = $createdBySession,
459
+ edge.source = $source,
460
+ edge.sourceDocumentId = $sourceDocumentId,
461
+ edge.createdAt = $createdAt`, {
462
+ docId: documentNodeId,
463
+ targetId: targetNodeId,
464
+ createdByAgent: PROVENANCE_AGENT,
465
+ createdBySession: sessionId,
466
+ source: PROVENANCE_SOURCE,
467
+ sourceDocumentId: attachmentId,
468
+ createdAt: now,
469
+ });
470
+ }
471
+ function formatBreakdown(b) {
472
+ const entries = Object.entries(b);
473
+ if (entries.length === 0)
474
+ return "{}";
475
+ return entries.map(([k, v]) => `${k}:${v}`).join(",");
476
+ }
477
+ async function writeRelatedNode(opts) {
478
+ const { session, accountId, related, now, sessionId } = opts;
479
+ // Compute embedding from a representative property string so the related
480
+ // node is searchable. Falls back to label+JSON if no `name` field exists.
481
+ const embedSource = typeof related.properties.name === "string"
482
+ ? related.properties.name
483
+ : `${related.kind} ${JSON.stringify(related.properties)}`;
484
+ const relatedEmbedding = await embed(embedSource);
485
+ const mergeKey = related.merge !== false ? mergeKeyFor(related.kind, related.properties) : null;
486
+ if (mergeKey) {
487
+ // MERGE on the identifying property + accountId. ON CREATE stamps
488
+ // provenance; ON MATCH leaves provenance intact (so re-ingest of the
489
+ // same Organization across docs doesn't overwrite the original
490
+ // provenance — first-write wins for shared entities).
491
+ const result = await session.run(`MERGE (r:\`${related.kind}\` { accountId: $accountId, \`${mergeKey.key}\`: $mergeValue })
492
+ ON CREATE SET r += $createProps,
493
+ r.embedding = $embedding,
494
+ r.createdAt = $createdAt,
495
+ r.updatedAt = $createdAt,
496
+ r.createdByAgent = $createdByAgent,
497
+ r.createdBySource = $createdByAgent,
498
+ r.createdBySession = $createdBySession,
499
+ r.source = $source
500
+ ON MATCH SET r.updatedAt = $createdAt
501
+ RETURN elementId(r) AS nodeId`, {
502
+ accountId,
503
+ mergeValue: mergeKey.value,
504
+ createProps: { ...related.properties, accountId, scope: "shared" },
505
+ embedding: relatedEmbedding,
506
+ createdAt: now,
507
+ createdByAgent: PROVENANCE_AGENT,
508
+ createdBySession: sessionId ?? "",
509
+ source: PROVENANCE_SOURCE,
510
+ });
511
+ return result.records[0].get("nodeId");
512
+ }
513
+ // CREATE — no stable identifying property, treat as one-of-a-kind for
514
+ // this document. Track sourceDocumentId so re-ingest cleanup catches it.
515
+ const result = await session.run(`CREATE (r:\`${related.kind}\`)
516
+ SET r = $props
517
+ RETURN elementId(r) AS nodeId`, {
518
+ props: {
519
+ ...related.properties,
520
+ accountId,
521
+ scope: "shared",
522
+ embedding: relatedEmbedding,
523
+ createdAt: now,
524
+ updatedAt: now,
525
+ createdByAgent: PROVENANCE_AGENT,
526
+ createdBySource: PROVENANCE_AGENT,
527
+ createdBySession: sessionId ?? "",
528
+ source: PROVENANCE_SOURCE,
529
+ },
530
+ });
531
+ return result.records[0].get("nodeId");
532
+ }
249
533
  //# sourceMappingURL=memory-ingest.js.map