@rubytech/create-maxy 1.0.708 → 1.0.710

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/lib/mcp-spawn-tee/dist/index.d.ts +53 -0
  3. package/payload/platform/lib/mcp-spawn-tee/dist/index.d.ts.map +1 -0
  4. package/payload/platform/lib/mcp-spawn-tee/dist/index.js +132 -0
  5. package/payload/platform/lib/mcp-spawn-tee/dist/index.js.map +1 -0
  6. package/payload/platform/lib/mcp-spawn-tee/src/index.ts +134 -0
  7. package/payload/platform/lib/mcp-spawn-tee/tsconfig.json +8 -0
  8. package/payload/platform/lib/oauth-llm/dist/index.d.ts +101 -0
  9. package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -0
  10. package/payload/platform/lib/oauth-llm/dist/index.js +353 -0
  11. package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -0
  12. package/payload/platform/lib/oauth-llm/src/index.ts +526 -0
  13. package/payload/platform/lib/oauth-llm/tsconfig.json +8 -0
  14. package/payload/platform/neo4j/schema.cypher +37 -11
  15. package/payload/platform/package.json +2 -2
  16. package/payload/platform/plugins/docs/references/plugins-guide.md +12 -4
  17. package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts +3 -3
  18. package/payload/platform/plugins/email/mcp/dist/lib/screening.d.ts.map +1 -1
  19. package/payload/platform/plugins/email/mcp/dist/lib/screening.js +12 -12
  20. package/payload/platform/plugins/email/mcp/dist/lib/screening.js.map +1 -1
  21. package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js +14 -28
  22. package/payload/platform/plugins/email/mcp/dist/scripts/email-auto-respond.js.map +1 -1
  23. package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js +9 -19
  24. package/payload/platform/plugins/email/mcp/dist/scripts/email-fetch.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/index.js +46 -18
  26. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  27. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +34 -1
  28. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -1
  29. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
  30. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +22 -18
  31. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
  32. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +98 -24
  33. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  34. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +176 -86
  35. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  36. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +12 -46
  38. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts +10 -0
  40. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js +22 -3
  42. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -1
  44. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +24 -12
  45. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -1
  46. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +27 -11
  47. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  48. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +276 -238
  49. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  50. package/payload/platform/plugins/memory/mcp/package.json +3 -1
  51. package/payload/platform/plugins/memory/mcp/scripts/boot-smoke.sh +69 -0
  52. package/payload/platform/plugins/memory/references/graph-primitives.md +22 -0
  53. package/payload/platform/plugins/memory/references/schema-base.md +66 -14
  54. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +53 -20
  55. package/payload/platform/templates/specialists/agents/database-operator.md +18 -0
  56. package/payload/server/chunk-A5K3CFMI.js +12297 -0
  57. package/payload/server/chunk-Y57ACANQ.js +12292 -0
  58. package/payload/server/maxy-edge.js +1 -1
  59. package/payload/server/server.js +25 -44
@@ -3,33 +3,51 @@ import { embed, embedBatch } from "../lib/embeddings.js";
3
3
  import { extractCache } from "./memory-ingest-extract.js";
4
4
  import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
5
5
  import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
6
+ import { IDENTITY_SECTION_KINDS, STRUCTURAL_SECTION_KINDS, CONTRACT_SECTION_KINDS, STANDALONE_NODE_KINDS, SECTION_KIND_OTHER, } from "../lib/llm-classifier.js";
6
7
  // ---------------------------------------------------------------------------
7
- // Typed-node document ingestion (Task 737).
8
+ // Single-Section document ingestion (Task 740, replacing Task 737's typed-vs-
9
+ // UNMAPPED fork).
8
10
  //
9
11
  // CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
10
- // (by attachmentId) KnowledgeDocument delete prior batch typed nodes
11
- // (idempotent) Section/Chunk/typed Ollama + anchor edges
12
- // + REFERENCES
12
+ // (by attachmentId) KnowledgeDocument delete prior batch Section nodes
13
+ // (idempotent) Section/Chunk/typed Ollama + secondary labels
14
+ // + NEXT chain
15
+ // + anchor edges
16
+ // + related entities
17
+ // + KD-level edges
13
18
  //
14
- // Each classified section becomes either:
15
- // - a typed graph node (Position, Service, Credential, etc.) anchored to
16
- // UserProfile / LocalBusiness / Person / Organization via the natural
17
- // ontology edge, plus a (KnowledgeDocument)-[:REFERENCES]->(typed) link
18
- // for retrieval, plus optional related-entity nodes (e.g. Position's
19
- // employer Organization, MERGEd by name).
20
- // - a generic :Section node (UNMAPPED fallback) hanging off the document
21
- // via the legacy (KnowledgeDocument)-[:HAS_SECTION] edge, with optional
22
- // :Chunk overflow when the body exceeds MAX_CHUNK_SIZE.
19
+ // Every classified section produces ONE `:Section` node. When the classifier
20
+ // recognises the kind (Position/Education/Chapter/Parties/etc.), the same
21
+ // node carries a secondary label (`:Section:Position`) and any structured
22
+ // properties; identity-kind anchor edges go to the multi-labeled node
23
+ // directly, killing the parallel Section-vs-typed-node concept.
24
+ //
25
+ // Special-case writers fire for two contract-clause kinds:
26
+ // * Parties: `(:KnowledgeDocument)-[:PARTY]->(:Person|:Organization)` from documentEdges.
27
+ // * Definitions: `(:Section:Definitions)-[:DEFINES]->(:DefinedTerm)` from related entries.
28
+ //
29
+ // `:Chunk` is gone. Sections carry their body directly. If a body legitimately
30
+ // exceeds Neo4j's property limit, that is a classifier-split-the-section
31
+ // concern, not a writer problem.
23
32
  //
24
33
  // Provenance properties (createdByAgent, createdBySession, source,
25
- // sourceDocumentId) stamp every node and edge the skill creates so the
26
- // re-ingest cleanup can find and replace them deterministically.
34
+ // sourceDocumentId) stamp every node and edge so re-ingest cleanup finds
35
+ // them deterministically.
27
36
  // ---------------------------------------------------------------------------
28
- const MAX_CHUNK_SIZE = 2000;
29
37
  const PREVIEW_LENGTH = 150;
30
- const UNMAPPED = "UNMAPPED";
31
38
  const PROVENANCE_AGENT = "document-ingest";
32
39
  const PROVENANCE_SOURCE = "document";
40
+ /** Identity-kind anchor edge writer set (UserProfile → Section:Kind). */
41
+ const IDENTITY_KINDS_SET = new Set(IDENTITY_SECTION_KINDS);
42
+ /** Standalone (non-Section) node kinds (currently just Project). */
43
+ const STANDALONE_KINDS_SET = new Set(STANDALONE_NODE_KINDS);
44
+ /** All section-shaped kinds — anything that becomes a `:Section` node. */
45
+ const SECTION_LABEL_KINDS = new Set([
46
+ ...IDENTITY_SECTION_KINDS,
47
+ ...STRUCTURAL_SECTION_KINDS,
48
+ ...CONTRACT_SECTION_KINDS,
49
+ SECTION_KIND_OTHER,
50
+ ]);
33
51
  // ---------------------------------------------------------------------------
34
52
  // Helpers
35
53
  // ---------------------------------------------------------------------------
@@ -38,15 +56,6 @@ function normaliseKeywords(arr) {
38
56
  return [];
39
57
  return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
40
58
  }
41
- function chunkBody(body) {
42
- if (body.length <= MAX_CHUNK_SIZE)
43
- return [body];
44
- const chunks = [];
45
- for (let i = 0; i < body.length; i += MAX_CHUNK_SIZE) {
46
- chunks.push(body.slice(i, i + MAX_CHUNK_SIZE));
47
- }
48
- return chunks;
49
- }
50
59
  /**
51
60
  * Identifying property for MERGE on a related node by kind.
52
61
  * Choose a stable, human-recognisable key per label so the same real-world
@@ -78,11 +87,17 @@ function mergeKeyFor(kind, properties) {
78
87
  return null;
79
88
  }
80
89
  }
90
+ function bumpKind(breakdown, kind) {
91
+ breakdown[kind] = (breakdown[kind] ?? 0) + 1;
92
+ }
93
+ function bumpEdge(breakdown, edgeType) {
94
+ breakdown[edgeType] = (breakdown[edgeType] ?? 0) + 1;
95
+ }
81
96
  // ---------------------------------------------------------------------------
82
97
  // Main entry point
83
98
  // ---------------------------------------------------------------------------
84
99
  export async function memoryIngest(params) {
85
- const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
100
+ const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, documentEdges = [], orphanCandidates = [], scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
86
101
  if (!scope) {
87
102
  throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
88
103
  }
@@ -116,23 +131,23 @@ export async function memoryIngest(params) {
116
131
  const embeddings = await embedBatch(textsToEmbed);
117
132
  log("embedded", `${embeddings.length} vectors`);
118
133
  const docEmbedding = embeddings[0];
119
- const session = getSession();
134
+ const dbSession = getSession();
120
135
  let documentNodeId = "";
121
- let typedCount = 0;
122
- let unmappedCount = 0;
123
- let chunkCount = 0;
124
- let entityLinks = 0;
136
+ const kindBreakdown = {};
137
+ const edgeBreakdown = {};
138
+ let relatedCount = 0;
139
+ let standaloneCount = 0;
125
140
  try {
126
141
  // 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
127
142
  // with this attachmentId so MERGE finds the existing node.
128
- const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
143
+ const trashedDoc = await dbSession.run(`MATCH (d:KnowledgeDocument:Trashed)
129
144
  WHERE d.accountId = $accountId
130
145
  AND d._trashedKeys IS NOT NULL
131
146
  AND d._trashedKeys CONTAINS $attachmentId
132
147
  RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
133
148
  if (trashedDoc.records.length > 0) {
134
149
  const eid = trashedDoc.records[0].get("eid");
135
- await restoreNode({ session, accountId, elementId: eid });
150
+ await restoreNode({ session: dbSession, accountId, elementId: eid });
136
151
  log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
137
152
  }
138
153
  // 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
@@ -153,7 +168,7 @@ export async function memoryIngest(params) {
153
168
  optionalParams.keywords = keywords;
154
169
  }
155
170
  const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
156
- const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
171
+ const docResult = await dbSession.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
157
172
  SET d.accountId = $accountId,
158
173
  d.name = $filename,
159
174
  d.encodingFormat = $mimeType,
@@ -179,21 +194,21 @@ export async function memoryIngest(params) {
179
194
  ...optionalParams,
180
195
  });
181
196
  documentNodeId = docResult.records[0].get("nodeId");
182
- // 4. Cleanup prior children (idempotent re-ingest). Removes generic
183
- // :Section/Chunk fallbacks and typed nodes that originated from this
184
- // document. MERGEd related nodes (Organizations, Persons) are spared.
185
- const cleanup = await deleteDocumentChildren(attachmentId, session);
186
- if (cleanup.sections > 0 ||
187
- cleanup.chunks > 0 ||
188
- cleanup.typed > 0 ||
189
- cleanup.references > 0) {
197
+ console.error(`[memory-ingest] KnowledgeDocument.name=${JSON.stringify(filename)} attachmentId=${attachmentId.slice(0, 8)} docId=${documentNodeId.slice(0, 12)}`);
198
+ // 4. Cleanup prior children (idempotent re-ingest). Removes Section nodes
199
+ // (any secondary label) and any standalone nodes stamped with this
200
+ // attachmentId. MERGEd related entities (Organizations, Persons) are spared.
201
+ const cleanup = await deleteDocumentChildren(attachmentId, dbSession);
202
+ if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.typed > 0 || cleanup.references > 0) {
190
203
  log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
191
204
  }
192
- // 5. Per-section writes.
205
+ // 5. Per-section writes. Track the previous section's elementId so we can
206
+ // chain (:Section)-[:NEXT]->(:Section) in reading order.
207
+ let previousSectionId = null;
193
208
  for (let i = 0; i < sections.length; i++) {
194
209
  const section = sections[i];
195
210
  const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
196
- const sectionParams = {
211
+ const baseProps = {
197
212
  accountId,
198
213
  title: section.title,
199
214
  body: section.body,
@@ -209,172 +224,91 @@ export async function memoryIngest(params) {
209
224
  source: PROVENANCE_SOURCE,
210
225
  sourceDocumentId: attachmentId,
211
226
  };
212
- if (section.kind === UNMAPPED) {
213
- // 5a. UNMAPPED: generic :Section + (KnowledgeDocument)-[:HAS_SECTION]
214
- const sectionResult = await session.run(`CREATE (s:Section {
215
- accountId: $accountId,
216
- title: $title,
217
- body: $body,
218
- summary: $bodyPreview,
219
- position: $position,
220
- scope: $scope,
221
- embedding: $embedding,
222
- createdAt: $createdAt,
223
- updatedAt: $updatedAt,
224
- createdByAgent: $createdByAgent,
225
- createdBySource: $createdBySource,
226
- createdBySession: $createdBySession,
227
- source: $source,
228
- sourceDocumentId: $sourceDocumentId
229
- })
230
- WITH s
231
- MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
232
- CREATE (d)-[:HAS_SECTION]->(s)
233
- RETURN elementId(s) AS nodeId`, { ...sectionParams, docId: documentNodeId });
234
- const sectionId = sectionResult.records[0].get("nodeId");
235
- // Body overflow → :Chunk children
236
- if (section.body.length > MAX_CHUNK_SIZE) {
237
- const overflowChunks = chunkBody(section.body);
238
- const overflowEmbeddings = await embedBatch(overflowChunks);
239
- for (let ci = 0; ci < overflowChunks.length; ci++) {
240
- await session.run(`MATCH (s:Section) WHERE elementId(s) = $sectionId
241
- CREATE (s)-[:HAS_CHUNK]->(c:Chunk {
242
- accountId: $accountId,
243
- content: $content,
244
- summary: $summary,
245
- position: $position,
246
- scope: $scope,
247
- embedding: $embedding,
248
- createdAt: $createdAt,
249
- updatedAt: $updatedAt,
250
- createdByAgent: $createdByAgent,
251
- createdBySource: $createdBySource,
252
- createdBySession: $createdBySession,
253
- source: $source,
254
- sourceDocumentId: $sourceDocumentId
255
- })`, {
256
- sectionId,
257
- accountId,
258
- content: overflowChunks[ci],
259
- summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
260
- position: ci,
261
- scope,
262
- embedding: overflowEmbeddings[ci],
263
- createdAt: now,
264
- updatedAt: now,
265
- createdByAgent: PROVENANCE_AGENT,
266
- createdBySource: PROVENANCE_AGENT,
267
- createdBySession: sessionId ?? "",
268
- source: PROVENANCE_SOURCE,
269
- sourceDocumentId: attachmentId,
270
- });
271
- chunkCount += 1;
227
+ // 5a. Standalone node kind (currently just Project) — no `:Section` label.
228
+ if (STANDALONE_KINDS_SET.has(section.kind)) {
229
+ const standaloneId = await writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId ?? "", now);
230
+ standaloneCount += 1;
231
+ bumpKind(kindBreakdown, section.kind);
232
+ if (section.anchorEdge)
233
+ bumpEdge(edgeBreakdown, section.anchorEdge.type);
234
+ previousSectionId = null; // standalone breaks the section chain
235
+ // related entities for standalone (e.g. Project UNDER Organization)
236
+ if (section.related && section.related.length > 0) {
237
+ for (const related of section.related) {
238
+ await writeRelatedAndEdge(dbSession, standaloneId, related, accountId, now, sessionId ?? "", attachmentId);
239
+ relatedCount += 1;
240
+ bumpEdge(edgeBreakdown, related.edge.type);
272
241
  }
273
242
  }
274
- unmappedCount += 1;
275
- console.error(`[document-ingest] unmapped-section title="${section.title}" chars=${section.body.length}`);
276
243
  continue;
277
244
  }
278
- // 5b. Typed kind write the typed node, anchor edge, related nodes,
279
- // and the document REFERENCES link.
280
- // Build the typed-node properties: classifier-supplied properties
281
- // overlaid on top of the system fields, with system fields winning.
282
- const typedProps = {
245
+ // 5b. Section-shaped kind (everything else, including Other). One `:Section`
246
+ // node with optional secondary label.
247
+ const isKnownSectionKind = SECTION_LABEL_KINDS.has(section.kind);
248
+ const sectionLabels = isKnownSectionKind && section.kind !== SECTION_KIND_OTHER
249
+ ? [`Section`, section.kind]
250
+ : section.kind === SECTION_KIND_OTHER
251
+ ? [`Section`, `Other`]
252
+ : [`Section`]; // unrecognised kind — should never happen post-classifier
253
+ // Compose properties: classifier-supplied properties overlaid on system
254
+ // fields, system fields winning. For Section:Other, also stamp
255
+ // classifierReason so the ontology-growth query can surface it.
256
+ const sectionProps = {
283
257
  ...section.properties,
284
- accountId,
285
- title: section.title,
286
- body: section.body,
287
- scope,
288
- embedding: bodyEmbedding,
289
- createdAt: now,
290
- updatedAt: now,
258
+ ...baseProps,
259
+ ...(section.kind === SECTION_KIND_OTHER && section.classifierReason
260
+ ? { classifierReason: section.classifierReason }
261
+ : {}),
262
+ };
263
+ const labelClause = sectionLabels.map((l) => `\`${l}\``).join(":");
264
+ const sectionResult = await dbSession.run(`CREATE (s:${labelClause})
265
+ SET s = $props
266
+ WITH s
267
+ MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
268
+ CREATE (d)-[hs:HAS_SECTION]->(s)
269
+ SET hs.createdByAgent = $createdByAgent,
270
+ hs.createdBySession = $createdBySession,
271
+ hs.source = $source,
272
+ hs.sourceDocumentId = $sourceDocumentId,
273
+ hs.createdAt = $createdAt
274
+ RETURN elementId(s) AS nodeId`, {
275
+ props: sectionProps,
276
+ docId: documentNodeId,
291
277
  createdByAgent: PROVENANCE_AGENT,
292
- createdBySource: PROVENANCE_AGENT,
293
278
  createdBySession: sessionId ?? "",
294
279
  source: PROVENANCE_SOURCE,
295
280
  sourceDocumentId: attachmentId,
296
- };
297
- // CREATE the typed node. Label is interpolated (validated against the
298
- // ontology label set by the classifier) — Cypher does not allow
299
- // parameterising labels.
300
- const typedResult = await session.run(`CREATE (t:\`${section.kind}\`)
301
- SET t = $props
302
- RETURN elementId(t) AS nodeId`, { props: typedProps });
303
- const typedNodeId = typedResult.records[0].get("nodeId");
304
- // Body overflow :Chunk children attached directly to the typed node
305
- // (mirroring :Section overflow). Most typed nodes won't trip this.
306
- if (section.body.length > MAX_CHUNK_SIZE) {
307
- const overflowChunks = chunkBody(section.body);
308
- const overflowEmbeddings = await embedBatch(overflowChunks);
309
- for (let ci = 0; ci < overflowChunks.length; ci++) {
310
- await session.run(`MATCH (t) WHERE elementId(t) = $typedId
311
- CREATE (t)-[:HAS_CHUNK]->(c:Chunk {
312
- accountId: $accountId,
313
- content: $content,
314
- summary: $summary,
315
- position: $position,
316
- scope: $scope,
317
- embedding: $embedding,
318
- createdAt: $createdAt,
319
- updatedAt: $updatedAt,
320
- createdByAgent: $createdByAgent,
321
- createdBySource: $createdBySource,
322
- createdBySession: $createdBySession,
323
- source: $source,
324
- sourceDocumentId: $sourceDocumentId
325
- })`, {
326
- typedId: typedNodeId,
327
- accountId,
328
- content: overflowChunks[ci],
329
- summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
330
- position: ci,
331
- scope,
332
- embedding: overflowEmbeddings[ci],
333
- createdAt: now,
334
- updatedAt: now,
335
- createdByAgent: PROVENANCE_AGENT,
336
- createdBySource: PROVENANCE_AGENT,
337
- createdBySession: sessionId ?? "",
338
- source: PROVENANCE_SOURCE,
339
- sourceDocumentId: attachmentId,
340
- });
341
- chunkCount += 1;
342
- }
343
- }
344
- // Related nodes (e.g. Position's employer Organization). MERGE when
345
- // the related kind has a stable identifying property; CREATE when not.
346
- // Provenance is stamped only on creation (ON CREATE SET) so shared
347
- // entities don't get rewritten by subsequent ingestions.
348
- if (section.related && section.related.length > 0) {
349
- for (const related of section.related) {
350
- const relatedNodeId = await writeRelatedNode({
351
- session,
352
- accountId,
353
- related,
354
- now,
355
- sessionId,
356
- });
357
- // Edge from typed node to related node
358
- await session.run(`MATCH (t) WHERE elementId(t) = $typedId
359
- MATCH (r) WHERE elementId(r) = $relatedId
360
- CREATE (t)-[edge:\`${related.edge.type}\`]->(r)
361
- SET edge += $edgeProps`, {
362
- typedId: related.edge.direction === "outgoing" ? typedNodeId : relatedNodeId,
363
- relatedId: related.edge.direction === "outgoing" ? relatedNodeId : typedNodeId,
364
- edgeProps: {
365
- ...(related.edge.properties ?? {}),
366
- createdByAgent: PROVENANCE_AGENT,
367
- createdBySession: sessionId ?? "",
368
- source: PROVENANCE_SOURCE,
369
- sourceDocumentId: attachmentId,
370
- createdAt: now,
371
- },
372
- });
373
- }
281
+ createdAt: now,
282
+ });
283
+ const sectionId = sectionResult.records[0].get("nodeId");
284
+ bumpKind(kindBreakdown, section.kind);
285
+ bumpEdge(edgeBreakdown, "HAS_SECTION");
286
+ // 5c. NEXT chain in reading order.
287
+ if (previousSectionId) {
288
+ await dbSession.run(`MATCH (a:Section), (b:Section)
289
+ WHERE elementId(a) = $prev AND elementId(b) = $cur
290
+ CREATE (a)-[n:NEXT]->(b)
291
+ SET n.createdByAgent = $createdByAgent,
292
+ n.createdBySession = $createdBySession,
293
+ n.source = $source,
294
+ n.sourceDocumentId = $sourceDocumentId,
295
+ n.createdAt = $createdAt`, {
296
+ prev: previousSectionId,
297
+ cur: sectionId,
298
+ createdByAgent: PROVENANCE_AGENT,
299
+ createdBySession: sessionId ?? "",
300
+ source: PROVENANCE_SOURCE,
301
+ sourceDocumentId: attachmentId,
302
+ createdAt: now,
303
+ });
304
+ bumpEdge(edgeBreakdown, "NEXT");
374
305
  }
375
- // Anchor edge (anchor → typed or typed → anchor)
376
- if (section.anchorEdge) {
306
+ previousSectionId = sectionId;
307
+ // 5d. Anchor edge (identity kinds only). Writer applies the edge the
308
+ // classifier proposed; never invents.
309
+ if (section.anchorEdge && IDENTITY_KINDS_SET.has(section.kind)) {
377
310
  const direction = section.anchorEdge.direction;
311
+ const edgeType = section.anchorEdge.type;
378
312
  const edgeProps = {
379
313
  ...(section.anchorEdge.properties ?? {}),
380
314
  createdByAgent: PROVENANCE_AGENT,
@@ -383,59 +317,163 @@ export async function memoryIngest(params) {
383
317
  sourceDocumentId: attachmentId,
384
318
  createdAt: now,
385
319
  };
386
- if (direction === "from-anchor") {
387
- await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
388
- MATCH (t) WHERE elementId(t) = $typedId
389
- CREATE (a)-[edge:\`${section.anchorEdge.type}\`]->(t)
390
- SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
391
- }
392
- else {
393
- await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
394
- MATCH (t) WHERE elementId(t) = $typedId
395
- CREATE (t)-[edge:\`${section.anchorEdge.type}\`]->(a)
396
- SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
320
+ const cypher = direction === "from-anchor"
321
+ ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
322
+ MATCH (s:Section) WHERE elementId(s) = $sectionId
323
+ CREATE (a)-[edge:\`${edgeType}\`]->(s)
324
+ SET edge += $edgeProps`
325
+ : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
326
+ MATCH (s:Section) WHERE elementId(s) = $sectionId
327
+ CREATE (s)-[edge:\`${edgeType}\`]->(a)
328
+ SET edge += $edgeProps`;
329
+ await dbSession.run(cypher, { anchorId: anchorNodeId, sectionId, accountId, edgeProps });
330
+ bumpEdge(edgeBreakdown, edgeType);
331
+ }
332
+ // 5e. Related entities (Organization for AT, DefinedTerm for DEFINES on
333
+ // :Section:Definitions, etc.). Apply each as the classifier emitted.
334
+ if (section.related && section.related.length > 0) {
335
+ for (const related of section.related) {
336
+ await writeRelatedAndEdge(dbSession, sectionId, related, accountId, now, sessionId ?? "", attachmentId);
337
+ relatedCount += 1;
338
+ bumpEdge(edgeBreakdown, related.edge.type);
397
339
  }
398
340
  }
399
- // (KnowledgeDocument)-[:REFERENCES]->(typed) for retrieval.
400
- await session.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
401
- MATCH (t) WHERE elementId(t) = $typedId
402
- CREATE (d)-[r:REFERENCES]->(t)
403
- SET r.createdByAgent = $createdByAgent,
404
- r.createdBySession = $createdBySession,
405
- r.source = $source,
406
- r.sourceDocumentId = $sourceDocumentId,
407
- r.createdAt = $createdAt`, {
408
- docId: documentNodeId,
409
- typedId: typedNodeId,
410
- createdByAgent: PROVENANCE_AGENT,
411
- createdBySession: sessionId ?? "",
412
- source: PROVENANCE_SOURCE,
413
- sourceDocumentId: attachmentId,
414
- createdAt: now,
415
- });
416
- entityLinks += 1;
417
- typedCount += 1;
418
- console.error(`[document-ingest] section kind=${section.kind} title="${section.title}" chars=${section.body.length}`);
419
341
  }
420
- log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} chunks=${chunkCount} entityLinks=${entityLinks}`);
421
- console.error(`[document-ingest] done sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} ms=${Date.now() - t0}`);
342
+ // 6. Document-level edges (PARTY for contracts, etc.). Applied off the
343
+ // KnowledgeDocument; classifier proposes, writer applies.
344
+ if (documentEdges && documentEdges.length > 0) {
345
+ for (const docEdge of documentEdges) {
346
+ await writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId ?? "", attachmentId);
347
+ relatedCount += 1;
348
+ bumpEdge(edgeBreakdown, docEdge.type);
349
+ }
350
+ }
351
+ log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} kinds=${JSON.stringify(kindBreakdown)} edges=${JSON.stringify(edgeBreakdown)} related=${relatedCount} orphans=${orphanCandidates.length}`);
352
+ console.error(`[memory-ingest] sections=${sections.length} chain=${Math.max(0, sections.length - 1)} typed=${formatBreakdown(kindBreakdown)} edges=${formatBreakdown(edgeBreakdown)} orphans=${orphanCandidates.length} docId=${documentNodeId}`);
353
+ for (const orphan of orphanCandidates) {
354
+ console.error(`[document-ingest] orphan-candidate node=${orphan.kind} label=${JSON.stringify(orphan.label)} reason=${JSON.stringify(orphan.reason)}`);
355
+ }
422
356
  return {
423
357
  documentNodeId,
424
358
  sectionCount: sections.length,
425
- typedCount,
426
- unmappedCount,
427
- chunkCount,
428
- entityLinks,
359
+ kindBreakdown,
360
+ edgeBreakdown,
361
+ relatedCount,
362
+ standaloneCount,
363
+ orphanCandidates,
429
364
  documentSummary,
430
365
  keywords,
431
366
  };
432
367
  }
433
368
  finally {
434
369
  extractCache.delete(attachmentId);
435
- await session.close();
370
+ await dbSession.close();
436
371
  log("complete");
437
372
  }
438
373
  }
374
+ // ---------------------------------------------------------------------------
375
+ // Helpers — standalone-node writer (Project), related-edge writer, document-
376
+ // edge writer.
377
+ // ---------------------------------------------------------------------------
378
+ async function writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId, now) {
379
+ const props = { ...section.properties, ...baseProps };
380
+ const r = await dbSession.run(`CREATE (n:\`${section.kind}\`)
381
+ SET n = $props
382
+ RETURN elementId(n) AS nodeId`, { props });
383
+ const nodeId = r.records[0].get("nodeId");
384
+ if (section.anchorEdge) {
385
+ const direction = section.anchorEdge.direction;
386
+ const edgeType = section.anchorEdge.type;
387
+ const edgeProps = {
388
+ ...(section.anchorEdge.properties ?? {}),
389
+ createdByAgent: PROVENANCE_AGENT,
390
+ createdBySession: sessionId,
391
+ source: PROVENANCE_SOURCE,
392
+ sourceDocumentId: attachmentId,
393
+ createdAt: now,
394
+ };
395
+ const cypher = direction === "from-anchor"
396
+ ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
397
+ MATCH (n) WHERE elementId(n) = $nodeId
398
+ CREATE (a)-[edge:\`${edgeType}\`]->(n)
399
+ SET edge += $edgeProps`
400
+ : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
401
+ MATCH (n) WHERE elementId(n) = $nodeId
402
+ CREATE (n)-[edge:\`${edgeType}\`]->(a)
403
+ SET edge += $edgeProps`;
404
+ await dbSession.run(cypher, {
405
+ anchorId: anchorNodeId,
406
+ nodeId,
407
+ accountId: baseProps.accountId,
408
+ edgeProps,
409
+ });
410
+ }
411
+ return nodeId;
412
+ }
413
+ async function writeRelatedAndEdge(dbSession, fromNodeId, related, accountId, now, sessionId, attachmentId) {
414
+ const relatedNodeId = await writeRelatedNode({
415
+ session: dbSession,
416
+ accountId,
417
+ related,
418
+ now,
419
+ sessionId,
420
+ });
421
+ await dbSession.run(`MATCH (a) WHERE elementId(a) = $a
422
+ MATCH (b) WHERE elementId(b) = $b
423
+ CREATE (a)-[edge:\`${related.edge.type}\`]->(b)
424
+ SET edge += $edgeProps`, {
425
+ a: related.edge.direction === "outgoing" ? fromNodeId : relatedNodeId,
426
+ b: related.edge.direction === "outgoing" ? relatedNodeId : fromNodeId,
427
+ edgeProps: {
428
+ ...(related.edge.properties ?? {}),
429
+ createdByAgent: PROVENANCE_AGENT,
430
+ createdBySession: sessionId,
431
+ source: PROVENANCE_SOURCE,
432
+ sourceDocumentId: attachmentId,
433
+ createdAt: now,
434
+ },
435
+ });
436
+ }
437
+ async function writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId, attachmentId) {
438
+ // Build a synthetic ClassifiedRelated so we can reuse writeRelatedNode for
439
+ // the target. The MERGE-vs-CREATE decision is the same; the edge is off
440
+ // the KnowledgeDocument rather than off a Section.
441
+ const synthetic = {
442
+ kind: docEdge.targetKind,
443
+ properties: docEdge.targetProperties,
444
+ edge: { type: docEdge.type, direction: docEdge.direction },
445
+ merge: docEdge.merge !== false,
446
+ };
447
+ const targetNodeId = await writeRelatedNode({
448
+ session: dbSession,
449
+ accountId,
450
+ related: synthetic,
451
+ now,
452
+ sessionId,
453
+ });
454
+ await dbSession.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
455
+ MATCH (n) WHERE elementId(n) = $targetId
456
+ CREATE (d)-[edge:\`${docEdge.type}\`]->(n)
457
+ SET edge.createdByAgent = $createdByAgent,
458
+ edge.createdBySession = $createdBySession,
459
+ edge.source = $source,
460
+ edge.sourceDocumentId = $sourceDocumentId,
461
+ edge.createdAt = $createdAt`, {
462
+ docId: documentNodeId,
463
+ targetId: targetNodeId,
464
+ createdByAgent: PROVENANCE_AGENT,
465
+ createdBySession: sessionId,
466
+ source: PROVENANCE_SOURCE,
467
+ sourceDocumentId: attachmentId,
468
+ createdAt: now,
469
+ });
470
+ }
471
+ function formatBreakdown(b) {
472
+ const entries = Object.entries(b);
473
+ if (entries.length === 0)
474
+ return "{}";
475
+ return entries.map(([k, v]) => `${k}:${v}`).join(",");
476
+ }
439
477
  async function writeRelatedNode(opts) {
440
478
  const { session, accountId, related, now, sessionId } = opts;
441
479
  // Compute embedding from a representative property string so the related