@rubytech/create-maxy 1.0.705 → 1.0.707

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/dist/__tests__/apt-resolve.test.js +179 -0
  2. package/dist/apt-resolve.js +73 -0
  3. package/dist/index.js +48 -46
  4. package/package.json +3 -3
  5. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts +2 -0
  6. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts.map +1 -0
  7. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js +89 -0
  8. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js.map +1 -0
  9. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts +42 -0
  10. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts.map +1 -0
  11. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js +87 -0
  12. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js.map +1 -0
  13. package/payload/platform/lib/graph-mcp/src/__tests__/schema-cypher-parser.test.ts +99 -0
  14. package/payload/platform/lib/graph-mcp/src/schema-cypher-parser.ts +84 -0
  15. package/payload/platform/neo4j/schema.cypher +23 -0
  16. package/payload/platform/plugins/admin/PLUGIN.md +1 -0
  17. package/payload/platform/plugins/admin/mcp/dist/index.js +30 -0
  18. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  19. package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +2 -2
  20. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +47 -6
  21. package/payload/platform/plugins/docs/references/adherence.md +1 -1
  22. package/payload/platform/plugins/memory/PLUGIN.md +25 -16
  23. package/payload/platform/plugins/memory/mcp/dist/index.js +146 -38
  24. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts +2 -0
  26. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts.map +1 -0
  27. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js +92 -0
  28. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js.map +1 -0
  29. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts +2 -0
  30. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts.map +1 -0
  31. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +51 -0
  32. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -0
  33. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts +2 -0
  34. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts.map +1 -0
  35. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +222 -0
  36. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -0
  37. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
  38. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +27 -14
  40. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts +16 -0
  42. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js +38 -11
  44. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts +136 -0
  46. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts.map +1 -0
  47. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js +180 -0
  48. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js.map +1 -0
  49. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +126 -0
  50. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
  51. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +253 -0
  52. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
  53. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts +11 -2
  54. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts.map +1 -1
  55. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js +6 -3
  56. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js.map +1 -1
  57. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +44 -22
  58. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
  59. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +94 -57
  60. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
  61. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
  62. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
  63. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +46 -0
  64. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
  65. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
  66. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
  67. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
  68. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
  69. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
  70. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
  71. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
  72. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
  73. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
  74. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
  75. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
  76. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +23 -14
  77. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  78. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +410 -164
  79. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  80. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts +7 -5
  81. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts.map +1 -1
  82. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js +2 -2
  83. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js.map +1 -1
  84. package/payload/platform/plugins/memory/references/schema-base.md +33 -0
  85. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +112 -0
  86. package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
  87. package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
  88. package/payload/platform/templates/specialists/agents/database-operator.md +21 -13
  89. package/payload/server/chunk-PE76FPYP.js +12040 -0
  90. package/payload/server/maxy-edge.js +1 -1
  91. package/payload/server/public/assets/{Checkbox-B2Lk8F4X.js → Checkbox-CjbS9JcG.js} +1 -1
  92. package/payload/server/public/assets/{admin-agtgi48Q.js → admin-Ce9DbUuu.js} +1 -1
  93. package/payload/server/public/assets/{data-B7nsyBTV.js → data-C-SxjLC9.js} +1 -1
  94. package/payload/server/public/assets/{file-DHWTu8LP.js → file-D4cbAAuo.js} +1 -1
  95. package/payload/server/public/assets/{graph-ChDwqqhJ.js → graph-BRD96pKD.js} +8 -8
  96. package/payload/server/public/assets/{house-CfjnRPO6.js → house-CYsVygEQ.js} +1 -1
  97. package/payload/server/public/assets/{jsx-runtime-81wg0w0Q.css → jsx-runtime-DPXE45W9.css} +1 -1
  98. package/payload/server/public/assets/{public-CE1kyVnz.js → public-BTOF98iO.js} +1 -1
  99. package/payload/server/public/assets/{share-2-CAd1beVT.js → share-2-B-sbkB36.js} +1 -1
  100. package/payload/server/public/assets/{useVoiceRecorder-LSAU68Eo.js → useVoiceRecorder-DLVFx3ms.js} +1 -1
  101. package/payload/server/public/assets/{x-B0xK3Aoq.js → x-BNidzSAn.js} +1 -1
  102. package/payload/server/public/data.html +6 -6
  103. package/payload/server/public/graph.html +7 -7
  104. package/payload/server/public/index.html +8 -8
  105. package/payload/server/public/public.html +5 -5
  106. package/payload/server/server.js +6 -10
  107. /package/payload/server/public/assets/{jsx-runtime-DhzH26q8.js → jsx-runtime-BUs3sHtV.js} +0 -0
@@ -1,90 +1,143 @@
1
1
  import { getSession } from "../lib/neo4j.js";
2
- import { embedBatch } from "../lib/embeddings.js";
2
+ import { embed, embedBatch } from "../lib/embeddings.js";
3
3
  import { extractCache } from "./memory-ingest-extract.js";
4
4
  import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
5
5
  import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Typed-node document ingestion (Task 737).
8
+ //
9
+ // CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
10
+ // (by attachmentId) KnowledgeDocument delete prior batch typed nodes
11
+ // (idempotent) Section/Chunk/typed Ollama + anchor edges
12
+ // + REFERENCES
13
+ //
14
+ // Each classified section becomes either:
15
+ // - a typed graph node (Position, Service, Credential, etc.) anchored to
16
+ // UserProfile / LocalBusiness / Person / Organization via the natural
17
+ // ontology edge, plus a (KnowledgeDocument)-[:REFERENCES]->(typed) link
18
+ // for retrieval, plus optional related-entity nodes (e.g. Position's
19
+ // employer Organization, MERGEd by name).
20
+ // - a generic :Section node (UNMAPPED fallback) hanging off the document
21
+ // via the legacy (KnowledgeDocument)-[:HAS_SECTION] edge, with optional
22
+ // :Chunk overflow when the body exceeds MAX_CHUNK_SIZE.
23
+ //
24
+ // Provenance properties (createdByAgent, createdBySession, source,
25
+ // sourceDocumentId) stamp every node and edge the skill creates so the
26
+ // re-ingest cleanup can find and replace them deterministically.
27
+ // ---------------------------------------------------------------------------
28
+ const MAX_CHUNK_SIZE = 2000;
29
+ const PREVIEW_LENGTH = 150;
30
+ const UNMAPPED = "UNMAPPED";
31
+ const PROVENANCE_AGENT = "document-ingest";
32
+ const PROVENANCE_SOURCE = "document";
33
+ // ---------------------------------------------------------------------------
34
+ // Helpers
35
+ // ---------------------------------------------------------------------------
36
+ function normaliseKeywords(arr) {
37
+ if (!arr || arr.length === 0)
38
+ return [];
39
+ return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
40
+ }
41
+ function chunkBody(body) {
42
+ if (body.length <= MAX_CHUNK_SIZE)
43
+ return [body];
44
+ const chunks = [];
45
+ for (let i = 0; i < body.length; i += MAX_CHUNK_SIZE) {
46
+ chunks.push(body.slice(i, i + MAX_CHUNK_SIZE));
47
+ }
48
+ return chunks;
49
+ }
50
+ /**
51
+ * Identifying property for MERGE on a related node by kind.
52
+ * Choose a stable, human-recognisable key per label so the same real-world
53
+ * entity collapses to one node across documents.
54
+ */
55
+ function mergeKeyFor(kind, properties) {
56
+ switch (kind) {
57
+ case "Organization":
58
+ return typeof properties.name === "string" && properties.name.trim()
59
+ ? { key: "name", value: properties.name.trim() }
60
+ : null;
61
+ case "Person":
62
+ if (typeof properties.email === "string" && properties.email.trim()) {
63
+ return { key: "email", value: properties.email.trim() };
64
+ }
65
+ if (typeof properties.telephone === "string" && properties.telephone.trim()) {
66
+ return { key: "telephone", value: properties.telephone.trim() };
67
+ }
68
+ return null;
69
+ case "DefinedTerm":
70
+ return typeof properties.name === "string" && properties.name.trim()
71
+ ? { key: "name", value: properties.name.trim() }
72
+ : null;
73
+ case "Credential":
74
+ return typeof properties.name === "string" && properties.name.trim()
75
+ ? { key: "name", value: properties.name.trim() }
76
+ : null;
77
+ default:
78
+ return null;
79
+ }
80
+ }
81
+ // ---------------------------------------------------------------------------
82
+ // Main entry point
83
+ // ---------------------------------------------------------------------------
6
84
  export async function memoryIngest(params) {
7
- const { accountId, attachmentId, documentSummary, sections, scope, entities, sourceUrl, sourceType, keywords: rawKeywords, userKeywords: rawUserKeywords, } = params;
85
+ const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
8
86
  if (!scope) {
9
87
  throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
10
88
  }
11
- // Normalize and merge keywords: user-supplied ∪ LLM-extracted, deduplicated.
12
- // User keywords appear first (cosmetic both are equal after merge).
13
- const normalizeArray = (arr) => arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
14
- const hasUserKeywords = rawUserKeywords && rawUserKeywords.length > 0;
15
- const hasKeywords = rawKeywords && rawKeywords.length > 0;
16
- const keywords = hasUserKeywords || hasKeywords
17
- ? [...new Set([
18
- ...normalizeArray(rawUserKeywords ?? []),
19
- ...normalizeArray(rawKeywords ?? []),
20
- ])]
21
- : undefined;
89
+ if (!anchorNodeId) {
90
+ throw new Error("anchorNodeId is requiredthe document subject's element ID (UserProfile/LocalBusiness/Person/Organization)");
91
+ }
92
+ if (!anchorLabel) {
93
+ throw new Error("anchorLabel is required the anchor node's primary label");
94
+ }
95
+ const keywords = (() => {
96
+ const u = normaliseKeywords(rawUserKeywords);
97
+ const d = normaliseKeywords(rawDocKeywords);
98
+ if (u.length === 0 && d.length === 0)
99
+ return undefined;
100
+ return [...new Set([...u, ...d])];
101
+ })();
22
102
  const t0 = Date.now();
23
103
  const log = (stage, detail) => console.error(`[memory-ingest] [${attachmentId.slice(0, 8)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
24
- log("start", `${sections.length} sections, scope=${scope}`);
25
- // 1. Retrieve cached content from memory-ingest-extract
104
+ log("start", `${sections.length} sections, scope=${scope}, anchor=${anchorLabel}`);
26
105
  const cached = extractCache.get(attachmentId);
27
106
  if (!cached) {
28
107
  throw new Error(`No cached extract found for attachment "${attachmentId}". ` +
29
- `Call memory-ingest-extract first to extract and chunk the document.`);
108
+ `Call memory-ingest-extract first.`);
30
109
  }
31
110
  log("cache-hit", cached.filename);
32
- // Validate section count matches
33
- if (sections.length !== cached.sections.length) {
34
- throw new Error(`Section count mismatch: model provided ${sections.length} sections ` +
35
- `but the extracted document has ${cached.sections.length} sections. ` +
36
- `Provide exactly one summary per section returned by memory-ingest-extract.`);
37
- }
38
- // Validate chunk summary counts per section
39
- for (let i = 0; i < sections.length; i++) {
40
- const expectedChunks = cached.sections[i].chunks.length;
41
- const providedSummaries = sections[i].chunkSummaries.length;
42
- if (providedSummaries !== expectedChunks) {
43
- throw new Error(`Chunk count mismatch in section "${sections[i].title}": ` +
44
- `model provided ${providedSummaries} chunk summaries ` +
45
- `but the section has ${expectedChunks} chunks.`);
46
- }
47
- }
48
111
  const { filename, mimeType } = cached;
49
112
  const now = new Date().toISOString();
50
- // 2. Collect all texts that need embedding in a flat array.
51
- const textsToEmbed = [];
52
- // Document-level summary
53
- textsToEmbed.push(documentSummary);
54
- const docEmbedIdx = 0;
55
- // Section-level summaries
56
- const sectionEmbedIndices = [];
57
- for (const section of sections) {
58
- sectionEmbedIndices.push(textsToEmbed.length);
59
- textsToEmbed.push(section.summary);
60
- }
61
- // Chunk-level summaries
62
- const chunkEmbedIndices = [];
63
- for (let si = 0; si < sections.length; si++) {
64
- for (let ci = 0; ci < sections[si].chunkSummaries.length; ci++) {
65
- chunkEmbedIndices.push({
66
- sectionIdx: si,
67
- chunkIdx: ci,
68
- embedIdx: textsToEmbed.length,
69
- });
70
- textsToEmbed.push(sections[si].chunkSummaries[ci]);
71
- }
72
- }
73
- // 3. Batch embed all summaries in a single Ollama call.
113
+ // 1. Embed document summary + every section body in one batch.
114
+ const textsToEmbed = [documentSummary, ...sections.map((s) => s.body)];
74
115
  log("embedding", `${textsToEmbed.length} texts`);
75
116
  const embeddings = await embedBatch(textsToEmbed);
76
117
  log("embedded", `${embeddings.length} vectors`);
77
- // 4. Write nodes to Neo4j.
78
- log("neo4j-write", "starting");
118
+ const docEmbedding = embeddings[0];
79
119
  const session = getSession();
80
120
  let documentNodeId = "";
81
- let totalChunks = 0;
121
+ let typedCount = 0;
122
+ let unmappedCount = 0;
123
+ let chunkCount = 0;
82
124
  let entityLinks = 0;
83
125
  try {
84
- // 4a. Create KnowledgeDocument node
85
- // Build optional SET clauses for web-sourced properties.
86
- // When sourceUrl/sourceType/keywords are undefined (file uploads),
87
- // the corresponding SET lines are omitted — existing values preserved on re-ingest.
126
+ // 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
127
+ // with this attachmentId so MERGE finds the existing node.
128
+ const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
129
+ WHERE d.accountId = $accountId
130
+ AND d._trashedKeys IS NOT NULL
131
+ AND d._trashedKeys CONTAINS $attachmentId
132
+ RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
133
+ if (trashedDoc.records.length > 0) {
134
+ const eid = trashedDoc.records[0].get("eid");
135
+ await restoreNode({ session, accountId, elementId: eid });
136
+ log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
137
+ }
138
+ // 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
139
+ // sourceType, keywords) are SET only when present so re-ingest of a
140
+ // file upload doesn't null out a previously-set web-source URL.
88
141
  const optionalSets = [];
89
142
  const optionalParams = {};
90
143
  if (sourceUrl !== undefined) {
@@ -99,35 +152,19 @@ export async function memoryIngest(params) {
99
152
  optionalSets.push("d.keywords = $keywords");
100
153
  optionalParams.keywords = keywords;
101
154
  }
102
- const optionalSetClause = optionalSets.length > 0
103
- ? ", " + optionalSets.join(", ")
104
- : "";
105
- // Trash recovery: a prior memory-delete on this attachmentId nulls the
106
- // live attachmentId (Task 576 unique-key handling). MERGE on
107
- // attachmentId would then create a NEW node, orphaning the trashed copy
108
- // — and 30 days later memory-empty-trash would purge the disk dir the
109
- // new node depends on. Restore first so MERGE finds the existing node.
110
- const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
111
- WHERE d.accountId = $accountId
112
- AND d._trashedKeys IS NOT NULL
113
- AND d._trashedKeys CONTAINS $attachmentId
114
- RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
115
- if (trashedDoc.records.length > 0) {
116
- const eid = trashedDoc.records[0].get("eid");
117
- await restoreNode({ session, accountId, elementId: eid });
118
- log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
119
- }
155
+ const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
120
156
  const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
121
157
  SET d.accountId = $accountId,
122
158
  d.name = $filename,
123
- d.encodingFormat = $mimeType,
124
- d.summary = $documentSummary,
125
- d.sectionCount = $sectionCount,
126
- d.scope = $scope,
127
- d.embedding = $embedding,
128
- d.createdAt = $createdAt,
129
- d.updatedAt = $updatedAt
159
+ d.encodingFormat = $mimeType,
160
+ d.summary = $documentSummary,
161
+ d.sectionCount = $sectionCount,
162
+ d.scope = $scope,
163
+ d.embedding = $embedding,
164
+ d.createdAt = coalesce(d.createdAt, $createdAt),
165
+ d.updatedAt = $updatedAt
130
166
  ${optionalSetClause}
167
+ REMOVE d.deletedAt
131
168
  RETURN elementId(d) AS nodeId`, {
132
169
  attachmentId,
133
170
  accountId,
@@ -136,114 +173,323 @@ export async function memoryIngest(params) {
136
173
  documentSummary,
137
174
  sectionCount: sections.length,
138
175
  scope,
139
- embedding: embeddings[docEmbedIdx],
176
+ embedding: docEmbedding,
140
177
  createdAt: now,
141
178
  updatedAt: now,
142
179
  ...optionalParams,
143
180
  });
144
181
  documentNodeId = docResult.records[0].get("nodeId");
145
- // 4a-cleanup. Remove stale children before re-creating.
146
- // On first ingestion the queries return nothing and the DELETEs are no-ops.
147
- // Also clear any soft-delete marker on the document itself (re-ingestion revives).
182
+ // 4. Cleanup prior children (idempotent re-ingest). Removes generic
183
+ // :Section/Chunk fallbacks and typed nodes that originated from this
184
+ // document. MERGEd related nodes (Organizations, Persons) are spared.
148
185
  const cleanup = await deleteDocumentChildren(attachmentId, session);
149
- if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.references > 0) {
150
- log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.references} references`);
186
+ if (cleanup.sections > 0 ||
187
+ cleanup.chunks > 0 ||
188
+ cleanup.typed > 0 ||
189
+ cleanup.references > 0) {
190
+ log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
151
191
  }
152
- // Clear deletedAt if the document was previously soft-deleted
153
- await session.run(`MATCH (d:KnowledgeDocument { attachmentId: $attachmentId })
154
- WHERE d.deletedAt IS NOT NULL
155
- REMOVE d.deletedAt`, { attachmentId });
156
- // 4b. Create Section nodes and link HAS_SECTION
157
- const sectionNodeIds = [];
158
- for (let si = 0; si < sections.length; si++) {
159
- const section = sections[si];
160
- const sectionResult = await session.run(`CREATE (s:Section {
161
- accountId: $accountId,
162
- title: $title,
163
- summary: $summary,
164
- position: $position,
165
- scope: $scope,
166
- embedding: $embedding,
167
- createdAt: $createdAt,
168
- updatedAt: $updatedAt
169
- })
170
- RETURN elementId(s) AS nodeId`, {
192
+ // 5. Per-section writes.
193
+ for (let i = 0; i < sections.length; i++) {
194
+ const section = sections[i];
195
+ const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
196
+ const sectionParams = {
171
197
  accountId,
172
198
  title: section.title,
173
- summary: section.summary,
174
- position: si,
199
+ body: section.body,
200
+ bodyPreview: section.body.slice(0, PREVIEW_LENGTH),
201
+ position: i,
175
202
  scope,
176
- embedding: embeddings[sectionEmbedIndices[si]],
203
+ embedding: bodyEmbedding,
177
204
  createdAt: now,
178
205
  updatedAt: now,
179
- });
180
- const sectionNodeId = sectionResult.records[0].get("nodeId");
181
- sectionNodeIds.push(sectionNodeId);
182
- // Link KnowledgeDocument -[HAS_SECTION]-> Section
183
- await session.run(`MATCH (d), (s)
184
- WHERE elementId(d) = $docId AND elementId(s) = $sectionId
185
- CREATE (d)-[:HAS_SECTION]->(s)`, { docId: documentNodeId, sectionId: sectionNodeId });
186
- }
187
- // 4c. Create Chunk nodes and link HAS_CHUNK — content from cache
188
- for (const { sectionIdx, chunkIdx, embedIdx } of chunkEmbedIndices) {
189
- const cachedChunk = cached.sections[sectionIdx].chunks[chunkIdx];
190
- const chunkSummary = sections[sectionIdx].chunkSummaries[chunkIdx];
191
- const chunkResult = await session.run(`CREATE (c:Chunk {
192
- accountId: $accountId,
193
- summary: $summary,
194
- content: $content,
195
- position: $position,
196
- scope: $scope,
197
- embedding: $embedding,
198
- createdAt: $createdAt,
199
- updatedAt: $updatedAt
200
- })
201
- RETURN elementId(c) AS nodeId`, {
206
+ createdByAgent: PROVENANCE_AGENT,
207
+ createdBySource: PROVENANCE_AGENT,
208
+ createdBySession: sessionId ?? "",
209
+ source: PROVENANCE_SOURCE,
210
+ sourceDocumentId: attachmentId,
211
+ };
212
+ if (section.kind === UNMAPPED) {
213
+ // 5a. UNMAPPED: generic :Section + (KnowledgeDocument)-[:HAS_SECTION]
214
+ const sectionResult = await session.run(`CREATE (s:Section {
215
+ accountId: $accountId,
216
+ title: $title,
217
+ body: $body,
218
+ summary: $bodyPreview,
219
+ position: $position,
220
+ scope: $scope,
221
+ embedding: $embedding,
222
+ createdAt: $createdAt,
223
+ updatedAt: $updatedAt,
224
+ createdByAgent: $createdByAgent,
225
+ createdBySource: $createdBySource,
226
+ createdBySession: $createdBySession,
227
+ source: $source,
228
+ sourceDocumentId: $sourceDocumentId
229
+ })
230
+ WITH s
231
+ MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
232
+ CREATE (d)-[:HAS_SECTION]->(s)
233
+ RETURN elementId(s) AS nodeId`, { ...sectionParams, docId: documentNodeId });
234
+ const sectionId = sectionResult.records[0].get("nodeId");
235
+ // Body overflow → :Chunk children
236
+ if (section.body.length > MAX_CHUNK_SIZE) {
237
+ const overflowChunks = chunkBody(section.body);
238
+ const overflowEmbeddings = await embedBatch(overflowChunks);
239
+ for (let ci = 0; ci < overflowChunks.length; ci++) {
240
+ await session.run(`MATCH (s:Section) WHERE elementId(s) = $sectionId
241
+ CREATE (s)-[:HAS_CHUNK]->(c:Chunk {
242
+ accountId: $accountId,
243
+ content: $content,
244
+ summary: $summary,
245
+ position: $position,
246
+ scope: $scope,
247
+ embedding: $embedding,
248
+ createdAt: $createdAt,
249
+ updatedAt: $updatedAt,
250
+ createdByAgent: $createdByAgent,
251
+ createdBySource: $createdBySource,
252
+ createdBySession: $createdBySession,
253
+ source: $source,
254
+ sourceDocumentId: $sourceDocumentId
255
+ })`, {
256
+ sectionId,
257
+ accountId,
258
+ content: overflowChunks[ci],
259
+ summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
260
+ position: ci,
261
+ scope,
262
+ embedding: overflowEmbeddings[ci],
263
+ createdAt: now,
264
+ updatedAt: now,
265
+ createdByAgent: PROVENANCE_AGENT,
266
+ createdBySource: PROVENANCE_AGENT,
267
+ createdBySession: sessionId ?? "",
268
+ source: PROVENANCE_SOURCE,
269
+ sourceDocumentId: attachmentId,
270
+ });
271
+ chunkCount += 1;
272
+ }
273
+ }
274
+ unmappedCount += 1;
275
+ console.error(`[document-ingest] unmapped-section title="${section.title}" chars=${section.body.length}`);
276
+ continue;
277
+ }
278
+ // 5b. Typed kind — write the typed node, anchor edge, related nodes,
279
+ // and the document REFERENCES link.
280
+ // Build the typed-node properties: classifier-supplied properties
281
+ // overlaid on top of the system fields, with system fields winning.
282
+ const typedProps = {
283
+ ...section.properties,
202
284
  accountId,
203
- summary: chunkSummary,
204
- content: cachedChunk.content,
205
- position: chunkIdx,
285
+ title: section.title,
286
+ body: section.body,
206
287
  scope,
207
- embedding: embeddings[embedIdx],
288
+ embedding: bodyEmbedding,
208
289
  createdAt: now,
209
290
  updatedAt: now,
210
- });
211
- const chunkNodeId = chunkResult.records[0].get("nodeId");
212
- totalChunks++;
213
- // Link Section -[HAS_CHUNK]-> Chunk
214
- await session.run(`MATCH (s), (c)
215
- WHERE elementId(s) = $sectionId AND elementId(c) = $chunkId
216
- CREATE (s)-[:HAS_CHUNK]->(c)`, { sectionId: sectionNodeIds[sectionIdx], chunkId: chunkNodeId });
217
- }
218
- // 4d. Create REFERENCES links to entities
219
- if (entities && entities.length > 0) {
220
- for (const entity of entities) {
221
- try {
222
- await session.run(`MATCH (d), (e)
223
- WHERE elementId(d) = $docId AND elementId(e) = $entityId
224
- MERGE (d)-[:REFERENCES]->(e)`, { docId: documentNodeId, entityId: entity.nodeId });
225
- entityLinks++;
291
+ createdByAgent: PROVENANCE_AGENT,
292
+ createdBySource: PROVENANCE_AGENT,
293
+ createdBySession: sessionId ?? "",
294
+ source: PROVENANCE_SOURCE,
295
+ sourceDocumentId: attachmentId,
296
+ };
297
+ // CREATE the typed node. Label is interpolated (validated against the
298
+ // ontology label set by the classifier) — Cypher does not allow
299
+ // parameterising labels.
300
+ const typedResult = await session.run(`CREATE (t:\`${section.kind}\`)
301
+ SET t = $props
302
+ RETURN elementId(t) AS nodeId`, { props: typedProps });
303
+ const typedNodeId = typedResult.records[0].get("nodeId");
304
+ // Body overflow :Chunk children attached directly to the typed node
305
+ // (mirroring :Section overflow). Most typed nodes won't trip this.
306
+ if (section.body.length > MAX_CHUNK_SIZE) {
307
+ const overflowChunks = chunkBody(section.body);
308
+ const overflowEmbeddings = await embedBatch(overflowChunks);
309
+ for (let ci = 0; ci < overflowChunks.length; ci++) {
310
+ await session.run(`MATCH (t) WHERE elementId(t) = $typedId
311
+ CREATE (t)-[:HAS_CHUNK]->(c:Chunk {
312
+ accountId: $accountId,
313
+ content: $content,
314
+ summary: $summary,
315
+ position: $position,
316
+ scope: $scope,
317
+ embedding: $embedding,
318
+ createdAt: $createdAt,
319
+ updatedAt: $updatedAt,
320
+ createdByAgent: $createdByAgent,
321
+ createdBySource: $createdBySource,
322
+ createdBySession: $createdBySession,
323
+ source: $source,
324
+ sourceDocumentId: $sourceDocumentId
325
+ })`, {
326
+ typedId: typedNodeId,
327
+ accountId,
328
+ content: overflowChunks[ci],
329
+ summary: overflowChunks[ci].slice(0, PREVIEW_LENGTH),
330
+ position: ci,
331
+ scope,
332
+ embedding: overflowEmbeddings[ci],
333
+ createdAt: now,
334
+ updatedAt: now,
335
+ createdByAgent: PROVENANCE_AGENT,
336
+ createdBySource: PROVENANCE_AGENT,
337
+ createdBySession: sessionId ?? "",
338
+ source: PROVENANCE_SOURCE,
339
+ sourceDocumentId: attachmentId,
340
+ });
341
+ chunkCount += 1;
226
342
  }
227
- catch {
228
- // Entity node may have been deleted skip silently, log via caller
343
+ }
344
+ // Related nodes (e.g. Position's employer Organization). MERGE when
345
+ // the related kind has a stable identifying property; CREATE when not.
346
+ // Provenance is stamped only on creation (ON CREATE SET) so shared
347
+ // entities don't get rewritten by subsequent ingestions.
348
+ if (section.related && section.related.length > 0) {
349
+ for (const related of section.related) {
350
+ const relatedNodeId = await writeRelatedNode({
351
+ session,
352
+ accountId,
353
+ related,
354
+ now,
355
+ sessionId,
356
+ });
357
+ // Edge from typed node to related node
358
+ await session.run(`MATCH (t) WHERE elementId(t) = $typedId
359
+ MATCH (r) WHERE elementId(r) = $relatedId
360
+ CREATE (t)-[edge:\`${related.edge.type}\`]->(r)
361
+ SET edge += $edgeProps`, {
362
+ typedId: related.edge.direction === "outgoing" ? typedNodeId : relatedNodeId,
363
+ relatedId: related.edge.direction === "outgoing" ? relatedNodeId : typedNodeId,
364
+ edgeProps: {
365
+ ...(related.edge.properties ?? {}),
366
+ createdByAgent: PROVENANCE_AGENT,
367
+ createdBySession: sessionId ?? "",
368
+ source: PROVENANCE_SOURCE,
369
+ sourceDocumentId: attachmentId,
370
+ createdAt: now,
371
+ },
372
+ });
373
+ }
374
+ }
375
+ // Anchor edge (anchor → typed or typed → anchor)
376
+ if (section.anchorEdge) {
377
+ const direction = section.anchorEdge.direction;
378
+ const edgeProps = {
379
+ ...(section.anchorEdge.properties ?? {}),
380
+ createdByAgent: PROVENANCE_AGENT,
381
+ createdBySession: sessionId ?? "",
382
+ source: PROVENANCE_SOURCE,
383
+ sourceDocumentId: attachmentId,
384
+ createdAt: now,
385
+ };
386
+ if (direction === "from-anchor") {
387
+ await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
388
+ MATCH (t) WHERE elementId(t) = $typedId
389
+ CREATE (a)-[edge:\`${section.anchorEdge.type}\`]->(t)
390
+ SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
391
+ }
392
+ else {
393
+ await session.run(`MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
394
+ MATCH (t) WHERE elementId(t) = $typedId
395
+ CREATE (t)-[edge:\`${section.anchorEdge.type}\`]->(a)
396
+ SET edge += $edgeProps`, { anchorId: anchorNodeId, typedId: typedNodeId, accountId, edgeProps });
229
397
  }
230
398
  }
399
+ // (KnowledgeDocument)-[:REFERENCES]->(typed) for retrieval.
400
+ await session.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
401
+ MATCH (t) WHERE elementId(t) = $typedId
402
+ CREATE (d)-[r:REFERENCES]->(t)
403
+ SET r.createdByAgent = $createdByAgent,
404
+ r.createdBySession = $createdBySession,
405
+ r.source = $source,
406
+ r.sourceDocumentId = $sourceDocumentId,
407
+ r.createdAt = $createdAt`, {
408
+ docId: documentNodeId,
409
+ typedId: typedNodeId,
410
+ createdByAgent: PROVENANCE_AGENT,
411
+ createdBySession: sessionId ?? "",
412
+ source: PROVENANCE_SOURCE,
413
+ sourceDocumentId: attachmentId,
414
+ createdAt: now,
415
+ });
416
+ entityLinks += 1;
417
+ typedCount += 1;
418
+ console.error(`[document-ingest] section kind=${section.kind} title="${section.title}" chars=${section.body.length}`);
231
419
  }
232
- log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} chunks=${totalChunks} entityLinks=${entityLinks}`);
420
+ log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} chunks=${chunkCount} entityLinks=${entityLinks}`);
421
+ console.error(`[document-ingest] done sections=${sections.length} typed=${typedCount} unmapped=${unmappedCount} ms=${Date.now() - t0}`);
233
422
  return {
234
423
  documentNodeId,
235
424
  sectionCount: sections.length,
236
- chunkCount: totalChunks,
425
+ typedCount,
426
+ unmappedCount,
427
+ chunkCount,
237
428
  entityLinks,
238
429
  documentSummary,
239
430
  keywords,
240
431
  };
241
432
  }
242
433
  finally {
243
- // Evict cache entry — content is now persisted in Neo4j
244
434
  extractCache.delete(attachmentId);
245
435
  await session.close();
246
436
  log("complete");
247
437
  }
248
438
  }
439
+ async function writeRelatedNode(opts) {
440
+ const { session, accountId, related, now, sessionId } = opts;
441
+ // Compute embedding from a representative property string so the related
442
+ // node is searchable. Falls back to label+JSON if no `name` field exists.
443
+ const embedSource = typeof related.properties.name === "string"
444
+ ? related.properties.name
445
+ : `${related.kind} ${JSON.stringify(related.properties)}`;
446
+ const relatedEmbedding = await embed(embedSource);
447
+ const mergeKey = related.merge !== false ? mergeKeyFor(related.kind, related.properties) : null;
448
+ if (mergeKey) {
449
+ // MERGE on the identifying property + accountId. ON CREATE stamps
450
+ // provenance; ON MATCH leaves provenance intact (so re-ingest of the
451
+ // same Organization across docs doesn't overwrite the original
452
+ // provenance — first-write wins for shared entities).
453
+ const result = await session.run(`MERGE (r:\`${related.kind}\` { accountId: $accountId, \`${mergeKey.key}\`: $mergeValue })
454
+ ON CREATE SET r += $createProps,
455
+ r.embedding = $embedding,
456
+ r.createdAt = $createdAt,
457
+ r.updatedAt = $createdAt,
458
+ r.createdByAgent = $createdByAgent,
459
+ r.createdBySource = $createdByAgent,
460
+ r.createdBySession = $createdBySession,
461
+ r.source = $source
462
+ ON MATCH SET r.updatedAt = $createdAt
463
+ RETURN elementId(r) AS nodeId`, {
464
+ accountId,
465
+ mergeValue: mergeKey.value,
466
+ createProps: { ...related.properties, accountId, scope: "shared" },
467
+ embedding: relatedEmbedding,
468
+ createdAt: now,
469
+ createdByAgent: PROVENANCE_AGENT,
470
+ createdBySession: sessionId ?? "",
471
+ source: PROVENANCE_SOURCE,
472
+ });
473
+ return result.records[0].get("nodeId");
474
+ }
475
+ // CREATE — no stable identifying property, treat as one-of-a-kind for
476
+ // this document. Track sourceDocumentId so re-ingest cleanup catches it.
477
+ const result = await session.run(`CREATE (r:\`${related.kind}\`)
478
+ SET r = $props
479
+ RETURN elementId(r) AS nodeId`, {
480
+ props: {
481
+ ...related.properties,
482
+ accountId,
483
+ scope: "shared",
484
+ embedding: relatedEmbedding,
485
+ createdAt: now,
486
+ updatedAt: now,
487
+ createdByAgent: PROVENANCE_AGENT,
488
+ createdBySource: PROVENANCE_AGENT,
489
+ createdBySession: sessionId ?? "",
490
+ source: PROVENANCE_SOURCE,
491
+ },
492
+ });
493
+ return result.records[0].get("nodeId");
494
+ }
249
495
  //# sourceMappingURL=memory-ingest.js.map