npm - @rubytech/create-realagent - Versions diffs - 1.0.706 → 1.0.709 - Mend

@rubytech/create-realagent 1.0.706 → 1.0.709

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js CHANGED Viewed

@@ -1,90 +1,158 @@
 import { getSession } from "../lib/neo4j.js";
-import { embedBatch } from "../lib/embeddings.js";
+import { embed, embedBatch } from "../lib/embeddings.js";
 import { extractCache } from "./memory-ingest-extract.js";
 import { deleteDocumentChildren } from "../lib/document-hierarchy.js";
 import { restoreNode } from "../../../../../lib/graph-trash/dist/index.js";
+import { IDENTITY_SECTION_KINDS, STRUCTURAL_SECTION_KINDS, CONTRACT_SECTION_KINDS, STANDALONE_NODE_KINDS, SECTION_KIND_OTHER, } from "../lib/llm-classifier.js";
+// ---------------------------------------------------------------------------
+// Single-Section document ingestion (Task 740, replacing Task 737's typed-vs-
+// UNMAPPED fork).
+//
+// CACHE LOOKUP --> MERGE/REVIVE DOC --> CLEANUP CHILDREN --> EMBED --> WRITE
+//   (by attachmentId)  KnowledgeDocument    delete prior         batch    Section nodes
+//                      (idempotent)         Section/Chunk/typed  Ollama   + secondary labels
+//                                                                         + NEXT chain
+//                                                                         + anchor edges
+//                                                                         + related entities
+//                                                                         + KD-level edges
+//
+// Every classified section produces ONE `:Section` node. When the classifier
+// recognises the kind (Position/Education/Chapter/Parties/etc.), the same
+// node carries a secondary label (`:Section:Position`) and any structured
+// properties; identity-kind anchor edges go to the multi-labeled node
+// directly, killing the parallel Section-vs-typed-node concept.
+//
+// Special-case writers fire for two contract-clause kinds:
+//   * Parties:     `(:KnowledgeDocument)-[:PARTY]->(:Person|:Organization)` from documentEdges.
+//   * Definitions: `(:Section:Definitions)-[:DEFINES]->(:DefinedTerm)` from related entries.
+//
+// `:Chunk` is gone. Sections carry their body directly. If a body legitimately
+// exceeds Neo4j's property limit, that is a classifier-split-the-section
+// concern, not a writer problem.
+//
+// Provenance properties (createdByAgent, createdBySession, source,
+// sourceDocumentId) stamp every node and edge so re-ingest cleanup finds
+// them deterministically.
+// ---------------------------------------------------------------------------
+const PREVIEW_LENGTH = 150;
+const PROVENANCE_AGENT = "document-ingest";
+const PROVENANCE_SOURCE = "document";
+/** Identity-kind anchor edge writer set (UserProfile → Section:Kind). */
+const IDENTITY_KINDS_SET = new Set(IDENTITY_SECTION_KINDS);
+/** Standalone (non-Section) node kinds (currently just Project). */
+const STANDALONE_KINDS_SET = new Set(STANDALONE_NODE_KINDS);
+/** All section-shaped kinds — anything that becomes a `:Section` node. */
+const SECTION_LABEL_KINDS = new Set([
+    ...IDENTITY_SECTION_KINDS,
+    ...STRUCTURAL_SECTION_KINDS,
+    ...CONTRACT_SECTION_KINDS,
+    SECTION_KIND_OTHER,
+]);
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function normaliseKeywords(arr) {
+    if (!arr || arr.length === 0)
+        return [];
+    return arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
+}
+/**
+ * Identifying property for MERGE on a related node by kind.
+ * Choose a stable, human-recognisable key per label so the same real-world
+ * entity collapses to one node across documents.
+ */
+function mergeKeyFor(kind, properties) {
+    switch (kind) {
+        case "Organization":
+            return typeof properties.name === "string" && properties.name.trim()
+                ? { key: "name", value: properties.name.trim() }
+                : null;
+        case "Person":
+            if (typeof properties.email === "string" && properties.email.trim()) {
+                return { key: "email", value: properties.email.trim() };
+            }
+            if (typeof properties.telephone === "string" && properties.telephone.trim()) {
+                return { key: "telephone", value: properties.telephone.trim() };
+            }
+            return null;
+        case "DefinedTerm":
+            return typeof properties.name === "string" && properties.name.trim()
+                ? { key: "name", value: properties.name.trim() }
+                : null;
+        case "Credential":
+            return typeof properties.name === "string" && properties.name.trim()
+                ? { key: "name", value: properties.name.trim() }
+                : null;
+        default:
+            return null;
+    }
+}
+function bumpKind(breakdown, kind) {
+    breakdown[kind] = (breakdown[kind] ?? 0) + 1;
+}
+function bumpEdge(breakdown, edgeType) {
+    breakdown[edgeType] = (breakdown[edgeType] ?? 0) + 1;
+}
+// ---------------------------------------------------------------------------
+// Main entry point
+// ---------------------------------------------------------------------------
 export async function memoryIngest(params) {
-    const { accountId, attachmentId, documentSummary, sections, scope, entities, sourceUrl, sourceType, keywords: rawKeywords, userKeywords: rawUserKeywords, } = params;
+    const { accountId, attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, documentEdges = [], orphanCandidates = [], scope, sourceUrl, sourceType, documentKeywords: rawDocKeywords, userKeywords: rawUserKeywords, sessionId, } = params;
     if (!scope) {
         throw new Error("scope is required — valid values: 'public', 'shared', 'admin', 'user:{identifier}'");
     }
-    // Normalize and merge keywords: user-supplied ∪ LLM-extracted, deduplicated.
-    // User keywords appear first (cosmetic — both are equal after merge).
-    const normalizeArray = (arr) => arr.map((k) => k.toLowerCase().trim()).filter(Boolean);
-    const hasUserKeywords = rawUserKeywords && rawUserKeywords.length > 0;
-    const hasKeywords = rawKeywords && rawKeywords.length > 0;
-    const keywords = hasUserKeywords || hasKeywords
-        ? [...new Set([
-                ...normalizeArray(rawUserKeywords ?? []),
-                ...normalizeArray(rawKeywords ?? []),
-            ])]
-        : undefined;
+    if (!anchorNodeId) {
+        throw new Error("anchorNodeId is required — the document subject's element ID (UserProfile/LocalBusiness/Person/Organization)");
+    }
+    if (!anchorLabel) {
+        throw new Error("anchorLabel is required — the anchor node's primary label");
+    }
+    const keywords = (() => {
+        const u = normaliseKeywords(rawUserKeywords);
+        const d = normaliseKeywords(rawDocKeywords);
+        if (u.length === 0 && d.length === 0)
+            return undefined;
+        return [...new Set([...u, ...d])];
+    })();
     const t0 = Date.now();
     const log = (stage, detail) => console.error(`[memory-ingest] [${attachmentId.slice(0, 8)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
-    log("start", `${sections.length} sections, scope=${scope}`);
-    // 1. Retrieve cached content from memory-ingest-extract
+    log("start", `${sections.length} sections, scope=${scope}, anchor=${anchorLabel}`);
     const cached = extractCache.get(attachmentId);
     if (!cached) {
         throw new Error(`No cached extract found for attachment "${attachmentId}". ` +
-            `Call memory-ingest-extract first to extract and chunk the document.`);
+            `Call memory-ingest-extract first.`);
     }
     log("cache-hit", cached.filename);
-    // Validate section count matches
-    if (sections.length !== cached.sections.length) {
-        throw new Error(`Section count mismatch: model provided ${sections.length} sections ` +
-            `but the extracted document has ${cached.sections.length} sections. ` +
-            `Provide exactly one summary per section returned by memory-ingest-extract.`);
-    }
-    // Validate chunk summary counts per section
-    for (let i = 0; i < sections.length; i++) {
-        const expectedChunks = cached.sections[i].chunks.length;
-        const providedSummaries = sections[i].chunkSummaries.length;
-        if (providedSummaries !== expectedChunks) {
-            throw new Error(`Chunk count mismatch in section "${sections[i].title}": ` +
-                `model provided ${providedSummaries} chunk summaries ` +
-                `but the section has ${expectedChunks} chunks.`);
-        }
-    }
     const { filename, mimeType } = cached;
     const now = new Date().toISOString();
-    // 2. Collect all texts that need embedding in a flat array.
-    const textsToEmbed = [];
-    // Document-level summary
-    textsToEmbed.push(documentSummary);
-    const docEmbedIdx = 0;
-    // Section-level summaries
-    const sectionEmbedIndices = [];
-    for (const section of sections) {
-        sectionEmbedIndices.push(textsToEmbed.length);
-        textsToEmbed.push(section.summary);
-    }
-    // Chunk-level summaries
-    const chunkEmbedIndices = [];
-    for (let si = 0; si < sections.length; si++) {
-        for (let ci = 0; ci < sections[si].chunkSummaries.length; ci++) {
-            chunkEmbedIndices.push({
-                sectionIdx: si,
-                chunkIdx: ci,
-                embedIdx: textsToEmbed.length,
-            });
-            textsToEmbed.push(sections[si].chunkSummaries[ci]);
-        }
-    }
-    // 3. Batch embed all summaries in a single Ollama call.
+    // 1. Embed document summary + every section body in one batch.
+    const textsToEmbed = [documentSummary, ...sections.map((s) => s.body)];
     log("embedding", `${textsToEmbed.length} texts`);
     const embeddings = await embedBatch(textsToEmbed);
     log("embedded", `${embeddings.length} vectors`);
-    // 4. Write nodes to Neo4j.
-    log("neo4j-write", "starting");
-    const session = getSession();
+    const docEmbedding = embeddings[0];
+    const dbSession = getSession();
     let documentNodeId = "";
-    let totalChunks = 0;
-    let entityLinks = 0;
+    const kindBreakdown = {};
+    const edgeBreakdown = {};
+    let relatedCount = 0;
+    let standaloneCount = 0;
     try {
-        // 4a. Create KnowledgeDocument node
-        // Build optional SET clauses for web-sourced properties.
-        // When sourceUrl/sourceType/keywords are undefined (file uploads),
-        // the corresponding SET lines are omitted — existing values preserved on re-ingest.
+        // 2. Trash-revival (Task 576) — restore a soft-deleted KnowledgeDocument
+        //    with this attachmentId so MERGE finds the existing node.
+        const trashedDoc = await dbSession.run(`MATCH (d:KnowledgeDocument:Trashed)
+       WHERE d.accountId = $accountId
+         AND d._trashedKeys IS NOT NULL
+         AND d._trashedKeys CONTAINS $attachmentId
+       RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
+        if (trashedDoc.records.length > 0) {
+            const eid = trashedDoc.records[0].get("eid");
+            await restoreNode({ session: dbSession, accountId, elementId: eid });
+            log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
+        }
+        // 3. MERGE the KnowledgeDocument parent. Optional fields (sourceUrl,
+        //    sourceType, keywords) are SET only when present so re-ingest of a
+        //    file upload doesn't null out a previously-set web-source URL.
         const optionalSets = [];
         const optionalParams = {};
         if (sourceUrl !== undefined) {
@@ -99,35 +167,19 @@ export async function memoryIngest(params) {
             optionalSets.push("d.keywords = $keywords");
             optionalParams.keywords = keywords;
         }
-        const optionalSetClause = optionalSets.length > 0
-            ? ", " + optionalSets.join(", ")
-            : "";
-        // Trash recovery: a prior memory-delete on this attachmentId nulls the
-        // live attachmentId (Task 576 unique-key handling). MERGE on
-        // attachmentId would then create a NEW node, orphaning the trashed copy
-        // — and 30 days later memory-empty-trash would purge the disk dir the
-        // new node depends on. Restore first so MERGE finds the existing node.
-        const trashedDoc = await session.run(`MATCH (d:KnowledgeDocument:Trashed)
-       WHERE d.accountId = $accountId
-         AND d._trashedKeys IS NOT NULL
-         AND d._trashedKeys CONTAINS $attachmentId
-       RETURN elementId(d) AS eid LIMIT 1`, { accountId, attachmentId });
-        if (trashedDoc.records.length > 0) {
-            const eid = trashedDoc.records[0].get("eid");
-            await restoreNode({ session, accountId, elementId: eid });
-            log("revived", `restored trashed KnowledgeDocument elementId=${eid}`);
-        }
-        const docResult = await session.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
+        const optionalSetClause = optionalSets.length > 0 ? ", " + optionalSets.join(", ") : "";
+        const docResult = await dbSession.run(`MERGE (d:KnowledgeDocument { attachmentId: $attachmentId })
        SET d.accountId      = $accountId,
            d.name           = $filename,
-           d.encodingFormat  = $mimeType,
-           d.summary         = $documentSummary,
-           d.sectionCount    = $sectionCount,
-           d.scope           = $scope,
-           d.embedding       = $embedding,
-           d.createdAt       = $createdAt,
-           d.updatedAt       = $updatedAt
+           d.encodingFormat = $mimeType,
+           d.summary        = $documentSummary,
+           d.sectionCount   = $sectionCount,
+           d.scope          = $scope,
+           d.embedding      = $embedding,
+           d.createdAt      = coalesce(d.createdAt, $createdAt),
+           d.updatedAt      = $updatedAt
            ${optionalSetClause}
+       REMOVE d.deletedAt
        RETURN elementId(d) AS nodeId`, {
             attachmentId,
             accountId,
@@ -136,114 +188,346 @@ export async function memoryIngest(params) {
             documentSummary,
             sectionCount: sections.length,
             scope,
-            embedding: embeddings[docEmbedIdx],
+            embedding: docEmbedding,
             createdAt: now,
             updatedAt: now,
             ...optionalParams,
         });
         documentNodeId = docResult.records[0].get("nodeId");
-        // 4a-cleanup. Remove stale children before re-creating.
-        // On first ingestion the queries return nothing and the DELETEs are no-ops.
-        // Also clear any soft-delete marker on the document itself (re-ingestion revives).
-        const cleanup = await deleteDocumentChildren(attachmentId, session);
-        if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.references > 0) {
-            log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.references} references`);
+        console.error(`[memory-ingest] KnowledgeDocument.name=${JSON.stringify(filename)} attachmentId=${attachmentId.slice(0, 8)} docId=${documentNodeId.slice(0, 12)}`);
+        // 4. Cleanup prior children (idempotent re-ingest). Removes Section nodes
+        //    (any secondary label) and any standalone nodes stamped with this
+        //    attachmentId. MERGEd related entities (Organizations, Persons) are spared.
+        const cleanup = await deleteDocumentChildren(attachmentId, dbSession);
+        if (cleanup.sections > 0 || cleanup.chunks > 0 || cleanup.typed > 0 || cleanup.references > 0) {
+            log("cleanup", `deleted ${cleanup.sections} sections, ${cleanup.chunks} chunks, ${cleanup.typed} typed, ${cleanup.references} references`);
         }
-        // Clear deletedAt if the document was previously soft-deleted
-        await session.run(`MATCH (d:KnowledgeDocument { attachmentId: $attachmentId })
-       WHERE d.deletedAt IS NOT NULL
-       REMOVE d.deletedAt`, { attachmentId });
-        // 4b. Create Section nodes and link HAS_SECTION
-        const sectionNodeIds = [];
-        for (let si = 0; si < sections.length; si++) {
-            const section = sections[si];
-            const sectionResult = await session.run(`CREATE (s:Section {
-           accountId:  $accountId,
-           title:      $title,
-           summary:    $summary,
-           position:   $position,
-           scope:      $scope,
-           embedding:  $embedding,
-           createdAt:  $createdAt,
-           updatedAt:  $updatedAt
-         })
-         RETURN elementId(s) AS nodeId`, {
+        // 5. Per-section writes. Track the previous section's elementId so we can
+        //    chain (:Section)-[:NEXT]->(:Section) in reading order.
+        let previousSectionId = null;
+        for (let i = 0; i < sections.length; i++) {
+            const section = sections[i];
+            const bodyEmbedding = embeddings[i + 1]; // +1 because index 0 is docEmbedding
+            const baseProps = {
                 accountId,
                 title: section.title,
-                summary: section.summary,
-                position: si,
+                body: section.body,
+                bodyPreview: section.body.slice(0, PREVIEW_LENGTH),
+                position: i,
                 scope,
-                embedding: embeddings[sectionEmbedIndices[si]],
+                embedding: bodyEmbedding,
                 createdAt: now,
                 updatedAt: now,
-            });
-            const sectionNodeId = sectionResult.records[0].get("nodeId");
-            sectionNodeIds.push(sectionNodeId);
-            // Link KnowledgeDocument -[HAS_SECTION]-> Section
-            await session.run(`MATCH (d), (s)
-         WHERE elementId(d) = $docId AND elementId(s) = $sectionId
-         CREATE (d)-[:HAS_SECTION]->(s)`, { docId: documentNodeId, sectionId: sectionNodeId });
-        }
-        // 4c. Create Chunk nodes and link HAS_CHUNK — content from cache
-        for (const { sectionIdx, chunkIdx, embedIdx } of chunkEmbedIndices) {
-            const cachedChunk = cached.sections[sectionIdx].chunks[chunkIdx];
-            const chunkSummary = sections[sectionIdx].chunkSummaries[chunkIdx];
-            const chunkResult = await session.run(`CREATE (c:Chunk {
-           accountId:  $accountId,
-           summary:    $summary,
-           content:    $content,
-           position:   $position,
-           scope:      $scope,
-           embedding:  $embedding,
-           createdAt:  $createdAt,
-           updatedAt:  $updatedAt
-         })
-         RETURN elementId(c) AS nodeId`, {
-                accountId,
-                summary: chunkSummary,
-                content: cachedChunk.content,
-                position: chunkIdx,
-                scope,
-                embedding: embeddings[embedIdx],
+                createdByAgent: PROVENANCE_AGENT,
+                createdBySource: PROVENANCE_AGENT,
+                createdBySession: sessionId ?? "",
+                source: PROVENANCE_SOURCE,
+                sourceDocumentId: attachmentId,
+            };
+            // 5a. Standalone node kind (currently just Project) — no `:Section` label.
+            if (STANDALONE_KINDS_SET.has(section.kind)) {
+                const standaloneId = await writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId ?? "", now);
+                standaloneCount += 1;
+                bumpKind(kindBreakdown, section.kind);
+                if (section.anchorEdge)
+                    bumpEdge(edgeBreakdown, section.anchorEdge.type);
+                previousSectionId = null; // standalone breaks the section chain
+                // related entities for standalone (e.g. Project UNDER Organization)
+                if (section.related && section.related.length > 0) {
+                    for (const related of section.related) {
+                        await writeRelatedAndEdge(dbSession, standaloneId, related, accountId, now, sessionId ?? "", attachmentId);
+                        relatedCount += 1;
+                        bumpEdge(edgeBreakdown, related.edge.type);
+                    }
+                }
+                continue;
+            }
+            // 5b. Section-shaped kind (everything else, including Other). One `:Section`
+            //     node with optional secondary label.
+            const isKnownSectionKind = SECTION_LABEL_KINDS.has(section.kind);
+            const sectionLabels = isKnownSectionKind && section.kind !== SECTION_KIND_OTHER
+                ? [`Section`, section.kind]
+                : section.kind === SECTION_KIND_OTHER
+                    ? [`Section`, `Other`]
+                    : [`Section`]; // unrecognised kind — should never happen post-classifier
+            // Compose properties: classifier-supplied properties overlaid on system
+            // fields, system fields winning. For Section:Other, also stamp
+            // classifierReason so the ontology-growth query can surface it.
+            const sectionProps = {
+                ...section.properties,
+                ...baseProps,
+                ...(section.kind === SECTION_KIND_OTHER && section.classifierReason
+                    ? { classifierReason: section.classifierReason }
+                    : {}),
+            };
+            const labelClause = sectionLabels.map((l) => `\`${l}\``).join(":");
+            const sectionResult = await dbSession.run(`CREATE (s:${labelClause})
+         SET s = $props
+         WITH s
+         MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
+         CREATE (d)-[hs:HAS_SECTION]->(s)
+         SET hs.createdByAgent   = $createdByAgent,
+             hs.createdBySession = $createdBySession,
+             hs.source           = $source,
+             hs.sourceDocumentId = $sourceDocumentId,
+             hs.createdAt        = $createdAt
+         RETURN elementId(s) AS nodeId`, {
+                props: sectionProps,
+                docId: documentNodeId,
+                createdByAgent: PROVENANCE_AGENT,
+                createdBySession: sessionId ?? "",
+                source: PROVENANCE_SOURCE,
+                sourceDocumentId: attachmentId,
                 createdAt: now,
-                updatedAt: now,
             });
-            const chunkNodeId = chunkResult.records[0].get("nodeId");
-            totalChunks++;
-            // Link Section -[HAS_CHUNK]-> Chunk
-            await session.run(`MATCH (s), (c)
-         WHERE elementId(s) = $sectionId AND elementId(c) = $chunkId
-         CREATE (s)-[:HAS_CHUNK]->(c)`, { sectionId: sectionNodeIds[sectionIdx], chunkId: chunkNodeId });
-        }
-        // 4d. Create REFERENCES links to entities
-        if (entities && entities.length > 0) {
-            for (const entity of entities) {
-                try {
-                    await session.run(`MATCH (d), (e)
-             WHERE elementId(d) = $docId AND elementId(e) = $entityId
-             MERGE (d)-[:REFERENCES]->(e)`, { docId: documentNodeId, entityId: entity.nodeId });
-                    entityLinks++;
-                }
-                catch {
-                    // Entity node may have been deleted — skip silently, log via caller
+            const sectionId = sectionResult.records[0].get("nodeId");
+            bumpKind(kindBreakdown, section.kind);
+            bumpEdge(edgeBreakdown, "HAS_SECTION");
+            // 5c. NEXT chain in reading order.
+            if (previousSectionId) {
+                await dbSession.run(`MATCH (a:Section), (b:Section)
+           WHERE elementId(a) = $prev AND elementId(b) = $cur
+           CREATE (a)-[n:NEXT]->(b)
+           SET n.createdByAgent   = $createdByAgent,
+               n.createdBySession = $createdBySession,
+               n.source           = $source,
+               n.sourceDocumentId = $sourceDocumentId,
+               n.createdAt        = $createdAt`, {
+                    prev: previousSectionId,
+                    cur: sectionId,
+                    createdByAgent: PROVENANCE_AGENT,
+                    createdBySession: sessionId ?? "",
+                    source: PROVENANCE_SOURCE,
+                    sourceDocumentId: attachmentId,
+                    createdAt: now,
+                });
+                bumpEdge(edgeBreakdown, "NEXT");
+            }
+            previousSectionId = sectionId;
+            // 5d. Anchor edge (identity kinds only). Writer applies the edge the
+            //     classifier proposed; never invents.
+            if (section.anchorEdge && IDENTITY_KINDS_SET.has(section.kind)) {
+                const direction = section.anchorEdge.direction;
+                const edgeType = section.anchorEdge.type;
+                const edgeProps = {
+                    ...(section.anchorEdge.properties ?? {}),
+                    createdByAgent: PROVENANCE_AGENT,
+                    createdBySession: sessionId ?? "",
+                    source: PROVENANCE_SOURCE,
+                    sourceDocumentId: attachmentId,
+                    createdAt: now,
+                };
+                const cypher = direction === "from-anchor"
+                    ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
+             MATCH (s:Section) WHERE elementId(s) = $sectionId
+             CREATE (a)-[edge:\`${edgeType}\`]->(s)
+             SET edge += $edgeProps`
+                    : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
+             MATCH (s:Section) WHERE elementId(s) = $sectionId
+             CREATE (s)-[edge:\`${edgeType}\`]->(a)
+             SET edge += $edgeProps`;
+                await dbSession.run(cypher, { anchorId: anchorNodeId, sectionId, accountId, edgeProps });
+                bumpEdge(edgeBreakdown, edgeType);
+            }
+            // 5e. Related entities (Organization for AT, DefinedTerm for DEFINES on
+            //     :Section:Definitions, etc.). Apply each as the classifier emitted.
+            if (section.related && section.related.length > 0) {
+                for (const related of section.related) {
+                    await writeRelatedAndEdge(dbSession, sectionId, related, accountId, now, sessionId ?? "", attachmentId);
+                    relatedCount += 1;
+                    bumpEdge(edgeBreakdown, related.edge.type);
                 }
             }
         }
-        log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} chunks=${totalChunks} entityLinks=${entityLinks}`);
+        // 6. Document-level edges (PARTY for contracts, etc.). Applied off the
+        //    KnowledgeDocument; classifier proposes, writer applies.
+        if (documentEdges && documentEdges.length > 0) {
+            for (const docEdge of documentEdges) {
+                await writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId ?? "", attachmentId);
+                relatedCount += 1;
+                bumpEdge(edgeBreakdown, docEdge.type);
+            }
+        }
+        log("neo4j-done", `doc=${documentNodeId.slice(0, 12)} sections=${sections.length} kinds=${JSON.stringify(kindBreakdown)} edges=${JSON.stringify(edgeBreakdown)} related=${relatedCount} orphans=${orphanCandidates.length}`);
+        console.error(`[memory-ingest] sections=${sections.length} chain=${Math.max(0, sections.length - 1)} typed=${formatBreakdown(kindBreakdown)} edges=${formatBreakdown(edgeBreakdown)} orphans=${orphanCandidates.length} docId=${documentNodeId}`);
+        for (const orphan of orphanCandidates) {
+            console.error(`[document-ingest] orphan-candidate node=${orphan.kind} label=${JSON.stringify(orphan.label)} reason=${JSON.stringify(orphan.reason)}`);
+        }
         return {
             documentNodeId,
             sectionCount: sections.length,
-            chunkCount: totalChunks,
-            entityLinks,
+            kindBreakdown,
+            edgeBreakdown,
+            relatedCount,
+            standaloneCount,
+            orphanCandidates,
             documentSummary,
             keywords,
         };
     }
     finally {
-        // Evict cache entry — content is now persisted in Neo4j
         extractCache.delete(attachmentId);
-        await session.close();
+        await dbSession.close();
         log("complete");
     }
 }
+// ---------------------------------------------------------------------------
+// Helpers — standalone-node writer (Project), related-edge writer, document-
+// edge writer.
+// ---------------------------------------------------------------------------
+async function writeStandaloneNode(dbSession, section, baseProps, anchorNodeId, anchorLabel, attachmentId, sessionId, now) {
+    const props = { ...section.properties, ...baseProps };
+    const r = await dbSession.run(`CREATE (n:\`${section.kind}\`)
+     SET n = $props
+     RETURN elementId(n) AS nodeId`, { props });
+    const nodeId = r.records[0].get("nodeId");
+    if (section.anchorEdge) {
+        const direction = section.anchorEdge.direction;
+        const edgeType = section.anchorEdge.type;
+        const edgeProps = {
+            ...(section.anchorEdge.properties ?? {}),
+            createdByAgent: PROVENANCE_AGENT,
+            createdBySession: sessionId,
+            source: PROVENANCE_SOURCE,
+            sourceDocumentId: attachmentId,
+            createdAt: now,
+        };
+        const cypher = direction === "from-anchor"
+            ? `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
+         MATCH (n) WHERE elementId(n) = $nodeId
+         CREATE (a)-[edge:\`${edgeType}\`]->(n)
+         SET edge += $edgeProps`
+            : `MATCH (a:\`${anchorLabel}\`) WHERE elementId(a) = $anchorId AND a.accountId = $accountId
+         MATCH (n) WHERE elementId(n) = $nodeId
+         CREATE (n)-[edge:\`${edgeType}\`]->(a)
+         SET edge += $edgeProps`;
+        await dbSession.run(cypher, {
+            anchorId: anchorNodeId,
+            nodeId,
+            accountId: baseProps.accountId,
+            edgeProps,
+        });
+    }
+    return nodeId;
+}
+async function writeRelatedAndEdge(dbSession, fromNodeId, related, accountId, now, sessionId, attachmentId) {
+    const relatedNodeId = await writeRelatedNode({
+        session: dbSession,
+        accountId,
+        related,
+        now,
+        sessionId,
+    });
+    await dbSession.run(`MATCH (a) WHERE elementId(a) = $a
+     MATCH (b) WHERE elementId(b) = $b
+     CREATE (a)-[edge:\`${related.edge.type}\`]->(b)
+     SET edge += $edgeProps`, {
+        a: related.edge.direction === "outgoing" ? fromNodeId : relatedNodeId,
+        b: related.edge.direction === "outgoing" ? relatedNodeId : fromNodeId,
+        edgeProps: {
+            ...(related.edge.properties ?? {}),
+            createdByAgent: PROVENANCE_AGENT,
+            createdBySession: sessionId,
+            source: PROVENANCE_SOURCE,
+            sourceDocumentId: attachmentId,
+            createdAt: now,
+        },
+    });
+}
+async function writeDocumentEdge(dbSession, documentNodeId, docEdge, accountId, now, sessionId, attachmentId) {
+    // Build a synthetic ClassifiedRelated so we can reuse writeRelatedNode for
+    // the target. The MERGE-vs-CREATE decision is the same; the edge is off
+    // the KnowledgeDocument rather than off a Section.
+    const synthetic = {
+        kind: docEdge.targetKind,
+        properties: docEdge.targetProperties,
+        edge: { type: docEdge.type, direction: docEdge.direction },
+        merge: docEdge.merge !== false,
+    };
+    const targetNodeId = await writeRelatedNode({
+        session: dbSession,
+        accountId,
+        related: synthetic,
+        now,
+        sessionId,
+    });
+    await dbSession.run(`MATCH (d:KnowledgeDocument) WHERE elementId(d) = $docId
+     MATCH (n) WHERE elementId(n) = $targetId
+     CREATE (d)-[edge:\`${docEdge.type}\`]->(n)
+     SET edge.createdByAgent   = $createdByAgent,
+         edge.createdBySession = $createdBySession,
+         edge.source           = $source,
+         edge.sourceDocumentId = $sourceDocumentId,
+         edge.createdAt        = $createdAt`, {
+        docId: documentNodeId,
+        targetId: targetNodeId,
+        createdByAgent: PROVENANCE_AGENT,
+        createdBySession: sessionId,
+        source: PROVENANCE_SOURCE,
+        sourceDocumentId: attachmentId,
+        createdAt: now,
+    });
+}
+function formatBreakdown(b) {
+    const entries = Object.entries(b);
+    if (entries.length === 0)
+        return "{}";
+    return entries.map(([k, v]) => `${k}:${v}`).join(",");
+}
+async function writeRelatedNode(opts) {
+    const { session, accountId, related, now, sessionId } = opts;
+    // Compute embedding from a representative property string so the related
+    // node is searchable. Falls back to label+JSON if no `name` field exists.
+    const embedSource = typeof related.properties.name === "string"
+        ? related.properties.name
+        : `${related.kind} ${JSON.stringify(related.properties)}`;
+    const relatedEmbedding = await embed(embedSource);
+    const mergeKey = related.merge !== false ? mergeKeyFor(related.kind, related.properties) : null;
+    if (mergeKey) {
+        // MERGE on the identifying property + accountId. ON CREATE stamps
+        // provenance; ON MATCH leaves provenance intact (so re-ingest of the
+        // same Organization across docs doesn't overwrite the original
+        // provenance — first-write wins for shared entities).
+        const result = await session.run(`MERGE (r:\`${related.kind}\` { accountId: $accountId, \`${mergeKey.key}\`: $mergeValue })
+       ON CREATE SET r += $createProps,
+                     r.embedding        = $embedding,
+                     r.createdAt        = $createdAt,
+                     r.updatedAt        = $createdAt,
+                     r.createdByAgent   = $createdByAgent,
+                     r.createdBySource  = $createdByAgent,
+                     r.createdBySession = $createdBySession,
+                     r.source           = $source
+       ON MATCH  SET r.updatedAt = $createdAt
+       RETURN elementId(r) AS nodeId`, {
+            accountId,
+            mergeValue: mergeKey.value,
+            createProps: { ...related.properties, accountId, scope: "shared" },
+            embedding: relatedEmbedding,
+            createdAt: now,
+            createdByAgent: PROVENANCE_AGENT,
+            createdBySession: sessionId ?? "",
+            source: PROVENANCE_SOURCE,
+        });
+        return result.records[0].get("nodeId");
+    }
+    // CREATE — no stable identifying property, treat as one-of-a-kind for
+    // this document. Track sourceDocumentId so re-ingest cleanup catches it.
+    const result = await session.run(`CREATE (r:\`${related.kind}\`)
+     SET r = $props
+     RETURN elementId(r) AS nodeId`, {
+        props: {
+            ...related.properties,
+            accountId,
+            scope: "shared",
+            embedding: relatedEmbedding,
+            createdAt: now,
+            updatedAt: now,
+            createdByAgent: PROVENANCE_AGENT,
+            createdBySource: PROVENANCE_AGENT,
+            createdBySession: sessionId ?? "",
+            source: PROVENANCE_SOURCE,
+        },
+    });
+    return result.records[0].get("nodeId");
+}
 //# sourceMappingURL=memory-ingest.js.map