@rubytech/create-realagent 1.0.705 → 1.0.707

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/dist/__tests__/apt-resolve.test.js +179 -0
  2. package/dist/apt-resolve.js +73 -0
  3. package/dist/index.js +48 -46
  4. package/package.json +3 -3
  5. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts +2 -0
  6. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.d.ts.map +1 -0
  7. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js +89 -0
  8. package/payload/platform/lib/graph-mcp/dist/__tests__/schema-cypher-parser.test.js.map +1 -0
  9. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts +42 -0
  10. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.d.ts.map +1 -0
  11. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js +87 -0
  12. package/payload/platform/lib/graph-mcp/dist/schema-cypher-parser.js.map +1 -0
  13. package/payload/platform/lib/graph-mcp/src/__tests__/schema-cypher-parser.test.ts +99 -0
  14. package/payload/platform/lib/graph-mcp/src/schema-cypher-parser.ts +84 -0
  15. package/payload/platform/neo4j/schema.cypher +23 -0
  16. package/payload/platform/plugins/admin/PLUGIN.md +1 -0
  17. package/payload/platform/plugins/admin/mcp/dist/index.js +30 -0
  18. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  19. package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +2 -2
  20. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +47 -6
  21. package/payload/platform/plugins/docs/references/adherence.md +1 -1
  22. package/payload/platform/plugins/memory/PLUGIN.md +25 -16
  23. package/payload/platform/plugins/memory/mcp/dist/index.js +146 -38
  24. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts +2 -0
  26. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.d.ts.map +1 -0
  27. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js +92 -0
  28. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/live-schema-source.test.js.map +1 -0
  29. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts +2 -0
  30. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.d.ts.map +1 -0
  31. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +51 -0
  32. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -0
  33. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts +2 -0
  34. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.d.ts.map +1 -0
  35. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +222 -0
  36. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -0
  37. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
  38. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +27 -14
  40. package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts +16 -0
  42. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.d.ts.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js +38 -11
  44. package/payload/platform/plugins/memory/mcp/dist/lib/graph-write-gate.js.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts +136 -0
  46. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.d.ts.map +1 -0
  47. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js +180 -0
  48. package/payload/platform/plugins/memory/mcp/dist/lib/live-schema-source.js.map +1 -0
  49. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +126 -0
  50. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
  51. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +253 -0
  52. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
  53. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts +11 -2
  54. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.d.ts.map +1 -1
  55. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js +6 -3
  56. package/payload/platform/plugins/memory/mcp/dist/lib/schema-loader.js.map +1 -1
  57. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +44 -22
  58. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
  59. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +94 -57
  60. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
  61. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
  62. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
  63. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +46 -0
  64. package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
  65. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
  66. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
  67. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
  68. package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
  69. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
  70. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
  71. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
  72. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
  73. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
  74. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
  75. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
  76. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +23 -14
  77. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  78. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +410 -164
  79. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  80. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts +7 -5
  81. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.d.ts.map +1 -1
  82. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js +2 -2
  83. package/payload/platform/plugins/memory/mcp/dist/tools/memory-write.js.map +1 -1
  84. package/payload/platform/plugins/memory/references/schema-base.md +33 -0
  85. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +112 -0
  86. package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
  87. package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
  88. package/payload/platform/templates/specialists/agents/database-operator.md +21 -13
  89. package/payload/server/chunk-PE76FPYP.js +12040 -0
  90. package/payload/server/maxy-edge.js +1 -1
  91. package/payload/server/public/assets/{Checkbox-B2Lk8F4X.js → Checkbox-CjbS9JcG.js} +1 -1
  92. package/payload/server/public/assets/{admin-agtgi48Q.js → admin-Ce9DbUuu.js} +1 -1
  93. package/payload/server/public/assets/{data-B7nsyBTV.js → data-C-SxjLC9.js} +1 -1
  94. package/payload/server/public/assets/{file-DHWTu8LP.js → file-D4cbAAuo.js} +1 -1
  95. package/payload/server/public/assets/{graph-ChDwqqhJ.js → graph-BRD96pKD.js} +8 -8
  96. package/payload/server/public/assets/{house-CfjnRPO6.js → house-CYsVygEQ.js} +1 -1
  97. package/payload/server/public/assets/{jsx-runtime-81wg0w0Q.css → jsx-runtime-DPXE45W9.css} +1 -1
  98. package/payload/server/public/assets/{public-CE1kyVnz.js → public-BTOF98iO.js} +1 -1
  99. package/payload/server/public/assets/{share-2-CAd1beVT.js → share-2-B-sbkB36.js} +1 -1
  100. package/payload/server/public/assets/{useVoiceRecorder-LSAU68Eo.js → useVoiceRecorder-DLVFx3ms.js} +1 -1
  101. package/payload/server/public/assets/{x-B0xK3Aoq.js → x-BNidzSAn.js} +1 -1
  102. package/payload/server/public/data.html +6 -6
  103. package/payload/server/public/graph.html +7 -7
  104. package/payload/server/public/index.html +8 -8
  105. package/payload/server/public/public.html +5 -5
  106. package/payload/server/server.js +6 -10
  107. /package/payload/server/public/assets/{jsx-runtime-DhzH26q8.js → jsx-runtime-BUs3sHtV.js} +0 -0
@@ -110,7 +110,7 @@ Present the admin SOUL via `render-component` with `name: "document-editor"` and
110
110
 
111
111
  After the admin SOUL is written and approved, call `onboarding-complete-step` with step 6.
112
112
 
113
- **Document ingestion.** If the user uploaded any documents during Step 6 (or earlier in the session), dispatch the `content-producer` subagent to ingest them AFTER calling `onboarding-complete-step` — not before. Use the Agent tool with `run_in_background: true`. The critical path (SOUL file, step completion) must not depend on document ingestion succeeding. If no documents were uploaded, skip this step.
113
+ **Document ingestion.** If the user uploaded any documents during Step 6 (or earlier in the session), dispatch the `database-operator` subagent (via the universal `document-ingest` skill) to ingest them AFTER calling `onboarding-complete-step` — not before. Use the Agent tool with `run_in_background: true`. The critical path (SOUL file, step completion) must not depend on document ingestion succeeding. Include the document path, the document subject (typically the account owner's UserProfile or the LocalBusiness depending on the doc), and the scope in the brief. If no documents were uploaded, skip this step.
114
114
 
115
115
  **Next steps.** After completing onboarding, let the user know that everything configured during onboarding — plugins, WiFi, output style, thinking view, timezone, and personality — can be changed at any time through conversation. Then suggest three things the user can do next — all optional and available whenever they are ready:
116
116
 
@@ -167,14 +167,55 @@ All retry loops re-evaluate using the `action` returned in the most recent respo
167
167
 
168
168
  Do not read any skill files. Do not call any other Anthropic tools except `anthropic-setup`. Do not dispatch specialists. The `anthropic-setup` tool handles the entire flow.
169
169
 
170
- ## Step 9 — Business profile
170
+ ## Step 9 — Operator persona and profile bootstrap
171
171
 
172
172
  *(skip if `currentStep` >= 9)*
173
173
 
174
- Populate the business's operational identity in the graph the admin user node, the `LocalBusiness` node, and its core properties (name, description, address, hours, services). Without this, the graph-write gate refuses any user-domain write, so capturing it now avoids the agent being interrupted mid-task later.
174
+ Pin the operator's persona and bootstrap the graph nodes that satisfy the graph-write gate. The persona choice is the trust-shaping moment for this step an employee who answers "what's your business?" with their employer's name silently registers a `LocalBusiness` owned by the wrong party, so we must surface the question explicitly before any write.
175
175
 
176
- Invoke the `business-profile` skill. Follow its first-run path: create the `AdminUser` node (bound to the `userId` from users.json), create the `LocalBusiness` node, collect identity + address + whichever additional domains (hours, services, FAQs, brand assets) the user provides. The skill knows how to adapt — accept partial input and allow skipping sections.
176
+ **Render the persona select first.** Call `render-component` with `name: "single-select"` and data:
177
177
 
178
- When `business-profile` reports that the `AdminUser` and `LocalBusiness` nodes exist in the graph, call `onboarding-complete-step` with step 9. Do not mark step 9 complete before both nodes exist — the gate's precondition must be real, not just recorded.
178
+ ```
179
+ {
180
+ submitMessage: "Persona: {{value}}",
181
+ options: [
182
+ {
183
+ value: "personal",
184
+ label: "Just for me",
185
+ description: "I am not setting this up for a business — Maxy is my personal operations agent."
186
+ },
187
+ {
188
+ value: "business-owner",
189
+ label: "For my business",
190
+ description: "I am the owner / operator and Maxy is the operations agent for my company."
191
+ },
192
+ {
193
+ value: "employee",
194
+ label: "I work somewhere, but not as the owner",
195
+ description: "I have an employer, but this device is for my personal use — my employer is NOT being registered here."
196
+ }
197
+ ]
198
+ }
199
+ ```
200
+
201
+ **Wait for the user's submission.** If the user picks "Other" or types free text instead of selecting, ask them which of the three personas best describes them and re-render the select. Do not proceed without one of the three documented modes — the agent must not improvise a fourth path. If the user pivots off-topic mid-flow, answer their question briefly and re-render the select; step 9 stays incomplete until they pick a mode.
202
+
203
+ **Call `onboarding-step9-mode` with the chosen mode before any graph write or skill invocation.** The tool emits the diagnostic log line and returns the deterministic next-action prose. Branch on the mode:
204
+
205
+ ### `business-owner`
206
+
207
+ Invoke the `business-profile` skill. Follow its first-run path: create the `AdminUser` node, create the `LocalBusiness` node, collect identity + address + whichever additional domains (hours, services, FAQs, brand assets) the user provides. When `business-profile` reports that both nodes exist in the graph, call `onboarding-complete-step` with step 9. Do not mark step 9 complete before both nodes exist — the gate's precondition must be real, not just recorded.
208
+
209
+ ### `personal` or `employee`
210
+
211
+ Personal/employee mode does not register a `LocalBusiness`. Instead, bootstrap the operator's profile so the graph-write gate is satisfied without a business node:
212
+
213
+ 1. **Ask the user for their email** in one short conversational message — Maxy needs an email or phone number on the personal-profile node, and email is the more useful signal for downstream features.
214
+ 2. **Read the admin's `userId` and `name` from `admin-identity` in your system prompt.** Split `name` into `givenName` (first token) and `familyName` (remainder, or empty if a single token).
215
+ 3. **Write the `AdminUser` node.** Call `memory-write` with `labels: ["AdminUser"]`, `properties: { userId, name }`, `scope: "admin"`, `relationships: [{ type: "HAS_ACCOUNT_SCOPE", direction: "outgoing", targetNodeId: "<account-anchor>" }]` — or whatever adjacency convention the current schema requires (grep `AdminUser` in the codebase for a live example if unsure).
216
+ 4. **Write the personal-profile `Person` node.** Call `memory-write` with `labels: ["Person"]`, `properties: { givenName, familyName, email, role: "admin-personal" }`, `scope: "admin"`, `relationships: [{ type: "OWNS", direction: "incoming", targetNodeId: "<AdminUser-elementId>" }]`. The `role: "admin-personal"` property is what the graph-write gate looks for in lieu of a `LocalBusiness`.
217
+ 5. **Mark step 9 complete.** Call `onboarding-complete-step` with step 9.
218
+
219
+ After step 9 completes in personal/employee mode, tell the user that Maxy is configured for personal use — their employer (if any) is not registered here. If they later become the operator for a business of their own, they can ask Maxy to set up a business profile, which invokes the `business-profile` skill directly.
179
220
 
180
- If the user declines to complete business-profile now, leave step 9 incomplete. The next session will resume here, and any attempt to write user-domain data will surface `Write blocked (no-admin-user)` or `Write blocked (no-local-business)` via the gate, pulling the agent back into this step.
221
+ If the user declines to bootstrap during step 9 in any mode, leave step 9 incomplete. The next session will resume here, and any attempt to write user-domain data will surface `Write blocked (no-admin-user)` or `Write blocked (no-local-business)` via the gate, pulling the agent back into this step.
@@ -93,6 +93,6 @@ The constraint is computed once per turn at the top of `invokeAgent` and frozen
93
93
 
94
94
  ## Limits and deferrals
95
95
 
96
- v1 covers the admin agent only. Specialist subagents (`personal-assistant`, `project-manager`, `research-assistant`, `content-producer`) do not receive their own ledger injection yet — their `.md` templates load via `--plugin-dir` and have no TS-side assembly site. Follow-up task filed.
96
+ v1 covers the admin agent only. Specialist subagents (`personal-assistant`, `project-manager`, `research-assistant`, `content-producer`, `database-operator`) do not receive their own ledger injection yet — their `.md` templates load via `--plugin-dir` and have no TS-side assembly site. Follow-up task filed.
97
97
 
98
98
  No cross-agent rule inheritance, no user-visible correction-ack signal, no blocking-critic retry loop in v1 — each is a separate follow-up task. See [`.docs/agents.md`](../../../../.docs/agents.md) § Adherence Fidelity for the full deferral list with task numbers.
@@ -10,6 +10,7 @@ tools:
10
10
  - memory-ingest
11
11
  - memory-ingest-extract
12
12
  - memory-ingest-web
13
+ - memory-classify
13
14
  - memory-find-candidates
14
15
  - memory-delete
15
16
  - memory-restore
@@ -32,6 +33,7 @@ hidden:
32
33
  - session-compact-status
33
34
  skills:
34
35
  - skills/conversational-memory/SKILL.md
36
+ - skills/document-ingest/SKILL.md
35
37
  always: true
36
38
  embed: false
37
39
  ---
@@ -89,32 +91,39 @@ Restricted fields (`accountId`, `embedding`, `profileVersion`) cannot be set via
89
91
 
90
92
  ## Schema References
91
93
 
92
- Before any structured write, load `references/schema-base.md` via `plugin-read`. This defines universal node types, property naming rules, and relationship patterns. If the `LocalBusiness` node has a `businessType` property, also load the matching vertical schema (`references/schema-{businessType}.md`) — it extends the base with vertical-specific types. Confirm which schemas were consulted before writing.
94
+ Before any structured write, load `references/schema-base.md` via `plugin-read`. This defines property naming rules, required-property groups for documented types, and relationship patterns. If the `LocalBusiness` node has a `businessType` property, also load the matching vertical schema (`references/schema-{businessType}.md`) — it extends the base with vertical-specific types. Confirm which schemas were consulted before writing.
95
+
96
+ **Validation surface (Task 736).** `memory-write` validates labels against `db.labels()` ∪ `schema.cypher` declarations — not against this markdown. A label is recognised if it appears in either set. The markdown defines property *shape* (required-property groups, naming rules) for documented labels only; recognised-but-undocumented labels (e.g. `LocalBusiness`, `AdminUser`, `KnowledgeDocument`) accept any property shape and emit `[schema-validator] markdown-undocumented label=<X>` so the doc gap is visible to operators. If `memory-write` rejects a label as unknown, the rejection lists both source sets — the agent can call `maxy-graph-get_neo4j_schema` to refresh its view.
93
97
 
94
98
  ## Document Ingestion
95
99
 
96
- ### Scope
100
+ Document ingestion of any kind — PDFs, text, transcripts, web pages, single files — routes to the `database-operator` specialist, which loads the universal `document-ingest` skill at `skills/document-ingest/SKILL.md`. The admin agent never calls `memory-ingest` directly; it dispatches with the document path, the document subject (the anchor node), and the visibility scope.
97
101
 
98
- Before calling `memory-ingest`, ask the user what visibility scope the document should have. Do not assume a default. Present the options:
102
+ ### Pipeline (Task 737)
99
103
 
100
- - **public** visible to public agents and the admin. Use for business knowledge that customers or visitors should be able to ask about (product info, services, FAQs, policies).
101
- - **shared** — visible to all agents on the account but not surfaced to unauthenticated public visitors. Use for internal operational knowledge that multiple agents need.
102
- - **admin** — visible only to the admin agent. Use for sensitive or internal-only content (contracts, credentials, internal processes).
104
+ The skill drives a three-tool pipeline:
103
105
 
104
- If the user's intent is unambiguous from context e.g., "save this for the sales agent" implies public, "this is just for my reference" implies admin confirm the inferred scope rather than asking. When delegating to the content-producer, include the confirmed scope in the task description.
106
+ 1. **`memory-ingest-extract`** pulls text from PDF/markdown/plain-text and caches it under the `attachmentId`. No chunking — the chunker has moved upstream into LLM-driven section classification.
107
+ 2. **`memory-classify`** — calls Claude Haiku with the loaded ontology and the cached text; returns typed sections (`{kind, title, body, properties, anchorEdge, related}`). Every returned `kind` is verified against the live ontology label set; invalid labels become `UNMAPPED` so a hallucination never reaches the writer.
108
+ 3. **`memory-ingest`** — writes typed graph nodes (Position, Service, Credential, etc.) anchored to `UserProfile` / `LocalBusiness` / `Person` / `Organization` via natural ontology edges, plus `(KnowledgeDocument)-[:REFERENCES]->(typed)` links. UNMAPPED sections become generic `:Section` nodes hanging off the document via `HAS_SECTION` (legacy fallback) so free-form prose retrieval still works.
105
109
 
106
- ### Keywords
110
+ ### Scope
111
+
112
+ `memory-ingest` requires a `scope` value on every call. The admin agent confirms the scope with the operator before dispatching:
107
113
 
108
- After a successful `memory-ingest` call, present the ingestion results to the user before moving on:
114
+ - **public** visible to public agents and the admin. Use for business knowledge that customers or visitors should be able to ask about.
115
+ - **shared** — visible to all agents on the account but not surfaced to unauthenticated public visitors.
116
+ - **admin** — visible only to the admin agent. Use for sensitive content (contracts, credentials, internal processes).
109
117
 
110
- 1. **Display the document summary** from the `documentSummary` field in the tool response.
111
- 2. **Display the extracted keywords** from the `keywords` array in the tool response (omit if absent).
112
- 3. **Prompt for user keywords** — e.g., "Would you like to add any of your own keywords or tags to file this under?"
118
+ If the user's intent is unambiguous from context — e.g., "save this for the sales agent" implies public, "this is just for my reference" implies admin — confirm the inferred scope rather than asking.
113
119
 
114
- If the user provides keywords, normalize them (lowercase, trim), merge with the existing `keywords` array (deduplicate), and call `memory-update` on the `documentNodeId` with the merged array as the `keywords` property. User-supplied keywords appear first in the merged array.
120
+ ### Keywords
115
121
 
116
- If the user declines or says nothing further, the flow is complete.
122
+ The classifier extracts topic keywords as `documentKeywords`; the user can supply their own as `userKeywords`. Both are merged additively (lowercase, trimmed, deduplicated) and stored on the `KnowledgeDocument.keywords` array. User-supplied keywords appear first.
117
123
 
118
- Always generate LLM-extracted keywords as `keywords` on the `memory-ingest` call. The two sources are complementary — LLM keywords capture topic signals; user keywords define the user's intended classification.
124
+ After a successful `memory-ingest` call, the dispatching admin agent should:
125
+ 1. Display the document summary and the extracted keywords to the user.
126
+ 2. Prompt for user keywords if none were supplied with the brief.
127
+ 3. If the user provides additional keywords post-ingest, call `memory-update` on the `documentNodeId` with the merged array.
119
128
 
120
- Keywords support user-defined collections via naming convention (e.g., `["reports", "reports/quarterly", "reports/quarterly/q1-2026"]`). When the user describes a hierarchical filing intent, build the full keyword path.
129
+ Keywords support user-defined collections via naming convention (e.g., `["reports", "reports/quarterly", "reports/quarterly/q1-2026"]`).
@@ -8,10 +8,12 @@ import { memorySearch } from "./tools/memory-search.js";
8
8
  import { memoryRank } from "./tools/memory-rank.js";
9
9
  import { memoryWrite } from "./tools/memory-write.js";
10
10
  import { loadSchema } from "./lib/schema-loader.js";
11
+ import { buildLiveSchemaSource, defaultSchemaCypherPath, } from "./lib/live-schema-source.js";
11
12
  import { memoryReindex } from "./tools/memory-reindex.js";
12
13
  import { memoryIngestExtract } from "./tools/memory-ingest-extract.js";
13
14
  import { memoryIngest } from "./tools/memory-ingest.js";
14
15
  import { memoryIngestWeb } from "./tools/memory-ingest-web.js";
16
+ import { memoryClassify } from "./tools/memory-classify.js";
15
17
  import { memoryUpdate } from "./tools/memory-update.js";
16
18
  import { memoryDelete } from "./tools/memory-delete.js";
17
19
  import { memoryFindCandidates } from "./tools/memory-find-candidates.js";
@@ -40,11 +42,59 @@ const accountId = process.env.ACCOUNT_ID;
40
42
  if (!accountId) {
41
43
  throw new Error("ACCOUNT_ID environment variable is required");
42
44
  }
43
- // Load the schema contract from platform/plugins/memory/references/schema-*.md
44
- // once at startup. Every memory-write call validates against this. If loading
45
- // fails we throw — a memory server running with a broken schema is worse than
46
- // one that fails to start loudly.
45
+ // Load the markdown schema sidecar once at startup. Every memory-write call
46
+ // reads its required-property and synonym maps. If loading fails we throw —
47
+ // a memory server running with a broken schema is worse than one that fails
48
+ // to start loudly.
47
49
  const schema = loadSchema();
50
+ // Live label source (Task 736). The memory MCP runs as a separate stdio
51
+ // process from graph-mcp, so it owns its own SchemaCache instance backed by
52
+ // the existing Neo4j driver. The cache refreshes every 60s and tap-fires
53
+ // drift detection from its emit hook. Boot does NOT block on the first
54
+ // refresh — `defaultDeclaredOnly` is sufficient until live joins, since
55
+ // declared labels cover the bootstrap labels (LocalBusiness etc.).
56
+ const schemaCypherPath = defaultSchemaCypherPath();
57
+ const liveSchemaRuntime = buildLiveSchemaSource({
58
+ schemaCypherPath,
59
+ markdownLabels: schema.markdownLabels.keys(),
60
+ fetcher: {
61
+ async labels() {
62
+ const session = getSession();
63
+ try {
64
+ const result = await session.run("CALL db.labels() YIELD label RETURN label");
65
+ return result.records
66
+ .map((r) => r.get("label"))
67
+ .filter((v) => typeof v === "string");
68
+ }
69
+ finally {
70
+ await session.close();
71
+ }
72
+ },
73
+ async relationshipTypes() {
74
+ const session = getSession();
75
+ try {
76
+ const result = await session.run("CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType");
77
+ return result.records
78
+ .map((r) => r.get("relationshipType"))
79
+ .filter((v) => typeof v === "string");
80
+ }
81
+ finally {
82
+ await session.close();
83
+ }
84
+ },
85
+ },
86
+ });
87
+ // Boot drift fires once the first refresh resolves. We do not await — the
88
+ // validator handles boot races by accepting declared labels (which include
89
+ // the bootstrap set) until live joins. Drift logs land in server.log as
90
+ // soon as the snapshot lands, with no impact on serving requests.
91
+ liveSchemaRuntime.ready.then(() => {
92
+ process.stderr.write(`[schema-validator] using-cache cacheAgeMs=0 ready=${liveSchemaRuntime.source.liveReady()}\n`);
93
+ });
94
+ const validator = {
95
+ schema,
96
+ liveSource: liveSchemaRuntime.source,
97
+ };
48
98
  const userId = process.env.USER_ID; // Optional — present for admin sessions with users.json auth
49
99
  // Scope filtering: comma-separated list of allowed scopes (e.g. "public,shared").
50
100
  // When set, memory-search only returns nodes whose scope is in this list.
@@ -382,7 +432,7 @@ if (!readOnly) {
382
432
  session: sessionId,
383
433
  tool: "memory-write",
384
434
  },
385
- schema,
435
+ validator,
386
436
  });
387
437
  return {
388
438
  content: [
@@ -508,7 +558,7 @@ if (!readOnly) {
508
558
  session: sessionId,
509
559
  tool: "session-compact",
510
560
  },
511
- schema,
561
+ validator,
512
562
  });
513
563
  return {
514
564
  content: [{ type: "text", text: `Session summary saved (ID: ${result.nodeId})` }],
@@ -546,10 +596,11 @@ if (!readOnly) {
546
596
  };
547
597
  }
548
598
  });
549
- server.tool("memory-ingest-extract", "Extract text from an uploaded file and chunk it server-side. Supports PDF (via pdftotext), plain text, and markdown. " +
599
+ server.tool("memory-ingest-extract", "Extract text from an uploaded file and cache it server-side for memory-classify and memory-ingest (Task 737). " +
600
+ "Supports PDF (via pdftotext), plain text, and markdown. " +
550
601
  "Rejects CSV (structured data — use memory-write instead) and images (metadata-only). " +
551
- "Returns section titles and chunk previews. Full chunk content is cached server-side " +
552
- "the calling agent generates summaries from the previews, then calls memory-ingest with only summaries (no raw content needed).", {
602
+ "Returns file metadata + a short preview; the full text lives in the in-process cache keyed by attachmentId. " +
603
+ "Chunking has moved upstream into memory-classify (LLM-driven section classification) this tool no longer chunks.", {
553
604
  storagePath: z.string().describe("Absolute path to the stored file on disk"),
554
605
  filename: z.string().describe("Original filename as uploaded"),
555
606
  mimeType: z.string().describe("MIME type of the file"),
@@ -574,42 +625,96 @@ if (!readOnly) {
574
625
  };
575
626
  }
576
627
  });
577
- server.tool("memory-ingest", "Ingest a knowledge document into the graph as a three-level hierarchy: KnowledgeDocument → Section → Chunk. " +
578
- "Requires a prior call to memory-ingest-extract (which caches the raw chunk content server-side). " +
579
- "The calling agent provides only summaries no raw content needed. The tool retrieves cached content, " +
580
- "pairs it with summaries, embeds everything, and writes to the graph. " +
581
- "Re-ingesting with the same attachmentId replaces the existing hierarchy safe to call on already-ingested documents.", {
628
+ server.tool("memory-classify", "Classify an unstructured document into typed sections via Claude Haiku (Task 737). " +
629
+ "Reads the cached text written by memory-ingest-extract (same attachmentId), runs the classifier against " +
630
+ "the live ontology label set, and returns a JSON structure ready for memory-ingest. Every returned `kind` is " +
631
+ "verified server-side against the ontology invalid labels become `UNMAPPED` so the writer falls back to " +
632
+ "generic Section nodes with a logged ontology gap. When Haiku is unavailable, returns `{kind: \"fallback\"}` " +
633
+ "with a reason — the skill should treat the whole document as one UNMAPPED Section.", {
634
+ attachmentId: z.string().describe("UUID of the file attachment — must match a prior memory-ingest-extract call"),
635
+ anchorDescription: z.string().describe("Short human sentence describing the document subject the classifier should anchor sections to. " +
636
+ "Examples: 'subject = UserProfile (the account owner)'; 'subject = LocalBusiness (the operator's business)'; " +
637
+ "'subject = Person {name: \"Jane Smith\"} (a third party)'."),
638
+ }, async ({ attachmentId, anchorDescription }) => {
639
+ try {
640
+ const result = await memoryClassify({
641
+ accountId,
642
+ attachmentId,
643
+ anchorDescription,
644
+ liveSchemaSource: liveSchemaRuntime.source,
645
+ });
646
+ return {
647
+ content: [{
648
+ type: "text",
649
+ text: JSON.stringify(result),
650
+ }],
651
+ };
652
+ }
653
+ catch (err) {
654
+ return {
655
+ content: [{
656
+ type: "text",
657
+ text: `memory-classify failed: ${err instanceof Error ? err.message : String(err)}`,
658
+ }],
659
+ isError: true,
660
+ };
661
+ }
662
+ });
663
+ server.tool("memory-ingest", "Write a classified document into the graph as typed nodes anchored to the document subject (Task 737). " +
664
+ "Requires prior calls to memory-ingest-extract (text cache) and memory-classify (typed structure). " +
665
+ "Each section becomes either a typed graph node (Position, Service, Credential, etc.) anchored to the named " +
666
+ "subject via the natural ontology edge plus a (KnowledgeDocument)-[:REFERENCES]->(typed) link, or — for " +
667
+ "UNMAPPED sections — a generic :Section node hanging off the document via HAS_SECTION (legacy fallback). " +
668
+ "Re-ingesting with the same attachmentId replaces the document's typed and untyped children — safe to call " +
669
+ "on already-ingested documents. Shared related entities (Organizations, Persons referenced by typed nodes) " +
670
+ "are MERGEd by identifying property and never deleted on re-ingest.", {
582
671
  attachmentId: z.string().describe("UUID of the file attachment — must match a prior memory-ingest-extract call"),
583
- documentSummary: z.string().describe("LLM-generated summary of the entire document (1-3 sentences)"),
672
+ documentSummary: z.string().describe("Classifier-produced summary of the entire document (1-3 sentences)"),
673
+ anchorNodeId: z.string().describe("Element ID of the anchor node (UserProfile/LocalBusiness/Person/Organization). The document subject " +
674
+ "the operator confirmed during the document-ingest skill's first step."),
675
+ anchorLabel: z.string().describe("Primary label of the anchor node (e.g. 'UserProfile', 'LocalBusiness'). Used in the per-section MATCH " +
676
+ "for anchor-edge creation."),
584
677
  sections: z.array(z.object({
585
- title: z.string().describe("Section title (must match the title from memory-ingest-extract)"),
586
- summary: z.string().describe("LLM-generated summary of this section (1 sentence)"),
587
- chunkSummaries: z.array(z.string().describe("LLM-generated summary of one chunk (1 sentence)"))
588
- .describe("One summary per chunk in this section, in order"),
589
- })).describe("One entry per section from memory-ingest-extract, in order"),
678
+ kind: z.string().describe("Ontology label or the literal 'UNMAPPED'"),
679
+ title: z.string(),
680
+ body: z.string(),
681
+ properties: z.record(z.string(), z.unknown()),
682
+ anchorEdge: z.object({
683
+ type: z.string(),
684
+ direction: z.enum(["from-anchor", "to-anchor"]),
685
+ properties: z.record(z.string(), z.unknown()).optional(),
686
+ }).nullable(),
687
+ related: z.array(z.object({
688
+ kind: z.string(),
689
+ properties: z.record(z.string(), z.unknown()),
690
+ edge: z.object({
691
+ type: z.string(),
692
+ direction: z.enum(["outgoing", "incoming"]),
693
+ properties: z.record(z.string(), z.unknown()).optional(),
694
+ }),
695
+ merge: z.boolean().optional(),
696
+ })).optional(),
697
+ })).describe("Typed sections as returned by memory-classify"),
590
698
  scope: z.string().describe("Visibility scope for all created nodes — required. Valid values: 'public', 'shared', 'admin', or 'user:{identifier}'."),
591
- entities: z.array(z.object({
592
- name: z.string().describe("Entity name (for logging)"),
593
- nodeId: z.string().describe("Neo4j element ID of the entity node"),
594
- })).optional().describe("Entities referenced in the document to link via REFERENCES relationships"),
595
699
  sourceUrl: z.string().optional().describe("Original URL for web-sourced documents. Omit for file uploads."),
596
- sourceType: z.string().optional().describe("Provenance: 'upload' or 'web'. Omit for file uploads (defaults to no sourceType)."),
597
- keywords: z.array(z.string()).optional().describe("LLM-extracted topic keywords for the document. Normalized (lowercase, trimmed, deduplicated) before storage."),
598
- userKeywords: z.array(z.string()).optional().describe("Keywords explicitly provided by the user. Always included verbatim (after normalization). " +
599
- "Use for user-defined collections and classification (e.g. 'file this under quarterly reports')."),
600
- }, async ({ attachmentId, documentSummary, sections, scope, entities, sourceUrl, sourceType, keywords, userKeywords }) => {
700
+ sourceType: z.string().optional().describe("Provenance: 'upload' or 'web'."),
701
+ documentKeywords: z.array(z.string()).optional().describe("Classifier-extracted topic keywords."),
702
+ userKeywords: z.array(z.string()).optional().describe("Keywords explicitly provided by the user. Merged additively with documentKeywords."),
703
+ }, async ({ attachmentId, documentSummary, anchorNodeId, anchorLabel, sections, scope, sourceUrl, sourceType, documentKeywords, userKeywords }) => {
601
704
  try {
602
705
  const result = await memoryIngest({
603
706
  accountId,
604
707
  attachmentId,
605
708
  documentSummary,
709
+ anchorNodeId,
710
+ anchorLabel,
606
711
  sections,
607
712
  scope,
608
- entities,
609
713
  sourceUrl,
610
714
  sourceType,
611
- keywords,
715
+ documentKeywords,
612
716
  userKeywords,
717
+ sessionId,
613
718
  });
614
719
  return {
615
720
  content: [{
@@ -617,6 +722,8 @@ if (!readOnly) {
617
722
  text: JSON.stringify({
618
723
  documentNodeId: result.documentNodeId,
619
724
  sectionCount: result.sectionCount,
725
+ typedCount: result.typedCount,
726
+ unmappedCount: result.unmappedCount,
620
727
  chunkCount: result.chunkCount,
621
728
  entityLinks: result.entityLinks,
622
729
  documentSummary: result.documentSummary,
@@ -635,10 +742,10 @@ if (!readOnly) {
635
742
  };
636
743
  }
637
744
  });
638
- server.tool("memory-ingest-web", "Ingest web content into the knowledge graph. Accepts a URL and its pre-fetched readable content " +
639
- "(the agent calls WebFetch first, then passes the text here). " +
640
- "Extracts a title, writes content to a temp file, and delegates to the existing extraction + chunking pipeline. " +
641
- "Returns section/chunk previews for the agent to generate summaries, then call memory-ingest with sourceUrl and sourceType. " +
745
+ server.tool("memory-ingest-web", "Adapter for web-content ingestion (Task 737). Accepts a URL and its pre-fetched readable content " +
746
+ "(the agent calls WebFetch first, then passes the text here), writes content to a temp file, and delegates " +
747
+ "to memory-ingest-extract caching the text under a freshly-generated attachmentId. The skill then drives " +
748
+ "memory-classify and memory-ingest using that attachmentId, exactly as for a file upload. " +
642
749
  "If the URL was previously ingested, returns the existing document info so the agent can inform the user.", {
643
750
  url: z.string().describe("The web page URL being ingested"),
644
751
  content: z.string().describe("The readable text content of the web page (pre-fetched by the agent via WebFetch)"),
@@ -879,12 +986,12 @@ if (!readOnly) {
879
986
  const result = await memoryEditAttachment({ accountId, attachmentId, content });
880
987
  let text = `Edited: ${result.filename} (${result.oldSizeBytes} → ${result.newSizeBytes} bytes)`;
881
988
  if (result.cachePopulated) {
882
- text += `\nExtract cache populated: ${result.extractSections} sections, ${result.extractChunks} chunks.`;
883
- text += `\nCall memory-ingest with updated summaries to sync the knowledge graph.`;
989
+ text += `\nExtract cache populated: ${result.extractTextLength} chars.`;
990
+ text += `\nCall memory-classify and memory-ingest to re-sync the knowledge graph.`;
884
991
  }
885
992
  else {
886
993
  text += `\nWARNING: File updated on disk but extract cache could not be populated.`;
887
- text += `\nCall memory-ingest-extract manually, then memory-ingest.`;
994
+ text += `\nCall memory-ingest-extract manually, then memory-classify, then memory-ingest.`;
888
995
  }
889
996
  return { content: [{ type: "text", text }] };
890
997
  }
@@ -1466,6 +1573,7 @@ server.tool("conversation-search", "Search conversation history using semantic v
1466
1573
  });
1467
1574
  // Cleanup on exit
1468
1575
  process.on("SIGINT", async () => {
1576
+ liveSchemaRuntime.cache.stop();
1469
1577
  await closeDriver();
1470
1578
  process.exit(0);
1471
1579
  });