@rubytech/create-maxy 1.0.711 → 1.0.712

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/dist/index.js +38 -3
  2. package/package.json +2 -2
  3. package/payload/platform/plugins/linkedin-import/PLUGIN.md +1 -0
  4. package/payload/platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md +26 -5
  5. package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/connections.md +53 -82
  6. package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/profile.md +42 -49
  7. package/payload/platform/plugins/memory/PLUGIN.md +1 -0
  8. package/payload/platform/plugins/memory/mcp/dist/index.js +48 -0
  9. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  10. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +33 -0
  11. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -0
  12. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +229 -0
  13. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -0
  14. package/payload/platform/scripts/redact-install-logs.sh +85 -0
  15. package/payload/platform/scripts/setup.sh +20 -3
  16. package/payload/platform/scripts/verify-skill-tool-surface.sh +255 -0
  17. package/payload/platform/templates/specialists/agents/database-operator.md +6 -2
  18. package/payload/server/chunk-U5JPRUYZ.js +12298 -0
  19. package/payload/server/maxy-edge.js +1 -1
  20. package/payload/server/public/assets/{graph-BNx6E7BH.js → graph-DJ7IfYHV.js} +12 -12
  21. package/payload/server/public/graph.html +1 -1
  22. package/payload/server/server.js +16 -9
package/dist/index.js CHANGED
@@ -125,7 +125,15 @@ function shell(command, args, options) {
125
125
  const cmd = options?.sudo ? "sudo" : command;
126
126
  const cmdArgs = options?.sudo ? [command, ...args] : args;
127
127
  const start = Date.now();
128
- logFile(`> ${cmd} ${cmdArgs.join(" ")}${options?.cwd ? ` [cwd: ${options.cwd}]` : ""}`);
128
+ // Redaction (Task 744): callers handling secrets pass redact: true so the
129
+ // wrapper records the command name only, not the secret-bearing args. The
130
+ // child process still receives the real args via spawnSync below; only the
131
+ // install log line is sanitised. The grep-able audit shape stays:
132
+ // > sudo neo4j-admin dbms set-initial-password [REDACTED]
133
+ const loggedArgs = options?.redact
134
+ ? `${cmdArgs.slice(0, options?.sudo ? 4 : 3).join(" ")} [REDACTED]`
135
+ : cmdArgs.join(" ");
136
+ logFile(`> ${cmd} ${loggedArgs}${options?.cwd ? ` [cwd: ${options.cwd}]` : ""}`);
129
137
  const result = spawnSync(cmd, cmdArgs, {
130
138
  stdio: "inherit",
131
139
  timeout: options?.timeout ?? 300_000,
@@ -690,7 +698,7 @@ function resetNeo4jAuth(port = DEFAULT_NEO4J_PORT, dataDir = "/var/lib/neo4j") {
690
698
  }
691
699
  else {
692
700
  console.log(" [privileged] neo4j-admin dbms");
693
- shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true });
701
+ shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true, redact: true });
694
702
  }
695
703
  console.log(" [privileged] systemctl start");
696
704
  shell("systemctl", ["start", serviceName], { sudo: true });
@@ -707,6 +715,29 @@ function resetNeo4jAuth(port = DEFAULT_NEO4J_PORT, dataDir = "/var/lib/neo4j") {
707
715
  }
708
716
  return password;
709
717
  }
718
+ /**
719
+ * Task 744 — scrub plaintext neo4j passwords from pre-fix install-*.log files.
720
+ * Calls platform/scripts/redact-install-logs.sh against the installer's LOG_DIR.
721
+ * The script is idempotent; re-running on clean logs is a no-op. Failures here
722
+ * are non-fatal — credential redaction is best-effort cleanup, not a blocker
723
+ * for installation.
724
+ */
725
+ function redactInstallLogs() {
726
+ const script = resolve(INSTALL_DIR, "platform/scripts/redact-install-logs.sh");
727
+ if (!existsSync(script)) {
728
+ logFile("[redact-install-logs] script not found at " + script + " — skipping");
729
+ return;
730
+ }
731
+ const r = spawnSync("bash", [script, "--dir", LOG_DIR], {
732
+ stdio: "pipe",
733
+ encoding: "utf-8",
734
+ timeout: 30_000,
735
+ });
736
+ if (r.stdout)
737
+ logFile(r.stdout.trim());
738
+ if (r.status !== 0 && r.stderr)
739
+ logFile("[redact-install-logs] WARN " + r.stderr.trim());
740
+ }
710
741
  /** Check Neo4j has a working password. Called AFTER deploy so config is in place. */
711
742
  function ensureNeo4jPassword() {
712
743
  const passwordFile = join(INSTALL_DIR, "platform/config/.neo4j-password");
@@ -794,7 +825,7 @@ function installNeo4j() {
794
825
  mkdirSync(configDir, { recursive: true });
795
826
  writeFileSync(join(configDir, ".neo4j-password"), password, { mode: 0o600 });
796
827
  console.log(" [privileged] neo4j-admin dbms");
797
- shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true });
828
+ shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true, redact: true });
798
829
  console.log(" [privileged] systemctl enable");
799
830
  shell("systemctl", ["enable", "neo4j"], { sudo: true });
800
831
  console.log(" [privileged] systemctl start");
@@ -2148,6 +2179,10 @@ try {
2148
2179
  installCloudflared();
2149
2180
  installWhisperCpp();
2150
2181
  deployPayload(); // Must happen before ensureNeo4jPassword — restores config backup
2182
+ // Task 744: scrub plaintext neo4j passwords from any pre-fix install-*.log.
2183
+ // Idempotent — re-running on already-redacted logs is a no-op. Runs after
2184
+ // payload deploy so the bundled redact-install-logs.sh is on disk.
2185
+ redactInstallLogs();
2151
2186
  ensureNeo4jPassword(); // Now config/.neo4j-password is available if it existed before
2152
2187
  provisionRemoteSessionSecret(); // Task 653: shared HMAC key readable by maxy-edge + maxy-ui
2153
2188
  buildPlatform();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rubytech/create-maxy",
3
- "version": "1.0.711",
3
+ "version": "1.0.712",
4
4
  "description": "Install Maxy — AI for Productive People",
5
5
  "bin": {
6
6
  "create-maxy": "./dist/index.js"
@@ -10,7 +10,7 @@
10
10
  "build": "tsc",
11
11
  "bundle": "node scripts/bundle.js",
12
12
  "test": "npm run build && node --test 'dist/__tests__/*.test.js'",
13
- "prepublishOnly": "node ../../platform/ui/scripts/check-route-wiring.mjs && node ../../platform/ui/scripts/check-edge-admin-routes.mjs && npm run build && node --test 'dist/__tests__/*.test.js' && chmod +x dist/index.js && npm run bundle && node ../../platform/ui/scripts/check-bundle-node-imports.mjs --dir=./payload/server/public/assets"
13
+ "prepublishOnly": "bash ../../platform/scripts/verify-skill-tool-surface.sh && node ../../platform/ui/scripts/check-route-wiring.mjs && node ../../platform/ui/scripts/check-edge-admin-routes.mjs && npm run build && node --test 'dist/__tests__/*.test.js' && chmod +x dist/index.js && npm run bundle && node ../../platform/ui/scripts/check-bundle-node-imports.mjs --dir=./payload/server/public/assets"
14
14
  },
15
15
  "files": [
16
16
  "dist",
@@ -4,6 +4,7 @@ description: "Import a LinkedIn Basic Data Export into the Maxy Neo4j graph. Ski
4
4
  tools: []
5
5
  always: false
6
6
  embed: false
7
+ specialist: database-operator
7
8
  metadata: {"platform":{"optional":true,"pluginKey":"linkedin-import"}}
8
9
  ---
9
10
 
@@ -42,7 +42,7 @@ When the owner is an external Person (non-operator archive), the anchor is the c
42
42
 
43
43
  ## Invariants
44
44
 
45
- 1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade. If running against a Neo4j that hasn't been reseeded since shipping, pipe `schema.cypher` into `cypher-shell` once before startingevery statement is `IF NOT EXISTS`.
45
+ 1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade. The skill assumes the schema has been seeded; it does not bootstrap schema itself. If a constraint or index is missing, the operator re-runs `seed-neo4j.sh` from the installerschema-bootstrap is installer-side, never agent-side.
46
46
  2. **Owner confirmed first.** No reference runs until `$ownerUserId` (or `$ownerPersonId`) is persisted and echo-confirmed. The reference set is parameterised — no hard-coded owner.
47
47
  3. **Natural edges only.** Every edge written is one the CSV actually expresses. `Connections.csv` encodes "I am connected on LinkedIn to this person" — that becomes `CONNECTED_ON_LINKEDIN`. No synthetic attach-to-owner pattern bolted onto rows that don't describe a relationship to the owner.
48
48
  4. **Reuse Maxy labels.** Schema-extension is last resort. The LinkedIn set maps onto existing labels wherever semantics align:
@@ -60,10 +60,31 @@ When the owner is an external Person (non-operator archive), the anchor is the c
60
60
 
61
61
  ## Execution model
62
62
 
63
- 1. Confirm `schema.cypher` is applied (one-liner: `cypher-shell ... < platform/neo4j/schema.cypher`; safe to re-run).
64
- 2. Run the owner-confirmation flow, persist `$ownerUserId` / `$ownerPersonId`.
65
- 3. For each file the operator approves, load its reference, parse the CSV, batch rows (default 500 per tx), execute the reference's Cypher with `$rows` + owner parameter.
66
- 4. After each file emit `[linkedin-import] file=<name> rows=<n> created=<n> matched=<n> ms=<elapsed>`.
63
+ 1. Run the owner-confirmation flow, persist `$ownerUserId` / `$ownerPersonId`. The owner identity resolves to a single `ownerNodeId` (elementId of the AdminUser or external Person) used in every write call.
64
+ 2. For each file the operator approves, load its reference, parse the CSV into typed `rows[]` matching the reference's row schema.
65
+ 3. **Selective-ingest gate.** Before invoking any write tool, check the parsed row count against the reference's `selectiveIngestThreshold`. If the count exceeds the threshold, pause and ask the operator to filter the import along the natural axes named in the reference (for `Connections.csv`: Company, Position, Connected On). Apply the filter to `rows[]` before continuing. Compress on write, never after — a 5,000-row blanket import is a landfill, a 200-row filtered import is signal. See [§Selective-ingest](#selective-ingest-threshold-bulk-archives).
66
+ 4. Invoke the deterministic write tool the reference names. For all archive references this is `mcp__memory__memory-archive-write` with `{archiveType, ownerNodeId, rows}` the Cypher body is fixed server-side per `archiveType`, so the agent supplies parsed rows, never Cypher. The tool batches rows at 500 per transaction internally.
67
+ 5. After each file emit `[linkedin-import] file=<name> rows=<n> created=<n> matched=<n> ms=<elapsed>` using the counters returned by the write tool.
68
+
69
+ **Doctrine:** raw Cypher and `cypher-shell` invocations are forbidden in this skill and its references. Writes route through `mcp__memory__memory-archive-write` (bulk archives) or `mcp__memory__memory-write` / `mcp__memory__memory-update` (single-node enrichments like `profile.md`). If a CSV needs a write shape no current MCP tool supports, file a task to extend `memory-archive-write` with a new `archiveType` handler — never improvise via Bash. See [database-operator's LOUD-FAIL prerogative](../../../../templates/specialists/agents/database-operator.md#prerogatives).
70
+
71
+ ## Selective-ingest threshold (bulk archives)
72
+
73
+ A LinkedIn export typically contains 3,000–10,000 connections. Writing all of them in one shot defeats compression-on-write — most rows will never be queried, and the noise compounds with every subsequent ingest. The skill compresses by interrogating the operator before bulk writes.
74
+
75
+ **Threshold:** when a parsed reference's `rows[]` exceeds **100 rows**, pause and ask the operator to filter along the reference's natural axes before invoking the write tool.
76
+
77
+ For `Connections.csv` the natural filter axes are:
78
+
79
+ - **Company** — "only people at LargeCorp", "only Female Founders Fund alumni"
80
+ - **Position** — "only Partners", "only Engineering Managers"
81
+ - **Connected On** (date range) — "only my last two years", "since 2024-01-01"
82
+
83
+ The operator picks one axis or a combination. The agent applies the filter to `rows[]` and writes only the filtered subset.
84
+
85
+ **Re-importing is idempotent.** Coming back later with a wider filter (`"add anyone at LargeCorp"`, `"include 2022 too"`) hits the same `linkedinUrl` natural key — existing `:Person` nodes are matched and updated; only the new-only delta is created. The operator can grow the slice over time without dedup work.
86
+
87
+ **Why the threshold lives in the skill, not the server.** Different archive types have different "interesting" thresholds — 100 LinkedIn connections is a lot; 100 LinkedIn skills is small. The MCP tool accepts whatever rows are passed; the conversational gate is the skill's responsibility.
67
88
 
68
89
  ## File roster
69
90
 
@@ -31,7 +31,7 @@ The real column header is **line 4**. Either skip the first three lines before p
31
31
  | Position | `[:WORKS_FOR].title` |
32
32
  | Connected On | `[:CONNECTED_ON_LINKEDIN].connectedOn` (ISO 8601) |
33
33
 
34
- LinkedIn only emits email for connections who opted in, so most rows have a blank email. Write `email` only when non-empty — avoids colliding with `person_email_unique` on empty strings.
34
+ LinkedIn only emits email for connections who opted in, so most rows have a blank email. The MCP tool writes `email` only when non-empty — avoids colliding with `person_email_unique` on empty strings.
35
35
 
36
36
  ## Natural keys
37
37
 
@@ -42,81 +42,52 @@ LinkedIn only emits email for connections who opted in, so most rows have a blan
42
42
 
43
43
  ## Anchor
44
44
 
45
- ```cypher
46
- MATCH (owner:AdminUser {userId: $ownerUserId})
47
- ```
45
+ Resolved at skill start via the owner-confirmation flow. The owner is either an `:AdminUser` (the operator's own archive — the common case) or a confirmed `:Person` (an external archive ingested for reference). Both flow through the same write tool — `memory-archive-write` matches by `elementId(owner)` and accepts either label set.
48
46
 
49
- Resolved at skill start via the owner-confirmation flow. The owner could instead be a `:Person` if the operator confirmed an external-Person anchor; in that case swap the MATCH to `MATCH (owner:Person) WHERE elementId(owner) = $ownerPersonId` and keep the rest identical — the edges are the same regardless.
47
+ ## Selective-ingest threshold
50
48
 
51
- ## Cypher
49
+ **100 rows.** When the parsed `rows[]` count exceeds this, the skill pauses before the write call and asks the operator to filter by Company, Position, or Connected On range. See [SKILL.md § Selective-ingest threshold](../SKILL.md#selective-ingest-threshold-bulk-archives) for the doctrine. The MCP tool accepts whatever rows are passed; this gate is conversational.
52
50
 
53
- ```cypher
54
- // Parameters:
55
- // $ownerUserIdAdminUser.userId of the confirmed archive owner
56
- // $accountId — Organization accountId scope for this import
57
- // $sessionId — UUID generated once per skill run
58
- // $rows — array of objects:
59
- // {
60
- // givenName: "Dee",
61
- // familyName: "Odus",
62
- // linkedinUrl: "https://www.linkedin.com/in/deeodus",
63
- // email: null | "someone@example.com",
64
- // company: null | "Female Founders Fund",
65
- // title: null | "Partner",
66
- // connectedOn: "2026-04-23" // ISO 8601, parsed from "23 Apr 2026"
67
- // }
68
-
69
- MATCH (owner:AdminUser {userId: $ownerUserId})
70
- UNWIND $rows AS row
71
-
72
- // 1. Upsert the connection Person. linkedinUrl is the natural key.
73
- MERGE (p:Person {linkedinUrl: row.linkedinUrl})
74
- ON CREATE SET
75
- p.accountId = $accountId,
76
- p.source = 'linkedin',
77
- p.createdByAgent = 'linkedin-import',
78
- p.createdBySource = 'linkedin-import',
79
- p.createdBySession= $sessionId,
80
- p.createdAt = datetime()
81
- SET
82
- p.givenName = row.givenName,
83
- p.familyName= row.familyName,
84
- p.name = trim(coalesce(row.givenName,'') + ' ' + coalesce(row.familyName,''))
85
-
86
- // 1a. Email only when non-empty (avoids person_email_unique collisions on empty strings)
87
- FOREACH (_ IN CASE WHEN row.email IS NOT NULL AND row.email <> '' THEN [1] ELSE [] END |
88
- SET p.email = row.email
89
- )
90
-
91
- // 2. The CONNECTED_ON_LINKEDIN edge is what this CSV means.
92
- MERGE (owner)-[c:CONNECTED_ON_LINKEDIN]->(p)
93
- ON CREATE SET
94
- c.connectedOn = date(row.connectedOn),
95
- c.source = 'linkedin',
96
- c.createdAt = datetime()
97
-
98
- // 3. If the row names a current employer, create the Organization and WORKS_FOR edge.
99
- // If no company is named, this block no-ops — we do not synthesise one.
100
- WITH p, row
101
- WHERE row.company IS NOT NULL AND row.company <> ''
102
- MERGE (o:Organization {accountId: $accountId, name: trim(row.company)})
103
- ON CREATE SET
104
- o.source = 'linkedin',
105
- o.createdByAgent = 'linkedin-import',
106
- o.createdBySource = 'linkedin-import',
107
- o.createdBySession= $sessionId,
108
- o.createdAt = datetime()
109
-
110
- MERGE (p)-[w:WORKS_FOR]->(o)
111
- ON CREATE SET
112
- w.title = row.title,
113
- w.source = 'linkedin',
114
- w.current = true,
115
- w.createdAt = datetime()
116
- ON MATCH SET
117
- w.title = coalesce(row.title, w.title)
51
+ ## Write surface
52
+
53
+ This reference invokes a single MCP tool: `mcp__memory__memory-archive-write` with `archiveType: "linkedin-connections"`. The Cypher body Person upsert by `linkedinUrl`, `CONNECTED_ON_LINKEDIN` edge from owner, optional `:Organization` + `WORKS_FOR` when company is non-empty — lives server-side in [`platform/plugins/memory/mcp/src/tools/memory-archive-write.ts`](../../../../memory/mcp/src/tools/memory-archive-write.ts). The agent does not author or pipe Cypher; it parses CSV rows into the tool's row schema and dispatches one (or more, for filtered re-imports) tool call.
54
+
55
+ ### Tool input shape
56
+
57
+ ```json
58
+ {
59
+ "archiveType": "linkedin-connections",
60
+ "ownerNodeId": "<elementId of :AdminUser or :Person — from owner-confirmation flow>",
61
+ "rows": [
62
+ {
63
+ "givenName": "Dee",
64
+ "familyName": "Odus",
65
+ "linkedinUrl": "https://www.linkedin.com/in/deeodus",
66
+ "email": null,
67
+ "company": "Female Founders Fund",
68
+ "title": "Partner",
69
+ "connectedOn": "2026-04-23"
70
+ }
71
+ ],
72
+ "sessionId": "<UUID generated once per skill run>"
73
+ }
118
74
  ```
119
75
 
76
+ The parser converts:
77
+ - Blank cells → `null` (especially `email`, `company`, `title`).
78
+ - `Connected On` from `"23 Apr 2026"` → ISO 8601 `"2026-04-23"`. The tool rejects rows with non-ISO dates loudly — never let the agent paper over a parser bug.
79
+
80
+ ### What the server does (informational, not the agent's responsibility)
81
+
82
+ Per 500-row batch the handler runs one transaction with two phases:
83
+
84
+ 1. Upsert each row's `:Person` (natural key `linkedinUrl`), stamp provenance + email when present, then upsert the owner→Person `CONNECTED_ON_LINKEDIN` edge with `connectedOn` on the edge.
85
+ 2. For rows whose `company` is non-empty, upsert the `:Organization` (natural key `accountId + name`), then upsert the Person→Organization `WORKS_FOR` edge with `title` on the edge.
86
+
87
+ Provenance stamped on every node: `source='linkedin'`, `createdByAgent='linkedin-import'`, `createdBySource='linkedin-import'`, `createdBySession=<sessionId>`, `createdAt=<now>`.
88
+
89
+ Counters come from the Neo4j driver's per-statement summary (`nodesCreated`, `relationshipsCreated`). The tool returns `{processedRows, createdPersons, mergedPersons, createdOrganizations, createdEdges, errors[]}`.
90
+
120
91
  ## Edge semantics — why these and no others
121
92
 
122
93
  - **`(owner)-[:CONNECTED_ON_LINKEDIN]->(:Person)`** — each row of Connections.csv is a declaration that the archive owner and this person are LinkedIn connections. That's the edge.
@@ -128,14 +99,16 @@ Rows missing a position but present with a company produce a `WORKS_FOR` edge wi
128
99
 
129
100
  ## Date parsing
130
101
 
131
- `Connected On` arrives as `"23 Apr 2026"`. Convert to ISO 8601 (`2026-04-23`) in the parser before passing to Cypher `date("2026-04-23")` is Neo4j-native.
102
+ `Connected On` arrives as `"23 Apr 2026"`. Convert to ISO 8601 (`2026-04-23`) in the parser before passing to the tool. The server converts to Neo4j `date()` internally — the agent never invokes Cypher functions.
132
103
 
133
104
  ## Expected shape
134
105
 
135
- - ~3,000–10,000 rows typical for a long-running account.
136
- - 500 rows per transaction. Single UNWIND handles this; `apoc.periodic.iterate` not required.
106
+ - ~3,000–10,000 rows typical for a long-running account. The selective-ingest gate (above) keeps a typical write at well under 1,000 rows.
107
+ - 500 rows per transaction. The MCP tool handles batching internally; the agent passes the full filtered `rows[]` in one call.
137
108
 
138
- ## Post-import verification
109
+ ## Post-import verification (operator-side, not agent-side)
110
+
111
+ After ingest, the operator can verify counts via the `database-operator` specialist's read tools — `mcp__memory__memory-search` with `labels: ["Person"]` plus a filter, or a direct read query through `mcp__graph__maxy-graph-read_neo4j_cypher`:
139
112
 
140
113
  ```cypher
141
114
  // Owner → connections count
@@ -145,18 +118,16 @@ RETURN count(p) AS connections;
145
118
  // LinkedIn-origin organizations count
146
119
  MATCH (o:Organization {accountId: $accountId, source: 'linkedin'})
147
120
  RETURN count(o) AS organizations;
148
-
149
- // Spot-check: who works at Female Founders Fund?
150
- MATCH (o:Organization {accountId: $accountId, name: 'Female Founders Fund'})
151
- <-[:WORKS_FOR]-(p:Person)
152
- RETURN p.name, p.linkedinUrl;
153
121
  ```
154
122
 
123
+ These are **read queries**, not writes. Cypher writes from the agent are forbidden.
124
+
155
125
  ## Failure modes
156
126
 
157
127
  | Symptom | Cause | Fix |
158
128
  |---------|-------|-----|
159
129
  | Every row parsed as "Notes:,NaN,…" | Header preamble not skipped | Skip first 3 lines before the CSV parser |
160
- | Constraint violation on `person_email_unique` | Empty email cells treated as `""` instead of `null` | Ensure the parser converts blanks to `null` |
161
- | `MATCH (owner …)` returns zero rows | `$ownerUserId` invalid — owner-confirmation not run, or operator typed the wrong id | Re-run owner confirmation |
130
+ | Tool error "row connectedOn is not ISO 8601" | Parser left `Connected On` in `"23 Apr 2026"` form | Convert to `YYYY-MM-DD` before passing to the tool |
131
+ | Tool error "ownerNodeId not found" | Owner-confirmation flow not run, or operator typed the wrong id | Re-run owner confirmation; pass the resulting `elementId` as `ownerNodeId` |
162
132
  | `WORKS_FOR` count « connection count | Many rows have blank company | Expected — LinkedIn doesn't force connections to list a current employer |
133
+ | Tool not present in `init` frame | `database-operator` spawned without the `mcp__memory__memory-archive-write` token | Loud-fail per database-operator's prerogatives. Do not improvise via Bash. Operator must remediate (re-seed specialist templates) |
@@ -1,8 +1,8 @@
1
1
  # Reference: Profile.csv
2
2
 
3
- Enriches the confirmed archive owner's `:UserProfile` with the LinkedIn profile fields. No new nodes, no new edges — `:AdminUser` and `:UserProfile` already exist for any Maxy operator and are linked by `[:HAS_PROFILE]` at session start.
3
+ Enriches the confirmed archive owner's `:UserProfile` with the LinkedIn profile fields. No new nodes, no new edges — `:AdminUser` and `:UserProfile` already exist for any Maxy operator and are linked by `[:HAS_PROFILE]` at session start (neo4j-store handles that on session boot, not this skill).
4
4
 
5
- Runs before every other reference because later files display LinkedIn profile fields (headline, summary) on the owner node they MATCH.
5
+ Runs before every other reference because later files display LinkedIn profile fields (headline, summary) on the owner node they search.
6
6
 
7
7
  ## Source
8
8
 
@@ -30,66 +30,59 @@ Schema.org camelCase per `platform/plugins/memory/references/schema-base.md`.
30
30
 
31
31
  ## Anchor
32
32
 
33
+ The owner-confirmation flow at the start of the skill resolves the operator's `:UserProfile` elementId — not just the `:AdminUser` userId. That elementId (`$ownerProfileElementId`) is the input to this reference. The `[:HAS_PROFILE]` edge between `:AdminUser` and `:UserProfile` is created by `platform/ui/app/lib/neo4j-store.ts` on every session start; it pre-exists by the time any skill runs.
34
+
35
+ ## Write surface
36
+
37
+ This reference invokes `mcp__memory__memory-update` once with the parsed row's properties:
38
+
39
+ ```json
40
+ {
41
+ "nodeId": "<elementId of :UserProfile from owner-confirmation>",
42
+ "properties": {
43
+ "givenName": "Joel",
44
+ "familyName": "Smalley",
45
+ "additionalName": null,
46
+ "address": null,
47
+ "birthDate": null,
48
+ "headline": "Founder, Rubytech",
49
+ "description": "Building Maxy …",
50
+ "industry": "Software",
51
+ "postalCode": null,
52
+ "addressLocality": "London, UK",
53
+ "twitterHandles": ["@joelsmalley"],
54
+ "websites": ["https://getmaxy.com"],
55
+ "instantMessengers": [],
56
+ "linkedinProfileUpdatedAt": "<ISO 8601 timestamp>",
57
+ "source": "linkedin"
58
+ }
59
+ }
33
60
  ```
34
- (:AdminUser {userId: $ownerUserId}) -[:HAS_PROFILE]-> (:UserProfile {accountId, userId})
35
- ```
36
-
37
- The skill run has already persisted `$ownerUserId` (and its resolved `$accountId`) from the owner-confirmation flow. This reference trusts those parameters.
38
-
39
- ## Cypher
40
61
 
41
- ```cypher
42
- // Parameters:
43
- // $ownerUserId — AdminUser.userId of the confirmed archive owner
44
- // $accountId — the UserProfile accountId resolved alongside $ownerUserId
45
- // $sessionId — UUID generated once per skill run
46
- // $row — parsed object with the columns above
47
-
48
- MATCH (au:AdminUser {userId: $ownerUserId})
49
- MERGE (au)-[:HAS_PROFILE]->(up:UserProfile {accountId: $accountId, userId: $ownerUserId})
50
- ON CREATE SET
51
- up.createdAt = datetime(),
52
- up.createdByAgent = 'linkedin-import',
53
- up.createdBySource = 'linkedin-import',
54
- up.createdBySession= $sessionId
55
- SET
56
- up.givenName = $row.givenName,
57
- up.familyName = $row.familyName,
58
- up.additionalName = $row.additionalName,
59
- up.address = $row.address,
60
- up.birthDate = $row.birthDate,
61
- up.headline = $row.headline,
62
- up.description = $row.description,
63
- up.industry = $row.industry,
64
- up.postalCode = $row.postalCode,
65
- up.addressLocality = $row.addressLocality,
66
- up.twitterHandles = $row.twitterHandles,
67
- up.websites = $row.websites,
68
- up.instantMessengers = $row.instantMessengers,
69
- up.linkedinProfileUpdatedAt = datetime(),
70
- up.source = coalesce(up.source, 'linkedin')
71
-
72
- RETURN elementId(up) AS ownerProfileElementId
73
- ```
62
+ The `memory-update` tool ignores restricted keys (`embedding`, `accountId`, `createdAt`) and recomputes the embedding from the new property set. No raw Cypher, no `MERGE`, no Bash.
74
63
 
75
- The `MERGE (au)-[:HAS_PROFILE]->(up)` is idempotent: for any operator whose session has already run, `(au)-[:HAS_PROFILE]->(up)` already exists — this statement simply matches it and SETs properties. If the operator has never opened a Maxy session for this account (rare; the UserProfile normally exists before any skill runs), it is created here.
64
+ The parser converts:
65
+ - Blank cells → `null`.
66
+ - `Twitter Handles`, `Websites`, `Instant Messengers` from comma-delimited strings → arrays.
67
+ - `Birth Date` to ISO 8601 if present.
76
68
 
77
69
  ## Expected outcome
78
70
 
79
- - Zero new nodes (typical case).
80
- - Zero new edges (typical case).
71
+ - Zero new nodes.
72
+ - Zero new edges.
81
73
  - One existing `:UserProfile` enriched with 10–13 new properties.
82
- - `ownerProfileElementId` returned for downstream references that want to cache the anchor.
83
74
 
84
75
  ## Failure modes
85
76
 
86
77
  | Symptom | Cause | Fix |
87
78
  |---------|-------|-----|
88
- | Zero rows returned from `MATCH (au:AdminUser {userId: $ownerUserId})` | `$ownerUserId` doesn't resolve operator typo in confirmation, or AdminUser missing | Re-run the owner-confirmation flow; verify `platform/config/users.json` contains the expected userId |
79
+ | Tool error "node not found" | `$ownerProfileElementId` invalidowner-confirmation flow did not return a UserProfile | Re-run the owner-confirmation flow; verify `:AdminUser`-`HAS_PROFILE`->`:UserProfile` exists for the confirmed userId |
89
80
  | `up.websites` written as a single string not an array | Parser didn't split on `,` | Fix parser — LinkedIn comma-delimits these fields |
90
- | Constraint violation on `user_profile_account_user_unique` | Shouldn't happen MERGE uses the composite key | Indicates a pre-existing duplicate; investigate with `MATCH (up:UserProfile {accountId: $accountId, userId: $ownerUserId}) RETURN count(up)` |
81
+ | Restricted property silently ignored | `memory-update` rejects `embedding` / `accountId` / `createdAt` overrides | Expected those fields are managed by the server |
82
+
83
+ ## Post-import verification (operator-side, read-only)
91
84
 
92
- ## Post-import verification
85
+ A direct read query through `mcp__memory__memory-search` (`labels: ["UserProfile"]`) or `mcp__graph__maxy-graph-read_neo4j_cypher`:
93
86
 
94
87
  ```cypher
95
88
  MATCH (au:AdminUser {userId: $ownerUserId})-[:HAS_PROFILE]->(up:UserProfile)
@@ -99,4 +92,4 @@ RETURN
99
92
  up.websites, up.linkedinProfileUpdatedAt
100
93
  ```
101
94
 
102
- Exactly one row. If zero, either the AdminUser doesn't exist or the HAS_PROFILE edge wasn't MERGEd — investigate before running any subsequent reference.
95
+ Exactly one row. If zero, the owner-confirmation flow returned the wrong elementId — investigate before running any subsequent reference.
@@ -19,6 +19,7 @@ tools:
19
19
  - memory-read-attachment
20
20
  - memory-edit-attachment
21
21
  - memory-rename-attachment
22
+ - memory-archive-write
22
23
  - conversation-list
23
24
  - conversation-search
24
25
  - profile-read
@@ -12,6 +12,7 @@ import { buildLiveSchemaSource, defaultSchemaCypherPath, } from "./lib/live-sche
12
12
  import { memoryReindex } from "./tools/memory-reindex.js";
13
13
  import { memoryIngestExtract } from "./tools/memory-ingest-extract.js";
14
14
  import { memoryIngest } from "./tools/memory-ingest.js";
15
+ import { memoryArchiveWrite } from "./tools/memory-archive-write.js";
15
16
  import { memoryIngestWeb } from "./tools/memory-ingest-web.js";
16
17
  import { memoryClassify } from "./tools/memory-classify.js";
17
18
  import { memoryUpdate } from "./tools/memory-update.js";
@@ -769,6 +770,53 @@ if (!readOnly) {
769
770
  };
770
771
  }
771
772
  });
773
+ server.tool("memory-archive-write", "Bulk-archive write surface (Task 744). Writes a flat dataset (typed entities + natural edges) into the graph " +
774
+ "in 500-row UNWIND batches. The Cypher body is fixed server-side per `archiveType`; the agent supplies parsed " +
775
+ "rows + the discriminant, never raw Cypher. Use ONLY for first-class entity exports (LinkedIn Connections, " +
776
+ "future CRM-type seed exports). Use memory-ingest for narrative documents (KnowledgeDocument + Section + NEXT) " +
777
+ "and memory-write for single-node operator-driven writes. Currently supported archiveType values: " +
778
+ "`linkedin-connections`.", {
779
+ archiveType: z
780
+ .enum(["linkedin-connections"])
781
+ .describe("Discriminant naming the per-source schema and Cypher body the server runs. Add a new value here only when the corresponding handler is added in memory-archive-write.ts."),
782
+ ownerNodeId: z
783
+ .string()
784
+ .min(1)
785
+ .describe("elementId of the archive owner — :AdminUser for an operator's own archive, or :Person for an external-archive owner. Confirmed during the skill's owner-confirmation flow before this tool is invoked."),
786
+ rows: z
787
+ .array(z.record(z.string(), z.unknown()))
788
+ .min(1)
789
+ .describe("Parsed rows. The skill's selective-ingest gate runs BEFORE this tool — large blanket archives get filtered (Company / Position / Connected On range for linkedin-connections) before the write call."),
790
+ sessionId: z
791
+ .string()
792
+ .optional()
793
+ .describe("Skill-run UUID for provenance stamping. Falls back to SESSION_ID env var when absent."),
794
+ }, async ({ archiveType, ownerNodeId, rows, sessionId: sessionIdOverride }) => {
795
+ try {
796
+ const result = await memoryArchiveWrite({
797
+ archiveType,
798
+ ownerNodeId,
799
+ accountId,
800
+ rows: rows,
801
+ sessionId: resolveSessionId(sessionIdOverride),
802
+ });
803
+ return {
804
+ content: [{
805
+ type: "text",
806
+ text: JSON.stringify(result),
807
+ }],
808
+ };
809
+ }
810
+ catch (err) {
811
+ return {
812
+ content: [{
813
+ type: "text",
814
+ text: `memory-archive-write failed: ${err instanceof Error ? err.message : String(err)}`,
815
+ }],
816
+ isError: true,
817
+ };
818
+ }
819
+ });
772
820
  server.tool("memory-ingest-web", "Adapter for web-content ingestion (Task 737). Accepts a URL and its pre-fetched readable content " +
773
821
  "(the agent calls WebFetch first, then passes the text here), writes content to a temp file, and delegates " +
774
822
  "to memory-ingest-extract — caching the text under a freshly-generated attachmentId. The skill then drives " +