@rubytech/create-maxy 1.0.711 → 1.0.712
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +38 -3
- package/package.json +2 -2
- package/payload/platform/plugins/linkedin-import/PLUGIN.md +1 -0
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md +26 -5
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/connections.md +53 -82
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/profile.md +42 -49
- package/payload/platform/plugins/memory/PLUGIN.md +1 -0
- package/payload/platform/plugins/memory/mcp/dist/index.js +48 -0
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +33 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +229 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -0
- package/payload/platform/scripts/redact-install-logs.sh +85 -0
- package/payload/platform/scripts/setup.sh +20 -3
- package/payload/platform/scripts/verify-skill-tool-surface.sh +255 -0
- package/payload/platform/templates/specialists/agents/database-operator.md +6 -2
- package/payload/server/chunk-U5JPRUYZ.js +12298 -0
- package/payload/server/maxy-edge.js +1 -1
- package/payload/server/public/assets/{graph-BNx6E7BH.js → graph-DJ7IfYHV.js} +12 -12
- package/payload/server/public/graph.html +1 -1
- package/payload/server/server.js +16 -9
package/dist/index.js
CHANGED
|
@@ -125,7 +125,15 @@ function shell(command, args, options) {
|
|
|
125
125
|
const cmd = options?.sudo ? "sudo" : command;
|
|
126
126
|
const cmdArgs = options?.sudo ? [command, ...args] : args;
|
|
127
127
|
const start = Date.now();
|
|
128
|
-
|
|
128
|
+
// Redaction (Task 744): callers handling secrets pass redact: true so the
|
|
129
|
+
// wrapper records the command name only, not the secret-bearing args. The
|
|
130
|
+
// child process still receives the real args via spawnSync below; only the
|
|
131
|
+
// install log line is sanitised. The grep-able audit shape stays:
|
|
132
|
+
// > sudo neo4j-admin dbms set-initial-password [REDACTED]
|
|
133
|
+
const loggedArgs = options?.redact
|
|
134
|
+
? `${cmdArgs.slice(0, options?.sudo ? 4 : 3).join(" ")} [REDACTED]`
|
|
135
|
+
: cmdArgs.join(" ");
|
|
136
|
+
logFile(`> ${cmd} ${loggedArgs}${options?.cwd ? ` [cwd: ${options.cwd}]` : ""}`);
|
|
129
137
|
const result = spawnSync(cmd, cmdArgs, {
|
|
130
138
|
stdio: "inherit",
|
|
131
139
|
timeout: options?.timeout ?? 300_000,
|
|
@@ -690,7 +698,7 @@ function resetNeo4jAuth(port = DEFAULT_NEO4J_PORT, dataDir = "/var/lib/neo4j") {
|
|
|
690
698
|
}
|
|
691
699
|
else {
|
|
692
700
|
console.log(" [privileged] neo4j-admin dbms");
|
|
693
|
-
shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true });
|
|
701
|
+
shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true, redact: true });
|
|
694
702
|
}
|
|
695
703
|
console.log(" [privileged] systemctl start");
|
|
696
704
|
shell("systemctl", ["start", serviceName], { sudo: true });
|
|
@@ -707,6 +715,29 @@ function resetNeo4jAuth(port = DEFAULT_NEO4J_PORT, dataDir = "/var/lib/neo4j") {
|
|
|
707
715
|
}
|
|
708
716
|
return password;
|
|
709
717
|
}
|
|
718
|
+
/**
|
|
719
|
+
* Task 744 — scrub plaintext neo4j passwords from pre-fix install-*.log files.
|
|
720
|
+
* Calls platform/scripts/redact-install-logs.sh against the installer's LOG_DIR.
|
|
721
|
+
* The script is idempotent; re-running on clean logs is a no-op. Failures here
|
|
722
|
+
* are non-fatal — credential redaction is best-effort cleanup, not a blocker
|
|
723
|
+
* for installation.
|
|
724
|
+
*/
|
|
725
|
+
function redactInstallLogs() {
|
|
726
|
+
const script = resolve(INSTALL_DIR, "platform/scripts/redact-install-logs.sh");
|
|
727
|
+
if (!existsSync(script)) {
|
|
728
|
+
logFile("[redact-install-logs] script not found at " + script + " — skipping");
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
const r = spawnSync("bash", [script, "--dir", LOG_DIR], {
|
|
732
|
+
stdio: "pipe",
|
|
733
|
+
encoding: "utf-8",
|
|
734
|
+
timeout: 30_000,
|
|
735
|
+
});
|
|
736
|
+
if (r.stdout)
|
|
737
|
+
logFile(r.stdout.trim());
|
|
738
|
+
if (r.status !== 0 && r.stderr)
|
|
739
|
+
logFile("[redact-install-logs] WARN " + r.stderr.trim());
|
|
740
|
+
}
|
|
710
741
|
/** Check Neo4j has a working password. Called AFTER deploy so config is in place. */
|
|
711
742
|
function ensureNeo4jPassword() {
|
|
712
743
|
const passwordFile = join(INSTALL_DIR, "platform/config/.neo4j-password");
|
|
@@ -794,7 +825,7 @@ function installNeo4j() {
|
|
|
794
825
|
mkdirSync(configDir, { recursive: true });
|
|
795
826
|
writeFileSync(join(configDir, ".neo4j-password"), password, { mode: 0o600 });
|
|
796
827
|
console.log(" [privileged] neo4j-admin dbms");
|
|
797
|
-
shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true });
|
|
828
|
+
shell("neo4j-admin", ["dbms", "set-initial-password", "--", password], { sudo: true, redact: true });
|
|
798
829
|
console.log(" [privileged] systemctl enable");
|
|
799
830
|
shell("systemctl", ["enable", "neo4j"], { sudo: true });
|
|
800
831
|
console.log(" [privileged] systemctl start");
|
|
@@ -2148,6 +2179,10 @@ try {
|
|
|
2148
2179
|
installCloudflared();
|
|
2149
2180
|
installWhisperCpp();
|
|
2150
2181
|
deployPayload(); // Must happen before ensureNeo4jPassword — restores config backup
|
|
2182
|
+
// Task 744: scrub plaintext neo4j passwords from any pre-fix install-*.log.
|
|
2183
|
+
// Idempotent — re-running on already-redacted logs is a no-op. Runs after
|
|
2184
|
+
// payload deploy so the bundled redact-install-logs.sh is on disk.
|
|
2185
|
+
redactInstallLogs();
|
|
2151
2186
|
ensureNeo4jPassword(); // Now config/.neo4j-password is available if it existed before
|
|
2152
2187
|
provisionRemoteSessionSecret(); // Task 653: shared HMAC key readable by maxy-edge + maxy-ui
|
|
2153
2188
|
buildPlatform();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rubytech/create-maxy",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.712",
|
|
4
4
|
"description": "Install Maxy — AI for Productive People",
|
|
5
5
|
"bin": {
|
|
6
6
|
"create-maxy": "./dist/index.js"
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"build": "tsc",
|
|
11
11
|
"bundle": "node scripts/bundle.js",
|
|
12
12
|
"test": "npm run build && node --test 'dist/__tests__/*.test.js'",
|
|
13
|
-
"prepublishOnly": "node ../../platform/ui/scripts/check-route-wiring.mjs && node ../../platform/ui/scripts/check-edge-admin-routes.mjs && npm run build && node --test 'dist/__tests__/*.test.js' && chmod +x dist/index.js && npm run bundle && node ../../platform/ui/scripts/check-bundle-node-imports.mjs --dir=./payload/server/public/assets"
|
|
13
|
+
"prepublishOnly": "bash ../../platform/scripts/verify-skill-tool-surface.sh && node ../../platform/ui/scripts/check-route-wiring.mjs && node ../../platform/ui/scripts/check-edge-admin-routes.mjs && npm run build && node --test 'dist/__tests__/*.test.js' && chmod +x dist/index.js && npm run bundle && node ../../platform/ui/scripts/check-bundle-node-imports.mjs --dir=./payload/server/public/assets"
|
|
14
14
|
},
|
|
15
15
|
"files": [
|
|
16
16
|
"dist",
|
|
@@ -42,7 +42,7 @@ When the owner is an external Person (non-operator archive), the anchor is the c
|
|
|
42
42
|
|
|
43
43
|
## Invariants
|
|
44
44
|
|
|
45
|
-
1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade.
|
|
45
|
+
1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade. The skill assumes the schema has been seeded; it does not bootstrap schema itself. If a constraint or index is missing, the operator re-runs `seed-neo4j.sh` from the installer — schema-bootstrap is installer-side, never agent-side.
|
|
46
46
|
2. **Owner confirmed first.** No reference runs until `$ownerUserId` (or `$ownerPersonId`) is persisted and echo-confirmed. The reference set is parameterised — no hard-coded owner.
|
|
47
47
|
3. **Natural edges only.** Every edge written is one the CSV actually expresses. `Connections.csv` encodes "I am connected on LinkedIn to this person" — that becomes `CONNECTED_ON_LINKEDIN`. No synthetic attach-to-owner pattern bolted onto rows that don't describe a relationship to the owner.
|
|
48
48
|
4. **Reuse Maxy labels.** Schema-extension is last resort. The LinkedIn set maps onto existing labels wherever semantics align:
|
|
@@ -60,10 +60,31 @@ When the owner is an external Person (non-operator archive), the anchor is the c
|
|
|
60
60
|
|
|
61
61
|
## Execution model
|
|
62
62
|
|
|
63
|
-
1.
|
|
64
|
-
2.
|
|
65
|
-
3.
|
|
66
|
-
4.
|
|
63
|
+
1. Run the owner-confirmation flow, persist `$ownerUserId` / `$ownerPersonId`. The owner identity resolves to a single `ownerNodeId` (elementId of the AdminUser or external Person) used in every write call.
|
|
64
|
+
2. For each file the operator approves, load its reference, parse the CSV into typed `rows[]` matching the reference's row schema.
|
|
65
|
+
3. **Selective-ingest gate.** Before invoking any write tool, check the parsed row count against the reference's `selectiveIngestThreshold`. If the count exceeds the threshold, pause and ask the operator to filter the import along the natural axes named in the reference (for `Connections.csv`: Company, Position, Connected On). Apply the filter to `rows[]` before continuing. Compress on write, never after — a 5,000-row blanket import is a landfill, a 200-row filtered import is signal. See [§Selective-ingest](#selective-ingest-threshold-bulk-archives).
|
|
66
|
+
4. Invoke the deterministic write tool the reference names. For all archive references this is `mcp__memory__memory-archive-write` with `{archiveType, ownerNodeId, rows}` — the Cypher body is fixed server-side per `archiveType`, so the agent supplies parsed rows, never Cypher. The tool batches rows at 500 per transaction internally.
|
|
67
|
+
5. After each file emit `[linkedin-import] file=<name> rows=<n> created=<n> matched=<n> ms=<elapsed>` using the counters returned by the write tool.
|
|
68
|
+
|
|
69
|
+
**Doctrine:** raw Cypher and `cypher-shell` invocations are forbidden in this skill and its references. Writes route through `mcp__memory__memory-archive-write` (bulk archives) or `mcp__memory__memory-write` / `mcp__memory__memory-update` (single-node enrichments like `profile.md`). If a CSV needs a write shape no current MCP tool supports, file a task to extend `memory-archive-write` with a new `archiveType` handler — never improvise via Bash. See [database-operator's LOUD-FAIL prerogative](../../../../templates/specialists/agents/database-operator.md#prerogatives).
|
|
70
|
+
|
|
71
|
+
## Selective-ingest threshold (bulk archives)
|
|
72
|
+
|
|
73
|
+
A LinkedIn export typically contains 3,000–10,000 connections. Writing all of them in one shot defeats compression-on-write — most rows will never be queried, and the noise compounds with every subsequent ingest. The skill compresses by interrogating the operator before bulk writes.
|
|
74
|
+
|
|
75
|
+
**Threshold:** when a parsed reference's `rows[]` exceeds **100 rows**, pause and ask the operator to filter along the reference's natural axes before invoking the write tool.
|
|
76
|
+
|
|
77
|
+
For `Connections.csv` the natural filter axes are:
|
|
78
|
+
|
|
79
|
+
- **Company** — "only people at LargeCorp", "only Female Founders Fund alumni"
|
|
80
|
+
- **Position** — "only Partners", "only Engineering Managers"
|
|
81
|
+
- **Connected On** (date range) — "only my last two years", "since 2024-01-01"
|
|
82
|
+
|
|
83
|
+
The operator picks one axis or a combination. The agent applies the filter to `rows[]` and writes only the filtered subset.
|
|
84
|
+
|
|
85
|
+
**Re-importing is idempotent.** Coming back later with a wider filter (`"add anyone at LargeCorp"`, `"include 2022 too"`) hits the same `linkedinUrl` natural key — existing `:Person` nodes are matched and updated; only the new-only delta is created. The operator can grow the slice over time without dedup work.
|
|
86
|
+
|
|
87
|
+
**Why the threshold lives in the skill, not the server.** Different archive types have different "interesting" thresholds — 100 LinkedIn connections is a lot; 100 LinkedIn skills is small. The MCP tool accepts whatever rows are passed; the conversational gate is the skill's responsibility.
|
|
67
88
|
|
|
68
89
|
## File roster
|
|
69
90
|
|
package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/connections.md
CHANGED
|
@@ -31,7 +31,7 @@ The real column header is **line 4**. Either skip the first three lines before p
|
|
|
31
31
|
| Position | `[:WORKS_FOR].title` |
|
|
32
32
|
| Connected On | `[:CONNECTED_ON_LINKEDIN].connectedOn` (ISO 8601) |
|
|
33
33
|
|
|
34
|
-
LinkedIn only emits email for connections who opted in, so most rows have a blank email.
|
|
34
|
+
LinkedIn only emits email for connections who opted in, so most rows have a blank email. The MCP tool writes `email` only when non-empty — avoids colliding with `person_email_unique` on empty strings.
|
|
35
35
|
|
|
36
36
|
## Natural keys
|
|
37
37
|
|
|
@@ -42,81 +42,52 @@ LinkedIn only emits email for connections who opted in, so most rows have a blan
|
|
|
42
42
|
|
|
43
43
|
## Anchor
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
MATCH (owner:AdminUser {userId: $ownerUserId})
|
|
47
|
-
```
|
|
45
|
+
Resolved at skill start via the owner-confirmation flow. The owner is either an `:AdminUser` (the operator's own archive — the common case) or a confirmed `:Person` (an external archive ingested for reference). Both flow through the same write tool — `memory-archive-write` matches by `elementId(owner)` and accepts either label set.
|
|
48
46
|
|
|
49
|
-
|
|
47
|
+
## Selective-ingest threshold
|
|
50
48
|
|
|
51
|
-
|
|
49
|
+
**100 rows.** When the parsed `rows[]` count exceeds this, the skill pauses before the write call and asks the operator to filter by Company, Position, or Connected On range. See [SKILL.md § Selective-ingest threshold](../SKILL.md#selective-ingest-threshold-bulk-archives) for the doctrine. The MCP tool accepts whatever rows are passed; this gate is conversational.
|
|
52
50
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
p.source = 'linkedin',
|
|
77
|
-
p.createdByAgent = 'linkedin-import',
|
|
78
|
-
p.createdBySource = 'linkedin-import',
|
|
79
|
-
p.createdBySession= $sessionId,
|
|
80
|
-
p.createdAt = datetime()
|
|
81
|
-
SET
|
|
82
|
-
p.givenName = row.givenName,
|
|
83
|
-
p.familyName= row.familyName,
|
|
84
|
-
p.name = trim(coalesce(row.givenName,'') + ' ' + coalesce(row.familyName,''))
|
|
85
|
-
|
|
86
|
-
// 1a. Email only when non-empty (avoids person_email_unique collisions on empty strings)
|
|
87
|
-
FOREACH (_ IN CASE WHEN row.email IS NOT NULL AND row.email <> '' THEN [1] ELSE [] END |
|
|
88
|
-
SET p.email = row.email
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
// 2. The CONNECTED_ON_LINKEDIN edge is what this CSV means.
|
|
92
|
-
MERGE (owner)-[c:CONNECTED_ON_LINKEDIN]->(p)
|
|
93
|
-
ON CREATE SET
|
|
94
|
-
c.connectedOn = date(row.connectedOn),
|
|
95
|
-
c.source = 'linkedin',
|
|
96
|
-
c.createdAt = datetime()
|
|
97
|
-
|
|
98
|
-
// 3. If the row names a current employer, create the Organization and WORKS_FOR edge.
|
|
99
|
-
// If no company is named, this block no-ops — we do not synthesise one.
|
|
100
|
-
WITH p, row
|
|
101
|
-
WHERE row.company IS NOT NULL AND row.company <> ''
|
|
102
|
-
MERGE (o:Organization {accountId: $accountId, name: trim(row.company)})
|
|
103
|
-
ON CREATE SET
|
|
104
|
-
o.source = 'linkedin',
|
|
105
|
-
o.createdByAgent = 'linkedin-import',
|
|
106
|
-
o.createdBySource = 'linkedin-import',
|
|
107
|
-
o.createdBySession= $sessionId,
|
|
108
|
-
o.createdAt = datetime()
|
|
109
|
-
|
|
110
|
-
MERGE (p)-[w:WORKS_FOR]->(o)
|
|
111
|
-
ON CREATE SET
|
|
112
|
-
w.title = row.title,
|
|
113
|
-
w.source = 'linkedin',
|
|
114
|
-
w.current = true,
|
|
115
|
-
w.createdAt = datetime()
|
|
116
|
-
ON MATCH SET
|
|
117
|
-
w.title = coalesce(row.title, w.title)
|
|
51
|
+
## Write surface
|
|
52
|
+
|
|
53
|
+
This reference invokes a single MCP tool: `mcp__memory__memory-archive-write` with `archiveType: "linkedin-connections"`. The Cypher body — Person upsert by `linkedinUrl`, `CONNECTED_ON_LINKEDIN` edge from owner, optional `:Organization` + `WORKS_FOR` when company is non-empty — lives server-side in [`platform/plugins/memory/mcp/src/tools/memory-archive-write.ts`](../../../../memory/mcp/src/tools/memory-archive-write.ts). The agent does not author or pipe Cypher; it parses CSV rows into the tool's row schema and dispatches one (or more, for filtered re-imports) tool call.
|
|
54
|
+
|
|
55
|
+
### Tool input shape
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"archiveType": "linkedin-connections",
|
|
60
|
+
"ownerNodeId": "<elementId of :AdminUser or :Person — from owner-confirmation flow>",
|
|
61
|
+
"rows": [
|
|
62
|
+
{
|
|
63
|
+
"givenName": "Dee",
|
|
64
|
+
"familyName": "Odus",
|
|
65
|
+
"linkedinUrl": "https://www.linkedin.com/in/deeodus",
|
|
66
|
+
"email": null,
|
|
67
|
+
"company": "Female Founders Fund",
|
|
68
|
+
"title": "Partner",
|
|
69
|
+
"connectedOn": "2026-04-23"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
"sessionId": "<UUID generated once per skill run>"
|
|
73
|
+
}
|
|
118
74
|
```
|
|
119
75
|
|
|
76
|
+
The parser converts:
|
|
77
|
+
- Blank cells → `null` (especially `email`, `company`, `title`).
|
|
78
|
+
- `Connected On` from `"23 Apr 2026"` → ISO 8601 `"2026-04-23"`. The tool rejects rows with non-ISO dates loudly — never let the agent paper over a parser bug.
|
|
79
|
+
|
|
80
|
+
### What the server does (informational, not the agent's responsibility)
|
|
81
|
+
|
|
82
|
+
Per 500-row batch the handler runs one transaction with two phases:
|
|
83
|
+
|
|
84
|
+
1. Upsert each row's `:Person` (natural key `linkedinUrl`), stamp provenance + email when present, then upsert the owner→Person `CONNECTED_ON_LINKEDIN` edge with `connectedOn` on the edge.
|
|
85
|
+
2. For rows whose `company` is non-empty, upsert the `:Organization` (natural key `accountId + name`), then upsert the Person→Organization `WORKS_FOR` edge with `title` on the edge.
|
|
86
|
+
|
|
87
|
+
Provenance stamped on every node: `source='linkedin'`, `createdByAgent='linkedin-import'`, `createdBySource='linkedin-import'`, `createdBySession=<sessionId>`, `createdAt=<now>`.
|
|
88
|
+
|
|
89
|
+
Counters come from the Neo4j driver's per-statement summary (`nodesCreated`, `relationshipsCreated`). The tool returns `{processedRows, createdPersons, mergedPersons, createdOrganizations, createdEdges, errors[]}`.
|
|
90
|
+
|
|
120
91
|
## Edge semantics — why these and no others
|
|
121
92
|
|
|
122
93
|
- **`(owner)-[:CONNECTED_ON_LINKEDIN]->(:Person)`** — each row of Connections.csv is a declaration that the archive owner and this person are LinkedIn connections. That's the edge.
|
|
@@ -128,14 +99,16 @@ Rows missing a position but present with a company produce a `WORKS_FOR` edge wi
|
|
|
128
99
|
|
|
129
100
|
## Date parsing
|
|
130
101
|
|
|
131
|
-
`Connected On` arrives as `"23 Apr 2026"`. Convert to ISO 8601 (`2026-04-23`) in the parser before passing to
|
|
102
|
+
`Connected On` arrives as `"23 Apr 2026"`. Convert to ISO 8601 (`2026-04-23`) in the parser before passing to the tool. The server converts to Neo4j `date()` internally — the agent never invokes Cypher functions.
|
|
132
103
|
|
|
133
104
|
## Expected shape
|
|
134
105
|
|
|
135
|
-
- ~3,000–10,000 rows typical for a long-running account.
|
|
136
|
-
- 500 rows per transaction.
|
|
106
|
+
- ~3,000–10,000 rows typical for a long-running account. The selective-ingest gate (above) keeps a typical write at well under 1,000 rows.
|
|
107
|
+
- 500 rows per transaction. The MCP tool handles batching internally; the agent passes the full filtered `rows[]` in one call.
|
|
137
108
|
|
|
138
|
-
## Post-import verification
|
|
109
|
+
## Post-import verification (operator-side, not agent-side)
|
|
110
|
+
|
|
111
|
+
After ingest, the operator can verify counts via the `database-operator` specialist's read tools — `mcp__memory__memory-search` with `labels: ["Person"]` plus a filter, or a direct read query through `mcp__graph__maxy-graph-read_neo4j_cypher`:
|
|
139
112
|
|
|
140
113
|
```cypher
|
|
141
114
|
// Owner → connections count
|
|
@@ -145,18 +118,16 @@ RETURN count(p) AS connections;
|
|
|
145
118
|
// LinkedIn-origin organizations count
|
|
146
119
|
MATCH (o:Organization {accountId: $accountId, source: 'linkedin'})
|
|
147
120
|
RETURN count(o) AS organizations;
|
|
148
|
-
|
|
149
|
-
// Spot-check: who works at Female Founders Fund?
|
|
150
|
-
MATCH (o:Organization {accountId: $accountId, name: 'Female Founders Fund'})
|
|
151
|
-
<-[:WORKS_FOR]-(p:Person)
|
|
152
|
-
RETURN p.name, p.linkedinUrl;
|
|
153
121
|
```
|
|
154
122
|
|
|
123
|
+
These are **read queries**, not writes. Cypher writes from the agent are forbidden.
|
|
124
|
+
|
|
155
125
|
## Failure modes
|
|
156
126
|
|
|
157
127
|
| Symptom | Cause | Fix |
|
|
158
128
|
|---------|-------|-----|
|
|
159
129
|
| Every row parsed as "Notes:,NaN,…" | Header preamble not skipped | Skip first 3 lines before the CSV parser |
|
|
160
|
-
|
|
|
161
|
-
|
|
|
130
|
+
| Tool error "row connectedOn is not ISO 8601" | Parser left `Connected On` in `"23 Apr 2026"` form | Convert to `YYYY-MM-DD` before passing to the tool |
|
|
131
|
+
| Tool error "ownerNodeId not found" | Owner-confirmation flow not run, or operator typed the wrong id | Re-run owner confirmation; pass the resulting `elementId` as `ownerNodeId` |
|
|
162
132
|
| `WORKS_FOR` count « connection count | Many rows have blank company | Expected — LinkedIn doesn't force connections to list a current employer |
|
|
133
|
+
| Tool not present in `init` frame | `database-operator` spawned without the `mcp__memory__memory-archive-write` token | Loud-fail per database-operator's prerogatives. Do not improvise via Bash. Operator must remediate (re-seed specialist templates) |
|
package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/profile.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Reference: Profile.csv
|
|
2
2
|
|
|
3
|
-
Enriches the confirmed archive owner's `:UserProfile` with the LinkedIn profile fields. No new nodes, no new edges — `:AdminUser` and `:UserProfile` already exist for any Maxy operator and are linked by `[:HAS_PROFILE]` at session start.
|
|
3
|
+
Enriches the confirmed archive owner's `:UserProfile` with the LinkedIn profile fields. No new nodes, no new edges — `:AdminUser` and `:UserProfile` already exist for any Maxy operator and are linked by `[:HAS_PROFILE]` at session start (neo4j-store handles that on session boot, not this skill).
|
|
4
4
|
|
|
5
|
-
Runs before every other reference because later files display LinkedIn profile fields (headline, summary) on the owner node they
|
|
5
|
+
Runs before every other reference because later files display LinkedIn profile fields (headline, summary) on the owner node they search.
|
|
6
6
|
|
|
7
7
|
## Source
|
|
8
8
|
|
|
@@ -30,66 +30,59 @@ Schema.org camelCase per `platform/plugins/memory/references/schema-base.md`.
|
|
|
30
30
|
|
|
31
31
|
## Anchor
|
|
32
32
|
|
|
33
|
+
The owner-confirmation flow at the start of the skill resolves the operator's `:UserProfile` elementId — not just the `:AdminUser` userId. That elementId (`$ownerProfileElementId`) is the input to this reference. The `[:HAS_PROFILE]` edge between `:AdminUser` and `:UserProfile` is created by `platform/ui/app/lib/neo4j-store.ts` on every session start; it pre-exists by the time any skill runs.
|
|
34
|
+
|
|
35
|
+
## Write surface
|
|
36
|
+
|
|
37
|
+
This reference invokes `mcp__memory__memory-update` once with the parsed row's properties:
|
|
38
|
+
|
|
39
|
+
```json
|
|
40
|
+
{
|
|
41
|
+
"nodeId": "<elementId of :UserProfile from owner-confirmation>",
|
|
42
|
+
"properties": {
|
|
43
|
+
"givenName": "Joel",
|
|
44
|
+
"familyName": "Smalley",
|
|
45
|
+
"additionalName": null,
|
|
46
|
+
"address": null,
|
|
47
|
+
"birthDate": null,
|
|
48
|
+
"headline": "Founder, Rubytech",
|
|
49
|
+
"description": "Building Maxy …",
|
|
50
|
+
"industry": "Software",
|
|
51
|
+
"postalCode": null,
|
|
52
|
+
"addressLocality": "London, UK",
|
|
53
|
+
"twitterHandles": ["@joelsmalley"],
|
|
54
|
+
"websites": ["https://getmaxy.com"],
|
|
55
|
+
"instantMessengers": [],
|
|
56
|
+
"linkedinProfileUpdatedAt": "<ISO 8601 timestamp>",
|
|
57
|
+
"source": "linkedin"
|
|
58
|
+
}
|
|
59
|
+
}
|
|
33
60
|
```
|
|
34
|
-
(:AdminUser {userId: $ownerUserId}) -[:HAS_PROFILE]-> (:UserProfile {accountId, userId})
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
The skill run has already persisted `$ownerUserId` (and its resolved `$accountId`) from the owner-confirmation flow. This reference trusts those parameters.
|
|
38
|
-
|
|
39
|
-
## Cypher
|
|
40
61
|
|
|
41
|
-
|
|
42
|
-
// Parameters:
|
|
43
|
-
// $ownerUserId — AdminUser.userId of the confirmed archive owner
|
|
44
|
-
// $accountId — the UserProfile accountId resolved alongside $ownerUserId
|
|
45
|
-
// $sessionId — UUID generated once per skill run
|
|
46
|
-
// $row — parsed object with the columns above
|
|
47
|
-
|
|
48
|
-
MATCH (au:AdminUser {userId: $ownerUserId})
|
|
49
|
-
MERGE (au)-[:HAS_PROFILE]->(up:UserProfile {accountId: $accountId, userId: $ownerUserId})
|
|
50
|
-
ON CREATE SET
|
|
51
|
-
up.createdAt = datetime(),
|
|
52
|
-
up.createdByAgent = 'linkedin-import',
|
|
53
|
-
up.createdBySource = 'linkedin-import',
|
|
54
|
-
up.createdBySession= $sessionId
|
|
55
|
-
SET
|
|
56
|
-
up.givenName = $row.givenName,
|
|
57
|
-
up.familyName = $row.familyName,
|
|
58
|
-
up.additionalName = $row.additionalName,
|
|
59
|
-
up.address = $row.address,
|
|
60
|
-
up.birthDate = $row.birthDate,
|
|
61
|
-
up.headline = $row.headline,
|
|
62
|
-
up.description = $row.description,
|
|
63
|
-
up.industry = $row.industry,
|
|
64
|
-
up.postalCode = $row.postalCode,
|
|
65
|
-
up.addressLocality = $row.addressLocality,
|
|
66
|
-
up.twitterHandles = $row.twitterHandles,
|
|
67
|
-
up.websites = $row.websites,
|
|
68
|
-
up.instantMessengers = $row.instantMessengers,
|
|
69
|
-
up.linkedinProfileUpdatedAt = datetime(),
|
|
70
|
-
up.source = coalesce(up.source, 'linkedin')
|
|
71
|
-
|
|
72
|
-
RETURN elementId(up) AS ownerProfileElementId
|
|
73
|
-
```
|
|
62
|
+
The `memory-update` tool ignores restricted keys (`embedding`, `accountId`, `createdAt`) and recomputes the embedding from the new property set. No raw Cypher, no `MERGE`, no Bash.
|
|
74
63
|
|
|
75
|
-
The
|
|
64
|
+
The parser converts:
|
|
65
|
+
- Blank cells → `null`.
|
|
66
|
+
- `Twitter Handles`, `Websites`, `Instant Messengers` from comma-delimited strings → arrays.
|
|
67
|
+
- `Birth Date` to ISO 8601 if present.
|
|
76
68
|
|
|
77
69
|
## Expected outcome
|
|
78
70
|
|
|
79
|
-
- Zero new nodes
|
|
80
|
-
- Zero new edges
|
|
71
|
+
- Zero new nodes.
|
|
72
|
+
- Zero new edges.
|
|
81
73
|
- One existing `:UserProfile` enriched with 10–13 new properties.
|
|
82
|
-
- `ownerProfileElementId` returned for downstream references that want to cache the anchor.
|
|
83
74
|
|
|
84
75
|
## Failure modes
|
|
85
76
|
|
|
86
77
|
| Symptom | Cause | Fix |
|
|
87
78
|
|---------|-------|-----|
|
|
88
|
-
|
|
|
79
|
+
| Tool error "node not found" | `$ownerProfileElementId` invalid — owner-confirmation flow did not return a UserProfile | Re-run the owner-confirmation flow; verify `:AdminUser`-`HAS_PROFILE`->`:UserProfile` exists for the confirmed userId |
|
|
89
80
|
| `up.websites` written as a single string not an array | Parser didn't split on `,` | Fix parser — LinkedIn comma-delimits these fields |
|
|
90
|
-
|
|
|
81
|
+
| Restricted property silently ignored | `memory-update` rejects `embedding` / `accountId` / `createdAt` overrides | Expected — those fields are managed by the server |
|
|
82
|
+
|
|
83
|
+
## Post-import verification (operator-side, read-only)
|
|
91
84
|
|
|
92
|
-
|
|
85
|
+
A direct read query through `mcp__memory__memory-search` (`labels: ["UserProfile"]`) or `mcp__graph__maxy-graph-read_neo4j_cypher`:
|
|
93
86
|
|
|
94
87
|
```cypher
|
|
95
88
|
MATCH (au:AdminUser {userId: $ownerUserId})-[:HAS_PROFILE]->(up:UserProfile)
|
|
@@ -99,4 +92,4 @@ RETURN
|
|
|
99
92
|
up.websites, up.linkedinProfileUpdatedAt
|
|
100
93
|
```
|
|
101
94
|
|
|
102
|
-
Exactly one row. If zero,
|
|
95
|
+
Exactly one row. If zero, the owner-confirmation flow returned the wrong elementId — investigate before running any subsequent reference.
|
|
@@ -12,6 +12,7 @@ import { buildLiveSchemaSource, defaultSchemaCypherPath, } from "./lib/live-sche
|
|
|
12
12
|
import { memoryReindex } from "./tools/memory-reindex.js";
|
|
13
13
|
import { memoryIngestExtract } from "./tools/memory-ingest-extract.js";
|
|
14
14
|
import { memoryIngest } from "./tools/memory-ingest.js";
|
|
15
|
+
import { memoryArchiveWrite } from "./tools/memory-archive-write.js";
|
|
15
16
|
import { memoryIngestWeb } from "./tools/memory-ingest-web.js";
|
|
16
17
|
import { memoryClassify } from "./tools/memory-classify.js";
|
|
17
18
|
import { memoryUpdate } from "./tools/memory-update.js";
|
|
@@ -769,6 +770,53 @@ if (!readOnly) {
|
|
|
769
770
|
};
|
|
770
771
|
}
|
|
771
772
|
});
|
|
773
|
+
server.tool("memory-archive-write", "Bulk-archive write surface (Task 744). Writes a flat dataset (typed entities + natural edges) into the graph " +
|
|
774
|
+
"in 500-row UNWIND batches. The Cypher body is fixed server-side per `archiveType`; the agent supplies parsed " +
|
|
775
|
+
"rows + the discriminant, never raw Cypher. Use ONLY for first-class entity exports (LinkedIn Connections, " +
|
|
776
|
+
"future CRM-type seed exports). Use memory-ingest for narrative documents (KnowledgeDocument + Section + NEXT) " +
|
|
777
|
+
"and memory-write for single-node operator-driven writes. Currently supported archiveType values: " +
|
|
778
|
+
"`linkedin-connections`.", {
|
|
779
|
+
archiveType: z
|
|
780
|
+
.enum(["linkedin-connections"])
|
|
781
|
+
.describe("Discriminant naming the per-source schema and Cypher body the server runs. Add a new value here only when the corresponding handler is added in memory-archive-write.ts."),
|
|
782
|
+
ownerNodeId: z
|
|
783
|
+
.string()
|
|
784
|
+
.min(1)
|
|
785
|
+
.describe("elementId of the archive owner — :AdminUser for an operator's own archive, or :Person for an external-archive owner. Confirmed during the skill's owner-confirmation flow before this tool is invoked."),
|
|
786
|
+
rows: z
|
|
787
|
+
.array(z.record(z.string(), z.unknown()))
|
|
788
|
+
.min(1)
|
|
789
|
+
.describe("Parsed rows. The skill's selective-ingest gate runs BEFORE this tool — large blanket archives get filtered (Company / Position / Connected On range for linkedin-connections) before the write call."),
|
|
790
|
+
sessionId: z
|
|
791
|
+
.string()
|
|
792
|
+
.optional()
|
|
793
|
+
.describe("Skill-run UUID for provenance stamping. Falls back to SESSION_ID env var when absent."),
|
|
794
|
+
}, async ({ archiveType, ownerNodeId, rows, sessionId: sessionIdOverride }) => {
|
|
795
|
+
try {
|
|
796
|
+
const result = await memoryArchiveWrite({
|
|
797
|
+
archiveType,
|
|
798
|
+
ownerNodeId,
|
|
799
|
+
accountId,
|
|
800
|
+
rows: rows,
|
|
801
|
+
sessionId: resolveSessionId(sessionIdOverride),
|
|
802
|
+
});
|
|
803
|
+
return {
|
|
804
|
+
content: [{
|
|
805
|
+
type: "text",
|
|
806
|
+
text: JSON.stringify(result),
|
|
807
|
+
}],
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
catch (err) {
|
|
811
|
+
return {
|
|
812
|
+
content: [{
|
|
813
|
+
type: "text",
|
|
814
|
+
text: `memory-archive-write failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
815
|
+
}],
|
|
816
|
+
isError: true,
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
});
|
|
772
820
|
server.tool("memory-ingest-web", "Adapter for web-content ingestion (Task 737). Accepts a URL and its pre-fetched readable content " +
|
|
773
821
|
"(the agent calls WebFetch first, then passes the text here), writes content to a temp file, and delegates " +
|
|
774
822
|
"to memory-ingest-extract — caching the text under a freshly-generated attachmentId. The skill then drives " +
|