npm - @rubytech/create-realagent - Versions diffs - 1.0.710 → 1.0.713 - Mend

@rubytech/create-realagent 1.0.710 → 1.0.713

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/payload/platform/lib/graph-search/src/__tests__/expand-batch.test.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import { hybrid, clearIndexCache } from "../index.js";
+/**
+ * Task 747 — graph expand is one Cypher round-trip per `hybrid()` call,
+ * not one per result. Pre-fix the lib looped at index.ts:421-462, issuing
+ * `MATCH (n)-[r]-(related)` once per merged node — at slider=2000 that was
+ * 2000 round-trips through the driver per search. Post-fix, a single
+ * `UNWIND $nodeIds AS nid MATCH (n)-[r]-(related) WHERE elementId(n) = nid …`
+ * with `WITH nid, collect({...})[0..20]` returns all expansions in one query.
+ *
+ * Invariants pinned here:
+ *   1. exactly one expand round-trip per hybrid() call (regardless of N)
+ *   2. per-result related cap of 20 preserved (slice notation in the lib)
+ *   3. each `related` entry carries the neighbour's `elementId` so the
+ *      canvas can render edges (post-Task-747 lib shape change)
+ *   4. `expandHops: 0` short-circuits the expand round-trip entirely
+ */
+interface ScriptedRun {
+  match: (query: string) => boolean;
+  records: Array<Record<string, unknown>>;
+}
+function record(fields: Record<string, unknown>) {
+  return { get: (k: string) => fields[k] };
+}
+function makeStubSession(scripted: ScriptedRun[]) {
+  const calls: Array<{ query: string; params: Record<string, unknown> }> = [];
+  const session = {
+    run(query: string, params: Record<string, unknown>) {
+      calls.push({ query, params });
+      const hit = scripted.find((s) => s.match(query));
+      if (!hit) return Promise.resolve({ records: [] });
+      return Promise.resolve({ records: hit.records.map(record) });
+    },
+  } as unknown as import("neo4j-driver").Session;
+  return { session, calls };
+}
+beforeEach(() => {
+  clearIndexCache();
+});
+describe("hybrid — batched expand (Task 747)", () => {
+  it("issues exactly one expand round-trip for N merged results", async () => {
+    const merged = [
+      { nodeId: "n1", nodeLabels: ["Person"], node: { properties: { name: "A" } }, score: 0.9 },
+      { nodeId: "n2", nodeLabels: ["Person"], node: { properties: { name: "B" } }, score: 0.8 },
+      { nodeId: "n3", nodeLabels: ["Person"], node: { properties: { name: "C" } }, score: 0.7 },
+    ];
+    const { session, calls } = makeStubSession([
+      {
+        match: (q) => q.includes("SHOW INDEXES"),
+        records: [{ name: "vec_person", labelsOrTypes: ["Person"] }],
+      },
+      {
+        match: (q) => q.includes("db.index.vector.queryNodes"),
+        records: merged,
+      },
+      {
+        match: (q) => q.includes("UNWIND $nodeIds"),
+        records: [
+          {
+            nid: "n1",
+            items: [{ relType: "KNOWS", direction: "outgoing", relatedNodeId: "r1", relatedLabels: ["Person"], related: { properties: { name: "X" } } }],
+          },
+          {
+            nid: "n2",
+            items: [{ relType: "KNOWS", direction: "incoming", relatedNodeId: "r2", relatedLabels: ["Person"], related: { properties: { name: "Y" } } }],
+          },
+        ],
+      },
+    ]);
+    const embed = async () => [0.1, 0.2];
+    await hybrid(session, embed, {
+      query: "test",
+      accountId: "acc-1",
+      limit: 10,
+      labels: ["Person"],
+    });
+    const expandCalls = calls.filter((c) => c.params && "nodeIds" in (c.params as Record<string, unknown>));
+    expect(expandCalls).toHaveLength(1);
+    expect((expandCalls[0].params as { nodeIds: string[] }).nodeIds).toEqual(["n1", "n2", "n3"]);
+  });
+  it("preserves per-result related cap of 20 via slice notation", async () => {
+    const { session, calls } = makeStubSession([
+      {
+        match: (q) => q.includes("SHOW INDEXES"),
+        records: [{ name: "vec_person", labelsOrTypes: ["Person"] }],
+      },
+      {
+        match: (q) => q.includes("db.index.vector.queryNodes"),
+        records: [
+          { nodeId: "n1", nodeLabels: ["Person"], node: { properties: {} }, score: 0.9 },
+        ],
+      },
+    ]);
+    const embed = async () => [0.1];
+    await hybrid(session, embed, {
+      query: "test",
+      accountId: "acc-1",
+      limit: 10,
+      labels: ["Person"],
+    });
+    const expandCall = calls.find((c) => c.query.includes("UNWIND $nodeIds"));
+    expect(expandCall).toBeDefined();
+    expect(expandCall!.query).toContain("[0..20]");
+  });
+  it("returns related entries carrying neighbour elementId for canvas edge rendering", async () => {
+    const { session } = makeStubSession([
+      {
+        match: (q) => q.includes("SHOW INDEXES"),
+        records: [{ name: "vec_person", labelsOrTypes: ["Person"] }],
+      },
+      {
+        match: (q) => q.includes("db.index.vector.queryNodes"),
+        records: [
+          { nodeId: "n1", nodeLabels: ["Person"], node: { properties: { name: "A" } }, score: 0.9 },
+        ],
+      },
+      {
+        match: (q) => q.includes("UNWIND $nodeIds"),
+        records: [
+          {
+            nid: "n1",
+            items: [
+              {
+                relType: "KNOWS",
+                direction: "outgoing",
+                relatedNodeId: "r1",
+                relatedLabels: ["Person"],
+                related: { properties: { name: "B" } },
+              },
+            ],
+          },
+        ],
+      },
+    ]);
+    const embed = async () => [0.1];
+    const res = await hybrid(session, embed, {
+      query: "test",
+      accountId: "acc-1",
+      limit: 10,
+      labels: ["Person"],
+    });
+    expect(res.results[0]?.related[0]?.nodeId).toBe("r1");
+  });
+  it("skips the expand round-trip entirely when expandHops is 0", async () => {
+    const { session, calls } = makeStubSession([
+      {
+        match: (q) => q.includes("SHOW INDEXES"),
+        records: [{ name: "vec_person", labelsOrTypes: ["Person"] }],
+      },
+      {
+        match: (q) => q.includes("db.index.vector.queryNodes"),
+        records: [
+          { nodeId: "n1", nodeLabels: ["Person"], node: { properties: {} }, score: 0.9 },
+        ],
+      },
+    ]);
+    const embed = async () => [0.1];
+    await hybrid(session, embed, {
+      query: "test",
+      accountId: "acc-1",
+      limit: 10,
+      labels: ["Person"],
+      expandHops: 0,
+    });
+    const expandCalls = calls.filter((c) => c.query.includes("UNWIND $nodeIds"));
+    expect(expandCalls).toHaveLength(0);
+  });
+  it("preserves trashed/scope/agent gates on the related neighbour", async () => {
+    const { session, calls } = makeStubSession([
+      {
+        match: (q) => q.includes("SHOW INDEXES"),
+        records: [{ name: "vec_person", labelsOrTypes: ["Person"] }],
+      },
+      {
+        match: (q) => q.includes("db.index.vector.queryNodes"),
+        records: [
+          { nodeId: "n1", nodeLabels: ["Person"], node: { properties: {} }, score: 0.9 },
+        ],
+      },
+    ]);
+    const embed = async () => [0.1];
+    await hybrid(session, embed, {
+      query: "test",
+      accountId: "acc-1",
+      limit: 10,
+      labels: ["Person"],
+      allowedScopes: ["public"],
+      agentSlug: "support",
+    });
+    const expandCall = calls.find((c) => c.query.includes("UNWIND $nodeIds"));
+    expect(expandCall).toBeDefined();
+    expect(expandCall!.query).toContain("related"); // notTrashed predicate target
+    expect(expandCall!.query).toContain("$allowedScopes");
+    expect(expandCall!.query).toContain("$agentSlug");
+  });
+});

package/payload/platform/lib/graph-search/src/index.ts CHANGED Viewed

@@ -46,6 +46,12 @@ export interface SearchHit {
 export interface SearchResult extends SearchHit {
   related: Array<{
+    /**
+     * Task 747 — neighbour `elementId`. Required by the /graph canvas to
+     * render edges in pipeline-collapse mode (search response IS the canvas
+     * data). The MCP memory-search tool ignores it; the field is additive.
+     */
+    nodeId: string;
     relationship: string;
     direction: string;
     labels: string[];
@@ -71,10 +77,17 @@ export interface Bm25OnlyParams {
   agentSlug?: string;
   keywords?: string[];
   keywordMatch?: "any" | "all";
+  /**
+   * Task 747 — gate BM25 hits to nodes carrying at least one of these labels.
+   * Mirrors hybrid()'s vector-half label filter so the Ollama-down fallback
+   * honours the same gate the operator applied via /graph chips. Empty array
+   * is treated as "no gate" (matches hybrid's `labels && labels.length > 0`
+   * guard); admin route enforces non-empty `labels` as a precondition.
+   */
+  labels?: string[];
 }
 export interface HybridParams extends Bm25OnlyParams {
-  labels?: string[];
   expandHops?: number;
   keywordSubscriptions?: string[];
   /**
@@ -91,6 +104,14 @@ export interface HybridResponse {
   results: SearchResult[];
   /** Populated when degradeOnEmbedFailure fired. Caller logs it. */
   embedError?: string;
+  /**
+   * Task 747 — milliseconds spent on the batched expand round-trip alone
+   * (separate from embed + vector + BM25). The /graph admin route emits
+   * this as `expand-ms=N` so a regression on the post-Task-747 batching
+   * surfaces in server.log without needing a profiler. Zero when
+   * `expandHops === 0` or no merged results.
+   */
+  expandMs: number;
 }
 export type EmbedFn = (text: string) => Promise<number[]>;
@@ -170,13 +191,16 @@ export async function bm25Only(
   session: Session,
   params: Bm25OnlyParams,
 ): Promise<SearchHit[]> {
-  const { query, accountId, limit, allowedScopes, agentSlug, keywords, keywordMatch } = params;
+  const { query, accountId, limit, allowedScopes, agentSlug, keywords, keywordMatch, labels } = params;
   const scopeClause = allowedScopes
     ? "AND (node.scope IS NULL OR node.scope IN $allowedScopes)"
     : "";
   const agentClause = agentSlug
     ? "AND node.agents IS NOT NULL AND $agentSlug IN node.agents"
     : "";
+  const labelClause = labels && labels.length > 0
+    ? "AND any(l IN labels(node) WHERE l IN $labels)"
+    : "";
   const keywordFilter = buildKeywordFilter(keywords, keywordMatch);
   const kwClause = keywordFilter?.clause ?? "";
   const escaped = escapeLucene(query);
@@ -188,6 +212,7 @@ export async function bm25Only(
        WHERE node.accountId = $accountId
        ${scopeClause}
        ${agentClause}
+       ${labelClause}
        AND ${notTrashed("node")}
        ${kwClause}
        RETURN node, score, labels(node) AS nodeLabels, elementId(node) AS nodeId
@@ -200,6 +225,7 @@ export async function bm25Only(
         limit: int(limit),
         ...(allowedScopes ? { allowedScopes } : {}),
         ...(agentSlug ? { agentSlug } : {}),
+        ...(labels && labels.length > 0 ? { labels } : {}),
         ...(keywordFilter?.params ?? {}),
       },
     );
@@ -270,7 +296,7 @@ export async function hybrid(
     const msg = err instanceof Error ? err.message : String(err);
     const bm25Hits = await bm25Only(session, params);
     const results: SearchResult[] = bm25Hits.map((h) => ({ ...h, related: [] }));
-    return { mode: "bm25", results, embedError: msg };
+    return { mode: "bm25", results, embedError: msg, expandMs: 0 };
   }
   const labelToIndex = await discoverIndexes(session);
@@ -288,7 +314,7 @@ export async function hybrid(
       .map((l) => labelToIndex.get(l))
       .filter((idx): idx is string => idx !== undefined);
     if (indexesToQuery.length === 0) {
-      return { mode: "hybrid", results: [] };
+      return { mode: "hybrid", results: [], expandMs: 0 };
     }
   } else {
     indexesToQuery = [...new Set(labelToIndex.values())];
@@ -418,50 +444,81 @@ export async function hybrid(
     .sort((a, b) => b.combinedScore - a.combinedScore)
     .slice(0, limit);
-  // --- Graph expand ---
-  const results: SearchResult[] = [];
-  for (const node of merged) {
-    const result: SearchResult = {
-      nodeId: node.nodeId,
-      labels: node.labels,
-      properties: node.properties,
-      score: node.combinedScore,
-      related: [],
-    };
-    if (expandHops > 0) {
-      const expandScopeClause = allowedScopes
-        ? "AND (related.scope IS NULL OR related.scope IN $allowedScopes)"
-        : "";
-      const expandAgentClause = agentSlug
-        ? "AND (related.agents IS NULL OR $agentSlug IN related.agents)"
-        : "";
-      const expandResult = await session.run(
-        `MATCH (n)-[r]-(related)
-         WHERE elementId(n) = $nodeId
-         AND ${notTrashed("related")}
-         ${expandScopeClause}
-         ${expandAgentClause}
-         RETURN type(r) AS relType,
-                CASE WHEN startNode(r) = n THEN 'outgoing' ELSE 'incoming' END AS direction,
-                labels(related) AS relatedLabels,
-                related
-         LIMIT 20`,
-        { nodeId: node.nodeId, ...scopeParams, ...agentParams },
-      );
-      for (const rec of expandResult.records) {
-        const related = rec.get("related") as { properties: Record<string, unknown> };
-        result.related.push({
-          relationship: rec.get("relType") as string,
-          direction: rec.get("direction") as string,
-          labels: rec.get("relatedLabels") as string[],
-          properties: plainProperties(related.properties),
+  // --- Graph expand (Task 747 — single batched round-trip) ---
+  //
+  // Pre-Task-747: one Cypher per merged node, in a JS for-loop. At the admin
+  // route's slider=2000 this produced 2000 driver round-trips per search.
+  //
+  // Post-Task-747: one UNWIND-driven query for all merged nodeIds. The
+  // `WITH nid, collect({...})[0..20]` clause preserves the per-result cap of
+  // 20 neighbours that the per-node query enforced via `LIMIT 20`. Slice
+  // notation is order-preserving over the rows it consumes (no upstream
+  // ORDER BY changes that), so canvas-edge density per hit is unchanged.
+  //
+  // Each `related` entry now carries `relatedNodeId` (the neighbour's
+  // elementId) so the /graph canvas can render edges in pipeline-collapse
+  // mode (search response IS the canvas data when `q` is set).
+  const results: SearchResult[] = merged.map((node) => ({
+    nodeId: node.nodeId,
+    labels: node.labels,
+    properties: node.properties,
+    score: node.combinedScore,
+    related: [],
+  }));
+  let expandMs = 0;
+  if (expandHops > 0 && results.length > 0) {
+    const expandScopeClause = allowedScopes
+      ? "AND (related.scope IS NULL OR related.scope IN $allowedScopes)"
+      : "";
+    const expandAgentClause = agentSlug
+      ? "AND (related.agents IS NULL OR $agentSlug IN related.agents)"
+      : "";
+    const expandStart = Date.now();
+    const expandResult = await session.run(
+      `UNWIND $nodeIds AS nid
+       MATCH (n)-[r]-(related)
+       WHERE elementId(n) = nid
+       AND ${notTrashed("related")}
+       ${expandScopeClause}
+       ${expandAgentClause}
+       WITH nid, n, r, related
+       WITH nid, collect({
+         relType: type(r),
+         direction: CASE WHEN startNode(r) = n THEN 'outgoing' ELSE 'incoming' END,
+         relatedNodeId: elementId(related),
+         relatedLabels: labels(related),
+         related: related
+       })[0..20] AS items
+       RETURN nid, items`,
+      { nodeIds: results.map((r) => r.nodeId), ...scopeParams, ...agentParams },
+    );
+    expandMs = Date.now() - expandStart;
+    const byNodeId = new Map<string, SearchResult>(results.map((r) => [r.nodeId, r]));
+    for (const rec of expandResult.records) {
+      const nid = rec.get("nid") as string;
+      const target = byNodeId.get(nid);
+      if (!target) continue;
+      const items = rec.get("items") as Array<{
+        relType: string;
+        direction: string;
+        relatedNodeId: string;
+        relatedLabels: string[];
+        related: { properties: Record<string, unknown> };
+      }>;
+      for (const item of items) {
+        target.related.push({
+          nodeId: item.relatedNodeId,
+          relationship: item.relType,
+          direction: item.direction,
+          labels: item.relatedLabels,
+          properties: plainProperties(item.related.properties),
         });
       }
     }
-    results.push(result);
   }
-  return { mode: "hybrid", results };
+  return { mode: "hybrid", results, expandMs };
 }
 function mergeBm25Hit(

package/payload/platform/plugins/docs/references/platform.md CHANGED Viewed

@@ -17,7 +17,7 @@ The Pi runs the web interface, the AI agent, and all the plugin servers. When yo
 Maxy runs two agents simultaneously:
-**Admin agent (you)** — full access to all tools and plugins. This is the agent you interact with at your local or remote URL. It can read and write contacts, send Telegram messages, manage your account, and perform any task you have plugins for. Protected by your PIN.
+**Admin agent (you)** — full access to all tools and plugins. This is the agent you interact with at your local or remote URL. It can read and write contacts, send Telegram messages, manage your account, and perform any task you have plugins for. Protected by your PIN. Your admin agent runs through your own Claude Code OAuth session — it never bills the Anthropic API. Authentication and SDK details are documented in the developer doc `.docs/platform.md` admin-agent section.
 **Public agent (visitors)** — read-only access. Handles enquiries from people who reach your public URL. It can answer questions about your business and collect waitlist signups, but it cannot access your private data or take actions.
@@ -51,6 +51,8 @@ Maxy maintains a graph database (Neo4j) of everything you've told it. People, co
 The memory graph is stored on your Pi. It never leaves your network.
+The graph view (at `/graph`) lets you explore the memory directly. Pick a category from the filter, then type to search inside it — typing makes the canvas narrower, not wider. Drag the slider to control how many matches you see (1 to 2000). If the search shows a yellow banner saying "Vector ranking unavailable," it means the local AI ranking model is offline; results are still returned using keyword match, but ordering is less semantic until the ranker recovers.
 ## The Web Interface
 The web app runs on your Pi on port 19200. A small always-on front door (`maxy-edge`) owns that port and the remote terminal transport — so when the Software Update command restarts the app server, the browser-side terminal keeps streaming bytes exactly like an SSH session would. The edge also hosts the update flow's own routes (the sudo prompt, the action launcher, the SSE progress stream, the installed-version poll), so the Software Update modal's log panel does not go blank during the app-server restart window — it keeps receiving lines, heartbeats, and the final exit event unbroken. Login cookies are HMAC-signed with a shared key on disk, so both processes recognise the same session without any coordination and you do not have to log in again after an update. Every request is also classified as LAN or external based on the network shape it arrived on — LAN browsers reach admin directly; the remote password screen only appears on the tunnel-exposed admin domain. It provides:

package/payload/platform/plugins/linkedin-import/PLUGIN.md CHANGED Viewed

@@ -4,6 +4,7 @@ description: "Import a LinkedIn Basic Data Export into the Maxy Neo4j graph. Ski
 tools: []
 always: false
 embed: false
+specialist: database-operator
 metadata: {"platform":{"optional":true,"pluginKey":"linkedin-import"}}
 ---

package/payload/platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md CHANGED Viewed

@@ -42,7 +42,7 @@ When the owner is an external Person (non-operator archive), the anchor is the c
 ## Invariants
-1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade. If running against a Neo4j that hasn't been reseeded since shipping, pipe `schema.cypher` into `cypher-shell` once before starting — every statement is `IF NOT EXISTS`.
+1. **Schema first.** The LinkedIn additions (`person_linkedin_url` index, `:Credential` constraint) live in [`platform/neo4j/schema.cypher`](../../../../neo4j/schema.cypher) and are applied by `platform/scripts/seed-neo4j.sh` on every install / upgrade. The skill assumes the schema has been seeded; it does not bootstrap schema itself. If a constraint or index is missing, the operator re-runs `seed-neo4j.sh` from the installer — schema-bootstrap is installer-side, never agent-side.
 2. **Owner confirmed first.** No reference runs until `$ownerUserId` (or `$ownerPersonId`) is persisted and echo-confirmed. The reference set is parameterised — no hard-coded owner.
 3. **Natural edges only.** Every edge written is one the CSV actually expresses. `Connections.csv` encodes "I am connected on LinkedIn to this person" — that becomes `CONNECTED_ON_LINKEDIN`. No synthetic attach-to-owner pattern bolted onto rows that don't describe a relationship to the owner.
 4. **Reuse Maxy labels.** Schema-extension is last resort. The LinkedIn set maps onto existing labels wherever semantics align:
@@ -60,10 +60,31 @@ When the owner is an external Person (non-operator archive), the anchor is the c
 ## Execution model
-1. Confirm `schema.cypher` is applied (one-liner: `cypher-shell ... < platform/neo4j/schema.cypher`; safe to re-run).
-2. Run the owner-confirmation flow, persist `$ownerUserId` / `$ownerPersonId`.
-3. For each file the operator approves, load its reference, parse the CSV, batch rows (default 500 per tx), execute the reference's Cypher with `$rows` + owner parameter.
-4. After each file emit `[linkedin-import] file=<name> rows=<n> created=<n> matched=<n> ms=<elapsed>`.
+1. Run the owner-confirmation flow, persist `$ownerUserId` / `$ownerPersonId`. The owner identity resolves to a single `ownerNodeId` (elementId of the AdminUser or external Person) used in every write call.
+2. For each file the operator approves, load its reference, parse the CSV into typed `rows[]` matching the reference's row schema.
+3. **Selective-ingest gate.** Before invoking any write tool, check the parsed row count against the reference's `selectiveIngestThreshold`. If the count exceeds the threshold, pause and ask the operator to filter the import along the natural axes named in the reference (for `Connections.csv`: Company, Position, Connected On). Apply the filter to `rows[]` before continuing. Compress on write, never after — a 5,000-row blanket import is a landfill, a 200-row filtered import is signal. See [§Selective-ingest](#selective-ingest-threshold-bulk-archives).
+4. Invoke the deterministic write tool the reference names. For all archive references this is `mcp__memory__memory-archive-write` with `{archiveType, ownerNodeId, rows}` — the Cypher body is fixed server-side per `archiveType`, so the agent supplies parsed rows, never Cypher. The tool batches rows at 500 per transaction internally.
+5. After each file emit `[linkedin-import] file=<name> rows=<n> created=<n> matched=<n> ms=<elapsed>` using the counters returned by the write tool.
+**Doctrine:** raw Cypher and `cypher-shell` invocations are forbidden in this skill and its references. Writes route through `mcp__memory__memory-archive-write` (bulk archives) or `mcp__memory__memory-write` / `mcp__memory__memory-update` (single-node enrichments like `profile.md`). If a CSV needs a write shape no current MCP tool supports, file a task to extend `memory-archive-write` with a new `archiveType` handler — never improvise via Bash. See [database-operator's LOUD-FAIL prerogative](../../../../templates/specialists/agents/database-operator.md#prerogatives).
+## Selective-ingest threshold (bulk archives)
+A LinkedIn export typically contains 3,000–10,000 connections. Writing all of them in one shot defeats compression-on-write — most rows will never be queried, and the noise compounds with every subsequent ingest. The skill compresses by interrogating the operator before bulk writes.
+**Threshold:** when a parsed reference's `rows[]` exceeds **100 rows**, pause and ask the operator to filter along the reference's natural axes before invoking the write tool.
+For `Connections.csv` the natural filter axes are:
+- **Company** — "only people at LargeCorp", "only Female Founders Fund alumni"
+- **Position** — "only Partners", "only Engineering Managers"
+- **Connected On** (date range) — "only my last two years", "since 2024-01-01"
+The operator picks one axis or a combination. The agent applies the filter to `rows[]` and writes only the filtered subset.
+**Re-importing is idempotent.** Coming back later with a wider filter (`"add anyone at LargeCorp"`, `"include 2022 too"`) hits the same `linkedinUrl` natural key — existing `:Person` nodes are matched and updated; only the new-only delta is created. The operator can grow the slice over time without dedup work.
+**Why the threshold lives in the skill, not the server.** Different archive types have different "interesting" thresholds — 100 LinkedIn connections is a lot; 100 LinkedIn skills is small. The MCP tool accepts whatever rows are passed; the conversational gate is the skill's responsibility.
 ## File roster