npm - @chainlesschain/personal-data-hub - Versions diffs - 0.1.0 → 0.2.1 - Mend

@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
package/__tests__/adapters/ai-chat-history.test.js +396 -0
package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
package/__tests__/adapters/email-adapter.test.js +138 -1
package/__tests__/adapters/email-classifier.test.js +347 -0
package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
package/__tests__/adapters/email-retry-progress.test.js +294 -0
package/__tests__/adapters/email-templates.test.js +699 -0
package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
package/__tests__/adapters/system-data-adapter.test.js +440 -0
package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
package/__tests__/adapters/system-data-android.test.js +387 -0
package/__tests__/adapters/system-data-disclosure.test.js +153 -0
package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
package/__tests__/adapters/wechat-env-probe.test.js +162 -0
package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
package/__tests__/analysis-skills.test.js +556 -0
package/__tests__/analysis.test.js +329 -1
package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
package/__tests__/e2e/full-user-journey.test.js +188 -0
package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
package/__tests__/entity-resolver-stages.test.js +411 -0
package/__tests__/entity-resolver-vault.test.js +246 -0
package/__tests__/entity-resolver.test.js +526 -0
package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
package/__tests__/longtail-adapters.test.js +217 -0
package/__tests__/mobile-extractor.test.js +288 -0
package/__tests__/registry.test.js +4 -2
package/__tests__/shopping-adapters.test.js +296 -0
package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
package/__tests__/sidecar-supervisor.test.js +120 -0
package/__tests__/social-adapters.test.js +206 -0
package/__tests__/travel-adapters.test.js +325 -0
package/__tests__/vault.test.js +3 -3
package/__tests__/wechat-adapter.test.js +476 -0
package/__tests__/whatsapp-adapter.test.js +135 -0
package/lib/adapter-spec.js +12 -0
package/lib/adapters/_python-sidecar-base.js +207 -0
package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
package/lib/adapters/ai-chat-history/health-checker.js +210 -0
package/lib/adapters/ai-chat-history/http-client.js +211 -0
package/lib/adapters/ai-chat-history/index.js +28 -0
package/lib/adapters/ai-chat-history/schema-map.js +258 -0
package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
package/lib/adapters/alipay-bill/counterparty.js +129 -0
package/lib/adapters/alipay-bill/csv-parser.js +217 -0
package/lib/adapters/alipay-bill/index.js +41 -0
package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
package/lib/adapters/email-imap/classifier.js +495 -0
package/lib/adapters/email-imap/email-adapter.js +419 -8
package/lib/adapters/email-imap/index.js +42 -0
package/lib/adapters/email-imap/pdf-extractor.js +192 -0
package/lib/adapters/email-imap/templates/bill.js +232 -0
package/lib/adapters/email-imap/templates/government.js +120 -0
package/lib/adapters/email-imap/templates/index.js +78 -0
package/lib/adapters/email-imap/templates/order.js +186 -0
package/lib/adapters/email-imap/templates/other.js +114 -0
package/lib/adapters/email-imap/templates/register.js +113 -0
package/lib/adapters/email-imap/templates/travel.js +157 -0
package/lib/adapters/email-imap/templates/utils.js +275 -0
package/lib/adapters/email-imap/transactions.js +234 -0
package/lib/adapters/messaging-qq/index.js +158 -0
package/lib/adapters/messaging-telegram/index.js +142 -0
package/lib/adapters/messaging-whatsapp/index.js +189 -0
package/lib/adapters/shopping-base/index.js +208 -0
package/lib/adapters/shopping-jd/index.js +150 -0
package/lib/adapters/shopping-meituan/index.js +154 -0
package/lib/adapters/shopping-taobao/index.js +176 -0
package/lib/adapters/social-bilibili/index.js +171 -0
package/lib/adapters/social-douyin/index.js +116 -0
package/lib/adapters/social-kuaishou/index.js +237 -0
package/lib/adapters/social-toutiao/index.js +236 -0
package/lib/adapters/social-weibo/index.js +164 -0
package/lib/adapters/social-xiaohongshu/index.js +96 -0
package/lib/adapters/system-data/disclosure.js +166 -0
package/lib/adapters/system-data/index.js +34 -0
package/lib/adapters/system-data/system-data-adapter.js +344 -0
package/lib/adapters/system-data-android/adapter.js +348 -0
package/lib/adapters/system-data-android/index.js +76 -0
package/lib/adapters/travel-12306/index.js +151 -0
package/lib/adapters/travel-amap/index.js +164 -0
package/lib/adapters/travel-baidu-map/index.js +162 -0
package/lib/adapters/travel-base/index.js +240 -0
package/lib/adapters/travel-ctrip/index.js +151 -0
package/lib/adapters/wechat/bootstrap.js +146 -0
package/lib/adapters/wechat/content-parser.js +326 -0
package/lib/adapters/wechat/db-reader.js +209 -0
package/lib/adapters/wechat/env-probe.js +218 -0
package/lib/adapters/wechat/frida-agent/loader.js +67 -0
package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
package/lib/adapters/wechat/index.js +37 -0
package/lib/adapters/wechat/key-extractor.js +158 -0
package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
package/lib/adapters/wechat/key-providers/index.js +22 -0
package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
package/lib/adapters/wechat/normalize.js +220 -0
package/lib/adapters/wechat/wechat-adapter.js +205 -0
package/lib/analysis-skills/base.js +113 -0
package/lib/analysis-skills/footprint.js +167 -0
package/lib/analysis-skills/index.js +58 -0
package/lib/analysis-skills/interests.js +161 -0
package/lib/analysis-skills/relations.js +226 -0
package/lib/analysis-skills/spending.js +219 -0
package/lib/analysis-skills/timeline.js +167 -0
package/lib/analysis.js +191 -2
package/lib/entity-resolver/embedding-stage.js +198 -0
package/lib/entity-resolver/entity-resolver.js +384 -0
package/lib/entity-resolver/index.js +42 -0
package/lib/entity-resolver/llm-stage.js +191 -0
package/lib/entity-resolver/rule-stage.js +208 -0
package/lib/entity-resolver/worker.js +149 -0
package/lib/index.js +131 -0
package/lib/migrations.js +73 -0
package/lib/mobile-extractor/android.js +193 -0
package/lib/mobile-extractor/index.js +9 -0
package/lib/mobile-extractor/ios.js +223 -0
package/lib/prompt-builder.js +11 -1
package/lib/query-parser.js +7 -1
package/lib/registry.js +42 -0
package/lib/sidecar/index.js +15 -0
package/lib/sidecar/supervisor.js +359 -0
package/lib/vault.js +343 -0
package/package.json +36 -3
package/scripts/_make-fixture-all.js +126 -0
package/scripts/_make-fixture-contacts.js +84 -0
package/scripts/evaluate-entity-resolver.js +213 -0
package/scripts/smoke-phase-5-5.js +196 -0
package/scripts/smoke-phase-5-7.js +181 -0
package/scripts/smoke-system-data-contacts.js +309 -0
package/scripts/smoke-system-data.js +312 -0

package/lib/analysis-skills/timeline.js ADDED Viewed

@@ -0,0 +1,167 @@
+/**
+ * Phase 11 — analysis.timeline skill.
+ *
+ * Cross-source narrative timeline. Given a time window + optional topic
+ * keyword, weaves Events from all adapters into a chronological story
+ * with adapter-aware glyphs (so "邮件" / "支付" / "出行" are
+ * visually distinguishable in the UI).
+ *
+ * LLM (optional) produces a 1-paragraph synthesis: "你这周买过 X 也去过
+ * Y, 给妈妈转账过 Z" rather than just a list.
+ *
+ * Inputs:
+ *   - timeWindow:   required (default last 7 days)
+ *   - topicFilter:  optional substring match against title / counterparty
+ *   - personId:     optional — scope to events involving this person
+ *                   (merge-group expanded)
+ *   - limit:        default 100 events
+ *
+ * Output:
+ *   {
+ *     entries: [{ id, occurredAt, title, kind, amount?, adapter, snippet }],
+ *     summary: { totalEvents, byAdapter, byDay, period },
+ *     citations,
+ *     llm_narrative: "..." | null,
+ *   }
+ */
+"use strict";
+const { AnalysisSkill } = require("./base");
+class TimelineSkill extends AnalysisSkill {
+  constructor(opts) {
+    super({ ...opts, name: "analysis.timeline" });
+  }
+  async run(options = {}) {
+    const window = this.resolveTimeWindow({
+      sinceDays: options.sinceDays ?? (options.since ? null : 7), // default 7d
+      ...options,
+    });
+    const limit = Number.isFinite(options.limit) && options.limit > 0
+      ? Math.min(options.limit, 1000)
+      : 100;
+    let events = this._fetchEvents(window, limit);
+    events = this._applyFilters(events, options);
+    const entries = events.map((e) => this._toEntry(e));
+    const summary = this._summarize(entries, window);
+    let narrative = null;
+    if (options.narrative !== false && this.llm && entries.length > 0) {
+      narrative = await this._llmNarrative(entries, summary, options);
+    }
+    return {
+      skill: "analysis.timeline",
+      entries,
+      summary,
+      citations: entries.slice(0, 50).map((e) => e.id),
+      llm_narrative: narrative,
+    };
+  }
+  _fetchEvents({ since, until }, limit) {
+    const q = { limit };
+    if (since != null) q.since = since;
+    if (until != null) q.until = until;
+    const events = this.vault.queryEvents(q) || [];
+    // queryEvents orders DESC; reverse for narrative (oldest first)
+    return events.slice().sort((a, b) => (a.occurredAt || 0) - (b.occurredAt || 0));
+  }
+  _applyFilters(events, options) {
+    let out = events;
+    if (typeof options.topicFilter === "string" && options.topicFilter.length > 0) {
+      const needle = options.topicFilter.toLowerCase();
+      out = out.filter((e) => {
+        const title = (e.content && e.content.title) || "";
+        const counterparty = (e.extra && e.extra.counterparty) || "";
+        const text = (e.content && e.content.text) || "";
+        return title.toLowerCase().includes(needle)
+          || counterparty.toLowerCase().includes(needle)
+          || text.toLowerCase().includes(needle);
+      });
+    }
+    if (typeof options.personId === "string" && options.personId.length > 0) {
+      const memberSet = new Set(this.expandToMergeGroup(options.personId));
+      out = out.filter((e) => {
+        if (memberSet.has(e.actor)) return true;
+        if (Array.isArray(e.participants) && e.participants.some((p) => memberSet.has(p))) return true;
+        return false;
+      });
+    }
+    return out;
+  }
+  _toEntry(event) {
+    const adapter = (event.source && event.source.adapter) || "unknown";
+    return {
+      id: event.id,
+      occurredAt: event.occurredAt,
+      title: (event.content && event.content.title) || "(无标题)",
+      kind: event.subtype || "event",
+      amount: event.content?.amount || null,
+      adapter,
+      snippet: this._buildSnippet(event),
+    };
+  }
+  _buildSnippet(event) {
+    const parts = [];
+    const text = (event.content && event.content.text) || "";
+    if (text) parts.push(text.slice(0, 100));
+    if (event.extra) {
+      if (event.extra.counterparty) parts.push(`@${event.extra.counterparty}`);
+      if (event.extra.from && event.extra.to) parts.push(`${event.extra.from} → ${event.extra.to}`);
+    }
+    return parts.join(" · ").slice(0, 200);
+  }
+  _summarize(entries, window) {
+    const byAdapter = {};
+    const byDay = {};
+    for (const e of entries) {
+      byAdapter[e.adapter] = (byAdapter[e.adapter] || 0) + 1;
+      const d = new Date(e.occurredAt);
+      if (Number.isFinite(d.getTime())) {
+        const day = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
+        byDay[day] = (byDay[day] || 0) + 1;
+      }
+    }
+    return {
+      totalEvents: entries.length,
+      byAdapter,
+      byDay,
+      period: {
+        since: window.since || null,
+        until: window.until || null,
+      },
+    };
+  }
+  async _llmNarrative(entries, summary, options) {
+    if (entries.length === 0) return null;
+    // Cap to 30 entries for prompt size
+    const sampled = entries.slice(-30);
+    const lines = sampled.map((e) => {
+      const d = new Date(e.occurredAt).toISOString().slice(0, 10);
+      const amt = e.amount ? ` ¥${e.amount.value}(${e.amount.direction})` : "";
+      return `- ${d} [${e.adapter}/${e.kind}] ${e.title}${amt}`;
+    }).join("\n");
+    const userMsg = `用户的事件时间线（共 ${summary.totalEvents} 条, 显示最近 ${sampled.length}）:
+${lines}
+请用 3-5 句话讲清楚这段时间发生了什么、出现的人物 / 地点、有没有明显的主题或事件。中文回答，平实叙述，不评价。`;
+    return await this.callLlmCommentary([
+      { role: "system", content: "你是一个克制的时间线叙述助手。基于事实串联事件，不引申、不评价。" },
+      { role: "user", content: userMsg },
+    ], { acceptNonLocal: options.acceptNonLocal });
+  }
+}
+module.exports = { TimelineSkill };

package/lib/analysis.js CHANGED Viewed

@@ -136,14 +136,26 @@ class AnalysisEngine {
       intent: parsed.intent,
       timeWindow: parsed.timeWindow,
       maxFacts: this.maxFacts,
+      vaultTotals: this._gatherVaultTotals(),
     });
-    // Call LLM.
+    // Call LLM. **skipCache: true** is critical: PDH answers depend on
+    // current vault state (new contacts / events / items ingested between
+    // asks). The desktop LLMManager has a 7-day ResponseCache keyed on
+    // sha256(messages); if a stale entry from before the latest sync hits,
+    // the user sees yesterday's hallucinated count after fixing _gatherFacts
+    // and never finds out (real-device verify 2026-05-21 Xiaomi 24115RA8EC:
+    // "几个联系人" served from cache, returned the pre-Path-C-fix wrong
+    // answer of "32" even though vault now had real contact data). PDH's
+    // freshness-over-latency tradeoff makes the cache strictly counter-
+    // productive at this layer. The cache for OTHER LLM uses (chat /
+    // skill orchestration / autonomous-agent) is unaffected.
     let llmResp;
     try {
       llmResp = await this.llm.chat(messages, {
         temperature: 0.2,
         purpose: "personal-data-hub.analysis.ask",
+        skipCache: true,
       });
     } catch (err) {
       const e = toError(err, "llm.chat");
@@ -195,6 +207,109 @@ class AnalysisEngine {
     };
   }
+  /**
+   * Retrieve the prompt context for a question WITHOUT calling the LLM.
+   *
+   * Mirrors the front half of `ask()` (parseQuery → gatherFacts → ragRetriever
+   * → buildPrompt) and returns the assembled messages + facts. The caller is
+   * responsible for invoking its own LLM with the returned messages and then
+   * (optionally) running citation validation on the answer.
+   *
+   * Why: lets a mobile / browser front-end host the LLM call locally (e.g.
+   * Android-side Volcengine Doubao adapter via API key) while keeping the
+   * vault + retrieval on the desktop. The privacy gate does NOT apply here
+   * because no LLM is contacted — the caller's gate is the gate.
+   *
+   * @param {string} question
+   * @param {object} [options]
+   * @param {number} [options.now]
+   * @param {boolean} [options.skipAudit=false]
+   * @returns {Promise<RetrieveContextResult>}
+   *
+   * @typedef {object} RetrieveContextResult
+   * @property {string} question
+   * @property {object} parsed
+   * @property {Array<object>} facts
+   * @property {string[]} factIds
+   * @property {number} factCount
+   * @property {boolean} truncated
+   * @property {string[]} ragContextIds
+   * @property {Array<{role: string, content: string}>} messages   prompt-builder output, LLM-ready
+   * @property {string} systemPrompt
+   * @property {number} retrievedAt                                Date.now() at start
+   * @property {number} durationMs
+   */
+  async retrieveContext(question, options = {}) {
+    if (typeof question !== "string" || question.length === 0) {
+      throw new Error("AnalysisEngine.retrieveContext: question must be a non-empty string");
+    }
+    const startedAt = Date.now();
+    const parsed = parseQuery(question, { now: options.now });
+    const facts = this._gatherFacts(parsed);
+    const ragContextIds = [];
+    if (this.ragRetriever) {
+      try {
+        const docs = await this.ragRetriever(question, parsed);
+        if (Array.isArray(docs)) {
+          for (const doc of docs) {
+            if (!doc || !doc.id) continue;
+            const e = this.vault.getEvent(doc.id);
+            if (e && !facts.find((f) => f.id === e.id)) {
+              facts.push(e);
+              ragContextIds.push(doc.id);
+            }
+          }
+        }
+      } catch (err) {
+        const e = toError(err, "ragRetriever");
+        try {
+          this.vault.audit("analysis.rag_failed", question, { error: e.message });
+        } catch (_e) { /* audit failures are non-fatal */ }
+      }
+    }
+    const { messages, factIds, factCount, truncated } = buildPrompt({
+      question,
+      facts,
+      systemPrompt: this.systemPrompt,
+      intent: parsed.intent,
+      timeWindow: parsed.timeWindow,
+      maxFacts: this.maxFacts,
+      vaultTotals: this._gatherVaultTotals(),
+    });
+    const durationMs = Date.now() - startedAt;
+    if (!options.skipAudit) {
+      try {
+        this.vault.audit("analysis.retrieve_context", question, {
+          factCount,
+          truncated,
+          ragContextIds: ragContextIds.length,
+          durationMs,
+        });
+      } catch (_e) { /* audit failures are non-fatal */ }
+    }
+    return {
+      question,
+      parsed,
+      facts,
+      // buildPrompt returns factIds as a Set; flatten to Array so the result
+      // round-trips through IPC / WS JSON serialization without becoming `{}`.
+      factIds: Array.from(factIds),
+      factCount,
+      truncated,
+      ragContextIds,
+      messages,
+      systemPrompt: this.systemPrompt,
+      retrievedAt: startedAt,
+      durationMs,
+    };
+  }
   // ─── Internals ─────────────────────────────────────────────────────
   _gatherFacts(parsed) {
@@ -215,7 +330,81 @@ class AnalysisEngine {
       if (Number.isFinite(parsed.timeWindow.since)) q.since = parsed.timeWindow.since;
       if (Number.isFinite(parsed.timeWindow.until)) q.until = parsed.timeWindow.until;
     }
-    return this.vault.queryEvents(q);
+    const events = this.vault.queryEvents(q);
+    // Path C follow-up — events alone miss whole categories of facts:
+    //  - contacts (system-data-android) land in `persons`, not `events`
+    //  - installed apps land in `items`, not `events`
+    //  - places (visited locations) live in `places`
+    // Without these the LLM gets 0 facts for "我有几个联系人" style questions
+    // and hallucinates a count. We pull a bounded slice of each entity type
+    // and append; prompt-builder.summarizeFact already handles `person` /
+    // `place` / fallback `item` shapes, so this is additive with no schema
+    // change to the LLM-facing prompt.
+    //
+    // Sizing: keep events as the majority (existing behavior is unchanged for
+    // event-heavy queries like 消费 / 通话); split the remaining 1/2 budget
+    // between persons + items. Time window + adapter filters don't apply to
+    // these tables (persons aren't time-stamped events) — they're current-
+    // state snapshots that should always be visible. Adapter filter is also
+    // skipped because users asking "我有几个联系人" don't say "from
+    // system-data-android".
+    const remaining = Math.max(0, this.maxFacts - events.length);
+    const sideBudget = Math.floor(remaining / 2);
+    const personBudget = sideBudget > 0 ? sideBudget : 0;
+    const itemBudget = remaining - personBudget;
+    let persons = [];
+    if (personBudget > 0) {
+      try {
+        persons = this.vault.queryPersons({ limit: personBudget });
+      } catch (_e) {
+        // Older vaults / forks without queryPersons — fall back gracefully.
+      }
+    }
+    let items = [];
+    if (itemBudget > 0) {
+      try {
+        items = this.vault.queryItems({ limit: itemBudget });
+      } catch (_e) {
+        /* same fallback */
+      }
+    }
+    return [...events, ...persons, ...items];
+  }
+  /**
+   * Pull authoritative entity counts from the vault. These go into the
+   * prompt's TOTALS block so the LLM can answer "how many X" questions
+   * correctly even when the FACTS sample is truncated (maxFacts cap).
+   *
+   * 2026-05-21 bug: LLM said "32 contacts" when vault actually had ~500.
+   * Root cause was a mix of (a) FACTS not including persons (fixed in
+   * _gatherFacts), and (b) LLM still counting FACTS array length even after
+   * persons were included — capped at the 80-fact ceiling. TOTALS bypasses
+   * both: it gives the LLM the real number to quote directly.
+   *
+   * Wrapped in try because legacy vault forks / mock vaults in tests may
+   * not expose `stats()`; falling back to undefined makes prompt-builder
+   * skip the block entirely.
+   */
+  _gatherVaultTotals() {
+    if (typeof this.vault.stats !== "function") return undefined;
+    try {
+      const s = this.vault.stats();
+      // Trim to the fields useful for question answering — schemaVersion /
+      // mergeGroups / audit log size are noise here.
+      return {
+        events: s.events,
+        persons: s.persons,
+        places: s.places,
+        items: s.items,
+        topics: s.topics,
+      };
+    } catch (_e) {
+      return undefined;
+    }
   }
 }

package/lib/entity-resolver/embedding-stage.js ADDED Viewed

@@ -0,0 +1,198 @@
+/**
+ * Phase 8.3 — embedding stage.
+ *
+ * Pluggable function that takes two Person rows + returns
+ * `{ sim, profileA, profileB }` where `sim` is a cosine similarity in
+ * [0, 1]. Caller (EntityResolver.drain) uses thresholds to decide
+ * auto-same / auto-different / "send to LLM stage".
+ *
+ * Profile encoding (per design doc §4.2):
+ *   "{type}: {primary_name} | aliases: {a1, a2} | identifiers: {phone,
+ *    email} | recent: {top-3 event titles}"
+ *
+ * Embedding backend: Ollama HTTP API by default (compatible with
+ * `nomic-embed-text` / `bge-m3` / `bge-large-zh`). Caller can inject any
+ * `embedFn: async (text) => Float32Array | number[]` via opts.
+ *
+ * Privacy: same gate as AnalysisEngine — local Ollama default, accept-
+ * NonLocal flag required for hosted. Phase 8.4 LLM stage carries the
+ * same invariant; this module is dumb to that and trusts caller.
+ */
+"use strict";
+const DEFAULT_OLLAMA_URL = "http://localhost:11434";
+const DEFAULT_MODEL = "nomic-embed-text";
+class EmbeddingStage {
+  constructor(opts = {}) {
+    if (!opts || typeof opts !== "object") {
+      throw new Error("EmbeddingStage: opts required");
+    }
+    this._embedFn = typeof opts.embedFn === "function" ? opts.embedFn : null;
+    this._ollamaUrl = typeof opts.ollamaUrl === "string" && opts.ollamaUrl.length > 0
+      ? opts.ollamaUrl
+      : DEFAULT_OLLAMA_URL;
+    this._model = typeof opts.model === "string" && opts.model.length > 0
+      ? opts.model
+      : DEFAULT_MODEL;
+    // Caller-supplied vault lets us pull recent events for richer profiles
+    // (per design doc §4.2 — "recent: top-3 event titles"). Optional.
+    this._vault = opts.vault || null;
+    // LRU-ish in-memory cache: personId → embedding. Cheap perf win for
+    // re-using the same person across many pair comparisons in one drain.
+    this._cache = new Map();
+    this._cacheMaxSize = Number.isFinite(opts.cacheMaxSize) ? opts.cacheMaxSize : 1000;
+  }
+  /**
+   * The function EntityResolver.drain expects.
+   * Signature: `async (a, b) => { sim, profileA, profileB }`
+   */
+  async compare(a, b) {
+    const [vecA, profileA] = await this._embedPerson(a);
+    const [vecB, profileB] = await this._embedPerson(b);
+    const sim = cosineSimilarity(vecA, vecB);
+    return { sim, profileA, profileB };
+  }
+  /**
+   * Returns a stage function bound to this instance, suitable for passing
+   * as `opts.embeddingStage` to EntityResolver.
+   */
+  asStageFn() {
+    return (a, b) => this.compare(a, b);
+  }
+  async _embedPerson(person) {
+    if (!person || !person.id) throw new Error("EmbeddingStage: person required");
+    const profile = this.buildProfile(person);
+    if (this._cache.has(person.id)) {
+      return [this._cache.get(person.id), profile];
+    }
+    const vec = await this._embed(profile);
+    if (!Array.isArray(vec) && !(vec instanceof Float32Array)) {
+      throw new Error("EmbeddingStage: embedFn must return Array<number> or Float32Array");
+    }
+    if (this._cache.size >= this._cacheMaxSize) {
+      // FIFO eviction
+      const first = this._cache.keys().next().value;
+      if (first !== undefined) this._cache.delete(first);
+    }
+    this._cache.set(person.id, vec);
+    return [vec, profile];
+  }
+  /**
+   * Build the textual profile that gets embedded. Public for tests +
+   * for callers that want to feed the same string to LLM stage.
+   */
+  buildProfile(person) {
+    const parts = [];
+    parts.push(`${person.type || "person"}: ${(person.names && person.names[0]) || "(unknown)"}`);
+    if (person.names && person.names.length > 1) {
+      parts.push(`aliases: ${person.names.slice(1).join(", ")}`);
+    }
+    const ids = person.identifiers || {};
+    const idStrs = [];
+    for (const key of Object.keys(ids)) {
+      const v = ids[key];
+      if (Array.isArray(v)) {
+        for (const x of v) idStrs.push(`${key}:${x}`);
+      } else if (typeof v === "string") {
+        idStrs.push(`${key}:${v}`);
+      }
+    }
+    if (idStrs.length > 0) {
+      parts.push(`identifiers: ${idStrs.join(", ")}`);
+    }
+    if (this._vault) {
+      try {
+        const recent = this._recentEvents(person.id, 3);
+        if (recent.length > 0) {
+          parts.push(`recent: ${recent.map((e) => e.content?.title || "(no title)").join("; ")}`);
+        }
+      } catch (_e) {
+        // Vault read failure is non-fatal — embedding still works without events
+      }
+    }
+    return parts.join(" | ");
+  }
+  _recentEvents(personId, limit) {
+    if (!this._vault || typeof this._vault.queryEvents !== "function") return [];
+    // Pull events where this person is actor or participant
+    const events = this._vault.queryEvents({ actor: personId, limit });
+    return Array.isArray(events) ? events : [];
+  }
+  async _embed(text) {
+    if (this._embedFn) return this._embedFn(text);
+    // Default backend: Ollama HTTP API
+    return await ollamaEmbed(this._ollamaUrl, this._model, text);
+  }
+  /** Clear the embedding cache (e.g. after batch). */
+  clearCache() {
+    this._cache.clear();
+  }
+}
+// ─── helpers ────────────────────────────────────────────────────────────
+/**
+ * Cosine similarity ∈ [-1, 1], clamped to [0, 1] for embeddings (they
+ * tend to live in non-negative space but we don't trust that).
+ */
+function cosineSimilarity(a, b) {
+  if (!a || !b) return 0;
+  const len = Math.min(a.length, b.length);
+  if (len === 0) return 0;
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  for (let i = 0; i < len; i += 1) {
+    const x = Number(a[i]) || 0;
+    const y = Number(b[i]) || 0;
+    dot += x * y;
+    normA += x * x;
+    normB += y * y;
+  }
+  if (normA === 0 || normB === 0) return 0;
+  const sim = dot / (Math.sqrt(normA) * Math.sqrt(normB));
+  // Clamp to [0, 1] for the threshold-comparison call site
+  return Math.max(0, Math.min(1, sim));
+}
+/**
+ * Call Ollama's /api/embeddings endpoint. Throws on failure; caller
+ * (EntityResolver.drain → errorResolve) handles retry-vs-fatal.
+ */
+async function ollamaEmbed(baseUrl, model, text) {
+  const url = `${baseUrl.replace(/\/$/, "")}/api/embeddings`;
+  let resp;
+  try {
+    resp = await fetch(url, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model, prompt: text }),
+    });
+  } catch (err) {
+    throw new Error(`Ollama embed call failed (${url}): ${err && err.message ? err.message : err}`);
+  }
+  if (!resp.ok) {
+    const body = await resp.text().catch(() => "");
+    throw new Error(`Ollama embed returned ${resp.status}: ${body.slice(0, 200)}`);
+  }
+  const data = await resp.json();
+  if (!data || !Array.isArray(data.embedding)) {
+    throw new Error(`Ollama embed response missing 'embedding' array`);
+  }
+  return data.embedding;
+}
+module.exports = {
+  EmbeddingStage,
+  cosineSimilarity,
+  ollamaEmbed,
+};