@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Phase 11 — analysis.spending skill.
3
+ *
4
+ * Inputs:
5
+ * - timeWindow: { since, until } | { sinceDays N } | { sinceMonths N }
6
+ * - dimension: "merchant" | "category" | "counterparty" | "month"
7
+ * Default "merchant".
8
+ * - merchantFilter: optional substring (e.g. "美团" to scope to one
9
+ * merchant family)
10
+ * - personId: optional — scope to spending TO this person (uses
11
+ * merge-group expansion)
12
+ * - topN: default 10
13
+ *
14
+ * Output:
15
+ * {
16
+ * summary: {
17
+ * totalSpend, totalIncome, netFlow, currency,
18
+ * eventCount, uniqueCounterparties, period,
19
+ * },
20
+ * breakdown: [{ key, totalSpend, eventCount, percentOfTotal }, ...],
21
+ * trend: [{ monthKey, totalSpend, eventCount }, ...],
22
+ * citations: [eventId, ...],
23
+ * llm_commentary: "..." | null,
24
+ * }
25
+ */
26
+
27
+ "use strict";
28
+
29
+ const { AnalysisSkill } = require("./base");
30
+
31
+ const SUPPORTED_DIMENSIONS = new Set(["merchant", "category", "counterparty", "month"]);
32
+
33
+ class SpendingSkill extends AnalysisSkill {
34
+ constructor(opts) {
35
+ super({ ...opts, name: "analysis.spending" });
36
+ }
37
+
38
+ async run(options = {}) {
39
+ const { since, until } = this.resolveTimeWindow(options);
40
+ const dimension = SUPPORTED_DIMENSIONS.has(options.dimension)
41
+ ? options.dimension
42
+ : "merchant";
43
+ const topN = Number.isFinite(options.topN) && options.topN > 0 ? options.topN : 10;
44
+
45
+ // Pull events with subtype = payment / transfer / refund / utility /
46
+ // redenvelope / investment / income. These are the ones with content.amount.
47
+ const events = this._fetchPaymentEvents({ since, until });
48
+ const filtered = this._applyFilters(events, options);
49
+
50
+ const summary = this._summarize(filtered, since, until);
51
+ const breakdown = this._breakdown(filtered, dimension, topN);
52
+ const trend = this._monthlyTrend(filtered);
53
+ const citations = filtered.slice(0, 50).map((e) => e.id);
54
+
55
+ let llmCommentary = null;
56
+ if (options.commentary !== false && this.llm) {
57
+ llmCommentary = await this._llmCommentary(summary, breakdown, dimension, options);
58
+ }
59
+
60
+ return {
61
+ skill: "analysis.spending",
62
+ summary,
63
+ breakdown,
64
+ trend,
65
+ citations,
66
+ llm_commentary: llmCommentary,
67
+ };
68
+ }
69
+
70
+ _fetchPaymentEvents({ since, until }) {
71
+ const events = [];
72
+ const subtypes = ["payment", "transfer", "refund", "utility", "redenvelope", "investment", "income"];
73
+ for (const subtype of subtypes) {
74
+ const q = { subtype, limit: 5000 };
75
+ if (since != null) q.since = since;
76
+ if (until != null) q.until = until;
77
+ const batch = this.vault.queryEvents(q) || [];
78
+ for (const e of batch) {
79
+ // queryEvents may strip extra; we already get full row from vault
80
+ if (e && e.content && e.content.amount && Number.isFinite(e.content.amount.value)) {
81
+ events.push(e);
82
+ }
83
+ }
84
+ }
85
+ return events;
86
+ }
87
+
88
+ _applyFilters(events, options) {
89
+ let out = events;
90
+ if (typeof options.merchantFilter === "string" && options.merchantFilter.length > 0) {
91
+ const needle = options.merchantFilter.toLowerCase();
92
+ out = out.filter((e) => {
93
+ const title = (e.content && e.content.title) || "";
94
+ const counterparty = (e.extra && e.extra.counterparty) || "";
95
+ return title.toLowerCase().includes(needle)
96
+ || counterparty.toLowerCase().includes(needle);
97
+ });
98
+ }
99
+ if (typeof options.personId === "string" && options.personId.length > 0) {
100
+ const memberSet = new Set(this.expandToMergeGroup(options.personId));
101
+ out = out.filter((e) => {
102
+ if (memberSet.has(e.actor)) return true;
103
+ if (Array.isArray(e.participants) && e.participants.some((p) => memberSet.has(p))) return true;
104
+ return false;
105
+ });
106
+ }
107
+ if (options.direction === "out" || options.direction === "in") {
108
+ out = out.filter((e) => e.content.amount.direction === options.direction);
109
+ }
110
+ return out;
111
+ }
112
+
113
+ _summarize(events, since, until) {
114
+ let totalSpend = 0;
115
+ let totalIncome = 0;
116
+ const counterparties = new Set();
117
+ for (const e of events) {
118
+ const v = e.content.amount.value;
119
+ if (e.content.amount.direction === "in") totalIncome += v;
120
+ else if (e.content.amount.direction === "out") totalSpend += v;
121
+ // Identify counterparty for distinctness
122
+ const cp = (e.extra && e.extra.counterparty) || e.actor;
123
+ if (cp && cp !== "person-self") counterparties.add(cp);
124
+ }
125
+ return {
126
+ totalSpend: Math.round(totalSpend * 100) / 100,
127
+ totalIncome: Math.round(totalIncome * 100) / 100,
128
+ netFlow: Math.round((totalIncome - totalSpend) * 100) / 100,
129
+ currency: events[0]?.content?.amount?.currency || "CNY",
130
+ eventCount: events.length,
131
+ uniqueCounterparties: counterparties.size,
132
+ period: { since: since || null, until: until || null },
133
+ };
134
+ }
135
+
136
+ _breakdown(events, dimension, topN) {
137
+ const buckets = new Map();
138
+ for (const e of events) {
139
+ // Only count "out" for spending breakdown — income tracked separately
140
+ if (e.content.amount.direction !== "out") continue;
141
+ const key = this._keyFor(e, dimension);
142
+ if (!key) continue;
143
+ const cur = buckets.get(key) || { key, totalSpend: 0, eventCount: 0 };
144
+ cur.totalSpend += e.content.amount.value;
145
+ cur.eventCount += 1;
146
+ buckets.set(key, cur);
147
+ }
148
+ const totalOut = Array.from(buckets.values()).reduce((s, b) => s + b.totalSpend, 0);
149
+ const sorted = Array.from(buckets.values())
150
+ .map((b) => ({
151
+ ...b,
152
+ totalSpend: Math.round(b.totalSpend * 100) / 100,
153
+ percentOfTotal: totalOut > 0 ? Math.round((b.totalSpend / totalOut) * 1000) / 10 : 0,
154
+ }))
155
+ .sort((a, b) => b.totalSpend - a.totalSpend)
156
+ .slice(0, topN);
157
+ return sorted;
158
+ }
159
+
160
+ _monthlyTrend(events) {
161
+ const buckets = new Map();
162
+ for (const e of events) {
163
+ if (e.content.amount.direction !== "out") continue;
164
+ const d = new Date(e.occurredAt);
165
+ if (!Number.isFinite(d.getTime())) continue;
166
+ const monthKey = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
167
+ const cur = buckets.get(monthKey) || { monthKey, totalSpend: 0, eventCount: 0 };
168
+ cur.totalSpend += e.content.amount.value;
169
+ cur.eventCount += 1;
170
+ buckets.set(monthKey, cur);
171
+ }
172
+ return Array.from(buckets.values())
173
+ .map((b) => ({ ...b, totalSpend: Math.round(b.totalSpend * 100) / 100 }))
174
+ .sort((a, b) => a.monthKey.localeCompare(b.monthKey));
175
+ }
176
+
177
+ _keyFor(event, dimension) {
178
+ if (dimension === "merchant" || dimension === "counterparty") {
179
+ return (event.extra && event.extra.counterparty)
180
+ || (event.content && event.content.title)
181
+ || "(unknown)";
182
+ }
183
+ if (dimension === "category") {
184
+ return (event.extra && event.extra.category)
185
+ || event.subtype
186
+ || "(uncategorized)";
187
+ }
188
+ if (dimension === "month") {
189
+ const d = new Date(event.occurredAt);
190
+ if (!Number.isFinite(d.getTime())) return "(unknown date)";
191
+ return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
192
+ }
193
+ return null;
194
+ }
195
+
196
+ async _llmCommentary(summary, breakdown, dimension, options) {
197
+ if (summary.eventCount === 0) return "No spending events found in this period.";
198
+ const topItems = breakdown.slice(0, 5).map((b) => `${b.key} ¥${b.totalSpend} (${b.percentOfTotal}%)`).join(", ");
199
+ const periodStr = summary.period.since
200
+ ? `${new Date(summary.period.since).toISOString().slice(0, 10)} 至 ${new Date(summary.period.until).toISOString().slice(0, 10)}`
201
+ : "全部时间";
202
+ const userMsg = `用户的消费数据:
203
+ - 期间:${periodStr}
204
+ - 总支出 ¥${summary.totalSpend} (${summary.currency}), 总收入 ¥${summary.totalIncome}, 净流 ¥${summary.netFlow}
205
+ - 共 ${summary.eventCount} 笔交易, ${summary.uniqueCounterparties} 个独特对方
206
+ - 按 ${dimension} 排名 top 5:${topItems}
207
+
208
+ 请用 2-3 句话点评消费习惯,指出最大支出方向和异常(如有)。中文回答。`;
209
+ return await this.callLlmCommentary([
210
+ { role: "system", content: "你是一个理性、克制的财务分析助手。基于事实给出简短结论,不夸张、不臆断。" },
211
+ { role: "user", content: userMsg },
212
+ ], { acceptNonLocal: options.acceptNonLocal });
213
+ }
214
+ }
215
+
216
+ module.exports = { SpendingSkill, SUPPORTED_DIMENSIONS };
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Phase 11 — analysis.timeline skill.
3
+ *
4
+ * Cross-source narrative timeline. Given a time window + optional topic
5
+ * keyword, weaves Events from all adapters into a chronological story
6
+ * with adapter-aware glyphs (so "邮件" / "支付" / "出行" are
7
+ * visually distinguishable in the UI).
8
+ *
9
+ * LLM (optional) produces a 1-paragraph synthesis: "你这周买过 X 也去过
10
+ * Y, 给妈妈转账过 Z" rather than just a list.
11
+ *
12
+ * Inputs:
13
+ * - timeWindow: required (default last 7 days)
14
+ * - topicFilter: optional substring match against title / counterparty
15
+ * - personId: optional — scope to events involving this person
16
+ * (merge-group expanded)
17
+ * - limit: default 100 events
18
+ *
19
+ * Output:
20
+ * {
21
+ * entries: [{ id, occurredAt, title, kind, amount?, adapter, snippet }],
22
+ * summary: { totalEvents, byAdapter, byDay, period },
23
+ * citations,
24
+ * llm_narrative: "..." | null,
25
+ * }
26
+ */
27
+
28
+ "use strict";
29
+
30
+ const { AnalysisSkill } = require("./base");
31
+
32
+ class TimelineSkill extends AnalysisSkill {
33
+ constructor(opts) {
34
+ super({ ...opts, name: "analysis.timeline" });
35
+ }
36
+
37
+ async run(options = {}) {
38
+ const window = this.resolveTimeWindow({
39
+ sinceDays: options.sinceDays ?? (options.since ? null : 7), // default 7d
40
+ ...options,
41
+ });
42
+ const limit = Number.isFinite(options.limit) && options.limit > 0
43
+ ? Math.min(options.limit, 1000)
44
+ : 100;
45
+
46
+ let events = this._fetchEvents(window, limit);
47
+ events = this._applyFilters(events, options);
48
+ const entries = events.map((e) => this._toEntry(e));
49
+ const summary = this._summarize(entries, window);
50
+
51
+ let narrative = null;
52
+ if (options.narrative !== false && this.llm && entries.length > 0) {
53
+ narrative = await this._llmNarrative(entries, summary, options);
54
+ }
55
+
56
+ return {
57
+ skill: "analysis.timeline",
58
+ entries,
59
+ summary,
60
+ citations: entries.slice(0, 50).map((e) => e.id),
61
+ llm_narrative: narrative,
62
+ };
63
+ }
64
+
65
+ _fetchEvents({ since, until }, limit) {
66
+ const q = { limit };
67
+ if (since != null) q.since = since;
68
+ if (until != null) q.until = until;
69
+ const events = this.vault.queryEvents(q) || [];
70
+ // queryEvents orders DESC; reverse for narrative (oldest first)
71
+ return events.slice().sort((a, b) => (a.occurredAt || 0) - (b.occurredAt || 0));
72
+ }
73
+
74
+ _applyFilters(events, options) {
75
+ let out = events;
76
+ if (typeof options.topicFilter === "string" && options.topicFilter.length > 0) {
77
+ const needle = options.topicFilter.toLowerCase();
78
+ out = out.filter((e) => {
79
+ const title = (e.content && e.content.title) || "";
80
+ const counterparty = (e.extra && e.extra.counterparty) || "";
81
+ const text = (e.content && e.content.text) || "";
82
+ return title.toLowerCase().includes(needle)
83
+ || counterparty.toLowerCase().includes(needle)
84
+ || text.toLowerCase().includes(needle);
85
+ });
86
+ }
87
+ if (typeof options.personId === "string" && options.personId.length > 0) {
88
+ const memberSet = new Set(this.expandToMergeGroup(options.personId));
89
+ out = out.filter((e) => {
90
+ if (memberSet.has(e.actor)) return true;
91
+ if (Array.isArray(e.participants) && e.participants.some((p) => memberSet.has(p))) return true;
92
+ return false;
93
+ });
94
+ }
95
+ return out;
96
+ }
97
+
98
+ _toEntry(event) {
99
+ const adapter = (event.source && event.source.adapter) || "unknown";
100
+ return {
101
+ id: event.id,
102
+ occurredAt: event.occurredAt,
103
+ title: (event.content && event.content.title) || "(无标题)",
104
+ kind: event.subtype || "event",
105
+ amount: event.content?.amount || null,
106
+ adapter,
107
+ snippet: this._buildSnippet(event),
108
+ };
109
+ }
110
+
111
+ _buildSnippet(event) {
112
+ const parts = [];
113
+ const text = (event.content && event.content.text) || "";
114
+ if (text) parts.push(text.slice(0, 100));
115
+ if (event.extra) {
116
+ if (event.extra.counterparty) parts.push(`@${event.extra.counterparty}`);
117
+ if (event.extra.from && event.extra.to) parts.push(`${event.extra.from} → ${event.extra.to}`);
118
+ }
119
+ return parts.join(" · ").slice(0, 200);
120
+ }
121
+
122
+ _summarize(entries, window) {
123
+ const byAdapter = {};
124
+ const byDay = {};
125
+ for (const e of entries) {
126
+ byAdapter[e.adapter] = (byAdapter[e.adapter] || 0) + 1;
127
+ const d = new Date(e.occurredAt);
128
+ if (Number.isFinite(d.getTime())) {
129
+ const day = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
130
+ byDay[day] = (byDay[day] || 0) + 1;
131
+ }
132
+ }
133
+ return {
134
+ totalEvents: entries.length,
135
+ byAdapter,
136
+ byDay,
137
+ period: {
138
+ since: window.since || null,
139
+ until: window.until || null,
140
+ },
141
+ };
142
+ }
143
+
144
+ async _llmNarrative(entries, summary, options) {
145
+ if (entries.length === 0) return null;
146
+ // Cap to 30 entries for prompt size
147
+ const sampled = entries.slice(-30);
148
+ const lines = sampled.map((e) => {
149
+ const d = new Date(e.occurredAt).toISOString().slice(0, 10);
150
+ const amt = e.amount ? ` ¥${e.amount.value}(${e.amount.direction})` : "";
151
+ return `- ${d} [${e.adapter}/${e.kind}] ${e.title}${amt}`;
152
+ }).join("\n");
153
+
154
+ const userMsg = `用户的事件时间线(共 ${summary.totalEvents} 条, 显示最近 ${sampled.length}):
155
+
156
+ ${lines}
157
+
158
+ 请用 3-5 句话讲清楚这段时间发生了什么、出现的人物 / 地点、有没有明显的主题或事件。中文回答,平实叙述,不评价。`;
159
+
160
+ return await this.callLlmCommentary([
161
+ { role: "system", content: "你是一个克制的时间线叙述助手。基于事实串联事件,不引申、不评价。" },
162
+ { role: "user", content: userMsg },
163
+ ], { acceptNonLocal: options.acceptNonLocal });
164
+ }
165
+ }
166
+
167
+ module.exports = { TimelineSkill };
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Phase 8.3 — embedding stage.
3
+ *
4
+ * Pluggable function that takes two Person rows + returns
5
+ * `{ sim, profileA, profileB }` where `sim` is a cosine similarity in
6
+ * [0, 1]. Caller (EntityResolver.drain) uses thresholds to decide
7
+ * auto-same / auto-different / "send to LLM stage".
8
+ *
9
+ * Profile encoding (per design doc §4.2):
10
+ * "{type}: {primary_name} | aliases: {a1, a2} | identifiers: {phone,
11
+ * email} | recent: {top-3 event titles}"
12
+ *
13
+ * Embedding backend: Ollama HTTP API by default (compatible with
14
+ * `nomic-embed-text` / `bge-m3` / `bge-large-zh`). Caller can inject any
15
+ * `embedFn: async (text) => Float32Array | number[]` via opts.
16
+ *
17
+ * Privacy: same gate as AnalysisEngine — local Ollama default, accept-
18
+ * NonLocal flag required for hosted. Phase 8.4 LLM stage carries the
19
+ * same invariant; this module is dumb to that and trusts caller.
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const DEFAULT_OLLAMA_URL = "http://localhost:11434";
25
+ const DEFAULT_MODEL = "nomic-embed-text";
26
+
27
+ class EmbeddingStage {
28
+ constructor(opts = {}) {
29
+ if (!opts || typeof opts !== "object") {
30
+ throw new Error("EmbeddingStage: opts required");
31
+ }
32
+ this._embedFn = typeof opts.embedFn === "function" ? opts.embedFn : null;
33
+ this._ollamaUrl = typeof opts.ollamaUrl === "string" && opts.ollamaUrl.length > 0
34
+ ? opts.ollamaUrl
35
+ : DEFAULT_OLLAMA_URL;
36
+ this._model = typeof opts.model === "string" && opts.model.length > 0
37
+ ? opts.model
38
+ : DEFAULT_MODEL;
39
+ // Caller-supplied vault lets us pull recent events for richer profiles
40
+ // (per design doc §4.2 — "recent: top-3 event titles"). Optional.
41
+ this._vault = opts.vault || null;
42
+ // LRU-ish in-memory cache: personId → embedding. Cheap perf win for
43
+ // re-using the same person across many pair comparisons in one drain.
44
+ this._cache = new Map();
45
+ this._cacheMaxSize = Number.isFinite(opts.cacheMaxSize) ? opts.cacheMaxSize : 1000;
46
+ }
47
+
48
+ /**
49
+ * The function EntityResolver.drain expects.
50
+ * Signature: `async (a, b) => { sim, profileA, profileB }`
51
+ */
52
+ async compare(a, b) {
53
+ const [vecA, profileA] = await this._embedPerson(a);
54
+ const [vecB, profileB] = await this._embedPerson(b);
55
+ const sim = cosineSimilarity(vecA, vecB);
56
+ return { sim, profileA, profileB };
57
+ }
58
+
59
+ /**
60
+ * Returns a stage function bound to this instance, suitable for passing
61
+ * as `opts.embeddingStage` to EntityResolver.
62
+ */
63
+ asStageFn() {
64
+ return (a, b) => this.compare(a, b);
65
+ }
66
+
67
+ async _embedPerson(person) {
68
+ if (!person || !person.id) throw new Error("EmbeddingStage: person required");
69
+ const profile = this.buildProfile(person);
70
+ if (this._cache.has(person.id)) {
71
+ return [this._cache.get(person.id), profile];
72
+ }
73
+ const vec = await this._embed(profile);
74
+ if (!Array.isArray(vec) && !(vec instanceof Float32Array)) {
75
+ throw new Error("EmbeddingStage: embedFn must return Array<number> or Float32Array");
76
+ }
77
+ if (this._cache.size >= this._cacheMaxSize) {
78
+ // FIFO eviction
79
+ const first = this._cache.keys().next().value;
80
+ if (first !== undefined) this._cache.delete(first);
81
+ }
82
+ this._cache.set(person.id, vec);
83
+ return [vec, profile];
84
+ }
85
+
86
+ /**
87
+ * Build the textual profile that gets embedded. Public for tests +
88
+ * for callers that want to feed the same string to LLM stage.
89
+ */
90
+ buildProfile(person) {
91
+ const parts = [];
92
+ parts.push(`${person.type || "person"}: ${(person.names && person.names[0]) || "(unknown)"}`);
93
+ if (person.names && person.names.length > 1) {
94
+ parts.push(`aliases: ${person.names.slice(1).join(", ")}`);
95
+ }
96
+ const ids = person.identifiers || {};
97
+ const idStrs = [];
98
+ for (const key of Object.keys(ids)) {
99
+ const v = ids[key];
100
+ if (Array.isArray(v)) {
101
+ for (const x of v) idStrs.push(`${key}:${x}`);
102
+ } else if (typeof v === "string") {
103
+ idStrs.push(`${key}:${v}`);
104
+ }
105
+ }
106
+ if (idStrs.length > 0) {
107
+ parts.push(`identifiers: ${idStrs.join(", ")}`);
108
+ }
109
+ if (this._vault) {
110
+ try {
111
+ const recent = this._recentEvents(person.id, 3);
112
+ if (recent.length > 0) {
113
+ parts.push(`recent: ${recent.map((e) => e.content?.title || "(no title)").join("; ")}`);
114
+ }
115
+ } catch (_e) {
116
+ // Vault read failure is non-fatal — embedding still works without events
117
+ }
118
+ }
119
+ return parts.join(" | ");
120
+ }
121
+
122
+ _recentEvents(personId, limit) {
123
+ if (!this._vault || typeof this._vault.queryEvents !== "function") return [];
124
+ // Pull events where this person is actor or participant
125
+ const events = this._vault.queryEvents({ actor: personId, limit });
126
+ return Array.isArray(events) ? events : [];
127
+ }
128
+
129
+ async _embed(text) {
130
+ if (this._embedFn) return this._embedFn(text);
131
+ // Default backend: Ollama HTTP API
132
+ return await ollamaEmbed(this._ollamaUrl, this._model, text);
133
+ }
134
+
135
+ /** Clear the embedding cache (e.g. after batch). */
136
+ clearCache() {
137
+ this._cache.clear();
138
+ }
139
+ }
140
+
141
+ // ─── helpers ────────────────────────────────────────────────────────────
142
+
143
+ /**
144
+ * Cosine similarity ∈ [-1, 1], clamped to [0, 1] for embeddings (they
145
+ * tend to live in non-negative space but we don't trust that).
146
+ */
147
+ function cosineSimilarity(a, b) {
148
+ if (!a || !b) return 0;
149
+ const len = Math.min(a.length, b.length);
150
+ if (len === 0) return 0;
151
+ let dot = 0;
152
+ let normA = 0;
153
+ let normB = 0;
154
+ for (let i = 0; i < len; i += 1) {
155
+ const x = Number(a[i]) || 0;
156
+ const y = Number(b[i]) || 0;
157
+ dot += x * y;
158
+ normA += x * x;
159
+ normB += y * y;
160
+ }
161
+ if (normA === 0 || normB === 0) return 0;
162
+ const sim = dot / (Math.sqrt(normA) * Math.sqrt(normB));
163
+ // Clamp to [0, 1] for the threshold-comparison call site
164
+ return Math.max(0, Math.min(1, sim));
165
+ }
166
+
167
+ /**
168
+ * Call Ollama's /api/embeddings endpoint. Throws on failure; caller
169
+ * (EntityResolver.drain → errorResolve) handles retry-vs-fatal.
170
+ */
171
+ async function ollamaEmbed(baseUrl, model, text) {
172
+ const url = `${baseUrl.replace(/\/$/, "")}/api/embeddings`;
173
+ let resp;
174
+ try {
175
+ resp = await fetch(url, {
176
+ method: "POST",
177
+ headers: { "Content-Type": "application/json" },
178
+ body: JSON.stringify({ model, prompt: text }),
179
+ });
180
+ } catch (err) {
181
+ throw new Error(`Ollama embed call failed (${url}): ${err && err.message ? err.message : err}`);
182
+ }
183
+ if (!resp.ok) {
184
+ const body = await resp.text().catch(() => "");
185
+ throw new Error(`Ollama embed returned ${resp.status}: ${body.slice(0, 200)}`);
186
+ }
187
+ const data = await resp.json();
188
+ if (!data || !Array.isArray(data.embedding)) {
189
+ throw new Error(`Ollama embed response missing 'embedding' array`);
190
+ }
191
+ return data.embedding;
192
+ }
193
+
194
+ module.exports = {
195
+ EmbeddingStage,
196
+ cosineSimilarity,
197
+ ollamaEmbed,
198
+ };