@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
  2. package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
  3. package/__tests__/adapters/ai-chat-history.test.js +396 -0
  4. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  5. package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
  6. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  7. package/__tests__/adapters/email-adapter.test.js +138 -1
  8. package/__tests__/adapters/email-classifier.test.js +347 -0
  9. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  10. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  11. package/__tests__/adapters/email-templates.test.js +699 -0
  12. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
  13. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  14. package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
  15. package/__tests__/adapters/system-data-android.test.js +387 -0
  16. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  17. package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
  18. package/__tests__/adapters/wechat-env-probe.test.js +162 -0
  19. package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
  20. package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
  21. package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
  22. package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
  23. package/__tests__/analysis-skills.test.js +556 -0
  24. package/__tests__/analysis.test.js +329 -1
  25. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
  26. package/__tests__/e2e/full-user-journey.test.js +188 -0
  27. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  28. package/__tests__/entity-resolver-stages.test.js +411 -0
  29. package/__tests__/entity-resolver-vault.test.js +246 -0
  30. package/__tests__/entity-resolver.test.js +526 -0
  31. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  32. package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
  33. package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
  34. package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
  35. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
  36. package/__tests__/longtail-adapters.test.js +217 -0
  37. package/__tests__/mobile-extractor.test.js +288 -0
  38. package/__tests__/registry.test.js +4 -2
  39. package/__tests__/shopping-adapters.test.js +296 -0
  40. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  41. package/__tests__/sidecar-supervisor.test.js +120 -0
  42. package/__tests__/social-adapters.test.js +206 -0
  43. package/__tests__/travel-adapters.test.js +325 -0
  44. package/__tests__/vault.test.js +3 -3
  45. package/__tests__/wechat-adapter.test.js +476 -0
  46. package/__tests__/whatsapp-adapter.test.js +135 -0
  47. package/lib/adapter-spec.js +12 -0
  48. package/lib/adapters/_python-sidecar-base.js +207 -0
  49. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
  50. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  51. package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
  52. package/lib/adapters/ai-chat-history/health-checker.js +210 -0
  53. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  54. package/lib/adapters/ai-chat-history/index.js +28 -0
  55. package/lib/adapters/ai-chat-history/schema-map.js +258 -0
  56. package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
  57. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  58. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  59. package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
  60. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  61. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  62. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  63. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  64. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  65. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  66. package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
  67. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
  68. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  69. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  70. package/lib/adapters/alipay-bill/index.js +41 -0
  71. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  72. package/lib/adapters/email-imap/classifier.js +495 -0
  73. package/lib/adapters/email-imap/email-adapter.js +419 -8
  74. package/lib/adapters/email-imap/index.js +42 -0
  75. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  76. package/lib/adapters/email-imap/templates/bill.js +232 -0
  77. package/lib/adapters/email-imap/templates/government.js +120 -0
  78. package/lib/adapters/email-imap/templates/index.js +78 -0
  79. package/lib/adapters/email-imap/templates/order.js +186 -0
  80. package/lib/adapters/email-imap/templates/other.js +114 -0
  81. package/lib/adapters/email-imap/templates/register.js +113 -0
  82. package/lib/adapters/email-imap/templates/travel.js +157 -0
  83. package/lib/adapters/email-imap/templates/utils.js +275 -0
  84. package/lib/adapters/email-imap/transactions.js +234 -0
  85. package/lib/adapters/messaging-qq/index.js +158 -0
  86. package/lib/adapters/messaging-telegram/index.js +142 -0
  87. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  88. package/lib/adapters/shopping-base/index.js +208 -0
  89. package/lib/adapters/shopping-jd/index.js +150 -0
  90. package/lib/adapters/shopping-meituan/index.js +154 -0
  91. package/lib/adapters/shopping-taobao/index.js +176 -0
  92. package/lib/adapters/social-bilibili/index.js +171 -0
  93. package/lib/adapters/social-douyin/index.js +116 -0
  94. package/lib/adapters/social-kuaishou/index.js +237 -0
  95. package/lib/adapters/social-toutiao/index.js +236 -0
  96. package/lib/adapters/social-weibo/index.js +164 -0
  97. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  98. package/lib/adapters/system-data/disclosure.js +166 -0
  99. package/lib/adapters/system-data/index.js +34 -0
  100. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  101. package/lib/adapters/system-data-android/adapter.js +348 -0
  102. package/lib/adapters/system-data-android/index.js +76 -0
  103. package/lib/adapters/travel-12306/index.js +151 -0
  104. package/lib/adapters/travel-amap/index.js +164 -0
  105. package/lib/adapters/travel-baidu-map/index.js +162 -0
  106. package/lib/adapters/travel-base/index.js +240 -0
  107. package/lib/adapters/travel-ctrip/index.js +151 -0
  108. package/lib/adapters/wechat/bootstrap.js +146 -0
  109. package/lib/adapters/wechat/content-parser.js +326 -0
  110. package/lib/adapters/wechat/db-reader.js +209 -0
  111. package/lib/adapters/wechat/env-probe.js +218 -0
  112. package/lib/adapters/wechat/frida-agent/loader.js +67 -0
  113. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
  114. package/lib/adapters/wechat/index.js +37 -0
  115. package/lib/adapters/wechat/key-extractor.js +158 -0
  116. package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
  117. package/lib/adapters/wechat/key-providers/index.js +22 -0
  118. package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
  119. package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
  120. package/lib/adapters/wechat/normalize.js +220 -0
  121. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  122. package/lib/analysis-skills/base.js +113 -0
  123. package/lib/analysis-skills/footprint.js +167 -0
  124. package/lib/analysis-skills/index.js +58 -0
  125. package/lib/analysis-skills/interests.js +161 -0
  126. package/lib/analysis-skills/relations.js +226 -0
  127. package/lib/analysis-skills/spending.js +219 -0
  128. package/lib/analysis-skills/timeline.js +167 -0
  129. package/lib/analysis.js +191 -2
  130. package/lib/entity-resolver/embedding-stage.js +198 -0
  131. package/lib/entity-resolver/entity-resolver.js +384 -0
  132. package/lib/entity-resolver/index.js +42 -0
  133. package/lib/entity-resolver/llm-stage.js +191 -0
  134. package/lib/entity-resolver/rule-stage.js +208 -0
  135. package/lib/entity-resolver/worker.js +149 -0
  136. package/lib/index.js +131 -0
  137. package/lib/migrations.js +73 -0
  138. package/lib/mobile-extractor/android.js +193 -0
  139. package/lib/mobile-extractor/index.js +9 -0
  140. package/lib/mobile-extractor/ios.js +223 -0
  141. package/lib/prompt-builder.js +11 -1
  142. package/lib/query-parser.js +7 -1
  143. package/lib/registry.js +42 -0
  144. package/lib/sidecar/index.js +15 -0
  145. package/lib/sidecar/supervisor.js +359 -0
  146. package/lib/vault.js +343 -0
  147. package/package.json +36 -3
  148. package/scripts/_make-fixture-all.js +126 -0
  149. package/scripts/_make-fixture-contacts.js +84 -0
  150. package/scripts/evaluate-entity-resolver.js +213 -0
  151. package/scripts/smoke-phase-5-5.js +196 -0
  152. package/scripts/smoke-phase-5-7.js +181 -0
  153. package/scripts/smoke-system-data-contacts.js +309 -0
  154. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,207 @@
1
+ /**
2
+ * PythonSidecarAdapter — shared infrastructure for adapters whose extraction
3
+ * + parsing happens in the forensics-bridge Python sidecar.
4
+ *
5
+ * Design rationale (see Personal_Data_Hub_Python_Sidecar.md §6.2):
6
+ *
7
+ * - Subclasses define WHICH sidecar methods to invoke and HOW to orchestrate
8
+ * them (per-data-source pull + parse). The shared base only handles
9
+ * raw-event yielding + raw archival shape + supervisor wiring.
10
+ * - `sync(opts)` is an async generator that yields {entityType, originalId,
11
+ * capturedAt, payload} envelopes. `payload` is the already-normalized
12
+ * UnifiedSchema entity emitted by the sidecar — `normalize()` then routes
13
+ * it into the right NormalizedBatch bucket (events / persons / places / etc).
14
+ * - Cancellation works through SidecarSupervisor's per-invoke timeout. The
15
+ * registry's batchSize-driven iteration means partial progress is preserved
16
+ * even if a later parse_* method times out mid-stream.
17
+ *
18
+ * Subclass contract:
19
+ *
20
+ * class MyAdapter extends PythonSidecarAdapter {
21
+ * name = "my-adapter";
22
+ * version = "0.1.0";
23
+ * capabilities = ["sync:sidecar"];
24
+ * dataDisclosure = { ... };
25
+ * async *_runSidecar(opts, emit) { ... use this._invoke(...) ... }
26
+ * }
27
+ *
28
+ * The base exposes _invoke() which wraps SidecarSupervisor.invoke with
29
+ * onChunk routed into the adapter's yielded stream. Subclasses build up
30
+ * one or more invoke() calls inside `_runSidecar`.
31
+ */
32
+
33
+ "use strict";
34
+
35
+ class PythonSidecarAdapter {
36
+ /**
37
+ * @param {object} opts
38
+ * @param {import("../sidecar").SidecarSupervisor} opts.supervisor
39
+ * @param {string} [opts.name] Override the class-level name if needed.
40
+ * @param {object} [opts.logger] Optional logger; falls back to noop.
41
+ */
42
+ constructor(opts) {
43
+ if (!opts || !opts.supervisor) {
44
+ throw new Error("PythonSidecarAdapter: opts.supervisor required");
45
+ }
46
+ this.supervisor = opts.supervisor;
47
+ if (opts.name) this.name = opts.name;
48
+ this._logger = opts.logger || { info: () => {}, warn: () => {}, error: () => {} };
49
+ }
50
+
51
+ // -------------------------------------------------------------------------
52
+ // PersonalDataAdapter contract — required surface
53
+ // -------------------------------------------------------------------------
54
+
55
+ /**
56
+ * Override in subclasses.
57
+ * Default: no-op success (sidecar-backed adapters typically check device
58
+ * availability via subclass-specific sidecar methods).
59
+ */
60
+ async authenticate(_ctx) {
61
+ return { ok: true };
62
+ }
63
+
64
+ async healthCheck() {
65
+ try {
66
+ const pong = await this.supervisor.invoke("sidecar.ping", {}, { timeoutMs: 3000 });
67
+ return { ok: true, version: pong.version };
68
+ } catch (err) {
69
+ return { ok: false, reason: `sidecar.ping failed: ${err.code || err.message}` };
70
+ }
71
+ }
72
+
73
+ /**
74
+ * AdapterRegistry calls `normalize(raw)` once per raw event. Since the
75
+ * sidecar already returns UnifiedSchema entities, we just bucket the
76
+ * `raw.payload` (an entity dict) into the right NormalizedBatch slot
77
+ * based on its declared `entityType`.
78
+ *
79
+ * Yields a normalized batch with exactly one entity in one bucket.
80
+ */
81
+ normalize(raw) {
82
+ const empty = { events: [], persons: [], places: [], items: [], topics: [] };
83
+ if (!raw || typeof raw !== "object" || !raw.payload) return empty;
84
+ const t = raw.entityType;
85
+ const p = raw.payload;
86
+ if (t === "person") return { ...empty, persons: [p] };
87
+ if (t === "event") return { ...empty, events: [p] };
88
+ if (t === "place") return { ...empty, places: [p] };
89
+ if (t === "item") return { ...empty, items: [p] };
90
+ if (t === "topic") return { ...empty, topics: [p] };
91
+ // Defensive: unknown bucket → drop entity, registry counts as invalid.
92
+ return empty;
93
+ }
94
+
95
+ /**
96
+ * Subclasses MUST override `_runSidecar(opts, emit)`. `emit(raw)` is the
97
+ * generator-yielder; the base wires it up. Subclass returns the final
98
+ * orchestration result (used for adapter-progress audit, not for ingest).
99
+ */
100
+ async *sync(opts = {}) {
101
+ // Buffer between subclass producer and async-generator consumer.
102
+ // Using a small array + Promise resolution lets _runSidecar use callbacks
103
+ // while still yielding to the consumer one raw at a time.
104
+ const queue = [];
105
+ let done = false;
106
+ let runErr = null;
107
+ let resumeWaiter = null;
108
+
109
+ const emit = (raw) => {
110
+ queue.push(raw);
111
+ if (resumeWaiter) {
112
+ const r = resumeWaiter;
113
+ resumeWaiter = null;
114
+ r();
115
+ }
116
+ };
117
+
118
+ const runPromise = (async () => {
119
+ try {
120
+ await this._runSidecar(opts, emit);
121
+ } catch (err) {
122
+ runErr = err;
123
+ } finally {
124
+ done = true;
125
+ if (resumeWaiter) {
126
+ const r = resumeWaiter;
127
+ resumeWaiter = null;
128
+ r();
129
+ }
130
+ }
131
+ })();
132
+
133
+ try {
134
+ while (true) {
135
+ if (queue.length > 0) {
136
+ yield queue.shift();
137
+ continue;
138
+ }
139
+ if (done) break;
140
+ await new Promise((res) => {
141
+ resumeWaiter = res;
142
+ });
143
+ }
144
+ // Drain any items added after `done` flipped but before we noticed.
145
+ while (queue.length > 0) yield queue.shift();
146
+ if (runErr) throw runErr;
147
+ } finally {
148
+ // Make sure the producer task is awaited even if the consumer aborts.
149
+ try {
150
+ await runPromise;
151
+ } catch (_e) {
152
+ /* already captured in runErr */
153
+ }
154
+ }
155
+ }
156
+
157
+ // -------------------------------------------------------------------------
158
+ // Subclass surface
159
+ // -------------------------------------------------------------------------
160
+
161
+ /**
162
+ * Override me. `emit(rawEvent)` queues a raw event for the consumer of
163
+ * the async generator returned by `sync()`. Subclasses typically:
164
+ * 1. Optionally call `this.supervisor.invoke("xxx.pull_file", ...)`.
165
+ * 2. Call `this.supervisor.invoke("xxx.parse_yyy", ..., { onChunk })`.
166
+ * 3. Inside onChunk, walk each entity → `emit({entityType, originalId,
167
+ * capturedAt, payload})`.
168
+ *
169
+ * @param {object} _opts options from registry.syncAdapter
170
+ * @param {(raw: object) => void} _emit push a raw event to the stream
171
+ * @returns {Promise<object>} subclass-defined run summary
172
+ */
173
+ async _runSidecar(_opts, _emit) {
174
+ throw new Error(
175
+ `PythonSidecarAdapter[${this.name}]: subclass must implement _runSidecar(opts, emit)`,
176
+ );
177
+ }
178
+
179
+ /**
180
+ * Helper: walk a NormalizedBatch chunk and emit one raw per entity. Used
181
+ * by subclasses that want to forward all 5 buckets generically.
182
+ */
183
+ _emitChunkAsRaws(batch, emit) {
184
+ if (!batch || typeof batch !== "object") return;
185
+ const flush = (arr, entityType) => {
186
+ if (!Array.isArray(arr)) return;
187
+ for (const entity of arr) {
188
+ if (!entity || typeof entity !== "object") continue;
189
+ emit({
190
+ entityType,
191
+ originalId:
192
+ (entity.source && entity.source.originalId) || entity.id || null,
193
+ capturedAt:
194
+ (entity.source && entity.source.capturedAt) || Date.now(),
195
+ payload: entity,
196
+ });
197
+ }
198
+ };
199
+ flush(batch.persons, "person");
200
+ flush(batch.events, "event");
201
+ flush(batch.places, "place");
202
+ flush(batch.items, "item");
203
+ flush(batch.topics, "topic");
204
+ }
205
+ }
206
+
207
+ module.exports = { PythonSidecarAdapter };
@@ -0,0 +1,374 @@
1
+ /**
2
+ * AIChatHistoryAdapter — Phase 10 旗舰差异化 adapter.
3
+ *
4
+ * Fans out to 8 vendor sub-adapters (DeepSeek / Kimi / 通义 / 智谱 / 混元 /
5
+ * 千帆 / 扣子 / Dreamina) and re-emits their conversation + message stream as
6
+ * RawConversation / RawMessage envelopes. The AdapterRegistry will then call
7
+ * `normalize()` per envelope to fold them into the LocalVault.
8
+ *
9
+ * Phase 10.1 (this file): skeleton — adapter contract satisfied, schema-map
10
+ * fully implemented, vendor specs registered but their listConversations /
11
+ * listMessages throw VENDOR_NOT_WIRED. This is enough to:
12
+ * 1. register the adapter with AdapterRegistry without it failing assertAdapter
13
+ * 2. exercise normalize() with synthetic Raw* inputs in tests
14
+ * 3. exercise the per-vendor enable/disable flow in the hub UI
15
+ * 4. surface a precise "this vendor isn't wired yet" error to the user
16
+ *
17
+ * Phase 10.2+ replaces each vendor's listConversations / listMessages /
18
+ * validateCookie with real HTTP wiring against the documented endpoints in
19
+ * docs/design/Adapter_AIChat_History.md §6.
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const { EVENT_SUBTYPES } = require("../../constants");
25
+
26
+ const { SUPPORTED_VENDORS, assertVendorSpec, NotImplementedYetError } =
27
+ require("./vendor-spec");
28
+ const { CookieAuthSession } = require("./cookie-auth");
29
+ const { HttpClient, CookieExpiredError, RateLimitedError } = require("./http-client");
30
+ const {
31
+ ADAPTER_NAME,
32
+ ADAPTER_VERSION,
33
+ conversationToBatch,
34
+ buildMessageEvent,
35
+ buildVendorPerson,
36
+ buildConversationTopic,
37
+ buildGeneratedImageItems,
38
+ } = require("./schema-map");
39
+
40
+ const { SPEC: deepseekSpec } = require("./vendors/deepseek");
41
+ const { SPEC: kimiSpec } = require("./vendors/kimi");
42
+ const { SPEC: tongyiSpec } = require("./vendors/tongyi");
43
+ const { SPEC: zhipuSpec } = require("./vendors/zhipu");
44
+ const { SPEC: hunyuanSpec } = require("./vendors/hunyuan");
45
+ const { SPEC: qianfanSpec } = require("./vendors/qianfan");
46
+ const { SPEC: cozeSpec } = require("./vendors/coze");
47
+ const { SPEC: dreaminaSpec } = require("./vendors/dreamina");
48
+ const { SPEC: doubaoSpec } = require("./vendors/doubao");
49
+
50
+ const DEFAULT_VENDOR_SPECS = Object.freeze({
51
+ deepseek: deepseekSpec,
52
+ kimi: kimiSpec,
53
+ tongyi: tongyiSpec,
54
+ zhipu: zhipuSpec,
55
+ hunyuan: hunyuanSpec,
56
+ qianfan: qianfanSpec,
57
+ coze: cozeSpec,
58
+ dreamina: dreaminaSpec,
59
+ doubao: doubaoSpec,
60
+ });
61
+
62
+ class AIChatHistoryAdapter {
63
+ /**
64
+ * @param {object} [opts]
65
+ * @param {Record<string, CookieAuthSession>} [opts.sessions]
66
+ * Per-vendor cookie session, keyed by vendor name. Vendors without
67
+ * a session are skipped during sync (the hub flags them in the UI
68
+ * as "needs login").
69
+ * @param {Record<string, object>} [opts.vendorSpecs]
70
+ * Override one or more vendor specs (used by tests to inject fixtures).
71
+ * @param {function} [opts.fetch]
72
+ * Fetch override forwarded to per-vendor HttpClient. Defaults to
73
+ * global fetch (Node 22+). Tests inject a stub.
74
+ * @param {function} [opts.sleep]
75
+ * Sleep override (test seam) for HttpClient rate-limit + backoff.
76
+ * @param {function} [opts.now]
77
+ * Clock override (test seam) for HttpClient rate-limit.
78
+ * @param {object} [opts.logger]
79
+ */
80
+ constructor(opts = {}) {
81
+ this.name = ADAPTER_NAME;
82
+ this.version = ADAPTER_VERSION;
83
+ this.capabilities = [
84
+ "sync:cookie-multi-vendor",
85
+ "parse:ai-conversations",
86
+ "ingest:cross-vendor",
87
+ ];
88
+ this.extractMode = "web-api";
89
+ this.rateLimits = { perMinute: 60 }; // aggregate across vendors; per-vendor caps in spec
90
+ this.dataDisclosure = {
91
+ fields: [
92
+ "ai-chat:vendor,conversationId,messageId,role,text,modelName",
93
+ "ai-chat:attachments(url,filename,mimeType,size)",
94
+ "ai-chat:generatedImages(url,prompt,model,params)",
95
+ "ai-chat:toolCalls",
96
+ ],
97
+ sensitivity: "high",
98
+ legalGate: false,
99
+ notice:
100
+ "AI 对话史含您输入的所有问题与上传的附件。所有数据在本机加密存储;分析时本地 LLM 可读取;不向任何厂商回传。",
101
+ };
102
+
103
+ this._logger = opts.logger || { info: () => {}, warn: () => {}, error: () => {} };
104
+
105
+ // Vendor specs are registered upfront so that listVendors() / health
106
+ // checks work even before any cookie is configured.
107
+ const specs = { ...DEFAULT_VENDOR_SPECS, ...(opts.vendorSpecs || {}) };
108
+ for (const [vendor, spec] of Object.entries(specs)) {
109
+ const check = assertVendorSpec(spec);
110
+ if (!check.ok) {
111
+ throw new Error(
112
+ `AIChatHistoryAdapter: vendor "${vendor}" spec invalid: ${check.errors.join("; ")}`,
113
+ );
114
+ }
115
+ }
116
+ this._vendorSpecs = specs;
117
+ this._sessions = { ...(opts.sessions || {}) };
118
+ this._fetch = opts.fetch;
119
+ this._sleep = opts.sleep;
120
+ this._now = opts.now;
121
+ this._httpClients = new Map(); // vendor → HttpClient, lazy
122
+ }
123
+
124
+ _getHttpClient(vendor) {
125
+ let client = this._httpClients.get(vendor);
126
+ if (!client) {
127
+ const spec = this._vendorSpecs[vendor];
128
+ client = new HttpClient({
129
+ vendor,
130
+ rateLimits: spec.rateLimits,
131
+ fetch: this._fetch,
132
+ sleep: this._sleep,
133
+ now: this._now,
134
+ logger: this._logger,
135
+ });
136
+ this._httpClients.set(vendor, client);
137
+ }
138
+ return client;
139
+ }
140
+
141
+ // -------------------------------------------------------------------------
142
+ // PersonalDataAdapter contract
143
+ // -------------------------------------------------------------------------
144
+
145
+ /**
146
+ * Authentication is delegated per-vendor: the hub UI captures cookies via
147
+ * the Electron WebView and registers them with `setSession(vendor, session)`.
148
+ * `authenticate(ctx)` here only does a quick survey — returns { ok: true,
149
+ * vendorsReady: [...], vendorsNeedingLogin: [...] }.
150
+ */
151
+ async authenticate(_ctx = {}) {
152
+ const vendorsReady = [];
153
+ const vendorsNeedingLogin = [];
154
+ for (const vendor of Object.keys(this._vendorSpecs)) {
155
+ if (this._sessions[vendor]) {
156
+ vendorsReady.push(vendor);
157
+ } else {
158
+ vendorsNeedingLogin.push(vendor);
159
+ }
160
+ }
161
+ // Surface ok=true even when no vendor is configured yet — the UI manages
162
+ // the per-vendor onboarding state and `sync()` will simply yield zero
163
+ // events when no sessions are present.
164
+ return { ok: true, vendorsReady, vendorsNeedingLogin };
165
+ }
166
+
167
+ async healthCheck() {
168
+ // Per-vendor health is collected in parallel; the adapter as a whole is
169
+ // healthy iff at least one vendor has a valid cookie (or no vendors are
170
+ // yet onboarded — fresh-install state).
171
+ const perVendor = {};
172
+ for (const [vendor, spec] of Object.entries(this._vendorSpecs)) {
173
+ const sess = this._sessions[vendor];
174
+ if (!sess) {
175
+ perVendor[vendor] = { ok: false, reason: "no-session" };
176
+ continue;
177
+ }
178
+ try {
179
+ const httpClient = this._getHttpClient(vendor);
180
+ perVendor[vendor] = await spec.validateCookie({ session: sess, vendor, httpClient });
181
+ } catch (err) {
182
+ perVendor[vendor] = { ok: false, reason: err.code || err.message };
183
+ }
184
+ }
185
+ return { ok: true, perVendor };
186
+ }
187
+
188
+ /**
189
+ * Stream conversation + message envelopes across all configured vendors.
190
+ *
191
+ * Yields AdapterRegistry-compliant envelopes:
192
+ * { originalId, capturedAt, payload: { kind, vendor, conversation|message } }
193
+ *
194
+ * The inner `payload.kind` distinguishes:
195
+ * - "conversation" → emit Topic + vendor Person (no Event yet)
196
+ * - "message" → emit Event + items + vendor Person
197
+ * - "vendor-not-wired" → no-op normalize (Phase 10.1 stub trace)
198
+ * - "vendor-cookie-expired" → no-op normalize (401/403 trace)
199
+ * - "vendor-rate-limited" → no-op normalize (429 trace after retries)
200
+ *
201
+ * The registry calls `normalize(raw)` per yielded envelope. One yield per
202
+ * conversation/message keeps registry batches small so a slow vendor
203
+ * doesn't block faster ones at the registry boundary.
204
+ *
205
+ * @param {object} [opts]
206
+ * @param {string[]} [opts.vendors] restrict to a subset
207
+ * @param {object} [opts.watermarks] per-vendor cursor / since IDs
208
+ */
209
+ async *sync(opts = {}) {
210
+ const targetVendors = opts.vendors
211
+ ? opts.vendors.filter((v) => this._vendorSpecs[v])
212
+ : Object.keys(this._vendorSpecs);
213
+
214
+ for (const vendor of targetVendors) {
215
+ const sess = this._sessions[vendor];
216
+ if (!sess) {
217
+ this._logger.info(`[ai-chat] skipping vendor=${vendor}: no session`);
218
+ continue;
219
+ }
220
+ const spec = this._vendorSpecs[vendor];
221
+ const httpClient = this._getHttpClient(vendor);
222
+ const ctx = { session: sess, vendor, httpClient };
223
+ const vendorWatermark = (opts.watermarks && opts.watermarks[vendor]) || null;
224
+
225
+ try {
226
+ for await (const conv of spec.listConversations(ctx, { since: vendorWatermark })) {
227
+ yield {
228
+ originalId: `${vendor}:conv:${conv.originalId}`,
229
+ capturedAt: Number(conv.updatedAt) || Number(conv.createdAt) || Date.now(),
230
+ payload: { kind: "conversation", vendor, conversation: conv },
231
+ };
232
+
233
+ for await (const msg of spec.listMessages(ctx, conv.originalId, {})) {
234
+ yield {
235
+ originalId: `${vendor}:msg:${msg.originalId}`,
236
+ capturedAt: Number(msg.createdAt) || Date.now(),
237
+ payload: { kind: "message", vendor, message: msg },
238
+ };
239
+ }
240
+ }
241
+ } catch (err) {
242
+ const traceCapturedAt = Date.now();
243
+ if (err instanceof NotImplementedYetError) {
244
+ this._logger.warn(
245
+ `[ai-chat] vendor=${vendor} not wired (Phase 10.2+ work): ${err.message}`,
246
+ );
247
+ yield {
248
+ originalId: `${vendor}:trace:not-wired:${traceCapturedAt}`,
249
+ capturedAt: traceCapturedAt,
250
+ payload: { kind: "vendor-not-wired", vendor, error: err.code },
251
+ };
252
+ continue;
253
+ }
254
+ if (err instanceof CookieExpiredError) {
255
+ this._logger.warn(`[ai-chat] vendor=${vendor} cookie expired: ${err.message}`);
256
+ yield {
257
+ originalId: `${vendor}:trace:cookie-expired:${traceCapturedAt}`,
258
+ capturedAt: traceCapturedAt,
259
+ payload: { kind: "vendor-cookie-expired", vendor, error: err.code },
260
+ };
261
+ continue;
262
+ }
263
+ if (err instanceof RateLimitedError) {
264
+ this._logger.warn(`[ai-chat] vendor=${vendor} rate limited: ${err.message}`);
265
+ yield {
266
+ originalId: `${vendor}:trace:rate-limited:${traceCapturedAt}`,
267
+ capturedAt: traceCapturedAt,
268
+ payload: {
269
+ kind: "vendor-rate-limited",
270
+ vendor,
271
+ error: err.code,
272
+ retryAfterMs: err.retryAfterMs,
273
+ },
274
+ };
275
+ continue;
276
+ }
277
+ throw err;
278
+ }
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Convert one raw event into a NormalizedBatch.
284
+ *
285
+ * For "conversation" raws we emit only the conversation Topic + vendor
286
+ * Person (events are emitted lazily as their messages arrive). This lets
287
+ * the vault link Event.topics[] to a Topic that already exists.
288
+ */
289
+ normalize(raw) {
290
+ if (!raw || typeof raw !== "object") {
291
+ return { events: [], persons: [], places: [], items: [], topics: [] };
292
+ }
293
+ // Registry-compliant envelopes wrap kind inside payload. Adapter-internal
294
+ // tests (Phase 10.1) sometimes pass the inner shape directly — accept
295
+ // both for forward compat.
296
+ const inner = raw.payload && typeof raw.payload === "object" ? raw.payload : raw;
297
+ const kind = inner.kind;
298
+
299
+ if (kind === "vendor-not-wired" || kind === "vendor-cookie-expired" || kind === "vendor-rate-limited") {
300
+ // Nothing to write; the warning was already logged by sync().
301
+ return { events: [], persons: [], places: [], items: [], topics: [] };
302
+ }
303
+
304
+ if (kind === "conversation") {
305
+ const conv = inner.conversation;
306
+ const spec = this._vendorSpecs[conv.vendor];
307
+ const displayName = spec ? spec.displayName : conv.vendor;
308
+ return {
309
+ events: [],
310
+ persons: [buildVendorPerson(conv.vendor, displayName)],
311
+ places: [],
312
+ items: [],
313
+ topics: [buildConversationTopic(conv)],
314
+ };
315
+ }
316
+
317
+ if (kind === "message") {
318
+ const msg = inner.message;
319
+ const spec = this._vendorSpecs[msg.vendor];
320
+ const displayName = spec ? spec.displayName : msg.vendor;
321
+ return {
322
+ events: [buildMessageEvent(msg)],
323
+ persons: [buildVendorPerson(msg.vendor, displayName)],
324
+ places: [],
325
+ items: buildGeneratedImageItems(msg),
326
+ topics: [], // Topic was already emitted with the conversation event
327
+ };
328
+ }
329
+
330
+ return { events: [], persons: [], places: [], items: [], topics: [] };
331
+ }
332
+
333
+ // -------------------------------------------------------------------------
334
+ // Hub UI hooks (not part of PersonalDataAdapter contract)
335
+ // -------------------------------------------------------------------------
336
+
337
+ /**
338
+ * Register a cookie session captured from the WebView for a given vendor.
339
+ */
340
+ setSession(vendor, session) {
341
+ if (!this._vendorSpecs[vendor]) {
342
+ throw new Error(`AIChatHistoryAdapter: unknown vendor "${vendor}"`);
343
+ }
344
+ if (!(session instanceof CookieAuthSession)) {
345
+ throw new Error("AIChatHistoryAdapter: session must be a CookieAuthSession");
346
+ }
347
+ this._sessions[vendor] = session;
348
+ }
349
+
350
+ clearSession(vendor) {
351
+ delete this._sessions[vendor];
352
+ }
353
+
354
+ listVendors() {
355
+ return Object.values(this._vendorSpecs).map((spec) => ({
356
+ name: spec.name,
357
+ displayName: spec.displayName,
358
+ androidPackage: spec.androidPackage,
359
+ loginUrl: spec.loginUrl,
360
+ hasSession: Boolean(this._sessions[spec.name]),
361
+ }));
362
+ }
363
+ }
364
+
365
+ module.exports = {
366
+ AIChatHistoryAdapter,
367
+ SUPPORTED_VENDORS,
368
+ DEFAULT_VENDOR_SPECS,
369
+ // re-export for convenience
370
+ ADAPTER_NAME,
371
+ ADAPTER_VERSION,
372
+ EVENT_SUBTYPE_AI_MESSAGE: EVENT_SUBTYPES.AI_MESSAGE,
373
+ EVENT_SUBTYPE_AI_IMAGE_GENERATION: EVENT_SUBTYPES.AI_IMAGE_GENERATION,
374
+ };
@@ -0,0 +1,109 @@
1
+ /**
2
+ * CookieAuthSession — minimal cookie jar + WebView-ingested cookie blob.
3
+ *
4
+ * Phase 10.1 skeleton: this is the place where the parent hub will hand a
5
+ * vendor adapter the cookie blob captured from Electron's webContents.session
6
+ * cookie store. Vendor sub-adapters call `applyTo(headers)` to inject cookies
7
+ * into an outbound fetch.
8
+ *
9
+ * The blob is the raw output of `session.cookies.get({ url })`, i.e. an array
10
+ * of `{ name, value, domain, path, secure, httpOnly, expirationDate? }`.
11
+ * We don't try to be a full RFC 6265 jar — for these vendors the page that
12
+ * issued the cookie and the page we POST back to are always same-origin, so
13
+ * a flat `name=value;` header is enough.
14
+ *
15
+ * Phase 10.2+ will replace this with `tough-cookie` if multi-domain expiry
16
+ * tracking turns out to be required (Coze uses two domains).
17
+ */
18
+
19
+ "use strict";
20
+
21
+ class CookieAuthSession {
22
+ /**
23
+ * @param {object} opts
24
+ * @param {string} opts.vendor matches a SUPPORTED_VENDORS entry
25
+ * @param {Array<{name:string,value:string,domain?:string,path?:string,expirationDate?:number}>} opts.cookies
26
+ * @param {number} [opts.capturedAt] unix ms when cookies were captured
27
+ */
28
+ constructor(opts) {
29
+ if (!opts || typeof opts !== "object") {
30
+ throw new Error("CookieAuthSession: opts required");
31
+ }
32
+ if (typeof opts.vendor !== "string" || opts.vendor.length === 0) {
33
+ throw new Error("CookieAuthSession: opts.vendor required");
34
+ }
35
+ if (!Array.isArray(opts.cookies)) {
36
+ throw new Error("CookieAuthSession: opts.cookies must be an array");
37
+ }
38
+ this.vendor = opts.vendor;
39
+ this.cookies = opts.cookies.slice();
40
+ this.capturedAt = typeof opts.capturedAt === "number" ? opts.capturedAt : Date.now();
41
+ }
42
+
43
+ /**
44
+ * @returns {boolean} true if any cookie carries an explicit expiry that has passed.
45
+ */
46
+ isExpired(nowMs = Date.now()) {
47
+ if (this.cookies.length === 0) return true;
48
+ let sawExpiry = false;
49
+ for (const c of this.cookies) {
50
+ if (typeof c.expirationDate === "number") {
51
+ sawExpiry = true;
52
+ if (c.expirationDate * 1000 <= nowMs) return true;
53
+ }
54
+ }
55
+ return sawExpiry ? false : false; // session-only cookies — caller revalidates via validateCookie()
56
+ }
57
+
58
+ /**
59
+ * Returns a Cookie header value: "name1=value1; name2=value2; ...".
60
+ * Filters by optional domain match (suffix) — vendors with multi-domain
61
+ * cookies (coze) call applyTo per host.
62
+ */
63
+ toHeaderValue(matchDomain) {
64
+ const filtered = matchDomain
65
+ ? this.cookies.filter((c) => {
66
+ if (!c.domain) return true;
67
+ const d = c.domain.startsWith(".") ? c.domain.slice(1) : c.domain;
68
+ return matchDomain === d || matchDomain.endsWith("." + d);
69
+ })
70
+ : this.cookies;
71
+ return filtered.map((c) => `${c.name}=${c.value}`).join("; ");
72
+ }
73
+
74
+ /**
75
+ * Mutates `headers` in place: sets `Cookie` from `toHeaderValue(matchDomain)`.
76
+ */
77
+ applyTo(headers, matchDomain) {
78
+ headers.Cookie = this.toHeaderValue(matchDomain);
79
+ return headers;
80
+ }
81
+
82
+ /**
83
+ * Get a single cookie value by name. Returns undefined if not present.
84
+ * Used by vendors that need to compute auth tokens from cookie components
85
+ * (e.g. mtop-style sign from `_m_h5_tk`).
86
+ */
87
+ get(name) {
88
+ const c = this.cookies.find((c) => c.name === name);
89
+ return c ? c.value : undefined;
90
+ }
91
+
92
+ /**
93
+ * Serialize for vault persistence. Cookie values themselves still need to
94
+ * be encrypted at the vault layer — this method only flattens for storage.
95
+ */
96
+ toJSON() {
97
+ return {
98
+ vendor: this.vendor,
99
+ capturedAt: this.capturedAt,
100
+ cookies: this.cookies,
101
+ };
102
+ }
103
+
104
+ static fromJSON(json) {
105
+ return new CookieAuthSession(json);
106
+ }
107
+ }
108
+
109
+ module.exports = { CookieAuthSession };