@chainlesschain/personal-data-hub 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
- package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
- package/__tests__/adapters/ai-chat-history.test.js +396 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
- package/__tests__/adapters/system-data-android.test.js +387 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
- package/__tests__/adapters/wechat-env-probe.test.js +162 -0
- package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
- package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
- package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
- package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
- package/__tests__/analysis-skills.test.js +556 -0
- package/__tests__/analysis.test.js +329 -1
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
- package/__tests__/e2e/full-user-journey.test.js +188 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
- package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/registry.test.js +4 -2
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
- package/lib/adapters/ai-chat-history/health-checker.js +210 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +258 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-kuaishou/index.js +237 -0
- package/lib/adapters/social-toutiao/index.js +236 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/system-data-android/adapter.js +348 -0
- package/lib/adapters/system-data-android/index.js +76 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/bootstrap.js +146 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/env-probe.js +218 -0
- package/lib/adapters/wechat/frida-agent/loader.js +67 -0
- package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
- package/lib/adapters/wechat/index.js +37 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
- package/lib/adapters/wechat/key-providers/index.js +22 -0
- package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
- package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +219 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/analysis.js +191 -2
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +131 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/prompt-builder.js +11 -1
- package/lib/query-parser.js +7 -1
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +343 -0
- package/package.json +36 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 13.9(+) — Kuaishou 快手 adapter (v0.1 scaffold).
|
|
3
|
+
*
|
|
4
|
+
* Source: 快手 Android app stores user history in SQLite under
|
|
5
|
+
* /data/data/com.smile.gifmaker/databases/. Schema is reverse-engineered
|
|
6
|
+
* from sjqz parsers and pinned at scaffold quality only — Phase 13.10 will
|
|
7
|
+
* fixture-pin real field names after Xiaomi 24115RA8EC E2E.
|
|
8
|
+
*
|
|
9
|
+
* Conjectured tables (待 fixture pin):
|
|
10
|
+
* - photo_history watched short-videos (kuaishou calls them "photos")
|
|
11
|
+
* - user_collect collected (saved) videos
|
|
12
|
+
* - search_record user search queries
|
|
13
|
+
*
|
|
14
|
+
* Each row → Event with subtype "browse" (photo_history) /
|
|
15
|
+
* "like" (user_collect) / "post" (search_record reframed as a self-authored
|
|
16
|
+
* search event).
|
|
17
|
+
*
|
|
18
|
+
* Mirrors social-bilibili adapter contract; sensitivity stays "medium"
|
|
19
|
+
* (short-video watch history mainly reveals entertainment preference).
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const fs = require("node:fs");
|
|
25
|
+
const { newId } = require("../../ids");
|
|
26
|
+
|
|
27
|
+
const NAME = "social-kuaishou";
|
|
28
|
+
const VERSION = "0.1.0";
|
|
29
|
+
|
|
30
|
+
class KuaishouAdapter {
|
|
31
|
+
constructor(opts = {}) {
|
|
32
|
+
if (!opts.account || !opts.account.uid) {
|
|
33
|
+
throw new Error("KuaishouAdapter: opts.account.uid required");
|
|
34
|
+
}
|
|
35
|
+
this.account = opts.account;
|
|
36
|
+
this._dbPath = opts.dbPath || null;
|
|
37
|
+
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
38
|
+
|
|
39
|
+
this.name = NAME;
|
|
40
|
+
this.version = VERSION;
|
|
41
|
+
this.capabilities = [
|
|
42
|
+
"sync:sqlite",
|
|
43
|
+
"parse:kuaishou-photo-history",
|
|
44
|
+
"parse:kuaishou-user-collect",
|
|
45
|
+
"parse:kuaishou-search",
|
|
46
|
+
];
|
|
47
|
+
this.extractMode = "device-pull";
|
|
48
|
+
this.rateLimits = {};
|
|
49
|
+
this.dataDisclosure = {
|
|
50
|
+
fields: [
|
|
51
|
+
"kuaishou:photo_history (photo_id / caption / view_time / duration / author_id)",
|
|
52
|
+
"kuaishou:user_collect (photo_id / caption / collect_time)",
|
|
53
|
+
"kuaishou:search_record (keyword / search_time)",
|
|
54
|
+
],
|
|
55
|
+
sensitivity: "medium",
|
|
56
|
+
legalGate: false,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async authenticate() {
|
|
61
|
+
return { ok: true, account: this.account.uid };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async healthCheck() {
|
|
65
|
+
return { ok: true, lastChecked: Date.now() };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async *sync(opts = {}) {
|
|
69
|
+
const dbPath = opts.dbPath || this._dbPath;
|
|
70
|
+
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
71
|
+
const Driver = this._dbDriverFactory
|
|
72
|
+
? this._dbDriverFactory()
|
|
73
|
+
: require("better-sqlite3-multiple-ciphers");
|
|
74
|
+
const db = new Driver(dbPath, { readonly: true });
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const watched =
|
|
78
|
+
trySelect(
|
|
79
|
+
db,
|
|
80
|
+
"SELECT * FROM photo_history ORDER BY view_time DESC LIMIT 5000",
|
|
81
|
+
) || [];
|
|
82
|
+
for (const row of watched) {
|
|
83
|
+
yield {
|
|
84
|
+
adapter: NAME,
|
|
85
|
+
originalId: `photo-${row.id || row._id || row.photo_id}`,
|
|
86
|
+
capturedAt: parseTime(row.view_time || row.time || row.create_time),
|
|
87
|
+
payload: { row, kind: "watch" },
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const collected =
|
|
92
|
+
trySelect(
|
|
93
|
+
db,
|
|
94
|
+
"SELECT * FROM user_collect ORDER BY collect_time DESC LIMIT 5000",
|
|
95
|
+
) || [];
|
|
96
|
+
for (const row of collected) {
|
|
97
|
+
yield {
|
|
98
|
+
adapter: NAME,
|
|
99
|
+
originalId: `collect-${row.id || row.photo_id}`,
|
|
100
|
+
capturedAt: parseTime(row.collect_time || row.time),
|
|
101
|
+
payload: { row, kind: "collect" },
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const searches =
|
|
106
|
+
trySelect(
|
|
107
|
+
db,
|
|
108
|
+
"SELECT * FROM search_record ORDER BY search_time DESC LIMIT 5000",
|
|
109
|
+
) || [];
|
|
110
|
+
for (const row of searches) {
|
|
111
|
+
yield {
|
|
112
|
+
adapter: NAME,
|
|
113
|
+
originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
|
|
114
|
+
capturedAt: parseTime(row.search_time || row.time),
|
|
115
|
+
payload: { row, kind: "search" },
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
} finally {
|
|
119
|
+
try {
|
|
120
|
+
db.close();
|
|
121
|
+
} catch (_e) {}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
normalize(raw) {
|
|
126
|
+
if (!raw || !raw.payload || !raw.payload.row) {
|
|
127
|
+
throw new Error("KuaishouAdapter.normalize: row missing");
|
|
128
|
+
}
|
|
129
|
+
const { kind, row } = raw.payload;
|
|
130
|
+
const now = Date.now();
|
|
131
|
+
const occurredAt =
|
|
132
|
+
parseTime(row.view_time || row.collect_time || row.search_time || row.time) ||
|
|
133
|
+
now;
|
|
134
|
+
const source = {
|
|
135
|
+
adapter: NAME,
|
|
136
|
+
adapterVersion: VERSION,
|
|
137
|
+
originalId: raw.originalId,
|
|
138
|
+
capturedAt: occurredAt,
|
|
139
|
+
capturedBy: "sqlite",
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
if (kind === "collect") {
|
|
143
|
+
return {
|
|
144
|
+
events: [
|
|
145
|
+
{
|
|
146
|
+
id: newId(),
|
|
147
|
+
type: "event",
|
|
148
|
+
subtype: "like",
|
|
149
|
+
occurredAt,
|
|
150
|
+
actor: "person-self",
|
|
151
|
+
content: { title: row.caption || row.title || "(no caption)" },
|
|
152
|
+
ingestedAt: now,
|
|
153
|
+
source,
|
|
154
|
+
extra: {
|
|
155
|
+
photoId: row.photo_id || null,
|
|
156
|
+
authorId: row.author_id || null,
|
|
157
|
+
authorName: row.author_name || null,
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
],
|
|
161
|
+
persons: [],
|
|
162
|
+
places: [],
|
|
163
|
+
items: [],
|
|
164
|
+
topics: [],
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
if (kind === "search") {
|
|
168
|
+
return {
|
|
169
|
+
events: [
|
|
170
|
+
{
|
|
171
|
+
id: newId(),
|
|
172
|
+
type: "event",
|
|
173
|
+
subtype: "post",
|
|
174
|
+
occurredAt,
|
|
175
|
+
actor: "person-self",
|
|
176
|
+
content: { title: row.keyword || row.query || "(empty query)" },
|
|
177
|
+
ingestedAt: now,
|
|
178
|
+
source,
|
|
179
|
+
extra: { kind: "search", keyword: row.keyword || row.query || null },
|
|
180
|
+
},
|
|
181
|
+
],
|
|
182
|
+
persons: [],
|
|
183
|
+
places: [],
|
|
184
|
+
items: [],
|
|
185
|
+
topics: [],
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
// watch → browse event
|
|
189
|
+
return {
|
|
190
|
+
events: [
|
|
191
|
+
{
|
|
192
|
+
id: newId(),
|
|
193
|
+
type: "event",
|
|
194
|
+
subtype: "browse",
|
|
195
|
+
occurredAt,
|
|
196
|
+
actor: "person-self",
|
|
197
|
+
content: { title: row.caption || row.title || "(no caption)" },
|
|
198
|
+
ingestedAt: now,
|
|
199
|
+
source,
|
|
200
|
+
extra: {
|
|
201
|
+
photoId: row.photo_id || null,
|
|
202
|
+
duration: row.duration || row.play_duration || null,
|
|
203
|
+
authorId: row.author_id || null,
|
|
204
|
+
authorName: row.author_name || null,
|
|
205
|
+
},
|
|
206
|
+
},
|
|
207
|
+
],
|
|
208
|
+
persons: [],
|
|
209
|
+
places: [],
|
|
210
|
+
items: [],
|
|
211
|
+
topics: [],
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function trySelect(db, sql) {
|
|
217
|
+
try {
|
|
218
|
+
return db.prepare(sql).all();
|
|
219
|
+
} catch (_e) {
|
|
220
|
+
return null;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function parseTime(v) {
|
|
225
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
226
|
+
if (typeof v === "string") {
|
|
227
|
+
if (/^\d+$/.test(v)) {
|
|
228
|
+
const n = parseInt(v, 10);
|
|
229
|
+
return n > 1e12 ? n : n * 1000;
|
|
230
|
+
}
|
|
231
|
+
const t = Date.parse(v);
|
|
232
|
+
return Number.isFinite(t) ? t : null;
|
|
233
|
+
}
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
module.exports = { KuaishouAdapter, NAME, VERSION };
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 13.8(+) — Toutiao 今日头条 adapter (v0.1 scaffold).
|
|
3
|
+
*
|
|
4
|
+
* Source: 今日头条 Android app stores user history in SQLite (encrypted in
|
|
5
|
+
* newer versions, plaintext in older 7.x builds). Schema is reverse-engineered
|
|
6
|
+
* from the open-source sjqz parsers project and is pinned at scaffold quality
|
|
7
|
+
* only — Phase 13.10 will fixture-pin real field names after Xiaomi 24115RA8EC
|
|
8
|
+
* real-device E2E.
|
|
9
|
+
*
|
|
10
|
+
* Conjectured tables (待 fixture pin in Phase 13.10):
|
|
11
|
+
* - read_history read articles
|
|
12
|
+
* - collection_article user-collected (saved) articles
|
|
13
|
+
* - search_history user search queries
|
|
14
|
+
*
|
|
15
|
+
* Each row → Event with subtype "browse" (read_history) / "like" (collection)
|
|
16
|
+
* / "post" (search_history reframed as a self-authored "search" event).
|
|
17
|
+
*
|
|
18
|
+
* Mirrors social-bilibili adapter contract; differs only in table list +
|
|
19
|
+
* default sensitivity (toutiao reading patterns may include political /
|
|
20
|
+
* health topics so sensitivity is bumped to "high").
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
"use strict";
|
|
24
|
+
|
|
25
|
+
const fs = require("node:fs");
|
|
26
|
+
const { newId } = require("../../ids");
|
|
27
|
+
|
|
28
|
+
const NAME = "social-toutiao";
|
|
29
|
+
const VERSION = "0.1.0";
|
|
30
|
+
|
|
31
|
+
class ToutiaoAdapter {
|
|
32
|
+
constructor(opts = {}) {
|
|
33
|
+
if (!opts.account || !opts.account.uid) {
|
|
34
|
+
throw new Error("ToutiaoAdapter: opts.account.uid required");
|
|
35
|
+
}
|
|
36
|
+
this.account = opts.account;
|
|
37
|
+
this._dbPath = opts.dbPath || null;
|
|
38
|
+
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
39
|
+
|
|
40
|
+
this.name = NAME;
|
|
41
|
+
this.version = VERSION;
|
|
42
|
+
this.capabilities = [
|
|
43
|
+
"sync:sqlite",
|
|
44
|
+
"parse:toutiao-read-history",
|
|
45
|
+
"parse:toutiao-collection",
|
|
46
|
+
"parse:toutiao-search",
|
|
47
|
+
];
|
|
48
|
+
this.extractMode = "device-pull";
|
|
49
|
+
this.rateLimits = {};
|
|
50
|
+
this.dataDisclosure = {
|
|
51
|
+
fields: [
|
|
52
|
+
"toutiao:read_history (item_id / title / read_time / category)",
|
|
53
|
+
"toutiao:collection_article (item_id / title / save_time)",
|
|
54
|
+
"toutiao:search_history (keyword / search_time)",
|
|
55
|
+
],
|
|
56
|
+
// Bumped vs bilibili: news reading reveals political / medical interest.
|
|
57
|
+
sensitivity: "high",
|
|
58
|
+
legalGate: false,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async authenticate() {
|
|
63
|
+
return { ok: true, account: this.account.uid };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async healthCheck() {
|
|
67
|
+
return { ok: true, lastChecked: Date.now() };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async *sync(opts = {}) {
|
|
71
|
+
const dbPath = opts.dbPath || this._dbPath;
|
|
72
|
+
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
73
|
+
const Driver = this._dbDriverFactory
|
|
74
|
+
? this._dbDriverFactory()
|
|
75
|
+
: require("better-sqlite3-multiple-ciphers");
|
|
76
|
+
const db = new Driver(dbPath, { readonly: true });
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
const reads =
|
|
80
|
+
trySelect(db, "SELECT * FROM read_history ORDER BY read_time DESC LIMIT 5000") || [];
|
|
81
|
+
for (const row of reads) {
|
|
82
|
+
yield {
|
|
83
|
+
adapter: NAME,
|
|
84
|
+
originalId: `read-${row.id || row._id || row.item_id}`,
|
|
85
|
+
capturedAt: parseTime(row.read_time || row.time || row.create_time),
|
|
86
|
+
payload: { row, kind: "read" },
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const collections =
|
|
91
|
+
trySelect(
|
|
92
|
+
db,
|
|
93
|
+
"SELECT * FROM collection_article ORDER BY save_time DESC LIMIT 5000",
|
|
94
|
+
) || [];
|
|
95
|
+
for (const row of collections) {
|
|
96
|
+
yield {
|
|
97
|
+
adapter: NAME,
|
|
98
|
+
originalId: `collect-${row.id || row.item_id}`,
|
|
99
|
+
capturedAt: parseTime(row.save_time || row.time),
|
|
100
|
+
payload: { row, kind: "collection" },
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const searches =
|
|
105
|
+
trySelect(
|
|
106
|
+
db,
|
|
107
|
+
"SELECT * FROM search_history ORDER BY search_time DESC LIMIT 5000",
|
|
108
|
+
) || [];
|
|
109
|
+
for (const row of searches) {
|
|
110
|
+
yield {
|
|
111
|
+
adapter: NAME,
|
|
112
|
+
originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
|
|
113
|
+
capturedAt: parseTime(row.search_time || row.time),
|
|
114
|
+
payload: { row, kind: "search" },
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
} finally {
|
|
118
|
+
try {
|
|
119
|
+
db.close();
|
|
120
|
+
} catch (_e) {}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
normalize(raw) {
|
|
125
|
+
if (!raw || !raw.payload || !raw.payload.row) {
|
|
126
|
+
throw new Error("ToutiaoAdapter.normalize: row missing");
|
|
127
|
+
}
|
|
128
|
+
const { kind, row } = raw.payload;
|
|
129
|
+
const now = Date.now();
|
|
130
|
+
const occurredAt =
|
|
131
|
+
parseTime(row.read_time || row.save_time || row.search_time || row.time) || now;
|
|
132
|
+
const source = {
|
|
133
|
+
adapter: NAME,
|
|
134
|
+
adapterVersion: VERSION,
|
|
135
|
+
originalId: raw.originalId,
|
|
136
|
+
capturedAt: occurredAt,
|
|
137
|
+
capturedBy: "sqlite",
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
if (kind === "collection") {
|
|
141
|
+
return {
|
|
142
|
+
events: [
|
|
143
|
+
{
|
|
144
|
+
id: newId(),
|
|
145
|
+
type: "event",
|
|
146
|
+
subtype: "like",
|
|
147
|
+
occurredAt,
|
|
148
|
+
actor: "person-self",
|
|
149
|
+
content: { title: row.title || row.article_title || "(no title)" },
|
|
150
|
+
ingestedAt: now,
|
|
151
|
+
source,
|
|
152
|
+
extra: {
|
|
153
|
+
itemId: row.item_id || null,
|
|
154
|
+
category: row.category || null,
|
|
155
|
+
author: row.author || null,
|
|
156
|
+
source: row.source || null,
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
persons: [],
|
|
161
|
+
places: [],
|
|
162
|
+
items: [],
|
|
163
|
+
topics: [],
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
if (kind === "search") {
|
|
167
|
+
return {
|
|
168
|
+
events: [
|
|
169
|
+
{
|
|
170
|
+
id: newId(),
|
|
171
|
+
type: "event",
|
|
172
|
+
subtype: "post",
|
|
173
|
+
occurredAt,
|
|
174
|
+
actor: "person-self",
|
|
175
|
+
content: { title: row.keyword || row.query || "(empty query)" },
|
|
176
|
+
ingestedAt: now,
|
|
177
|
+
source,
|
|
178
|
+
extra: { kind: "search", keyword: row.keyword || row.query || null },
|
|
179
|
+
},
|
|
180
|
+
],
|
|
181
|
+
persons: [],
|
|
182
|
+
places: [],
|
|
183
|
+
items: [],
|
|
184
|
+
topics: [],
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
// read → browse event
|
|
188
|
+
return {
|
|
189
|
+
events: [
|
|
190
|
+
{
|
|
191
|
+
id: newId(),
|
|
192
|
+
type: "event",
|
|
193
|
+
subtype: "browse",
|
|
194
|
+
occurredAt,
|
|
195
|
+
actor: "person-self",
|
|
196
|
+
content: { title: row.title || row.article_title || "(no title)" },
|
|
197
|
+
ingestedAt: now,
|
|
198
|
+
source,
|
|
199
|
+
extra: {
|
|
200
|
+
itemId: row.item_id || null,
|
|
201
|
+
category: row.category || null,
|
|
202
|
+
author: row.author || null,
|
|
203
|
+
readDuration: row.read_duration || row.duration || null,
|
|
204
|
+
},
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
persons: [],
|
|
208
|
+
places: [],
|
|
209
|
+
items: [],
|
|
210
|
+
topics: [],
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function trySelect(db, sql) {
|
|
216
|
+
try {
|
|
217
|
+
return db.prepare(sql).all();
|
|
218
|
+
} catch (_e) {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function parseTime(v) {
|
|
224
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
225
|
+
if (typeof v === "string") {
|
|
226
|
+
if (/^\d+$/.test(v)) {
|
|
227
|
+
const n = parseInt(v, 10);
|
|
228
|
+
return n > 1e12 ? n : n * 1000;
|
|
229
|
+
}
|
|
230
|
+
const t = Date.parse(v);
|
|
231
|
+
return Number.isFinite(t) ? t : null;
|
|
232
|
+
}
|
|
233
|
+
return null;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
module.exports = { ToutiaoAdapter, NAME, VERSION };
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 13.2 — Weibo (微博) adapter.
|
|
3
|
+
*
|
|
4
|
+
* Source: Weibo Android app SQLite DBs (per sjqz/parsers/social.py
|
|
5
|
+
* WeiboParser). Three tables of v0 interest:
|
|
6
|
+
* - post / status posts the user published
|
|
7
|
+
* - search_history queries
|
|
8
|
+
* - message / direct private messages
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
"use strict";
|
|
12
|
+
|
|
13
|
+
const fs = require("node:fs");
|
|
14
|
+
const { newId } = require("../../ids");
|
|
15
|
+
|
|
16
|
+
const NAME = "social-weibo";
|
|
17
|
+
const VERSION = "0.5.0";
|
|
18
|
+
|
|
19
|
+
class WeiboAdapter {
|
|
20
|
+
constructor(opts = {}) {
|
|
21
|
+
if (!opts.account || !opts.account.uid) {
|
|
22
|
+
throw new Error("WeiboAdapter: opts.account.uid required");
|
|
23
|
+
}
|
|
24
|
+
this.account = opts.account;
|
|
25
|
+
this._dbPath = opts.dbPath || null;
|
|
26
|
+
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
27
|
+
|
|
28
|
+
this.name = NAME;
|
|
29
|
+
this.version = VERSION;
|
|
30
|
+
this.capabilities = ["sync:sqlite", "parse:weibo-posts", "parse:weibo-search"];
|
|
31
|
+
this.extractMode = "device-pull";
|
|
32
|
+
this.rateLimits = {};
|
|
33
|
+
this.dataDisclosure = {
|
|
34
|
+
fields: [
|
|
35
|
+
"weibo:posts (text / created_at / reposts_count / comments_count)",
|
|
36
|
+
"weibo:search_history",
|
|
37
|
+
"weibo:messages",
|
|
38
|
+
],
|
|
39
|
+
sensitivity: "medium",
|
|
40
|
+
legalGate: false,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async authenticate() {
|
|
45
|
+
return { ok: true, account: this.account.uid };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async healthCheck() {
|
|
49
|
+
return { ok: true, lastChecked: Date.now() };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async *sync(opts = {}) {
|
|
53
|
+
const dbPath = opts.dbPath || this._dbPath;
|
|
54
|
+
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
55
|
+
const Driver = this._dbDriverFactory
|
|
56
|
+
? this._dbDriverFactory()
|
|
57
|
+
: require("better-sqlite3-multiple-ciphers");
|
|
58
|
+
const db = new Driver(dbPath, { readonly: true });
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const posts = trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000")
|
|
62
|
+
|| trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000") || [];
|
|
63
|
+
for (const row of posts) {
|
|
64
|
+
yield {
|
|
65
|
+
adapter: NAME,
|
|
66
|
+
originalId: `post-${row.id || row.mid || row.idstr}`,
|
|
67
|
+
capturedAt: parseTime(row.created_at || row.time),
|
|
68
|
+
payload: { row, kind: "post" },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const searches = trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000") || [];
|
|
73
|
+
for (const row of searches) {
|
|
74
|
+
yield {
|
|
75
|
+
adapter: NAME,
|
|
76
|
+
originalId: `search-${row.id || row._id}`,
|
|
77
|
+
capturedAt: parseTime(row.time || row.create_at),
|
|
78
|
+
payload: { row, kind: "search" },
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
} finally {
|
|
82
|
+
try { db.close(); } catch (_e) {}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
normalize(raw) {
|
|
87
|
+
if (!raw || !raw.payload || !raw.payload.row) {
|
|
88
|
+
throw new Error("WeiboAdapter.normalize: row missing");
|
|
89
|
+
}
|
|
90
|
+
const { kind, row } = raw.payload;
|
|
91
|
+
const now = Date.now();
|
|
92
|
+
const occurredAt = parseTime(row.created_at || row.time) || now;
|
|
93
|
+
const source = {
|
|
94
|
+
adapter: NAME, adapterVersion: VERSION,
|
|
95
|
+
originalId: raw.originalId, capturedAt: occurredAt,
|
|
96
|
+
capturedBy: "sqlite",
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
if (kind === "search") {
|
|
100
|
+
return {
|
|
101
|
+
events: [{
|
|
102
|
+
id: newId(),
|
|
103
|
+
type: "event",
|
|
104
|
+
subtype: "interaction",
|
|
105
|
+
occurredAt,
|
|
106
|
+
actor: "person-self",
|
|
107
|
+
content: {
|
|
108
|
+
title: `搜索: ${row.keyword || row.query || ""}`,
|
|
109
|
+
text: row.keyword || row.query || "",
|
|
110
|
+
},
|
|
111
|
+
ingestedAt: now,
|
|
112
|
+
source,
|
|
113
|
+
extra: { query: row.keyword || row.query, fromAdapter: NAME },
|
|
114
|
+
}],
|
|
115
|
+
persons: [], places: [], items: [], topics: [],
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Post
|
|
120
|
+
return {
|
|
121
|
+
events: [{
|
|
122
|
+
id: newId(),
|
|
123
|
+
type: "event",
|
|
124
|
+
subtype: "post",
|
|
125
|
+
occurredAt,
|
|
126
|
+
actor: "person-self",
|
|
127
|
+
content: {
|
|
128
|
+
title: (row.text || "").slice(0, 80) || "(空)",
|
|
129
|
+
text: row.text || "",
|
|
130
|
+
},
|
|
131
|
+
ingestedAt: now,
|
|
132
|
+
source,
|
|
133
|
+
extra: {
|
|
134
|
+
weiboMid: row.mid || row.id || row.idstr || null,
|
|
135
|
+
repostsCount: row.reposts_count || row.repost || 0,
|
|
136
|
+
commentsCount: row.comments_count || row.comments || 0,
|
|
137
|
+
likesCount: row.attitudes_count || row.likes || 0,
|
|
138
|
+
source: row.source || null, // 客户端
|
|
139
|
+
location: row.location || row.geo || null,
|
|
140
|
+
},
|
|
141
|
+
}],
|
|
142
|
+
persons: [], places: [], items: [], topics: [],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function trySelect(db, sql) {
|
|
148
|
+
try { return db.prepare(sql).all(); } catch (_e) { return null; }
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function parseTime(v) {
|
|
152
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
153
|
+
if (typeof v === "string") {
|
|
154
|
+
if (/^\d+$/.test(v)) {
|
|
155
|
+
const n = parseInt(v, 10);
|
|
156
|
+
return n > 1e12 ? n : n * 1000;
|
|
157
|
+
}
|
|
158
|
+
const t = Date.parse(v);
|
|
159
|
+
return Number.isFinite(t) ? t : null;
|
|
160
|
+
}
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
module.exports = { WeiboAdapter, NAME, VERSION };
|