@chainlesschain/personal-data-hub 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
- package/__tests__/analysis.test.js +1 -1
- package/__tests__/longtail-adapters.test.js +67 -16
- package/__tests__/messaging-qq-snapshot.test.js +294 -0
- package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
- package/__tests__/shopping-snapshot.test.js +438 -0
- package/__tests__/social-adapters.test.js +28 -3
- package/__tests__/social-douyin-snapshot.test.js +253 -0
- package/__tests__/social-kuaishou-snapshot.test.js +309 -0
- package/__tests__/social-toutiao-snapshot.test.js +314 -0
- package/__tests__/social-weibo-snapshot.test.js +234 -0
- package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
- package/__tests__/travel-maps-snapshot.test.js +426 -0
- package/__tests__/vault-driver-error.test.js +74 -0
- package/lib/adapters/messaging-qq/index.js +498 -92
- package/lib/adapters/shopping-jd/index.js +228 -25
- package/lib/adapters/shopping-meituan/index.js +222 -26
- package/lib/adapters/shopping-pinduoduo/index.js +275 -0
- package/lib/adapters/social-douyin/index.js +454 -63
- package/lib/adapters/social-kuaishou/index.js +379 -127
- package/lib/adapters/social-toutiao/index.js +400 -130
- package/lib/adapters/social-weibo/index.js +393 -95
- package/lib/adapters/social-xiaohongshu/index.js +389 -49
- package/lib/adapters/travel-baidu-map/index.js +286 -26
- package/lib/adapters/travel-tencent-map/index.js +414 -0
- package/lib/index.js +5 -1
- package/lib/vault.js +60 -8
- package/package.json +2 -1
|
@@ -1,48 +1,161 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* §A8 v0.2 — Weibo (微博) adapter, dual-mode (snapshot + sqlite).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* -
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* Mirror of social-bilibili/adapter.js two-mode pattern:
|
|
5
|
+
*
|
|
6
|
+
* 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
|
|
7
|
+
* JSON produced by the phone's WeiboLocalCollector (WebView + OkHttp).
|
|
8
|
+
* Desktop-independent path. Adapter is stateless when in snapshot mode
|
|
9
|
+
* — account.uid is OPTIONAL at construction (the snapshot file carries
|
|
10
|
+
* account in payload).
|
|
11
|
+
*
|
|
12
|
+
* 2. sqlite mode (opts.dbPath, legacy): Phase 13.2 device-pull path —
|
|
13
|
+
* reads Weibo Android app's SQLite (history / post / status / search_
|
|
14
|
+
* history). Preserved for backward compat with desktop sqlite-mode
|
|
15
|
+
* users; account.uid REQUIRED in this mode.
|
|
16
|
+
*
|
|
17
|
+
* Snapshot schema (mirrors WeiboLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
18
|
+
*
|
|
19
|
+
* {
|
|
20
|
+
* "schemaVersion": 1,
|
|
21
|
+
* "snapshottedAt": <epoch-ms>,
|
|
22
|
+
* "account": { "uid": "12345", "displayName": "alice" },
|
|
23
|
+
* "events": [
|
|
24
|
+
* { "kind": "post", "id": "post-<mid>", "capturedAt": <ms>,
|
|
25
|
+
* "text": "...", "mid": "...", "source": "...",
|
|
26
|
+
* "repostsCount": N, "commentsCount": N, "likesCount": N, "picCount": N },
|
|
27
|
+
* { "kind": "favourite", "id": "fav-<mid>", "capturedAt": <ms>,
|
|
28
|
+
* "text": "...", "mid": "...", "authorScreenName": "..." },
|
|
29
|
+
* { "kind": "follow", "id": "follow-<uid>", "capturedAt": <ms>,
|
|
30
|
+
* "uid": <num>, "screenName": "...", "description": "...", "avatarUrl": "..." }
|
|
31
|
+
* ]
|
|
32
|
+
* }
|
|
9
33
|
*/
|
|
10
34
|
|
|
11
35
|
"use strict";
|
|
12
36
|
|
|
13
37
|
const fs = require("node:fs");
|
|
14
38
|
const { newId } = require("../../ids");
|
|
39
|
+
const {
|
|
40
|
+
ENTITY_TYPES,
|
|
41
|
+
PERSON_SUBTYPES,
|
|
42
|
+
EVENT_SUBTYPES,
|
|
43
|
+
CAPTURED_BY,
|
|
44
|
+
} = require("../../constants");
|
|
15
45
|
|
|
16
46
|
const NAME = "social-weibo";
|
|
17
|
-
const VERSION = "0.
|
|
47
|
+
const VERSION = "0.6.0";
|
|
48
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
49
|
+
|
|
50
|
+
const KIND_POST = "post";
|
|
51
|
+
const KIND_FAVOURITE = "favourite";
|
|
52
|
+
const KIND_FOLLOW = "follow";
|
|
53
|
+
const KIND_SEARCH = "search"; // legacy sqlite-mode only
|
|
54
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_POST, KIND_FAVOURITE, KIND_FOLLOW]);
|
|
55
|
+
|
|
56
|
+
function stableOriginalId(kind, id) {
|
|
57
|
+
const stringified =
|
|
58
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
59
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
60
|
+
null;
|
|
61
|
+
const safe =
|
|
62
|
+
stringified ||
|
|
63
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
64
|
+
return `weibo:${kind}:${safe}`;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function parseTime(v) {
|
|
68
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
69
|
+
if (typeof v === "string") {
|
|
70
|
+
if (/^\d+$/.test(v)) {
|
|
71
|
+
const n = parseInt(v, 10);
|
|
72
|
+
return n > 1e12 ? n : n * 1000;
|
|
73
|
+
}
|
|
74
|
+
const t = Date.parse(v);
|
|
75
|
+
return Number.isFinite(t) ? t : null;
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function trySelect(db, sql) {
|
|
81
|
+
try { return db.prepare(sql).all(); } catch (_e) { return null; }
|
|
82
|
+
}
|
|
18
83
|
|
|
19
84
|
class WeiboAdapter {
|
|
20
85
|
constructor(opts = {}) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
this.account = opts.account;
|
|
86
|
+
// §A8 v0.2: account.uid now OPTIONAL at construction — snapshot mode is
|
|
87
|
+
// stateless and pulls account from the snapshot file. Sqlite mode (legacy
|
|
88
|
+
// device-pull) still requires it; checked at sync time, not construction.
|
|
89
|
+
this.account = opts.account || null;
|
|
25
90
|
this._dbPath = opts.dbPath || null;
|
|
26
|
-
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
27
91
|
|
|
28
92
|
this.name = NAME;
|
|
29
93
|
this.version = VERSION;
|
|
30
|
-
this.capabilities = [
|
|
94
|
+
this.capabilities = [
|
|
95
|
+
"sync:snapshot",
|
|
96
|
+
"sync:sqlite",
|
|
97
|
+
"parse:weibo-posts",
|
|
98
|
+
"parse:weibo-favourite",
|
|
99
|
+
"parse:weibo-follow",
|
|
100
|
+
"parse:weibo-search",
|
|
101
|
+
];
|
|
102
|
+
// Existing desktop wiring may key off this — kept as device-pull (the
|
|
103
|
+
// sqlite mode is the desktop-side; snapshot mode is in-APK Android).
|
|
31
104
|
this.extractMode = "device-pull";
|
|
32
105
|
this.rateLimits = {};
|
|
33
106
|
this.dataDisclosure = {
|
|
34
107
|
fields: [
|
|
35
|
-
"weibo:posts (text / created_at / reposts_count / comments_count)",
|
|
36
|
-
"weibo:
|
|
37
|
-
"weibo:
|
|
108
|
+
"weibo:posts (text / created_at / reposts_count / comments_count / likes)",
|
|
109
|
+
"weibo:favourite (mid / text / author)",
|
|
110
|
+
"weibo:follow (uid / screen_name)",
|
|
111
|
+
"weibo:search_history (legacy sqlite mode)",
|
|
38
112
|
],
|
|
39
113
|
sensitivity: "medium",
|
|
40
114
|
legalGate: false,
|
|
115
|
+
defaultInclude: {
|
|
116
|
+
post: true,
|
|
117
|
+
favourite: true,
|
|
118
|
+
follow: true,
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// _deps injection seam for tests (vi.mock fs/ doesn't intercept require in
|
|
123
|
+
// inlined CJS — see .claude/rules/testing.md).
|
|
124
|
+
this._deps = {
|
|
125
|
+
fs,
|
|
126
|
+
dbDriverFactory: opts.dbDriverFactory || null,
|
|
41
127
|
};
|
|
42
128
|
}
|
|
43
129
|
|
|
44
|
-
async authenticate() {
|
|
45
|
-
|
|
130
|
+
async authenticate(ctx = {}) {
|
|
131
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
132
|
+
try {
|
|
133
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
134
|
+
} catch (err) {
|
|
135
|
+
return {
|
|
136
|
+
ok: false,
|
|
137
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
138
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
return { ok: true, mode: "snapshot-file" };
|
|
142
|
+
}
|
|
143
|
+
if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
|
|
144
|
+
if (!this.account || !this.account.uid) {
|
|
145
|
+
return {
|
|
146
|
+
ok: false,
|
|
147
|
+
reason: "NO_ACCOUNT_UID",
|
|
148
|
+
message: "social-weibo.authenticate: sqlite mode requires account.uid",
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
return { ok: true, account: this.account.uid, mode: "sqlite" };
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
ok: false,
|
|
155
|
+
reason: "NO_INPUT",
|
|
156
|
+
message:
|
|
157
|
+
"social-weibo.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
|
|
158
|
+
};
|
|
46
159
|
}
|
|
47
160
|
|
|
48
161
|
async healthCheck() {
|
|
@@ -50,115 +163,300 @@ class WeiboAdapter {
|
|
|
50
163
|
}
|
|
51
164
|
|
|
52
165
|
async *sync(opts = {}) {
|
|
166
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
167
|
+
yield* this._syncViaSnapshot(opts);
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
53
170
|
const dbPath = opts.dbPath || this._dbPath;
|
|
54
|
-
if (
|
|
55
|
-
|
|
56
|
-
|
|
171
|
+
if (dbPath) {
|
|
172
|
+
yield* this._syncViaSqlite({ ...opts, dbPath });
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
throw new Error(
|
|
176
|
+
"social-weibo.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
async *_syncViaSnapshot(opts) {
|
|
181
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
182
|
+
const snapshot = JSON.parse(raw);
|
|
183
|
+
if (
|
|
184
|
+
!snapshot ||
|
|
185
|
+
typeof snapshot !== "object" ||
|
|
186
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
187
|
+
) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`social-weibo.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
const fallbackCapturedAt =
|
|
193
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
194
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
195
|
+
: Date.now();
|
|
196
|
+
|
|
197
|
+
const account =
|
|
198
|
+
snapshot.account && typeof snapshot.account === "object"
|
|
199
|
+
? snapshot.account
|
|
200
|
+
: null;
|
|
201
|
+
const include = opts.include || {};
|
|
202
|
+
const limit =
|
|
203
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
204
|
+
|
|
205
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
206
|
+
let emitted = 0;
|
|
207
|
+
for (const ev of events) {
|
|
208
|
+
if (emitted >= limit) return;
|
|
209
|
+
if (!ev || typeof ev !== "object") continue;
|
|
210
|
+
const kind = ev.kind;
|
|
211
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
212
|
+
if (include[kind] === false) continue;
|
|
213
|
+
|
|
214
|
+
const capturedAt =
|
|
215
|
+
parseTime(ev.capturedAt) ||
|
|
216
|
+
parseTime(ev.time) ||
|
|
217
|
+
fallbackCapturedAt;
|
|
218
|
+
const id =
|
|
219
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
220
|
+
ev.mid ||
|
|
221
|
+
ev.uid ||
|
|
222
|
+
null;
|
|
223
|
+
|
|
224
|
+
yield {
|
|
225
|
+
adapter: NAME,
|
|
226
|
+
kind,
|
|
227
|
+
originalId: stableOriginalId(kind, id),
|
|
228
|
+
capturedAt,
|
|
229
|
+
payload: { ...ev, account },
|
|
230
|
+
};
|
|
231
|
+
emitted += 1;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async *_syncViaSqlite(opts) {
|
|
236
|
+
// Legacy Phase 13.2 path — requires account.uid in constructor and a DB
|
|
237
|
+
// pulled via the desktop AndroidExtractor.
|
|
238
|
+
if (!this.account || !this.account.uid) {
|
|
239
|
+
throw new Error(
|
|
240
|
+
"social-weibo._syncViaSqlite: account.uid required (set via new WeiboAdapter({ account: { uid } }) in cli wiring)",
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
const dbPath = opts.dbPath;
|
|
244
|
+
if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
|
|
245
|
+
const Driver = this._deps.dbDriverFactory
|
|
246
|
+
? this._deps.dbDriverFactory()
|
|
57
247
|
: require("better-sqlite3-multiple-ciphers");
|
|
58
248
|
const db = new Driver(dbPath, { readonly: true });
|
|
59
249
|
|
|
60
250
|
try {
|
|
61
|
-
const posts =
|
|
62
|
-
|
|
251
|
+
const posts =
|
|
252
|
+
trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000")
|
|
253
|
+
|| trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000")
|
|
254
|
+
|| [];
|
|
63
255
|
for (const row of posts) {
|
|
64
256
|
yield {
|
|
65
257
|
adapter: NAME,
|
|
66
258
|
originalId: `post-${row.id || row.mid || row.idstr}`,
|
|
67
259
|
capturedAt: parseTime(row.created_at || row.time),
|
|
68
|
-
payload: { row, kind:
|
|
260
|
+
payload: { row, kind: KIND_POST },
|
|
69
261
|
};
|
|
70
262
|
}
|
|
71
263
|
|
|
72
|
-
const searches =
|
|
264
|
+
const searches =
|
|
265
|
+
trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
|
|
266
|
+
|| [];
|
|
73
267
|
for (const row of searches) {
|
|
74
268
|
yield {
|
|
75
269
|
adapter: NAME,
|
|
76
270
|
originalId: `search-${row.id || row._id}`,
|
|
77
271
|
capturedAt: parseTime(row.time || row.create_at),
|
|
78
|
-
payload: { row, kind:
|
|
272
|
+
payload: { row, kind: KIND_SEARCH },
|
|
79
273
|
};
|
|
80
274
|
}
|
|
81
275
|
} finally {
|
|
82
|
-
try { db.close(); } catch (_e) {}
|
|
276
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
83
277
|
}
|
|
84
278
|
}
|
|
85
279
|
|
|
86
280
|
normalize(raw) {
|
|
87
|
-
if (!raw || !raw.payload
|
|
88
|
-
throw new Error("WeiboAdapter.normalize:
|
|
281
|
+
if (!raw || !raw.payload) {
|
|
282
|
+
throw new Error("WeiboAdapter.normalize: payload missing");
|
|
89
283
|
}
|
|
90
|
-
const
|
|
91
|
-
const
|
|
92
|
-
const
|
|
93
|
-
const source = {
|
|
94
|
-
adapter: NAME, adapterVersion: VERSION,
|
|
95
|
-
originalId: raw.originalId, capturedAt: occurredAt,
|
|
96
|
-
capturedBy: "sqlite",
|
|
97
|
-
};
|
|
284
|
+
const ingestedAt = Date.now();
|
|
285
|
+
const kind = raw.kind || raw.payload.kind;
|
|
286
|
+
const p = raw.payload;
|
|
98
287
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
type: "event",
|
|
104
|
-
subtype: "interaction",
|
|
105
|
-
occurredAt,
|
|
106
|
-
actor: "person-self",
|
|
107
|
-
content: {
|
|
108
|
-
title: `搜索: ${row.keyword || row.query || ""}`,
|
|
109
|
-
text: row.keyword || row.query || "",
|
|
110
|
-
},
|
|
111
|
-
ingestedAt: now,
|
|
112
|
-
source,
|
|
113
|
-
extra: { query: row.keyword || row.query, fromAdapter: NAME },
|
|
114
|
-
}],
|
|
115
|
-
persons: [], places: [], items: [], topics: [],
|
|
116
|
-
};
|
|
288
|
+
// Sqlite-mode payload carries `row`; snapshot-mode payload carries fields
|
|
289
|
+
// directly. The normalizers below handle both shapes.
|
|
290
|
+
if (kind === KIND_SEARCH) {
|
|
291
|
+
return normalizeSearch(p, raw, ingestedAt);
|
|
117
292
|
}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
title: (row.text || "").slice(0, 80) || "(空)",
|
|
129
|
-
text: row.text || "",
|
|
130
|
-
},
|
|
131
|
-
ingestedAt: now,
|
|
132
|
-
source,
|
|
133
|
-
extra: {
|
|
134
|
-
weiboMid: row.mid || row.id || row.idstr || null,
|
|
135
|
-
repostsCount: row.reposts_count || row.repost || 0,
|
|
136
|
-
commentsCount: row.comments_count || row.comments || 0,
|
|
137
|
-
likesCount: row.attitudes_count || row.likes || 0,
|
|
138
|
-
source: row.source || null, // 客户端
|
|
139
|
-
location: row.location || row.geo || null,
|
|
140
|
-
},
|
|
141
|
-
}],
|
|
142
|
-
persons: [], places: [], items: [], topics: [],
|
|
143
|
-
};
|
|
293
|
+
if (kind === KIND_POST) {
|
|
294
|
+
return normalizePost(p, raw, ingestedAt);
|
|
295
|
+
}
|
|
296
|
+
if (kind === KIND_FAVOURITE) {
|
|
297
|
+
return normalizeFavourite(p, raw, ingestedAt);
|
|
298
|
+
}
|
|
299
|
+
if (kind === KIND_FOLLOW) {
|
|
300
|
+
return normalizeFollow(p, raw, ingestedAt);
|
|
301
|
+
}
|
|
302
|
+
throw new Error(`WeiboAdapter.normalize: unknown kind ${kind}`);
|
|
144
303
|
}
|
|
145
304
|
}
|
|
146
305
|
|
|
147
|
-
function
|
|
148
|
-
|
|
306
|
+
function buildSource(raw, occurredAt, capturedBy) {
|
|
307
|
+
return {
|
|
308
|
+
adapter: NAME,
|
|
309
|
+
adapterVersion: VERSION,
|
|
310
|
+
originalId: raw.originalId,
|
|
311
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
312
|
+
capturedBy: capturedBy || CAPTURED_BY.SQLITE,
|
|
313
|
+
};
|
|
149
314
|
}
|
|
150
315
|
|
|
151
|
-
function
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
316
|
+
function normalizeSearch(p, raw, ingestedAt) {
|
|
317
|
+
// Sqlite-mode only: payload.row.keyword / row.query
|
|
318
|
+
const row = p.row || {};
|
|
319
|
+
const occurredAt = parseTime(row.time || row.create_at) || ingestedAt;
|
|
320
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
321
|
+
return {
|
|
322
|
+
events: [{
|
|
323
|
+
id: newId(),
|
|
324
|
+
type: ENTITY_TYPES.EVENT,
|
|
325
|
+
subtype: EVENT_SUBTYPES.INTERACTION,
|
|
326
|
+
occurredAt,
|
|
327
|
+
actor: "person-self",
|
|
328
|
+
content: {
|
|
329
|
+
title: `搜索: ${row.keyword || row.query || ""}`,
|
|
330
|
+
text: row.keyword || row.query || "",
|
|
331
|
+
},
|
|
332
|
+
ingestedAt,
|
|
333
|
+
source,
|
|
334
|
+
extra: { query: row.keyword || row.query, fromAdapter: NAME },
|
|
335
|
+
}],
|
|
336
|
+
persons: [], places: [], items: [], topics: [],
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function normalizePost(p, raw, ingestedAt) {
|
|
341
|
+
// Snapshot mode: { kind:"post", text, mid, source, repostsCount, … } direct
|
|
342
|
+
// Sqlite mode: { kind:"post", row: { text, mid, ... } }
|
|
343
|
+
const row = p.row || p;
|
|
344
|
+
const isSnapshot = !p.row;
|
|
345
|
+
const text = row.text || "";
|
|
346
|
+
const mid = row.mid || row.id || row.idstr || null;
|
|
347
|
+
const occurredAt =
|
|
348
|
+
parseTime(row.created_at || row.createdAt || row.time || raw.capturedAt) ||
|
|
349
|
+
ingestedAt;
|
|
350
|
+
const source = buildSource(
|
|
351
|
+
raw,
|
|
352
|
+
occurredAt,
|
|
353
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
354
|
+
);
|
|
355
|
+
return {
|
|
356
|
+
events: [{
|
|
357
|
+
id: newId(),
|
|
358
|
+
type: ENTITY_TYPES.EVENT,
|
|
359
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
360
|
+
occurredAt,
|
|
361
|
+
actor: "person-self",
|
|
362
|
+
content: {
|
|
363
|
+
title: (text || "").slice(0, 80) || "(空)",
|
|
364
|
+
text,
|
|
365
|
+
},
|
|
366
|
+
ingestedAt,
|
|
367
|
+
source,
|
|
368
|
+
extra: {
|
|
369
|
+
weiboMid: mid,
|
|
370
|
+
repostsCount:
|
|
371
|
+
row.repostsCount != null ? row.repostsCount
|
|
372
|
+
: row.reposts_count || row.repost || 0,
|
|
373
|
+
commentsCount:
|
|
374
|
+
row.commentsCount != null ? row.commentsCount
|
|
375
|
+
: row.comments_count || row.comments || 0,
|
|
376
|
+
likesCount:
|
|
377
|
+
row.likesCount != null ? row.likesCount
|
|
378
|
+
: row.attitudes_count || row.likes || 0,
|
|
379
|
+
picCount: row.picCount || row.pic_num || 0,
|
|
380
|
+
source: row.source || null,
|
|
381
|
+
location: row.location || row.geo || null,
|
|
382
|
+
platform: "weibo",
|
|
383
|
+
},
|
|
384
|
+
}],
|
|
385
|
+
persons: [], places: [], items: [], topics: [],
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function normalizeFavourite(p, raw, ingestedAt) {
|
|
390
|
+
// Snapshot only — sqlite mode has no favourite kind (legacy parser merged
|
|
391
|
+
// favourites into posts pre-A8). Payload: { kind:"favourite", mid, text,
|
|
392
|
+
// capturedAt, authorScreenName }
|
|
393
|
+
const text = p.text || "";
|
|
394
|
+
const mid = p.mid || null;
|
|
395
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
396
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
397
|
+
return {
|
|
398
|
+
events: [{
|
|
399
|
+
id: newId(),
|
|
400
|
+
type: ENTITY_TYPES.EVENT,
|
|
401
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
402
|
+
occurredAt,
|
|
403
|
+
actor: "person-self",
|
|
404
|
+
content: {
|
|
405
|
+
title: (text || "").slice(0, 80) || "(空)",
|
|
406
|
+
text,
|
|
407
|
+
},
|
|
408
|
+
ingestedAt,
|
|
409
|
+
source,
|
|
410
|
+
extra: {
|
|
411
|
+
platform: "weibo",
|
|
412
|
+
weiboMid: mid,
|
|
413
|
+
authorScreenName: p.authorScreenName || null,
|
|
414
|
+
},
|
|
415
|
+
}],
|
|
416
|
+
persons: [], places: [], items: [], topics: [],
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function normalizeFollow(p, raw, ingestedAt) {
|
|
421
|
+
// Snapshot only — payload: { kind:"follow", uid, screenName, description,
|
|
422
|
+
// avatarUrl, capturedAt }
|
|
423
|
+
const followUid =
|
|
424
|
+
(typeof p.uid === "number" && p.uid) ||
|
|
425
|
+
(typeof p.uid === "string" && p.uid.length > 0 && p.uid) ||
|
|
426
|
+
`unknown-${newId()}`;
|
|
427
|
+
const screenName = p.screenName || "(unnamed)";
|
|
428
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
429
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
430
|
+
const person = {
|
|
431
|
+
id: `person-weibo-${followUid}`,
|
|
432
|
+
type: ENTITY_TYPES.PERSON,
|
|
433
|
+
subtype: PERSON_SUBTYPES.CONTACT,
|
|
434
|
+
names: [screenName],
|
|
435
|
+
ingestedAt,
|
|
436
|
+
source,
|
|
437
|
+
identifiers: {
|
|
438
|
+
"weibo-uid": [String(followUid)],
|
|
439
|
+
},
|
|
440
|
+
extra: {
|
|
441
|
+
platform: "weibo",
|
|
442
|
+
description: p.description || null,
|
|
443
|
+
avatarUrl: p.avatarUrl || null,
|
|
444
|
+
followedAt: occurredAt,
|
|
445
|
+
},
|
|
446
|
+
};
|
|
447
|
+
return {
|
|
448
|
+
events: [],
|
|
449
|
+
persons: [person],
|
|
450
|
+
places: [],
|
|
451
|
+
items: [],
|
|
452
|
+
topics: [],
|
|
453
|
+
};
|
|
162
454
|
}
|
|
163
455
|
|
|
164
|
-
module.exports = {
|
|
456
|
+
module.exports = {
|
|
457
|
+
WeiboAdapter,
|
|
458
|
+
NAME,
|
|
459
|
+
VERSION,
|
|
460
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
461
|
+
VALID_SNAPSHOT_KINDS,
|
|
462
|
+
};
|