@chainlesschain/personal-data-hub 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
- package/__tests__/longtail-adapters.test.js +60 -14
- package/__tests__/messaging-qq-snapshot.test.js +294 -0
- package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
- package/__tests__/shopping-snapshot.test.js +438 -0
- package/__tests__/social-adapters.test.js +28 -3
- package/__tests__/social-douyin-snapshot.test.js +253 -0
- package/__tests__/social-kuaishou-snapshot.test.js +309 -0
- package/__tests__/social-toutiao-snapshot.test.js +314 -0
- package/__tests__/social-weibo-snapshot.test.js +234 -0
- package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
- package/__tests__/travel-maps-snapshot.test.js +426 -0
- package/__tests__/vault-driver-error.test.js +74 -0
- package/lib/adapters/messaging-qq/index.js +498 -92
- package/lib/adapters/shopping-jd/index.js +228 -25
- package/lib/adapters/shopping-meituan/index.js +222 -26
- package/lib/adapters/shopping-pinduoduo/index.js +275 -0
- package/lib/adapters/social-douyin/index.js +454 -63
- package/lib/adapters/social-kuaishou/index.js +379 -127
- package/lib/adapters/social-toutiao/index.js +400 -130
- package/lib/adapters/social-weibo/index.js +393 -95
- package/lib/adapters/social-xiaohongshu/index.js +389 -49
- package/lib/adapters/travel-baidu-map/index.js +286 -26
- package/lib/adapters/travel-tencent-map/index.js +414 -0
- package/lib/index.js +5 -1
- package/lib/vault.js +60 -8
- package/package.json +2 -1
|
@@ -1,48 +1,179 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* §A8 v0.2 — Douyin (抖音) adapter, dual-mode (snapshot + sqlite).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* -
|
|
4
|
+
* Mirror of social-weibo / social-bilibili two-mode pattern, **but with a
|
|
5
|
+
* smaller v0.2 surface because Douyin's web APIs gate behind X-Bogus + msToken
|
|
6
|
+
* signatures**:
|
|
7
|
+
*
|
|
8
|
+
* 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
|
|
9
|
+
* JSON produced by the phone's DouyinLocalCollector (WebView cookie
|
|
10
|
+
* capture + 1 endpoint `passport/account/info/v2/` that works without
|
|
11
|
+
* X-Bogus). Desktop-independent. account.secUid OPTIONAL at construction
|
|
12
|
+
* — payload carries it.
|
|
13
|
+
*
|
|
14
|
+
* 2. sqlite mode (opts.dbPath, legacy): Phase 13.3 device-pull path —
|
|
15
|
+
* reads Douyin Android app's SQLite (video_history / user_favorite /
|
|
16
|
+
* search_history). Preserved for backward compat; account.uid REQUIRED.
|
|
17
|
+
*
|
|
18
|
+
* v0.2 KIND_PROFILE only. v0.3 KIND_HISTORY/KIND_FAVOURITE/KIND_LIKE will
|
|
19
|
+
* land once the X-Bogus signature path is wired (likely via WebView JS
|
|
20
|
+
* injection — Douyin signs every read endpoint and there is no pure-Kotlin
|
|
21
|
+
* implementation that survives signature rotation).
|
|
22
|
+
*
|
|
23
|
+
* Snapshot schema (mirrors DouyinLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
24
|
+
*
|
|
25
|
+
* {
|
|
26
|
+
* "schemaVersion": 1,
|
|
27
|
+
* "snapshottedAt": <epoch-ms>,
|
|
28
|
+
* "account": { "secUid": "MS4wLjABA…", "shortId": "12345678",
|
|
29
|
+
* "displayName": "alice" },
|
|
30
|
+
* "events": [
|
|
31
|
+
* { "kind": "profile", "id": "profile-<secUid>", "capturedAt": <ms>,
|
|
32
|
+
* "secUid": "MS4wLjABA…", "shortId": "12345678", "nickname": "…",
|
|
33
|
+
* "signature": "…", // bio
|
|
34
|
+
* "followingCount": N, "followerCount": N,
|
|
35
|
+
* "awemeCount": N, "favoritingCount": N, "totalFavorited": N }
|
|
36
|
+
*
|
|
37
|
+
* // v0.3 will add (X-Bogus path):
|
|
38
|
+
* // { "kind": "history", "id": "history-<aweme>", ... }
|
|
39
|
+
* // { "kind": "favourite", "id": "fav-<aweme>", ... }
|
|
40
|
+
* // { "kind": "like", "id": "like-<aweme>", ... }
|
|
41
|
+
* ]
|
|
42
|
+
* }
|
|
9
43
|
*/
|
|
10
44
|
|
|
11
45
|
"use strict";
|
|
12
46
|
|
|
13
47
|
const fs = require("node:fs");
|
|
14
48
|
const { newId } = require("../../ids");
|
|
49
|
+
const {
|
|
50
|
+
ENTITY_TYPES,
|
|
51
|
+
PERSON_SUBTYPES,
|
|
52
|
+
EVENT_SUBTYPES,
|
|
53
|
+
CAPTURED_BY,
|
|
54
|
+
} = require("../../constants");
|
|
15
55
|
|
|
16
56
|
const NAME = "social-douyin";
|
|
17
|
-
const VERSION = "0.
|
|
57
|
+
const VERSION = "0.6.0";
|
|
58
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
59
|
+
|
|
60
|
+
const KIND_PROFILE = "profile";
|
|
61
|
+
const KIND_HISTORY = "history"; // v0.3 (X-Bogus required)
|
|
62
|
+
const KIND_FAVOURITE = "favourite"; // v0.3 (X-Bogus required)
|
|
63
|
+
const KIND_LIKE = "like"; // v0.3 (X-Bogus required)
|
|
64
|
+
const KIND_SEARCH = "search"; // legacy sqlite-mode only
|
|
65
|
+
|
|
66
|
+
// Forward-compat: list every kind v0.3+ may emit so cc adapter accepts
|
|
67
|
+
// snapshots from a newer Android even if this JS hasn't been bumped yet.
|
|
68
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
69
|
+
KIND_PROFILE,
|
|
70
|
+
KIND_HISTORY,
|
|
71
|
+
KIND_FAVOURITE,
|
|
72
|
+
KIND_LIKE,
|
|
73
|
+
]);
|
|
74
|
+
|
|
75
|
+
function stableOriginalId(kind, id) {
|
|
76
|
+
const stringified =
|
|
77
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
78
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
79
|
+
null;
|
|
80
|
+
const safe =
|
|
81
|
+
stringified ||
|
|
82
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
83
|
+
return `douyin:${kind}:${safe}`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function parseTime(v) {
|
|
87
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
88
|
+
if (typeof v === "string") {
|
|
89
|
+
if (/^\d+$/.test(v)) {
|
|
90
|
+
const n = parseInt(v, 10);
|
|
91
|
+
return n > 1e12 ? n : n * 1000;
|
|
92
|
+
}
|
|
93
|
+
const t = Date.parse(v);
|
|
94
|
+
return Number.isFinite(t) ? t : null;
|
|
95
|
+
}
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function trySelect(db, sql) {
|
|
100
|
+
try { return db.prepare(sql).all(); } catch (_e) { return null; }
|
|
101
|
+
}
|
|
18
102
|
|
|
19
103
|
class DouyinAdapter {
|
|
20
104
|
constructor(opts = {}) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
this.account = opts.account;
|
|
105
|
+
// §A8 v0.2: account.uid no longer required at construction — snapshot
|
|
106
|
+
// mode pulls account from the snapshot file. Sqlite mode still requires
|
|
107
|
+
// it at sync time.
|
|
108
|
+
this.account = opts.account || null;
|
|
25
109
|
this._dbPath = opts.dbPath || null;
|
|
26
|
-
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
27
110
|
|
|
28
111
|
this.name = NAME;
|
|
29
112
|
this.version = VERSION;
|
|
30
|
-
this.capabilities = [
|
|
113
|
+
this.capabilities = [
|
|
114
|
+
"sync:snapshot",
|
|
115
|
+
"sync:sqlite",
|
|
116
|
+
"parse:douyin-profile",
|
|
117
|
+
"parse:douyin-history", // v0.3
|
|
118
|
+
"parse:douyin-favourite", // v0.3
|
|
119
|
+
"parse:douyin-like", // v0.3
|
|
120
|
+
"parse:douyin-search", // sqlite-only
|
|
121
|
+
];
|
|
31
122
|
this.extractMode = "device-pull";
|
|
32
123
|
this.rateLimits = {};
|
|
33
124
|
this.dataDisclosure = {
|
|
34
125
|
fields: [
|
|
35
|
-
"douyin:
|
|
36
|
-
"douyin:
|
|
37
|
-
"douyin:
|
|
126
|
+
"douyin:profile (sec_user_id / nickname / signature / counts)",
|
|
127
|
+
"douyin:history (aweme_id / title / author / view_time)", // v0.3
|
|
128
|
+
"douyin:favourite", // v0.3
|
|
129
|
+
"douyin:like", // v0.3
|
|
130
|
+
"douyin:search_history (sqlite-mode only)",
|
|
38
131
|
],
|
|
39
132
|
sensitivity: "medium",
|
|
40
133
|
legalGate: false,
|
|
134
|
+
defaultInclude: {
|
|
135
|
+
profile: true,
|
|
136
|
+
history: true,
|
|
137
|
+
favourite: true,
|
|
138
|
+
like: true,
|
|
139
|
+
},
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
this._deps = {
|
|
143
|
+
fs,
|
|
144
|
+
dbDriverFactory: opts.dbDriverFactory || null,
|
|
41
145
|
};
|
|
42
146
|
}
|
|
43
147
|
|
|
44
|
-
async authenticate() {
|
|
45
|
-
|
|
148
|
+
async authenticate(ctx = {}) {
|
|
149
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
150
|
+
try {
|
|
151
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
152
|
+
} catch (err) {
|
|
153
|
+
return {
|
|
154
|
+
ok: false,
|
|
155
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
156
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
return { ok: true, mode: "snapshot-file" };
|
|
160
|
+
}
|
|
161
|
+
if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
|
|
162
|
+
if (!this.account || !this.account.uid) {
|
|
163
|
+
return {
|
|
164
|
+
ok: false,
|
|
165
|
+
reason: "NO_ACCOUNT_UID",
|
|
166
|
+
message: "social-douyin.authenticate: sqlite mode requires account.uid",
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
return { ok: true, account: this.account.uid, mode: "sqlite" };
|
|
170
|
+
}
|
|
171
|
+
return {
|
|
172
|
+
ok: false,
|
|
173
|
+
reason: "NO_INPUT",
|
|
174
|
+
message:
|
|
175
|
+
"social-douyin.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
|
|
176
|
+
};
|
|
46
177
|
}
|
|
47
178
|
|
|
48
179
|
async healthCheck() {
|
|
@@ -50,67 +181,327 @@ class DouyinAdapter {
|
|
|
50
181
|
}
|
|
51
182
|
|
|
52
183
|
async *sync(opts = {}) {
|
|
184
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
185
|
+
yield* this._syncViaSnapshot(opts);
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
53
188
|
const dbPath = opts.dbPath || this._dbPath;
|
|
54
|
-
if (
|
|
55
|
-
|
|
56
|
-
|
|
189
|
+
if (dbPath) {
|
|
190
|
+
yield* this._syncViaSqlite({ ...opts, dbPath });
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
throw new Error(
|
|
194
|
+
"social-douyin.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async *_syncViaSnapshot(opts) {
|
|
199
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
200
|
+
const snapshot = JSON.parse(raw);
|
|
201
|
+
if (
|
|
202
|
+
!snapshot ||
|
|
203
|
+
typeof snapshot !== "object" ||
|
|
204
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
205
|
+
) {
|
|
206
|
+
throw new Error(
|
|
207
|
+
`social-douyin.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
const fallbackCapturedAt =
|
|
211
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
212
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
213
|
+
: Date.now();
|
|
214
|
+
|
|
215
|
+
const account =
|
|
216
|
+
snapshot.account && typeof snapshot.account === "object"
|
|
217
|
+
? snapshot.account
|
|
218
|
+
: null;
|
|
219
|
+
const include = opts.include || {};
|
|
220
|
+
const limit =
|
|
221
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
222
|
+
|
|
223
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
224
|
+
let emitted = 0;
|
|
225
|
+
for (const ev of events) {
|
|
226
|
+
if (emitted >= limit) return;
|
|
227
|
+
if (!ev || typeof ev !== "object") continue;
|
|
228
|
+
const kind = ev.kind;
|
|
229
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
230
|
+
if (include[kind] === false) continue;
|
|
231
|
+
|
|
232
|
+
const capturedAt =
|
|
233
|
+
parseTime(ev.capturedAt) ||
|
|
234
|
+
parseTime(ev.time) ||
|
|
235
|
+
fallbackCapturedAt;
|
|
236
|
+
const id =
|
|
237
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
238
|
+
ev.secUid ||
|
|
239
|
+
ev.awemeId ||
|
|
240
|
+
null;
|
|
241
|
+
|
|
242
|
+
yield {
|
|
243
|
+
adapter: NAME,
|
|
244
|
+
kind,
|
|
245
|
+
originalId: stableOriginalId(kind, id),
|
|
246
|
+
capturedAt,
|
|
247
|
+
payload: { ...ev, account },
|
|
248
|
+
};
|
|
249
|
+
emitted += 1;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async *_syncViaSqlite(opts) {
|
|
254
|
+
if (!this.account || !this.account.uid) {
|
|
255
|
+
throw new Error(
|
|
256
|
+
"social-douyin._syncViaSqlite: account.uid required (set via new DouyinAdapter({ account: { uid } }) in cli wiring)",
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
const dbPath = opts.dbPath;
|
|
260
|
+
if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
|
|
261
|
+
const Driver = this._deps.dbDriverFactory
|
|
262
|
+
? this._deps.dbDriverFactory()
|
|
57
263
|
: require("better-sqlite3-multiple-ciphers");
|
|
58
264
|
const db = new Driver(dbPath, { readonly: true });
|
|
265
|
+
|
|
59
266
|
try {
|
|
60
|
-
const histories =
|
|
61
|
-
|
|
267
|
+
const histories =
|
|
268
|
+
trySelect(db, "SELECT * FROM video_history ORDER BY view_time DESC LIMIT 5000")
|
|
269
|
+
|| trySelect(db, "SELECT * FROM history ORDER BY view_time DESC LIMIT 5000")
|
|
270
|
+
|| [];
|
|
62
271
|
for (const row of histories) {
|
|
63
|
-
yield {
|
|
272
|
+
yield {
|
|
273
|
+
adapter: NAME,
|
|
274
|
+
originalId: `history-${row.id || row.aweme_id}`,
|
|
275
|
+
capturedAt: parseTime(row.view_time),
|
|
276
|
+
payload: { row, kind: KIND_HISTORY },
|
|
277
|
+
};
|
|
64
278
|
}
|
|
65
|
-
const favs =
|
|
66
|
-
|
|
279
|
+
const favs =
|
|
280
|
+
trySelect(db, "SELECT * FROM user_favorite ORDER BY create_time DESC LIMIT 5000")
|
|
281
|
+
|| trySelect(db, "SELECT * FROM favourite ORDER BY time DESC LIMIT 5000")
|
|
282
|
+
|| [];
|
|
67
283
|
for (const row of favs) {
|
|
68
|
-
yield {
|
|
284
|
+
yield {
|
|
285
|
+
adapter: NAME,
|
|
286
|
+
originalId: `fav-${row.id || row.aweme_id}`,
|
|
287
|
+
capturedAt: parseTime(row.create_time || row.time),
|
|
288
|
+
payload: { row, kind: KIND_FAVOURITE },
|
|
289
|
+
};
|
|
69
290
|
}
|
|
70
|
-
const searches =
|
|
291
|
+
const searches =
|
|
292
|
+
trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
|
|
293
|
+
|| [];
|
|
71
294
|
for (const row of searches) {
|
|
72
|
-
yield {
|
|
295
|
+
yield {
|
|
296
|
+
adapter: NAME,
|
|
297
|
+
originalId: `search-${row.id || row._id}`,
|
|
298
|
+
capturedAt: parseTime(row.time),
|
|
299
|
+
payload: { row, kind: KIND_SEARCH },
|
|
300
|
+
};
|
|
73
301
|
}
|
|
74
302
|
} finally {
|
|
75
|
-
try { db.close(); } catch (_e) {}
|
|
303
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
76
304
|
}
|
|
77
305
|
}
|
|
78
306
|
|
|
79
307
|
normalize(raw) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
const
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
};
|
|
308
|
+
if (!raw || !raw.payload) {
|
|
309
|
+
throw new Error("DouyinAdapter.normalize: payload missing");
|
|
310
|
+
}
|
|
311
|
+
const ingestedAt = Date.now();
|
|
312
|
+
const kind = raw.kind || raw.payload.kind;
|
|
313
|
+
const p = raw.payload;
|
|
314
|
+
|
|
315
|
+
if (kind === KIND_PROFILE) {
|
|
316
|
+
return normalizeProfile(p, raw, ingestedAt);
|
|
317
|
+
}
|
|
318
|
+
if (kind === KIND_HISTORY) {
|
|
319
|
+
return normalizeHistory(p, raw, ingestedAt);
|
|
320
|
+
}
|
|
321
|
+
if (kind === KIND_FAVOURITE) {
|
|
322
|
+
return normalizeFavourite(p, raw, ingestedAt);
|
|
323
|
+
}
|
|
324
|
+
if (kind === KIND_LIKE) {
|
|
325
|
+
return normalizeLike(p, raw, ingestedAt);
|
|
326
|
+
}
|
|
327
|
+
if (kind === KIND_SEARCH) {
|
|
328
|
+
return normalizeSearch(p, raw, ingestedAt);
|
|
329
|
+
}
|
|
330
|
+
throw new Error(`DouyinAdapter.normalize: unknown kind ${kind}`);
|
|
104
331
|
}
|
|
105
332
|
}
|
|
106
333
|
|
|
107
|
-
function
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
334
|
+
function buildSource(raw, occurredAt, capturedBy) {
|
|
335
|
+
return {
|
|
336
|
+
adapter: NAME,
|
|
337
|
+
adapterVersion: VERSION,
|
|
338
|
+
originalId: raw.originalId,
|
|
339
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
340
|
+
capturedBy: capturedBy || CAPTURED_BY.SQLITE,
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function normalizeProfile(p, raw, ingestedAt) {
|
|
345
|
+
// v0.2 snapshot-only — produces a person record for the logged-in user
|
|
346
|
+
// (person-self) carrying douyin-sec-uid identifier + stat counts in extra.
|
|
347
|
+
// Repeated syncs dedupe on the same id; extra fields get refreshed.
|
|
348
|
+
const secUid = p.secUid || (p.account && p.account.secUid) || null;
|
|
349
|
+
const shortId = p.shortId || (p.account && p.account.shortId) || null;
|
|
350
|
+
const nickname =
|
|
351
|
+
p.nickname || (p.account && p.account.displayName) || "(unnamed)";
|
|
352
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
353
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
354
|
+
const identifiers = {};
|
|
355
|
+
if (secUid) identifiers["douyin-sec-uid"] = [String(secUid)];
|
|
356
|
+
if (shortId) identifiers["douyin-short-id"] = [String(shortId)];
|
|
357
|
+
return {
|
|
358
|
+
events: [],
|
|
359
|
+
persons: [{
|
|
360
|
+
id: secUid ? `person-douyin-${secUid}` : `person-douyin-self-${newId()}`,
|
|
361
|
+
type: ENTITY_TYPES.PERSON,
|
|
362
|
+
subtype: PERSON_SUBTYPES.SELF,
|
|
363
|
+
names: [nickname],
|
|
364
|
+
ingestedAt,
|
|
365
|
+
source,
|
|
366
|
+
identifiers,
|
|
367
|
+
extra: {
|
|
368
|
+
platform: "douyin",
|
|
369
|
+
signature: p.signature || null,
|
|
370
|
+
followingCount: p.followingCount || 0,
|
|
371
|
+
followerCount: p.followerCount || 0,
|
|
372
|
+
awemeCount: p.awemeCount || 0,
|
|
373
|
+
favoritingCount: p.favoritingCount || 0,
|
|
374
|
+
totalFavorited: p.totalFavorited || 0,
|
|
375
|
+
snapshottedAt: occurredAt,
|
|
376
|
+
},
|
|
377
|
+
}],
|
|
378
|
+
places: [], items: [], topics: [],
|
|
379
|
+
};
|
|
115
380
|
}
|
|
116
|
-
|
|
381
|
+
|
|
382
|
+
function normalizeHistory(p, raw, ingestedAt) {
|
|
383
|
+
// v0.3 — X-Bogus path. Snapshot fields: { kind:"history", awemeId, title,
|
|
384
|
+
// author, capturedAt, duration }
|
|
385
|
+
const awemeId = p.awemeId || p.aweme_id || (p.row && (p.row.aweme_id || p.row.id)) || null;
|
|
386
|
+
const row = p.row || p;
|
|
387
|
+
const title = row.title || row.desc || p.title || "(no title)";
|
|
388
|
+
const author = row.author || row.nickname || p.author || null;
|
|
389
|
+
const duration = row.duration || p.duration || null;
|
|
390
|
+
const occurredAt =
|
|
391
|
+
parseTime(p.capturedAt || row.view_time || row.time) ||
|
|
392
|
+
raw.capturedAt ||
|
|
393
|
+
ingestedAt;
|
|
394
|
+
const source = buildSource(
|
|
395
|
+
raw, occurredAt,
|
|
396
|
+
p.row ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
397
|
+
);
|
|
398
|
+
return {
|
|
399
|
+
events: [{
|
|
400
|
+
id: newId(),
|
|
401
|
+
type: ENTITY_TYPES.EVENT,
|
|
402
|
+
subtype: EVENT_SUBTYPES.BROWSE,
|
|
403
|
+
occurredAt,
|
|
404
|
+
actor: "person-self",
|
|
405
|
+
content: { title },
|
|
406
|
+
ingestedAt,
|
|
407
|
+
source,
|
|
408
|
+
extra: {
|
|
409
|
+
platform: "douyin",
|
|
410
|
+
awemeId,
|
|
411
|
+
author,
|
|
412
|
+
duration,
|
|
413
|
+
},
|
|
414
|
+
}],
|
|
415
|
+
persons: [], places: [], items: [], topics: [],
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
function normalizeFavourite(p, raw, ingestedAt) {
|
|
420
|
+
// v0.3 — X-Bogus path. Snapshot fields: { kind:"favourite", awemeId, title,
|
|
421
|
+
// author, capturedAt }
|
|
422
|
+
const awemeId = p.awemeId || p.aweme_id || (p.row && (p.row.aweme_id || p.row.id)) || null;
|
|
423
|
+
const row = p.row || p;
|
|
424
|
+
const title = row.title || row.desc || p.title || "(no title)";
|
|
425
|
+
const author = row.author || row.nickname || p.author || null;
|
|
426
|
+
const occurredAt =
|
|
427
|
+
parseTime(p.capturedAt || row.create_time || row.time) ||
|
|
428
|
+
raw.capturedAt ||
|
|
429
|
+
ingestedAt;
|
|
430
|
+
const source = buildSource(
|
|
431
|
+
raw, occurredAt,
|
|
432
|
+
p.row ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
433
|
+
);
|
|
434
|
+
return {
|
|
435
|
+
events: [{
|
|
436
|
+
id: newId(),
|
|
437
|
+
type: ENTITY_TYPES.EVENT,
|
|
438
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
439
|
+
occurredAt,
|
|
440
|
+
actor: "person-self",
|
|
441
|
+
content: { title },
|
|
442
|
+
ingestedAt,
|
|
443
|
+
source,
|
|
444
|
+
extra: {
|
|
445
|
+
platform: "douyin",
|
|
446
|
+
awemeId,
|
|
447
|
+
author,
|
|
448
|
+
},
|
|
449
|
+
}],
|
|
450
|
+
persons: [], places: [], items: [], topics: [],
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function normalizeLike(p, raw, ingestedAt) {
|
|
455
|
+
// v0.3 — X-Bogus path. Same shape as favourite; semantic diff = a 赞 vs 收藏.
|
|
456
|
+
const awemeId = p.awemeId || (p.row && p.row.aweme_id) || null;
|
|
457
|
+
const title = p.title || (p.row && (p.row.title || p.row.desc)) || "(no title)";
|
|
458
|
+
const author = p.author || (p.row && (p.row.author || p.row.nickname)) || null;
|
|
459
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
460
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
461
|
+
return {
|
|
462
|
+
events: [{
|
|
463
|
+
id: newId(),
|
|
464
|
+
type: ENTITY_TYPES.EVENT,
|
|
465
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
466
|
+
occurredAt,
|
|
467
|
+
actor: "person-self",
|
|
468
|
+
content: { title },
|
|
469
|
+
ingestedAt,
|
|
470
|
+
source,
|
|
471
|
+
extra: { platform: "douyin", awemeId, author },
|
|
472
|
+
}],
|
|
473
|
+
persons: [], places: [], items: [], topics: [],
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function normalizeSearch(p, raw, ingestedAt) {
|
|
478
|
+
// sqlite-mode only — payload.row.keyword / row.query
|
|
479
|
+
const row = p.row || {};
|
|
480
|
+
const occurredAt = parseTime(row.time || row.create_time) || ingestedAt;
|
|
481
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
482
|
+
return {
|
|
483
|
+
events: [{
|
|
484
|
+
id: newId(),
|
|
485
|
+
type: ENTITY_TYPES.EVENT,
|
|
486
|
+
subtype: EVENT_SUBTYPES.INTERACTION,
|
|
487
|
+
occurredAt,
|
|
488
|
+
actor: "person-self",
|
|
489
|
+
content: {
|
|
490
|
+
title: `搜索: ${row.keyword || row.query || ""}`,
|
|
491
|
+
text: row.keyword || row.query || "",
|
|
492
|
+
},
|
|
493
|
+
ingestedAt,
|
|
494
|
+
source,
|
|
495
|
+
extra: { query: row.keyword || row.query, fromAdapter: NAME },
|
|
496
|
+
}],
|
|
497
|
+
persons: [], places: [], items: [], topics: [],
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
module.exports = {
|
|
502
|
+
DouyinAdapter,
|
|
503
|
+
NAME,
|
|
504
|
+
VERSION,
|
|
505
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
506
|
+
VALID_SNAPSHOT_KINDS,
|
|
507
|
+
};
|