@chainlesschain/personal-data-hub 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
- package/__tests__/analysis.test.js +1 -1
- package/__tests__/longtail-adapters.test.js +67 -16
- package/__tests__/messaging-qq-snapshot.test.js +294 -0
- package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
- package/__tests__/shopping-snapshot.test.js +438 -0
- package/__tests__/social-adapters.test.js +28 -3
- package/__tests__/social-douyin-snapshot.test.js +253 -0
- package/__tests__/social-kuaishou-snapshot.test.js +309 -0
- package/__tests__/social-toutiao-snapshot.test.js +314 -0
- package/__tests__/social-weibo-snapshot.test.js +234 -0
- package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
- package/__tests__/travel-maps-snapshot.test.js +426 -0
- package/__tests__/vault-driver-error.test.js +74 -0
- package/lib/adapters/messaging-qq/index.js +498 -92
- package/lib/adapters/shopping-jd/index.js +228 -25
- package/lib/adapters/shopping-meituan/index.js +222 -26
- package/lib/adapters/shopping-pinduoduo/index.js +275 -0
- package/lib/adapters/social-douyin/index.js +454 -63
- package/lib/adapters/social-kuaishou/index.js +379 -127
- package/lib/adapters/social-toutiao/index.js +400 -130
- package/lib/adapters/social-weibo/index.js +393 -95
- package/lib/adapters/social-xiaohongshu/index.js +389 -49
- package/lib/adapters/travel-baidu-map/index.js +286 -26
- package/lib/adapters/travel-tencent-map/index.js +414 -0
- package/lib/index.js +5 -1
- package/lib/vault.js +60 -8
- package/package.json +2 -1
|
@@ -1,44 +1,106 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* §A8 v0.2 — Kuaishou (快手) adapter, dual-mode (snapshot + sqlite).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* /data/data/com.smile.gifmaker/databases/. Schema is reverse-engineered
|
|
6
|
-
* from sjqz parsers and pinned at scaffold quality only — Phase 13.10 will
|
|
7
|
-
* fixture-pin real field names after Xiaomi 24115RA8EC E2E.
|
|
4
|
+
* Mirror of social-toutiao v0.2 two-mode pattern:
|
|
8
5
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
6
|
+
* 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
|
|
7
|
+
* JSON produced by the phone's KuaishouLocalCollector (root-required
|
|
8
|
+
* SQLCipher decrypt of /data/data/com.smile.gifmaker/databases/ on
|
|
9
|
+
* Phase 13.10 real-device E2E). Desktop-independent; account.uid
|
|
10
|
+
* OPTIONAL at construction — payload carries it.
|
|
13
11
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
12
|
+
* 2. sqlite mode (opts.dbPath, legacy): Phase 13.9 device-pull path —
|
|
13
|
+
* desktop reads the pulled DB directly. account.uid REQUIRED in this
|
|
14
|
+
* mode (lazy-checked at sync time).
|
|
17
15
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
16
|
+
* Snapshot schema (mirrors KuaishouLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
17
|
+
*
|
|
18
|
+
* {
|
|
19
|
+
* "schemaVersion": 1,
|
|
20
|
+
* "snapshottedAt": <epoch-ms>,
|
|
21
|
+
* "account": { "uid": "12345", "displayName": "alice" },
|
|
22
|
+
* "events": [
|
|
23
|
+
* { "kind": "watch", "id": "photo-<photoId>", "capturedAt": <ms>,
|
|
24
|
+
* "photoId": "...", "caption": "...", "duration": N,
|
|
25
|
+
* "authorId": "...", "authorName": "..." },
|
|
26
|
+
* { "kind": "collect", "id": "collect-<photoId>", "capturedAt": <ms>,
|
|
27
|
+
* "photoId": "...", "caption": "...",
|
|
28
|
+
* "authorId": "...", "authorName": "..." },
|
|
29
|
+
* { "kind": "search", "id": "search-<kw>:<ts>", "capturedAt": <ms>,
|
|
30
|
+
* "keyword": "...", "searchAt": <ms> }
|
|
31
|
+
* ]
|
|
32
|
+
* }
|
|
33
|
+
*
|
|
34
|
+
* Sensitivity: "medium" — short-video watch history mainly reveals
|
|
35
|
+
* entertainment preference (vs Toutiao's news-reading which goes "high").
|
|
20
36
|
*/
|
|
21
37
|
|
|
22
38
|
"use strict";
|
|
23
39
|
|
|
24
40
|
const fs = require("node:fs");
|
|
25
41
|
const { newId } = require("../../ids");
|
|
42
|
+
const {
|
|
43
|
+
ENTITY_TYPES,
|
|
44
|
+
EVENT_SUBTYPES,
|
|
45
|
+
CAPTURED_BY,
|
|
46
|
+
} = require("../../constants");
|
|
26
47
|
|
|
27
48
|
const NAME = "social-kuaishou";
|
|
28
|
-
const VERSION = "0.
|
|
49
|
+
const VERSION = "0.2.0";
|
|
50
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
51
|
+
|
|
52
|
+
const KIND_WATCH = "watch";
|
|
53
|
+
const KIND_COLLECT = "collect";
|
|
54
|
+
const KIND_SEARCH = "search";
|
|
55
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
56
|
+
KIND_WATCH,
|
|
57
|
+
KIND_COLLECT,
|
|
58
|
+
KIND_SEARCH,
|
|
59
|
+
]);
|
|
60
|
+
|
|
61
|
+
function stableOriginalId(kind, id) {
|
|
62
|
+
const stringified =
|
|
63
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
64
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
65
|
+
null;
|
|
66
|
+
const safe =
|
|
67
|
+
stringified ||
|
|
68
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
69
|
+
return `kuaishou:${kind}:${safe}`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function parseTime(v) {
|
|
73
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
74
|
+
if (typeof v === "string") {
|
|
75
|
+
if (/^\d+$/.test(v)) {
|
|
76
|
+
const n = parseInt(v, 10);
|
|
77
|
+
return n > 1e12 ? n : n * 1000;
|
|
78
|
+
}
|
|
79
|
+
const t = Date.parse(v);
|
|
80
|
+
return Number.isFinite(t) ? t : null;
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function trySelect(db, sql) {
|
|
86
|
+
try {
|
|
87
|
+
return db.prepare(sql).all();
|
|
88
|
+
} catch (_e) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
29
92
|
|
|
30
93
|
class KuaishouAdapter {
|
|
31
94
|
constructor(opts = {}) {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
this.account = opts.account;
|
|
95
|
+
// §A8 v0.2: account.uid OPTIONAL at construction — snapshot mode is
|
|
96
|
+
// stateless. Sqlite mode lazy-checks at sync time.
|
|
97
|
+
this.account = opts.account || null;
|
|
36
98
|
this._dbPath = opts.dbPath || null;
|
|
37
|
-
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
38
99
|
|
|
39
100
|
this.name = NAME;
|
|
40
101
|
this.version = VERSION;
|
|
41
102
|
this.capabilities = [
|
|
103
|
+
"sync:snapshot",
|
|
42
104
|
"sync:sqlite",
|
|
43
105
|
"parse:kuaishou-photo-history",
|
|
44
106
|
"parse:kuaishou-user-collect",
|
|
@@ -54,11 +116,49 @@ class KuaishouAdapter {
|
|
|
54
116
|
],
|
|
55
117
|
sensitivity: "medium",
|
|
56
118
|
legalGate: false,
|
|
119
|
+
defaultInclude: {
|
|
120
|
+
watch: true,
|
|
121
|
+
collect: true,
|
|
122
|
+
search: true,
|
|
123
|
+
},
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
this._deps = {
|
|
127
|
+
fs,
|
|
128
|
+
dbDriverFactory: opts.dbDriverFactory || null,
|
|
57
129
|
};
|
|
58
130
|
}
|
|
59
131
|
|
|
60
|
-
async authenticate() {
|
|
61
|
-
|
|
132
|
+
async authenticate(ctx = {}) {
|
|
133
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
134
|
+
try {
|
|
135
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
136
|
+
} catch (err) {
|
|
137
|
+
return {
|
|
138
|
+
ok: false,
|
|
139
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
140
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
return { ok: true, mode: "snapshot-file" };
|
|
144
|
+
}
|
|
145
|
+
if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
|
|
146
|
+
if (!this.account || !this.account.uid) {
|
|
147
|
+
return {
|
|
148
|
+
ok: false,
|
|
149
|
+
reason: "NO_ACCOUNT_UID",
|
|
150
|
+
message:
|
|
151
|
+
"social-kuaishou.authenticate: sqlite mode requires account.uid",
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
return { ok: true, account: this.account.uid, mode: "sqlite" };
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
ok: false,
|
|
158
|
+
reason: "NO_INPUT",
|
|
159
|
+
message:
|
|
160
|
+
"social-kuaishou.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
|
|
161
|
+
};
|
|
62
162
|
}
|
|
63
163
|
|
|
64
164
|
async healthCheck() {
|
|
@@ -66,10 +166,85 @@ class KuaishouAdapter {
|
|
|
66
166
|
}
|
|
67
167
|
|
|
68
168
|
async *sync(opts = {}) {
|
|
169
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
170
|
+
yield* this._syncViaSnapshot(opts);
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
69
173
|
const dbPath = opts.dbPath || this._dbPath;
|
|
70
|
-
if (
|
|
71
|
-
|
|
72
|
-
|
|
174
|
+
if (dbPath) {
|
|
175
|
+
yield* this._syncViaSqlite({ ...opts, dbPath });
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
throw new Error(
|
|
179
|
+
"social-kuaishou.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async *_syncViaSnapshot(opts) {
|
|
184
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
185
|
+
const snapshot = JSON.parse(raw);
|
|
186
|
+
if (
|
|
187
|
+
!snapshot ||
|
|
188
|
+
typeof snapshot !== "object" ||
|
|
189
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
190
|
+
) {
|
|
191
|
+
throw new Error(
|
|
192
|
+
`social-kuaishou.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
const fallbackCapturedAt =
|
|
196
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
197
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
198
|
+
: Date.now();
|
|
199
|
+
|
|
200
|
+
const account =
|
|
201
|
+
snapshot.account && typeof snapshot.account === "object"
|
|
202
|
+
? snapshot.account
|
|
203
|
+
: null;
|
|
204
|
+
const include = opts.include || {};
|
|
205
|
+
const limit =
|
|
206
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
207
|
+
|
|
208
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
209
|
+
let emitted = 0;
|
|
210
|
+
for (const ev of events) {
|
|
211
|
+
if (emitted >= limit) return;
|
|
212
|
+
if (!ev || typeof ev !== "object") continue;
|
|
213
|
+
const kind = ev.kind;
|
|
214
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
215
|
+
if (include[kind] === false) continue;
|
|
216
|
+
|
|
217
|
+
const capturedAt =
|
|
218
|
+
parseTime(ev.capturedAt) ||
|
|
219
|
+
parseTime(ev.time) ||
|
|
220
|
+
fallbackCapturedAt;
|
|
221
|
+
const id =
|
|
222
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
223
|
+
ev.photoId ||
|
|
224
|
+
ev.keyword ||
|
|
225
|
+
null;
|
|
226
|
+
|
|
227
|
+
yield {
|
|
228
|
+
adapter: NAME,
|
|
229
|
+
kind,
|
|
230
|
+
originalId: stableOriginalId(kind, id),
|
|
231
|
+
capturedAt,
|
|
232
|
+
payload: { ...ev, account },
|
|
233
|
+
};
|
|
234
|
+
emitted += 1;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async *_syncViaSqlite(opts) {
|
|
239
|
+
if (!this.account || !this.account.uid) {
|
|
240
|
+
throw new Error(
|
|
241
|
+
"social-kuaishou._syncViaSqlite: account.uid required (set via new KuaishouAdapter({ account: { uid } }) in cli wiring)",
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
const dbPath = opts.dbPath;
|
|
245
|
+
if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
|
|
246
|
+
const Driver = this._deps.dbDriverFactory
|
|
247
|
+
? this._deps.dbDriverFactory()
|
|
73
248
|
: require("better-sqlite3-multiple-ciphers");
|
|
74
249
|
const db = new Driver(dbPath, { readonly: true });
|
|
75
250
|
|
|
@@ -84,7 +259,7 @@ class KuaishouAdapter {
|
|
|
84
259
|
adapter: NAME,
|
|
85
260
|
originalId: `photo-${row.id || row._id || row.photo_id}`,
|
|
86
261
|
capturedAt: parseTime(row.view_time || row.time || row.create_time),
|
|
87
|
-
payload: { row, kind:
|
|
262
|
+
payload: { row, kind: KIND_WATCH },
|
|
88
263
|
};
|
|
89
264
|
}
|
|
90
265
|
|
|
@@ -98,7 +273,7 @@ class KuaishouAdapter {
|
|
|
98
273
|
adapter: NAME,
|
|
99
274
|
originalId: `collect-${row.id || row.photo_id}`,
|
|
100
275
|
capturedAt: parseTime(row.collect_time || row.time),
|
|
101
|
-
payload: { row, kind:
|
|
276
|
+
payload: { row, kind: KIND_COLLECT },
|
|
102
277
|
};
|
|
103
278
|
}
|
|
104
279
|
|
|
@@ -112,126 +287,203 @@ class KuaishouAdapter {
|
|
|
112
287
|
adapter: NAME,
|
|
113
288
|
originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
|
|
114
289
|
capturedAt: parseTime(row.search_time || row.time),
|
|
115
|
-
payload: { row, kind:
|
|
290
|
+
payload: { row, kind: KIND_SEARCH },
|
|
116
291
|
};
|
|
117
292
|
}
|
|
118
293
|
} finally {
|
|
119
294
|
try {
|
|
120
295
|
db.close();
|
|
121
|
-
} catch (_e) {
|
|
296
|
+
} catch (_e) {
|
|
297
|
+
/* ignore */
|
|
298
|
+
}
|
|
122
299
|
}
|
|
123
300
|
}
|
|
124
301
|
|
|
125
302
|
normalize(raw) {
|
|
126
|
-
if (!raw || !raw.payload
|
|
127
|
-
throw new Error("KuaishouAdapter.normalize:
|
|
303
|
+
if (!raw || !raw.payload) {
|
|
304
|
+
throw new Error("KuaishouAdapter.normalize: payload missing");
|
|
128
305
|
}
|
|
129
|
-
const
|
|
130
|
-
const
|
|
131
|
-
const
|
|
132
|
-
parseTime(row.view_time || row.collect_time || row.search_time || row.time) ||
|
|
133
|
-
now;
|
|
134
|
-
const source = {
|
|
135
|
-
adapter: NAME,
|
|
136
|
-
adapterVersion: VERSION,
|
|
137
|
-
originalId: raw.originalId,
|
|
138
|
-
capturedAt: occurredAt,
|
|
139
|
-
capturedBy: "sqlite",
|
|
140
|
-
};
|
|
306
|
+
const ingestedAt = Date.now();
|
|
307
|
+
const kind = raw.kind || raw.payload.kind;
|
|
308
|
+
const p = raw.payload;
|
|
141
309
|
|
|
142
|
-
if (kind ===
|
|
143
|
-
return
|
|
144
|
-
events: [
|
|
145
|
-
{
|
|
146
|
-
id: newId(),
|
|
147
|
-
type: "event",
|
|
148
|
-
subtype: "like",
|
|
149
|
-
occurredAt,
|
|
150
|
-
actor: "person-self",
|
|
151
|
-
content: { title: row.caption || row.title || "(no caption)" },
|
|
152
|
-
ingestedAt: now,
|
|
153
|
-
source,
|
|
154
|
-
extra: {
|
|
155
|
-
photoId: row.photo_id || null,
|
|
156
|
-
authorId: row.author_id || null,
|
|
157
|
-
authorName: row.author_name || null,
|
|
158
|
-
},
|
|
159
|
-
},
|
|
160
|
-
],
|
|
161
|
-
persons: [],
|
|
162
|
-
places: [],
|
|
163
|
-
items: [],
|
|
164
|
-
topics: [],
|
|
165
|
-
};
|
|
310
|
+
if (kind === KIND_COLLECT) {
|
|
311
|
+
return normalizeCollect(p, raw, ingestedAt);
|
|
166
312
|
}
|
|
167
|
-
if (kind ===
|
|
168
|
-
return
|
|
169
|
-
events: [
|
|
170
|
-
{
|
|
171
|
-
id: newId(),
|
|
172
|
-
type: "event",
|
|
173
|
-
subtype: "post",
|
|
174
|
-
occurredAt,
|
|
175
|
-
actor: "person-self",
|
|
176
|
-
content: { title: row.keyword || row.query || "(empty query)" },
|
|
177
|
-
ingestedAt: now,
|
|
178
|
-
source,
|
|
179
|
-
extra: { kind: "search", keyword: row.keyword || row.query || null },
|
|
180
|
-
},
|
|
181
|
-
],
|
|
182
|
-
persons: [],
|
|
183
|
-
places: [],
|
|
184
|
-
items: [],
|
|
185
|
-
topics: [],
|
|
186
|
-
};
|
|
313
|
+
if (kind === KIND_SEARCH) {
|
|
314
|
+
return normalizeSearch(p, raw, ingestedAt);
|
|
187
315
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
id: newId(),
|
|
193
|
-
type: "event",
|
|
194
|
-
subtype: "browse",
|
|
195
|
-
occurredAt,
|
|
196
|
-
actor: "person-self",
|
|
197
|
-
content: { title: row.caption || row.title || "(no caption)" },
|
|
198
|
-
ingestedAt: now,
|
|
199
|
-
source,
|
|
200
|
-
extra: {
|
|
201
|
-
photoId: row.photo_id || null,
|
|
202
|
-
duration: row.duration || row.play_duration || null,
|
|
203
|
-
authorId: row.author_id || null,
|
|
204
|
-
authorName: row.author_name || null,
|
|
205
|
-
},
|
|
206
|
-
},
|
|
207
|
-
],
|
|
208
|
-
persons: [],
|
|
209
|
-
places: [],
|
|
210
|
-
items: [],
|
|
211
|
-
topics: [],
|
|
212
|
-
};
|
|
316
|
+
if (kind === KIND_WATCH) {
|
|
317
|
+
return normalizeWatch(p, raw, ingestedAt);
|
|
318
|
+
}
|
|
319
|
+
throw new Error(`KuaishouAdapter.normalize: unknown kind ${kind}`);
|
|
213
320
|
}
|
|
214
321
|
}
|
|
215
322
|
|
|
216
|
-
function
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
323
|
+
function buildSource(raw, occurredAt, capturedBy) {
|
|
324
|
+
return {
|
|
325
|
+
adapter: NAME,
|
|
326
|
+
adapterVersion: VERSION,
|
|
327
|
+
originalId: raw.originalId,
|
|
328
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
329
|
+
capturedBy,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function pickField(p, snapshotKey, ...sqlKeys) {
|
|
334
|
+
if (p[snapshotKey] != null) return p[snapshotKey];
|
|
335
|
+
const row = p.row || {};
|
|
336
|
+
for (const k of sqlKeys) {
|
|
337
|
+
if (row[k] != null) return row[k];
|
|
338
|
+
}
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function normalizeWatch(p, raw, ingestedAt) {
|
|
343
|
+
if (!p.row && !p.photoId && !p.caption) {
|
|
344
|
+
if (!p.row) {
|
|
345
|
+
throw new Error("KuaishouAdapter.normalize: row missing");
|
|
346
|
+
}
|
|
221
347
|
}
|
|
348
|
+
const isSnapshot = !p.row;
|
|
349
|
+
const row = p.row || {};
|
|
350
|
+
const caption =
|
|
351
|
+
pickField(p, "caption", "caption", "title") || "(no caption)";
|
|
352
|
+
const occurredAt =
|
|
353
|
+
parseTime(p.capturedAt) ||
|
|
354
|
+
parseTime(row.view_time || row.time || row.create_time) ||
|
|
355
|
+
raw.capturedAt ||
|
|
356
|
+
ingestedAt;
|
|
357
|
+
const source = buildSource(
|
|
358
|
+
raw,
|
|
359
|
+
occurredAt,
|
|
360
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
361
|
+
);
|
|
362
|
+
return {
|
|
363
|
+
events: [
|
|
364
|
+
{
|
|
365
|
+
id: newId(),
|
|
366
|
+
type: ENTITY_TYPES.EVENT,
|
|
367
|
+
subtype: EVENT_SUBTYPES.BROWSE,
|
|
368
|
+
occurredAt,
|
|
369
|
+
actor: "person-self",
|
|
370
|
+
content: { title: caption },
|
|
371
|
+
ingestedAt,
|
|
372
|
+
source,
|
|
373
|
+
extra: {
|
|
374
|
+
platform: "kuaishou",
|
|
375
|
+
photoId: pickField(p, "photoId", "photo_id"),
|
|
376
|
+
duration: pickField(p, "duration", "duration", "play_duration"),
|
|
377
|
+
authorId: pickField(p, "authorId", "author_id"),
|
|
378
|
+
authorName: pickField(p, "authorName", "author_name"),
|
|
379
|
+
},
|
|
380
|
+
},
|
|
381
|
+
],
|
|
382
|
+
persons: [],
|
|
383
|
+
places: [],
|
|
384
|
+
items: [],
|
|
385
|
+
topics: [],
|
|
386
|
+
};
|
|
222
387
|
}
|
|
223
388
|
|
|
224
|
-
function
|
|
225
|
-
if (
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
const n = parseInt(v, 10);
|
|
229
|
-
return n > 1e12 ? n : n * 1000;
|
|
389
|
+
function normalizeCollect(p, raw, ingestedAt) {
|
|
390
|
+
if (!p.row && !p.photoId && !p.caption) {
|
|
391
|
+
if (!p.row) {
|
|
392
|
+
throw new Error("KuaishouAdapter.normalize: row missing");
|
|
230
393
|
}
|
|
231
|
-
const t = Date.parse(v);
|
|
232
|
-
return Number.isFinite(t) ? t : null;
|
|
233
394
|
}
|
|
234
|
-
|
|
395
|
+
const isSnapshot = !p.row;
|
|
396
|
+
const row = p.row || {};
|
|
397
|
+
const caption =
|
|
398
|
+
pickField(p, "caption", "caption", "title") || "(no caption)";
|
|
399
|
+
const occurredAt =
|
|
400
|
+
parseTime(p.capturedAt) ||
|
|
401
|
+
parseTime(row.collect_time || row.time) ||
|
|
402
|
+
raw.capturedAt ||
|
|
403
|
+
ingestedAt;
|
|
404
|
+
const source = buildSource(
|
|
405
|
+
raw,
|
|
406
|
+
occurredAt,
|
|
407
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
408
|
+
);
|
|
409
|
+
return {
|
|
410
|
+
events: [
|
|
411
|
+
{
|
|
412
|
+
id: newId(),
|
|
413
|
+
type: ENTITY_TYPES.EVENT,
|
|
414
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
415
|
+
occurredAt,
|
|
416
|
+
actor: "person-self",
|
|
417
|
+
content: { title: caption },
|
|
418
|
+
ingestedAt,
|
|
419
|
+
source,
|
|
420
|
+
extra: {
|
|
421
|
+
platform: "kuaishou",
|
|
422
|
+
photoId: pickField(p, "photoId", "photo_id"),
|
|
423
|
+
authorId: pickField(p, "authorId", "author_id"),
|
|
424
|
+
authorName: pickField(p, "authorName", "author_name"),
|
|
425
|
+
},
|
|
426
|
+
},
|
|
427
|
+
],
|
|
428
|
+
persons: [],
|
|
429
|
+
places: [],
|
|
430
|
+
items: [],
|
|
431
|
+
topics: [],
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function normalizeSearch(p, raw, ingestedAt) {
|
|
436
|
+
if (!p.row && !p.keyword && !p.query) {
|
|
437
|
+
if (!p.row) {
|
|
438
|
+
throw new Error("KuaishouAdapter.normalize: row missing");
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
const isSnapshot = !p.row;
|
|
442
|
+
const row = p.row || {};
|
|
443
|
+
const keyword =
|
|
444
|
+
pickField(p, "keyword", "keyword", "query") ||
|
|
445
|
+
pickField(p, "query") ||
|
|
446
|
+
"(empty query)";
|
|
447
|
+
const occurredAt =
|
|
448
|
+
parseTime(p.capturedAt) ||
|
|
449
|
+
parseTime(p.searchAt) ||
|
|
450
|
+
parseTime(row.search_time || row.time) ||
|
|
451
|
+
raw.capturedAt ||
|
|
452
|
+
ingestedAt;
|
|
453
|
+
const source = buildSource(
|
|
454
|
+
raw,
|
|
455
|
+
occurredAt,
|
|
456
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
457
|
+
);
|
|
458
|
+
return {
|
|
459
|
+
events: [
|
|
460
|
+
{
|
|
461
|
+
id: newId(),
|
|
462
|
+
type: ENTITY_TYPES.EVENT,
|
|
463
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
464
|
+
occurredAt,
|
|
465
|
+
actor: "person-self",
|
|
466
|
+
content: { title: keyword },
|
|
467
|
+
ingestedAt,
|
|
468
|
+
source,
|
|
469
|
+
extra: {
|
|
470
|
+
platform: "kuaishou",
|
|
471
|
+
kind: "search",
|
|
472
|
+
keyword,
|
|
473
|
+
},
|
|
474
|
+
},
|
|
475
|
+
],
|
|
476
|
+
persons: [],
|
|
477
|
+
places: [],
|
|
478
|
+
items: [],
|
|
479
|
+
topics: [],
|
|
480
|
+
};
|
|
235
481
|
}
|
|
236
482
|
|
|
237
|
-
module.exports = {
|
|
483
|
+
module.exports = {
|
|
484
|
+
KuaishouAdapter,
|
|
485
|
+
NAME,
|
|
486
|
+
VERSION,
|
|
487
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
488
|
+
VALID_SNAPSHOT_KINDS,
|
|
489
|
+
};
|