@chainlesschain/personal-data-hub 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/browser-history-chrome.test.js +377 -0
- package/__tests__/adapters/browser-history-edge.test.js +159 -0
- package/__tests__/adapters/git-activity.test.js +216 -0
- package/__tests__/adapters/local-files.test.js +264 -0
- package/__tests__/adapters/shell-history.test.js +180 -0
- package/__tests__/adapters/system-data-android.test.js +104 -3
- package/__tests__/adapters/vscode.test.js +299 -0
- package/__tests__/adapters/win-recent.test.js +192 -0
- package/__tests__/analysis.test.js +841 -2
- package/__tests__/categories.test.js +92 -0
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +146 -0
- package/__tests__/entity-resolver-vault.test.js +5 -2
- package/__tests__/integration/local-data-adapters-pipeline.test.js +373 -0
- package/__tests__/longtail-adapters.test.js +7 -2
- package/__tests__/query-parser.test.js +66 -0
- package/__tests__/registry.test.js +114 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +24 -1
- package/__tests__/sidecar-supervisor.test.js +9 -1
- package/__tests__/social-kuaishou-snapshot.test.js +55 -2
- package/__tests__/social-toutiao-snapshot.test.js +54 -2
- package/__tests__/vault-search-helpers.test.js +104 -0
- package/__tests__/vault-search.test.js +423 -0
- package/__tests__/vault.test.js +77 -3
- package/lib/adapters/browser-history-chrome/adapter.js +247 -0
- package/lib/adapters/browser-history-chrome/bookmarks-reader.js +79 -0
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +223 -0
- package/lib/adapters/browser-history-chrome/index.js +23 -0
- package/lib/adapters/browser-history-edge/adapter.js +34 -0
- package/lib/adapters/browser-history-edge/index.js +13 -0
- package/lib/adapters/git-activity/adapter.js +155 -0
- package/lib/adapters/git-activity/git-reader.js +125 -0
- package/lib/adapters/git-activity/index.js +17 -0
- package/lib/adapters/local-files/adapter.js +149 -0
- package/lib/adapters/local-files/file-walker.js +125 -0
- package/lib/adapters/local-files/index.js +18 -0
- package/lib/adapters/shell-history/adapter.js +137 -0
- package/lib/adapters/shell-history/index.js +17 -0
- package/lib/adapters/shell-history/shell-reader.js +100 -0
- package/lib/adapters/social-kuaishou/index.js +57 -1
- package/lib/adapters/social-toutiao/index.js +59 -1
- package/lib/adapters/system-data-android/adapter.js +220 -3
- package/lib/adapters/vscode/adapter.js +285 -0
- package/lib/adapters/vscode/index.js +18 -0
- package/lib/adapters/vscode/vscode-reader.js +191 -0
- package/lib/adapters/win-recent/adapter.js +150 -0
- package/lib/adapters/win-recent/index.js +16 -0
- package/lib/adapters/win-recent/win-recent-reader.js +72 -0
- package/lib/analysis.js +227 -9
- package/lib/categories.js +101 -0
- package/lib/index.js +61 -0
- package/lib/migrations.js +146 -0
- package/lib/query-parser.js +74 -0
- package/lib/registry.js +162 -0
- package/lib/vault.js +363 -2
- package/package.json +2 -1
- package/scripts/run-native-tests-sandbox.sh +53 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// ShellHistoryAdapter — surfaces PowerShell + bash + zsh command history
|
|
4
|
+
// as a self-actor event stream. Same shape as VSCode's terminal-command
|
|
5
|
+
// kind, just rooted in shell history files instead of an editor's state DB.
|
|
6
|
+
|
|
7
|
+
const {
|
|
8
|
+
ENTITY_TYPES,
|
|
9
|
+
EVENT_SUBTYPES,
|
|
10
|
+
CAPTURED_BY,
|
|
11
|
+
} = require("../../constants");
|
|
12
|
+
|
|
13
|
+
const {
|
|
14
|
+
defaultHistorySources,
|
|
15
|
+
readAllHistory,
|
|
16
|
+
} = require("./shell-reader");
|
|
17
|
+
|
|
18
|
+
const NAME = "shell-history";
|
|
19
|
+
const VERSION = "0.1.0";
|
|
20
|
+
|
|
21
|
+
class ShellHistoryAdapter {
|
|
22
|
+
constructor(opts = {}) {
|
|
23
|
+
this.name = NAME;
|
|
24
|
+
this.version = VERSION;
|
|
25
|
+
this.capabilities = ["sync:shell-history-files"];
|
|
26
|
+
this.extractMode = "file-import";
|
|
27
|
+
this.rateLimits = { perDay: 96 };
|
|
28
|
+
this.dataDisclosure = {
|
|
29
|
+
fields: ["commands:shell,value,sourceIndex,snapshotTs"],
|
|
30
|
+
sensitivity: "high",
|
|
31
|
+
legalGate: false,
|
|
32
|
+
defaultInclude: { commands: true },
|
|
33
|
+
};
|
|
34
|
+
this._deps = {
|
|
35
|
+
defaultSources: defaultHistorySources,
|
|
36
|
+
readHistory: readAllHistory,
|
|
37
|
+
};
|
|
38
|
+
this._sourcesOverride = Array.isArray(opts.sources) ? opts.sources : null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_resolveSources(opts) {
|
|
42
|
+
if (Array.isArray(opts?.sources) && opts.sources.length > 0) return opts.sources;
|
|
43
|
+
if (this._sourcesOverride) return this._sourcesOverride;
|
|
44
|
+
return this._deps.defaultSources();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async authenticate(ctx = {}) {
|
|
48
|
+
const sources = this._resolveSources(ctx);
|
|
49
|
+
if (!sources || sources.length === 0) {
|
|
50
|
+
return {
|
|
51
|
+
ok: false,
|
|
52
|
+
reason: "NO_HISTORY_SOURCES",
|
|
53
|
+
message: "no default shell history files on this platform; pass opts.sources",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
return { ok: true, mode: "file-import", sources: sources.map((s) => s.shell) };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async healthCheck() {
|
|
60
|
+
return { ok: true, lastChecked: Date.now() };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async *sync(opts = {}) {
|
|
64
|
+
const sources = this._resolveSources(opts);
|
|
65
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
66
|
+
const capturedAt = Date.now();
|
|
67
|
+
let emitted = 0;
|
|
68
|
+
for (const row of this._deps.readHistory(sources, { since: opts.since })) {
|
|
69
|
+
if (emitted >= limit) return;
|
|
70
|
+
yield {
|
|
71
|
+
kind: "shell-command",
|
|
72
|
+
// shell + sourceIndex + hash(value) keeps the same command at the
|
|
73
|
+
// same position in the same history file dedupable across re-syncs.
|
|
74
|
+
originalId: `shell-cmd:${row.shell}:${row.sourceIndex}:${hashCommand(row.value)}`,
|
|
75
|
+
capturedAt,
|
|
76
|
+
payload: row,
|
|
77
|
+
};
|
|
78
|
+
emitted += 1;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
normalize(raw) {
|
|
83
|
+
const ingestedAt = Date.now();
|
|
84
|
+
const source = (originalId) => ({
|
|
85
|
+
adapter: NAME,
|
|
86
|
+
adapterVersion: VERSION,
|
|
87
|
+
capturedAt: raw.capturedAt,
|
|
88
|
+
capturedBy: CAPTURED_BY.SQLITE,
|
|
89
|
+
originalId,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
if (raw.kind === "shell-command") {
|
|
93
|
+
const p = raw.payload || {};
|
|
94
|
+
const cmd = typeof p.value === "string" ? p.value : "";
|
|
95
|
+
const titleText = `[${p.shell || "?"}] ${cmd}`;
|
|
96
|
+
const event = {
|
|
97
|
+
id: `event-shell-cmd-${p.shell || "?"}-${p.sourceIndex}-${shortHash(cmd)}`,
|
|
98
|
+
type: ENTITY_TYPES.EVENT,
|
|
99
|
+
subtype: EVENT_SUBTYPES.OTHER,
|
|
100
|
+
occurredAt: Number.isInteger(p.snapshotTs) ? p.snapshotTs : raw.capturedAt,
|
|
101
|
+
ingestedAt,
|
|
102
|
+
source: source(raw.originalId),
|
|
103
|
+
actor: "self",
|
|
104
|
+
content: {
|
|
105
|
+
title: titleText.length > 100 ? titleText.substring(0, 100) + "…" : titleText,
|
|
106
|
+
text: cmd,
|
|
107
|
+
},
|
|
108
|
+
extra: {
|
|
109
|
+
kind: "shell-command",
|
|
110
|
+
shell: p.shell || null,
|
|
111
|
+
file: p.file || null,
|
|
112
|
+
sourceIndex: Number.isInteger(p.sourceIndex) ? p.sourceIndex : null,
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
return { events: [event], persons: [], places: [], items: [], topics: [] };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
throw new Error(`shell-history.normalize: unknown raw.kind=${raw.kind}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function hashCommand(s) {
|
|
123
|
+
let h = 5381;
|
|
124
|
+
for (let i = 0; i < s.length; i++) {
|
|
125
|
+
h = ((h << 5) + h + s.charCodeAt(i)) >>> 0;
|
|
126
|
+
}
|
|
127
|
+
return h.toString(36);
|
|
128
|
+
}
|
|
129
|
+
function shortHash(s) {
|
|
130
|
+
return hashCommand(s).substring(0, 8);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
module.exports = {
|
|
134
|
+
ShellHistoryAdapter,
|
|
135
|
+
SHELL_HISTORY_NAME: NAME,
|
|
136
|
+
SHELL_HISTORY_VERSION: VERSION,
|
|
137
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const {
|
|
4
|
+
ShellHistoryAdapter,
|
|
5
|
+
SHELL_HISTORY_NAME,
|
|
6
|
+
SHELL_HISTORY_VERSION,
|
|
7
|
+
} = require("./adapter");
|
|
8
|
+
const reader = require("./shell-reader");
|
|
9
|
+
|
|
10
|
+
module.exports = {
|
|
11
|
+
ShellHistoryAdapter,
|
|
12
|
+
SHELL_HISTORY_NAME,
|
|
13
|
+
SHELL_HISTORY_VERSION,
|
|
14
|
+
defaultHistorySources: reader.defaultHistorySources,
|
|
15
|
+
readHistoryFile: reader.readHistoryFile,
|
|
16
|
+
readAllHistory: reader.readAllHistory,
|
|
17
|
+
};
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// shell-reader — reads PowerShell + bash command history files. Both
|
|
4
|
+
// formats are dead simple: one command per line. Neither carries
|
|
5
|
+
// per-command timestamps, so we anchor every command to the file mtime
|
|
6
|
+
// and use sourceIndex for stable ordering (same pattern as the VSCode
|
|
7
|
+
// terminal adapter).
|
|
8
|
+
|
|
9
|
+
const fs = require("node:fs");
|
|
10
|
+
const path = require("node:path");
|
|
11
|
+
const os = require("node:os");
|
|
12
|
+
|
|
13
|
+
function defaultHistorySources() {
|
|
14
|
+
const home = os.homedir();
|
|
15
|
+
const sources = [];
|
|
16
|
+
if (process.platform === "win32" && process.env.APPDATA) {
|
|
17
|
+
sources.push({
|
|
18
|
+
shell: "pwsh",
|
|
19
|
+
file: path.join(
|
|
20
|
+
process.env.APPDATA,
|
|
21
|
+
"Microsoft",
|
|
22
|
+
"Windows",
|
|
23
|
+
"PowerShell",
|
|
24
|
+
"PSReadLine",
|
|
25
|
+
"ConsoleHost_history.txt",
|
|
26
|
+
),
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
// bash history exists on Win (Git Bash / WSL) AND Unix.
|
|
30
|
+
sources.push({ shell: "bash", file: path.join(home, ".bash_history") });
|
|
31
|
+
// zsh on macOS / Linux defaults.
|
|
32
|
+
if (process.platform !== "win32") {
|
|
33
|
+
sources.push({ shell: "zsh", file: path.join(home, ".zsh_history") });
|
|
34
|
+
}
|
|
35
|
+
return sources;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Reads one history file into rows. Skips blank/whitespace-only lines and
|
|
39
|
+
// preserves the original index so we can re-order across syncs.
|
|
40
|
+
function readHistoryFile(source, opts = {}) {
|
|
41
|
+
const fsMod = opts.fs || fs;
|
|
42
|
+
if (!fsMod.existsSync(source.file)) return null;
|
|
43
|
+
let stat;
|
|
44
|
+
try {
|
|
45
|
+
stat = fsMod.statSync(source.file);
|
|
46
|
+
} catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
let text;
|
|
50
|
+
try {
|
|
51
|
+
text = fsMod.readFileSync(source.file, "utf-8");
|
|
52
|
+
} catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const lines = text.split(/\r?\n/);
|
|
56
|
+
const rows = [];
|
|
57
|
+
for (let i = 0; i < lines.length; i++) {
|
|
58
|
+
let value = lines[i];
|
|
59
|
+
// zsh extended history stores ": <ts>:<dur>;<cmd>" — strip the prefix.
|
|
60
|
+
if (source.shell === "zsh") {
|
|
61
|
+
const m = value.match(/^: \d+:\d+;(.*)$/);
|
|
62
|
+
if (m) value = m[1];
|
|
63
|
+
}
|
|
64
|
+
value = value.replace(/[\r\n]+$/, "").trim();
|
|
65
|
+
if (!value) continue;
|
|
66
|
+
rows.push({
|
|
67
|
+
shell: source.shell,
|
|
68
|
+
file: source.file,
|
|
69
|
+
value,
|
|
70
|
+
sourceIndex: i,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
shell: source.shell,
|
|
75
|
+
file: source.file,
|
|
76
|
+
mtimeMs: Math.floor(stat.mtimeMs),
|
|
77
|
+
rows,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Yields rows across every configured source in (shell, sourceIndex)
|
|
82
|
+
// ascending order. since filter uses file mtime — there's no per-row
|
|
83
|
+
// timestamp in either format.
|
|
84
|
+
function* readAllHistory(sources, opts = {}) {
|
|
85
|
+
const sinceMs = Number.isInteger(opts.since) && opts.since > 0 ? opts.since : 0;
|
|
86
|
+
for (const src of sources) {
|
|
87
|
+
const parsed = readHistoryFile(src, opts);
|
|
88
|
+
if (!parsed) continue;
|
|
89
|
+
if (sinceMs > 0 && parsed.mtimeMs < sinceMs) continue;
|
|
90
|
+
for (const row of parsed.rows) {
|
|
91
|
+
yield { ...row, snapshotTs: parsed.mtimeMs };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
module.exports = {
|
|
97
|
+
defaultHistorySources,
|
|
98
|
+
readHistoryFile,
|
|
99
|
+
readAllHistory,
|
|
100
|
+
};
|
|
@@ -41,18 +41,23 @@ const fs = require("node:fs");
|
|
|
41
41
|
const { newId } = require("../../ids");
|
|
42
42
|
const {
|
|
43
43
|
ENTITY_TYPES,
|
|
44
|
+
PERSON_SUBTYPES,
|
|
44
45
|
EVENT_SUBTYPES,
|
|
45
46
|
CAPTURED_BY,
|
|
46
47
|
} = require("../../constants");
|
|
47
48
|
|
|
48
49
|
const NAME = "social-kuaishou";
|
|
49
|
-
const VERSION = "0.2.
|
|
50
|
+
const VERSION = "0.2.1";
|
|
50
51
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
51
52
|
|
|
53
|
+
const KIND_PROFILE = "profile";
|
|
52
54
|
const KIND_WATCH = "watch";
|
|
53
55
|
const KIND_COLLECT = "collect";
|
|
54
56
|
const KIND_SEARCH = "search";
|
|
57
|
+
// v0.2.1 — KIND_PROFILE added (mirrors Douyin/Toutiao); v0.3 will add watch/
|
|
58
|
+
// collect/search via NS_sig3. SNAPSHOT_SCHEMA_VERSION stays at 1 — additive.
|
|
55
59
|
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
60
|
+
KIND_PROFILE,
|
|
56
61
|
KIND_WATCH,
|
|
57
62
|
KIND_COLLECT,
|
|
58
63
|
KIND_SEARCH,
|
|
@@ -102,6 +107,7 @@ class KuaishouAdapter {
|
|
|
102
107
|
this.capabilities = [
|
|
103
108
|
"sync:snapshot",
|
|
104
109
|
"sync:sqlite",
|
|
110
|
+
"parse:kuaishou-profile",
|
|
105
111
|
"parse:kuaishou-photo-history",
|
|
106
112
|
"parse:kuaishou-user-collect",
|
|
107
113
|
"parse:kuaishou-search",
|
|
@@ -110,6 +116,7 @@ class KuaishouAdapter {
|
|
|
110
116
|
this.rateLimits = {};
|
|
111
117
|
this.dataDisclosure = {
|
|
112
118
|
fields: [
|
|
119
|
+
"kuaishou:profile (user_id / user_name / kuaishou_id / headurl / sex / city)",
|
|
113
120
|
"kuaishou:photo_history (photo_id / caption / view_time / duration / author_id)",
|
|
114
121
|
"kuaishou:user_collect (photo_id / caption / collect_time)",
|
|
115
122
|
"kuaishou:search_record (keyword / search_time)",
|
|
@@ -117,6 +124,7 @@ class KuaishouAdapter {
|
|
|
117
124
|
sensitivity: "medium",
|
|
118
125
|
legalGate: false,
|
|
119
126
|
defaultInclude: {
|
|
127
|
+
profile: true,
|
|
120
128
|
watch: true,
|
|
121
129
|
collect: true,
|
|
122
130
|
search: true,
|
|
@@ -307,6 +315,9 @@ class KuaishouAdapter {
|
|
|
307
315
|
const kind = raw.kind || raw.payload.kind;
|
|
308
316
|
const p = raw.payload;
|
|
309
317
|
|
|
318
|
+
if (kind === KIND_PROFILE) {
|
|
319
|
+
return normalizeProfile(p, raw, ingestedAt);
|
|
320
|
+
}
|
|
310
321
|
if (kind === KIND_COLLECT) {
|
|
311
322
|
return normalizeCollect(p, raw, ingestedAt);
|
|
312
323
|
}
|
|
@@ -320,6 +331,47 @@ class KuaishouAdapter {
|
|
|
320
331
|
}
|
|
321
332
|
}
|
|
322
333
|
|
|
334
|
+
function normalizeProfile(p, raw, ingestedAt) {
|
|
335
|
+
// v0.2 snapshot-only — produces a person record for the logged-in user
|
|
336
|
+
// (person-self) carrying kuaishou-uid + kuaishou-id identifiers + profile
|
|
337
|
+
// metadata in extra. Repeated syncs dedupe on the same id; extra fields
|
|
338
|
+
// get refreshed.
|
|
339
|
+
const uid = p.uid || (p.account && p.account.uid) || null;
|
|
340
|
+
const nickname =
|
|
341
|
+
p.nickname || (p.account && p.account.displayName) || "(unnamed)";
|
|
342
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
343
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
344
|
+
const identifiers = {};
|
|
345
|
+
if (uid) identifiers["kuaishou-uid"] = [String(uid)];
|
|
346
|
+
if (p.kuaishouId) identifiers["kuaishou-id"] = [String(p.kuaishouId)];
|
|
347
|
+
return {
|
|
348
|
+
events: [],
|
|
349
|
+
persons: [
|
|
350
|
+
{
|
|
351
|
+
id: uid ? `person-kuaishou-${uid}` : `person-kuaishou-self-${newId()}`,
|
|
352
|
+
type: ENTITY_TYPES.PERSON,
|
|
353
|
+
subtype: PERSON_SUBTYPES.SELF,
|
|
354
|
+
names: [nickname],
|
|
355
|
+
ingestedAt,
|
|
356
|
+
source,
|
|
357
|
+
identifiers,
|
|
358
|
+
extra: {
|
|
359
|
+
platform: "kuaishou",
|
|
360
|
+
avatarUrl: p.avatarUrl || null,
|
|
361
|
+
sex: p.sex || null,
|
|
362
|
+
city: p.city || null,
|
|
363
|
+
constellation: p.constellation || null,
|
|
364
|
+
description: p.description || null,
|
|
365
|
+
snapshottedAt: occurredAt,
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
],
|
|
369
|
+
places: [],
|
|
370
|
+
items: [],
|
|
371
|
+
topics: [],
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
323
375
|
function buildSource(raw, occurredAt, capturedBy) {
|
|
324
376
|
return {
|
|
325
377
|
adapter: NAME,
|
|
@@ -486,4 +538,8 @@ module.exports = {
|
|
|
486
538
|
VERSION,
|
|
487
539
|
SNAPSHOT_SCHEMA_VERSION,
|
|
488
540
|
VALID_SNAPSHOT_KINDS,
|
|
541
|
+
KIND_PROFILE,
|
|
542
|
+
KIND_WATCH,
|
|
543
|
+
KIND_COLLECT,
|
|
544
|
+
KIND_SEARCH,
|
|
489
545
|
};
|
|
@@ -44,18 +44,25 @@ const fs = require("node:fs");
|
|
|
44
44
|
const { newId } = require("../../ids");
|
|
45
45
|
const {
|
|
46
46
|
ENTITY_TYPES,
|
|
47
|
+
PERSON_SUBTYPES,
|
|
47
48
|
EVENT_SUBTYPES,
|
|
48
49
|
CAPTURED_BY,
|
|
49
50
|
} = require("../../constants");
|
|
50
51
|
|
|
51
52
|
const NAME = "social-toutiao";
|
|
52
|
-
const VERSION = "0.2.
|
|
53
|
+
const VERSION = "0.2.1";
|
|
53
54
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
54
55
|
|
|
56
|
+
const KIND_PROFILE = "profile";
|
|
55
57
|
const KIND_READ = "read";
|
|
56
58
|
const KIND_COLLECTION = "collection";
|
|
57
59
|
const KIND_SEARCH = "search";
|
|
60
|
+
// v0.2.1 — KIND_PROFILE added (mirrors Douyin); v0.3 will add read/collection
|
|
61
|
+
// /search once _signature path is wired. SNAPSHOT_SCHEMA_VERSION stays at 1:
|
|
62
|
+
// old (events-only) snapshots remain compatible; new profile events are an
|
|
63
|
+
// additive extension.
|
|
58
64
|
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
65
|
+
KIND_PROFILE,
|
|
59
66
|
KIND_READ,
|
|
60
67
|
KIND_COLLECTION,
|
|
61
68
|
KIND_SEARCH,
|
|
@@ -106,6 +113,7 @@ class ToutiaoAdapter {
|
|
|
106
113
|
this.capabilities = [
|
|
107
114
|
"sync:snapshot",
|
|
108
115
|
"sync:sqlite",
|
|
116
|
+
"parse:toutiao-profile",
|
|
109
117
|
"parse:toutiao-read-history",
|
|
110
118
|
"parse:toutiao-collection",
|
|
111
119
|
"parse:toutiao-search",
|
|
@@ -116,6 +124,7 @@ class ToutiaoAdapter {
|
|
|
116
124
|
this.rateLimits = {};
|
|
117
125
|
this.dataDisclosure = {
|
|
118
126
|
fields: [
|
|
127
|
+
"toutiao:profile (user_id / screen_name / avatar / mobile / following / followers)",
|
|
119
128
|
"toutiao:read_history (item_id / title / read_time / category)",
|
|
120
129
|
"toutiao:collection_article (item_id / title / save_time)",
|
|
121
130
|
"toutiao:search_history (keyword / search_time)",
|
|
@@ -124,6 +133,7 @@ class ToutiaoAdapter {
|
|
|
124
133
|
sensitivity: "high",
|
|
125
134
|
legalGate: false,
|
|
126
135
|
defaultInclude: {
|
|
136
|
+
profile: true,
|
|
127
137
|
read: true,
|
|
128
138
|
collection: true,
|
|
129
139
|
search: true,
|
|
@@ -317,6 +327,9 @@ class ToutiaoAdapter {
|
|
|
317
327
|
const kind = raw.kind || raw.payload.kind;
|
|
318
328
|
const p = raw.payload;
|
|
319
329
|
|
|
330
|
+
if (kind === KIND_PROFILE) {
|
|
331
|
+
return normalizeProfile(p, raw, ingestedAt);
|
|
332
|
+
}
|
|
320
333
|
if (kind === KIND_COLLECTION) {
|
|
321
334
|
return normalizeCollection(p, raw, ingestedAt);
|
|
322
335
|
}
|
|
@@ -330,6 +343,47 @@ class ToutiaoAdapter {
|
|
|
330
343
|
}
|
|
331
344
|
}
|
|
332
345
|
|
|
346
|
+
function normalizeProfile(p, raw, ingestedAt) {
|
|
347
|
+
// v0.2 snapshot-only — produces a person record for the logged-in user
|
|
348
|
+
// (person-self) carrying toutiao-uid identifier + counts in extra.
|
|
349
|
+
// Repeated syncs dedupe on the same id; extra fields get refreshed.
|
|
350
|
+
const uid = p.uid || (p.account && p.account.uid) || null;
|
|
351
|
+
const nickname =
|
|
352
|
+
p.nickname || (p.account && p.account.displayName) || "(unnamed)";
|
|
353
|
+
const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
|
|
354
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
|
|
355
|
+
const identifiers = {};
|
|
356
|
+
if (uid) identifiers["toutiao-uid"] = [String(uid)];
|
|
357
|
+
if (p.mediaId) identifiers["toutiao-media-id"] = [String(p.mediaId)];
|
|
358
|
+
return {
|
|
359
|
+
events: [],
|
|
360
|
+
persons: [
|
|
361
|
+
{
|
|
362
|
+
id: uid ? `person-toutiao-${uid}` : `person-toutiao-self-${newId()}`,
|
|
363
|
+
type: ENTITY_TYPES.PERSON,
|
|
364
|
+
subtype: PERSON_SUBTYPES.SELF,
|
|
365
|
+
names: [nickname],
|
|
366
|
+
ingestedAt,
|
|
367
|
+
source,
|
|
368
|
+
identifiers,
|
|
369
|
+
extra: {
|
|
370
|
+
platform: "toutiao",
|
|
371
|
+
avatarUrl: p.avatarUrl || null,
|
|
372
|
+
description: p.description || null,
|
|
373
|
+
mobile: p.mobile || null,
|
|
374
|
+
followingCount: p.followingCount || 0,
|
|
375
|
+
followerCount: p.followerCount || 0,
|
|
376
|
+
mediaId: p.mediaId || null,
|
|
377
|
+
snapshottedAt: occurredAt,
|
|
378
|
+
},
|
|
379
|
+
},
|
|
380
|
+
],
|
|
381
|
+
places: [],
|
|
382
|
+
items: [],
|
|
383
|
+
topics: [],
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
333
387
|
function buildSource(raw, occurredAt, capturedBy) {
|
|
334
388
|
return {
|
|
335
389
|
adapter: NAME,
|
|
@@ -503,4 +557,8 @@ module.exports = {
|
|
|
503
557
|
VERSION,
|
|
504
558
|
SNAPSHOT_SCHEMA_VERSION,
|
|
505
559
|
VALID_SNAPSHOT_KINDS,
|
|
560
|
+
KIND_PROFILE,
|
|
561
|
+
KIND_READ,
|
|
562
|
+
KIND_COLLECTION,
|
|
563
|
+
KIND_SEARCH,
|
|
506
564
|
};
|