@chainlesschain/personal-data-hub 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/browser-history-chrome.test.js +377 -0
- package/__tests__/adapters/browser-history-edge.test.js +159 -0
- package/__tests__/adapters/git-activity.test.js +216 -0
- package/__tests__/adapters/local-files.test.js +264 -0
- package/__tests__/adapters/shell-history.test.js +180 -0
- package/__tests__/adapters/system-data-android.test.js +104 -3
- package/__tests__/adapters/vscode.test.js +299 -0
- package/__tests__/adapters/win-recent.test.js +192 -0
- package/__tests__/analysis.test.js +841 -2
- package/__tests__/categories.test.js +92 -0
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +146 -0
- package/__tests__/entity-resolver-vault.test.js +5 -2
- package/__tests__/integration/local-data-adapters-pipeline.test.js +373 -0
- package/__tests__/longtail-adapters.test.js +7 -2
- package/__tests__/query-parser.test.js +66 -0
- package/__tests__/registry.test.js +114 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +24 -1
- package/__tests__/sidecar-supervisor.test.js +9 -1
- package/__tests__/social-kuaishou-snapshot.test.js +55 -2
- package/__tests__/social-toutiao-snapshot.test.js +54 -2
- package/__tests__/vault-search-helpers.test.js +104 -0
- package/__tests__/vault-search.test.js +423 -0
- package/__tests__/vault.test.js +77 -3
- package/lib/adapters/browser-history-chrome/adapter.js +247 -0
- package/lib/adapters/browser-history-chrome/bookmarks-reader.js +79 -0
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +223 -0
- package/lib/adapters/browser-history-chrome/index.js +23 -0
- package/lib/adapters/browser-history-edge/adapter.js +34 -0
- package/lib/adapters/browser-history-edge/index.js +13 -0
- package/lib/adapters/git-activity/adapter.js +155 -0
- package/lib/adapters/git-activity/git-reader.js +125 -0
- package/lib/adapters/git-activity/index.js +17 -0
- package/lib/adapters/local-files/adapter.js +149 -0
- package/lib/adapters/local-files/file-walker.js +125 -0
- package/lib/adapters/local-files/index.js +18 -0
- package/lib/adapters/shell-history/adapter.js +137 -0
- package/lib/adapters/shell-history/index.js +17 -0
- package/lib/adapters/shell-history/shell-reader.js +100 -0
- package/lib/adapters/social-kuaishou/index.js +57 -1
- package/lib/adapters/social-toutiao/index.js +59 -1
- package/lib/adapters/system-data-android/adapter.js +220 -3
- package/lib/adapters/vscode/adapter.js +285 -0
- package/lib/adapters/vscode/index.js +18 -0
- package/lib/adapters/vscode/vscode-reader.js +191 -0
- package/lib/adapters/win-recent/adapter.js +150 -0
- package/lib/adapters/win-recent/index.js +16 -0
- package/lib/adapters/win-recent/win-recent-reader.js +72 -0
- package/lib/analysis.js +227 -9
- package/lib/categories.js +101 -0
- package/lib/index.js +61 -0
- package/lib/migrations.js +146 -0
- package/lib/query-parser.js +74 -0
- package/lib/registry.js +162 -0
- package/lib/vault.js +363 -2
- package/package.json +2 -1
- package/scripts/run-native-tests-sandbox.sh +53 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// BrowserHistoryChromeAdapter — desktop Chrome (and Chromium-family) data
|
|
4
|
+
// adapter. Reads History.sqlite + Bookmarks JSON directly from the user's
|
|
5
|
+
// own profile directory; no extension, no network, no permission prompts.
|
|
6
|
+
//
|
|
7
|
+
// Supported profiles auto-detected on platform:
|
|
8
|
+
// Windows: %LOCALAPPDATA%\Google\Chrome\User Data\Default
|
|
9
|
+
// macOS: ~/Library/Application Support/Google/Chrome/Default
|
|
10
|
+
// Linux: ~/.config/google-chrome/Default
|
|
11
|
+
// `opts.profilePath` overrides — point at "Profile 1", an Edge profile,
|
|
12
|
+
// or a copy of one for testing.
|
|
13
|
+
//
|
|
14
|
+
// Chrome locks History while running. We snapshot via fs.copyFileSync to
|
|
15
|
+
// %TEMP% (carrying the WAL sidecar) and read from the copy. ~7 MB DB copies
|
|
16
|
+
// in <50ms; even 100k-visit profiles iterate in seconds.
|
|
17
|
+
|
|
18
|
+
const path = require("node:path");
|
|
19
|
+
const fs = require("node:fs");
|
|
20
|
+
|
|
21
|
+
const {
|
|
22
|
+
ENTITY_TYPES,
|
|
23
|
+
EVENT_SUBTYPES,
|
|
24
|
+
ITEM_SUBTYPES,
|
|
25
|
+
CAPTURED_BY,
|
|
26
|
+
} = require("../../constants");
|
|
27
|
+
|
|
28
|
+
const {
|
|
29
|
+
defaultChromeProfileDir,
|
|
30
|
+
copyHistorySnapshot,
|
|
31
|
+
cleanupHistorySnapshot,
|
|
32
|
+
readVisits,
|
|
33
|
+
} = require("./chrome-db-reader");
|
|
34
|
+
const { readBookmarks } = require("./bookmarks-reader");
|
|
35
|
+
|
|
36
|
+
const NAME = "browser-history-chrome";
|
|
37
|
+
const VERSION = "0.1.0";
|
|
38
|
+
|
|
39
|
+
// The adapter is browser-agnostic — Chromium-derived browsers (Chrome / Edge /
|
|
40
|
+
// Brave / Vivaldi / Arc) share the History SQLite + Bookmarks JSON schema.
|
|
41
|
+
// Subclasses override `_browserConfig()` to point at a different profile root.
|
|
42
|
+
class BrowserHistoryChromeAdapter {
|
|
43
|
+
constructor(opts = {}) {
|
|
44
|
+
const cfg = this._browserConfig();
|
|
45
|
+
this.name = cfg.name;
|
|
46
|
+
this.version = cfg.version;
|
|
47
|
+
this._browser = cfg.browser;
|
|
48
|
+
this.capabilities = [
|
|
49
|
+
`sync:${cfg.browser}-history-sqlite`,
|
|
50
|
+
`sync:${cfg.browser}-bookmarks-json`,
|
|
51
|
+
];
|
|
52
|
+
this.extractMode = "file-import";
|
|
53
|
+
this.rateLimits = { perDay: 96 }; // ~once per 15 min ceiling
|
|
54
|
+
this.dataDisclosure = {
|
|
55
|
+
fields: [
|
|
56
|
+
"visits:url,title,visitTimeMs,transition,visitDurationMs,hidden",
|
|
57
|
+
"bookmarks:url,name,dateAddedMs,folderPath",
|
|
58
|
+
],
|
|
59
|
+
sensitivity: "high",
|
|
60
|
+
legalGate: false,
|
|
61
|
+
defaultInclude: { history: true, bookmarks: true },
|
|
62
|
+
};
|
|
63
|
+
this._deps = {
|
|
64
|
+
fs: require("node:fs"),
|
|
65
|
+
defaultProfileDir: cfg.defaultProfileDir,
|
|
66
|
+
};
|
|
67
|
+
this._profileOverride = typeof opts.profilePath === "string" ? opts.profilePath : null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
_browserConfig() {
|
|
71
|
+
return {
|
|
72
|
+
name: NAME,
|
|
73
|
+
version: VERSION,
|
|
74
|
+
browser: "chrome",
|
|
75
|
+
defaultProfileDir: defaultChromeProfileDir,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
_resolveProfileDir(opts) {
|
|
80
|
+
if (typeof opts?.profilePath === "string" && opts.profilePath.length > 0) {
|
|
81
|
+
return opts.profilePath;
|
|
82
|
+
}
|
|
83
|
+
if (this._profileOverride) return this._profileOverride;
|
|
84
|
+
return this._deps.defaultProfileDir();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async authenticate(ctx = {}) {
|
|
88
|
+
const dir = this._resolveProfileDir(ctx);
|
|
89
|
+
if (!dir) {
|
|
90
|
+
return {
|
|
91
|
+
ok: false,
|
|
92
|
+
reason: "PROFILE_PATH_UNRESOLVED",
|
|
93
|
+
message: `no default ${this._browser} profile dir on this platform; pass opts.profilePath`,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
const histPath = path.join(dir, "History");
|
|
97
|
+
if (!this._deps.fs.existsSync(histPath)) {
|
|
98
|
+
return {
|
|
99
|
+
ok: false,
|
|
100
|
+
reason: "PROFILE_NOT_FOUND",
|
|
101
|
+
message: `no ${this._browser} History at ${histPath} — install ${this._browser} / open it at least once, or point opts.profilePath at a different profile`,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return { ok: true, mode: "file-import", profileDir: dir };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async healthCheck() {
|
|
108
|
+
const dir = this._resolveProfileDir({});
|
|
109
|
+
const ok = !!dir && this._deps.fs.existsSync(path.join(dir, "History"));
|
|
110
|
+
return { ok, lastChecked: Date.now() };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async *sync(opts = {}) {
|
|
114
|
+
const profileDir = this._resolveProfileDir(opts);
|
|
115
|
+
if (!profileDir || !this._deps.fs.existsSync(path.join(profileDir, "History"))) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
`${this.name}.sync: no History at ${path.join(profileDir || "?", "History")} — set opts.profilePath`,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const includeHistory = opts.include?.history !== false;
|
|
122
|
+
const includeBookmarks = opts.include?.bookmarks !== false;
|
|
123
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
124
|
+
const capturedAt = Date.now();
|
|
125
|
+
let emitted = 0;
|
|
126
|
+
|
|
127
|
+
// History (SQLite snapshot)
|
|
128
|
+
if (includeHistory) {
|
|
129
|
+
let tmp = null;
|
|
130
|
+
try {
|
|
131
|
+
tmp = copyHistorySnapshot(profileDir, { fs: this._deps.fs });
|
|
132
|
+
for (const v of readVisits(tmp, {
|
|
133
|
+
since: opts.since,
|
|
134
|
+
limit: Number.isFinite(limit) ? limit : undefined,
|
|
135
|
+
includeHidden: opts.includeHidden === true,
|
|
136
|
+
})) {
|
|
137
|
+
if (emitted >= limit) return;
|
|
138
|
+
yield {
|
|
139
|
+
kind: "visit",
|
|
140
|
+
originalId: `${this._browser}-visit:${profileDir}:${v.visitId}`,
|
|
141
|
+
capturedAt,
|
|
142
|
+
payload: { ...v, profileDir },
|
|
143
|
+
};
|
|
144
|
+
emitted += 1;
|
|
145
|
+
}
|
|
146
|
+
} finally {
|
|
147
|
+
if (tmp) cleanupHistorySnapshot(tmp, { fs: this._deps.fs });
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Bookmarks (JSON)
|
|
152
|
+
if (includeBookmarks) {
|
|
153
|
+
for (const b of readBookmarks(profileDir, { fs: this._deps.fs })) {
|
|
154
|
+
if (emitted >= limit) return;
|
|
155
|
+
yield {
|
|
156
|
+
kind: "bookmark",
|
|
157
|
+
originalId: `${this._browser}-bookmark:${profileDir}:${b.guid || b.id || b.url}`,
|
|
158
|
+
capturedAt,
|
|
159
|
+
payload: { ...b, profileDir },
|
|
160
|
+
};
|
|
161
|
+
emitted += 1;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
normalize(raw) {
|
|
167
|
+
const ingestedAt = Date.now();
|
|
168
|
+
const browser = this._browser;
|
|
169
|
+
const source = (originalId) => ({
|
|
170
|
+
adapter: this.name,
|
|
171
|
+
adapterVersion: this.version,
|
|
172
|
+
capturedAt: raw.capturedAt,
|
|
173
|
+
capturedBy: CAPTURED_BY.SQLITE,
|
|
174
|
+
originalId,
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
if (raw.kind === "visit") {
|
|
178
|
+
const p = raw.payload || {};
|
|
179
|
+
const url = typeof p.url === "string" ? p.url : "";
|
|
180
|
+
const title = typeof p.title === "string" && p.title.length > 0
|
|
181
|
+
? p.title
|
|
182
|
+
: (url || "(无标题)");
|
|
183
|
+
const occurredAt = Number.isInteger(p.visitTimeMs) ? p.visitTimeMs : raw.capturedAt;
|
|
184
|
+
const event = {
|
|
185
|
+
id: `event-${browser}-visit-${p.visitId}`,
|
|
186
|
+
type: ENTITY_TYPES.EVENT,
|
|
187
|
+
subtype: EVENT_SUBTYPES.BROWSE,
|
|
188
|
+
occurredAt,
|
|
189
|
+
ingestedAt,
|
|
190
|
+
source: source(`${browser}-visit:${p.profileDir}:${p.visitId}`),
|
|
191
|
+
actor: "self",
|
|
192
|
+
content: {
|
|
193
|
+
title: title.length > 200 ? title.substring(0, 200) + "…" : title,
|
|
194
|
+
text: url,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
if (Number.isInteger(p.visitDurationMs) && p.visitDurationMs > 0) {
|
|
198
|
+
event.durationMs = p.visitDurationMs;
|
|
199
|
+
}
|
|
200
|
+
event.extra = {
|
|
201
|
+
url,
|
|
202
|
+
transition: p.transition || null,
|
|
203
|
+
rawTransition: Number.isInteger(p.rawTransition) ? p.rawTransition : null,
|
|
204
|
+
visitCount: p.visitCount || 0,
|
|
205
|
+
typedCount: p.typedCount || 0,
|
|
206
|
+
hidden: p.hidden === true,
|
|
207
|
+
fromVisit: p.fromVisit || 0,
|
|
208
|
+
browser,
|
|
209
|
+
profileDir: p.profileDir,
|
|
210
|
+
};
|
|
211
|
+
return { events: [event], persons: [], places: [], items: [], topics: [] };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (raw.kind === "bookmark") {
|
|
215
|
+
const p = raw.payload || {};
|
|
216
|
+
const url = typeof p.url === "string" ? p.url : "";
|
|
217
|
+
const name = typeof p.name === "string" && p.name.length > 0 ? p.name : url;
|
|
218
|
+
const stableId = p.guid || p.id || url;
|
|
219
|
+
const item = {
|
|
220
|
+
id: `item-${browser}-bookmark-${stableId}`,
|
|
221
|
+
type: ENTITY_TYPES.ITEM,
|
|
222
|
+
subtype: ITEM_SUBTYPES.LINK,
|
|
223
|
+
name,
|
|
224
|
+
category: "bookmark",
|
|
225
|
+
ingestedAt,
|
|
226
|
+
source: source(`${browser}-bookmark:${p.profileDir}:${stableId}`),
|
|
227
|
+
extra: {
|
|
228
|
+
url,
|
|
229
|
+
dateAddedMs: Number.isInteger(p.dateAddedMs) ? p.dateAddedMs : null,
|
|
230
|
+
dateLastUsedMs: Number.isInteger(p.dateLastUsedMs) ? p.dateLastUsedMs : null,
|
|
231
|
+
folderPath: typeof p.folderPath === "string" ? p.folderPath : null,
|
|
232
|
+
browser,
|
|
233
|
+
profileDir: p.profileDir,
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
return { events: [], persons: [], places: [], items: [item], topics: [] };
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
throw new Error(`${this.name}.normalize: unknown raw.kind=${raw.kind}`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
module.exports = {
|
|
244
|
+
BrowserHistoryChromeAdapter,
|
|
245
|
+
BROWSER_HISTORY_CHROME_NAME: NAME,
|
|
246
|
+
BROWSER_HISTORY_CHROME_VERSION: VERSION,
|
|
247
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// bookmarks-reader — Chrome's Bookmarks file is plain JSON next to the
|
|
4
|
+
// History DB. No copy-first needed; Chrome writes atomically. We walk
|
|
5
|
+
// the tree depth-first so each url node carries its folder path.
|
|
6
|
+
|
|
7
|
+
const fs = require("node:fs");
|
|
8
|
+
const path = require("node:path");
|
|
9
|
+
|
|
10
|
+
const ROOT_LABELS = {
|
|
11
|
+
bookmark_bar: "书签栏",
|
|
12
|
+
other: "其他书签",
|
|
13
|
+
synced: "移动设备书签",
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
function webkitUsStrToEpochMs(s) {
|
|
17
|
+
if (typeof s !== "string" || s.length === 0) return null;
|
|
18
|
+
// Chrome stores date_added as a decimal string of WebKit microseconds.
|
|
19
|
+
// Parse to BigInt to keep precision.
|
|
20
|
+
try {
|
|
21
|
+
const us = BigInt(s);
|
|
22
|
+
return Number((us - 11_644_473_600_000_000n) / 1000n);
|
|
23
|
+
} catch (_e) {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function* walkNode(node, folderTrail) {
|
|
29
|
+
if (!node || typeof node !== "object") return;
|
|
30
|
+
if (node.type === "url" && typeof node.url === "string") {
|
|
31
|
+
yield {
|
|
32
|
+
id: node.id,
|
|
33
|
+
guid: node.guid,
|
|
34
|
+
name: typeof node.name === "string" ? node.name : node.url,
|
|
35
|
+
url: node.url,
|
|
36
|
+
dateAddedMs: webkitUsStrToEpochMs(node.date_added),
|
|
37
|
+
dateLastUsedMs: webkitUsStrToEpochMs(node.date_last_used),
|
|
38
|
+
folderPath: folderTrail.join(" / "),
|
|
39
|
+
};
|
|
40
|
+
} else if (node.type === "folder" && Array.isArray(node.children)) {
|
|
41
|
+
const nextTrail =
|
|
42
|
+
typeof node.name === "string" && node.name.length > 0
|
|
43
|
+
? folderTrail.concat(node.name)
|
|
44
|
+
: folderTrail;
|
|
45
|
+
for (const c of node.children) {
|
|
46
|
+
yield* walkNode(c, nextTrail);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Read + iterate every bookmark url node across all three roots
|
|
52
|
+
// (bookmark_bar / other / synced). Returns a generator so a callsite that
|
|
53
|
+
// only wants a count can short-circuit.
|
|
54
|
+
function* readBookmarks(profileDir, opts = {}) {
|
|
55
|
+
const fsMod = opts.fs || fs;
|
|
56
|
+
const file = path.join(profileDir, "Bookmarks");
|
|
57
|
+
if (!fsMod.existsSync(file)) return;
|
|
58
|
+
const text = fsMod.readFileSync(file, "utf-8");
|
|
59
|
+
const data = JSON.parse(text);
|
|
60
|
+
const roots = (data && data.roots) || {};
|
|
61
|
+
for (const [rootKey, rootNode] of Object.entries(roots)) {
|
|
62
|
+
if (!rootNode || typeof rootNode !== "object") continue;
|
|
63
|
+
const rootLabel = ROOT_LABELS[rootKey] || rootKey;
|
|
64
|
+
// Walk the root's CHILDREN directly with the localised root label as the
|
|
65
|
+
// trail; the root folder's own `name` ("Bookmarks bar" / "Other bookmarks"
|
|
66
|
+
// — Chrome's English defaults) would otherwise tack on a redundant
|
|
67
|
+
// English segment after our Chinese label.
|
|
68
|
+
if (Array.isArray(rootNode.children)) {
|
|
69
|
+
for (const c of rootNode.children) {
|
|
70
|
+
yield* walkNode(c, [rootLabel]);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
module.exports = {
|
|
77
|
+
readBookmarks,
|
|
78
|
+
ROOT_LABELS,
|
|
79
|
+
};
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// chrome-db-reader — opens a copy of Chrome's History SQLite and yields
|
|
4
|
+
// rows. We MUST copy first; Chrome holds an exclusive lock on the live
|
|
5
|
+
// file while running, and even when closed the WAL files (`-wal`, `-shm`)
|
|
6
|
+
// need to come along or we'd see a stale snapshot.
|
|
7
|
+
|
|
8
|
+
const fs = require("node:fs");
|
|
9
|
+
const path = require("node:path");
|
|
10
|
+
const os = require("node:os");
|
|
11
|
+
// Dual-load: bs3mc tracks Electron's ABI 140 (runtime path), plain
|
|
12
|
+
// better-sqlite3 tracks Node's ABI 127 (test path). Whichever loads
|
|
13
|
+
// without NODE_MODULE_VERSION mismatch wins. Both expose the same
|
|
14
|
+
// Database class for unencrypted DBs.
|
|
15
|
+
function loadDatabase() {
|
|
16
|
+
for (const mod of ["better-sqlite3-multiple-ciphers", "better-sqlite3"]) {
|
|
17
|
+
let cls;
|
|
18
|
+
try {
|
|
19
|
+
// eslint-disable-next-line global-require
|
|
20
|
+
cls = require(mod);
|
|
21
|
+
} catch (_e) {
|
|
22
|
+
continue; // require failed, try next
|
|
23
|
+
}
|
|
24
|
+
// require() returns the JS class even when the native binding is
|
|
25
|
+
// ABI-mismatched; instantiation is what actually loads the .node
|
|
26
|
+
// and throws. Smoke-test with an in-memory DB.
|
|
27
|
+
try {
|
|
28
|
+
const probe = new cls(":memory:");
|
|
29
|
+
probe.close();
|
|
30
|
+
return cls;
|
|
31
|
+
} catch (_e) {
|
|
32
|
+
// ABI mismatch — try next candidate
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
throw new Error(
|
|
36
|
+
"chrome-db-reader: neither better-sqlite3-multiple-ciphers nor better-sqlite3 loaded — both ABI-mismatched",
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
const Database = loadDatabase();
|
|
40
|
+
|
|
41
|
+
// WebKit timestamps are microseconds since 1601-01-01 UTC. Convert to
|
|
42
|
+
// epoch-ms by shifting the epoch (11644473600 seconds × 1e6 µs/s).
|
|
43
|
+
const WEBKIT_EPOCH_DELTA_US = 11_644_473_600_000_000n;
|
|
44
|
+
function webkitUsToEpochMs(wkUs) {
|
|
45
|
+
if (wkUs == null) return null;
|
|
46
|
+
// wkUs may arrive as Number (up to 2^53) or BigInt — handle both.
|
|
47
|
+
const bn = typeof wkUs === "bigint" ? wkUs : BigInt(wkUs);
|
|
48
|
+
return Number((bn - WEBKIT_EPOCH_DELTA_US) / 1000n);
|
|
49
|
+
}
|
|
50
|
+
function epochMsToWebkitUs(ms) {
|
|
51
|
+
return BigInt(ms) * 1000n + WEBKIT_EPOCH_DELTA_US;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Chrome transition flags (lower 8 bits of `transition`). See
|
|
55
|
+
// chromium/src/components/history/core/browser/history_types.h.
|
|
56
|
+
const CORE_TRANSITION_NAMES = {
|
|
57
|
+
0: "link",
|
|
58
|
+
1: "typed",
|
|
59
|
+
2: "auto_bookmark",
|
|
60
|
+
3: "auto_subframe",
|
|
61
|
+
4: "manual_subframe",
|
|
62
|
+
5: "generated",
|
|
63
|
+
6: "auto_toplevel",
|
|
64
|
+
7: "form_submit",
|
|
65
|
+
8: "reload",
|
|
66
|
+
9: "keyword",
|
|
67
|
+
10: "keyword_generated",
|
|
68
|
+
};
|
|
69
|
+
function decodeTransition(raw) {
|
|
70
|
+
if (!Number.isFinite(raw)) return null;
|
|
71
|
+
const core = raw & 0xff;
|
|
72
|
+
return CORE_TRANSITION_NAMES[core] || `unknown(${core})`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function defaultChromeProfileDir() {
|
|
76
|
+
if (process.platform === "win32") {
|
|
77
|
+
const lad = process.env.LOCALAPPDATA;
|
|
78
|
+
if (!lad) return null;
|
|
79
|
+
return path.join(lad, "Google", "Chrome", "User Data", "Default");
|
|
80
|
+
}
|
|
81
|
+
if (process.platform === "darwin") {
|
|
82
|
+
return path.join(os.homedir(), "Library", "Application Support", "Google", "Chrome", "Default");
|
|
83
|
+
}
|
|
84
|
+
return path.join(os.homedir(), ".config", "google-chrome", "Default");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Edge is Chromium under the hood — identical History/Bookmarks schema,
|
|
88
|
+
// just a different on-disk profile root. Same reader code works.
|
|
89
|
+
function defaultEdgeProfileDir() {
|
|
90
|
+
if (process.platform === "win32") {
|
|
91
|
+
const lad = process.env.LOCALAPPDATA;
|
|
92
|
+
if (!lad) return null;
|
|
93
|
+
return path.join(lad, "Microsoft", "Edge", "User Data", "Default");
|
|
94
|
+
}
|
|
95
|
+
if (process.platform === "darwin") {
|
|
96
|
+
return path.join(os.homedir(), "Library", "Application Support", "Microsoft Edge", "Default");
|
|
97
|
+
}
|
|
98
|
+
return path.join(os.homedir(), ".config", "microsoft-edge", "Default");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Brave: another Chromium fork; same schema again. Not auto-registered unless
|
|
102
|
+
// the user has Brave installed, since the wiring layer calls authenticate()
|
|
103
|
+
// at the adapter level rather than the registry filtering ahead of time.
|
|
104
|
+
function defaultBraveProfileDir() {
|
|
105
|
+
if (process.platform === "win32") {
|
|
106
|
+
const lad = process.env.LOCALAPPDATA;
|
|
107
|
+
if (!lad) return null;
|
|
108
|
+
return path.join(lad, "BraveSoftware", "Brave-Browser", "User Data", "Default");
|
|
109
|
+
}
|
|
110
|
+
if (process.platform === "darwin") {
|
|
111
|
+
return path.join(
|
|
112
|
+
os.homedir(),
|
|
113
|
+
"Library",
|
|
114
|
+
"Application Support",
|
|
115
|
+
"BraveSoftware",
|
|
116
|
+
"Brave-Browser",
|
|
117
|
+
"Default",
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return path.join(os.homedir(), ".config", "BraveSoftware", "Brave-Browser", "Default");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Copy the History file + any sidecar journal/WAL/SHM next to it. Returns
|
|
124
|
+
// the temp path that the caller is responsible for cleaning up.
|
|
125
|
+
function copyHistorySnapshot(profileDir, opts = {}) {
|
|
126
|
+
const fsMod = opts.fs || fs;
|
|
127
|
+
const src = path.join(profileDir, "History");
|
|
128
|
+
if (!fsMod.existsSync(src)) {
|
|
129
|
+
const err = new Error(`Chrome History not found at ${src}`);
|
|
130
|
+
err.code = "CHROME_HISTORY_NOT_FOUND";
|
|
131
|
+
throw err;
|
|
132
|
+
}
|
|
133
|
+
const tmp = path.join(
|
|
134
|
+
os.tmpdir(),
|
|
135
|
+
`pdh-chrome-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.db`,
|
|
136
|
+
);
|
|
137
|
+
fsMod.copyFileSync(src, tmp);
|
|
138
|
+
for (const ext of ["-journal", "-wal", "-shm"]) {
|
|
139
|
+
const w = src + ext;
|
|
140
|
+
if (fsMod.existsSync(w)) {
|
|
141
|
+
try {
|
|
142
|
+
fsMod.copyFileSync(w, tmp + ext);
|
|
143
|
+
} catch (_e) {
|
|
144
|
+
// Sidecar copy failures are non-fatal — better-sqlite3 will just
|
|
145
|
+
// see the pre-WAL state, which is what we want anyway.
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return tmp;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function cleanupHistorySnapshot(tmpPath, opts = {}) {
|
|
153
|
+
const fsMod = opts.fs || fs;
|
|
154
|
+
for (const ext of ["", "-journal", "-wal", "-shm"]) {
|
|
155
|
+
try {
|
|
156
|
+
fsMod.unlinkSync(tmpPath + ext);
|
|
157
|
+
} catch (_e) {
|
|
158
|
+
// best-effort
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Yields visit rows in occurredAt-ascending order so the registry's
|
|
164
|
+
// watermark (max occurredAt) advances monotonically across syncs.
|
|
165
|
+
function* readVisits(tmpPath, opts = {}) {
|
|
166
|
+
const sinceWk = Number.isInteger(opts.since) && opts.since > 0
|
|
167
|
+
? epochMsToWebkitUs(opts.since)
|
|
168
|
+
: 0n;
|
|
169
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 200_000;
|
|
170
|
+
const includeHidden = opts.includeHidden === true;
|
|
171
|
+
const db = new Database(tmpPath, { readonly: true });
|
|
172
|
+
try {
|
|
173
|
+
// Bind sinceWk as a string — better-sqlite3 accepts BigInt only when
|
|
174
|
+
// safeIntegers is on, which we don't enable. SQLite compares numerically
|
|
175
|
+
// so passing the decimal string is safe (and avoids 2^53 truncation).
|
|
176
|
+
const stmt = db.prepare(
|
|
177
|
+
`SELECT v.id AS visit_id, v.url AS url_id, v.visit_time AS visit_time,
|
|
178
|
+
v.transition AS transition, v.visit_duration AS visit_duration,
|
|
179
|
+
v.from_visit AS from_visit, u.url AS url, u.title AS title,
|
|
180
|
+
u.visit_count AS visit_count, u.typed_count AS typed_count,
|
|
181
|
+
u.hidden AS hidden
|
|
182
|
+
FROM visits v
|
|
183
|
+
JOIN urls u ON v.url = u.id
|
|
184
|
+
WHERE v.visit_time > ?
|
|
185
|
+
${includeHidden ? "" : "AND u.hidden = 0"}
|
|
186
|
+
ORDER BY v.visit_time ASC
|
|
187
|
+
LIMIT ?`,
|
|
188
|
+
);
|
|
189
|
+
const rows = stmt.iterate(sinceWk.toString(), limit);
|
|
190
|
+
for (const r of rows) {
|
|
191
|
+
yield {
|
|
192
|
+
visitId: r.visit_id,
|
|
193
|
+
urlId: r.url_id,
|
|
194
|
+
url: r.url,
|
|
195
|
+
title: r.title || "",
|
|
196
|
+
visitTimeMs: webkitUsToEpochMs(r.visit_time),
|
|
197
|
+
visitDurationMs: Number.isInteger(r.visit_duration)
|
|
198
|
+
? Math.floor(r.visit_duration / 1000)
|
|
199
|
+
: 0,
|
|
200
|
+
transition: decodeTransition(r.transition),
|
|
201
|
+
rawTransition: r.transition,
|
|
202
|
+
fromVisit: r.from_visit || 0,
|
|
203
|
+
visitCount: r.visit_count || 0,
|
|
204
|
+
typedCount: r.typed_count || 0,
|
|
205
|
+
hidden: r.hidden === 1,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
} finally {
|
|
209
|
+
db.close();
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
module.exports = {
|
|
214
|
+
defaultChromeProfileDir,
|
|
215
|
+
defaultEdgeProfileDir,
|
|
216
|
+
defaultBraveProfileDir,
|
|
217
|
+
copyHistorySnapshot,
|
|
218
|
+
cleanupHistorySnapshot,
|
|
219
|
+
readVisits,
|
|
220
|
+
webkitUsToEpochMs,
|
|
221
|
+
epochMsToWebkitUs,
|
|
222
|
+
decodeTransition,
|
|
223
|
+
};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const {
|
|
4
|
+
BrowserHistoryChromeAdapter,
|
|
5
|
+
BROWSER_HISTORY_CHROME_NAME,
|
|
6
|
+
BROWSER_HISTORY_CHROME_VERSION,
|
|
7
|
+
} = require("./adapter");
|
|
8
|
+
const dbReader = require("./chrome-db-reader");
|
|
9
|
+
const bookmarksReader = require("./bookmarks-reader");
|
|
10
|
+
|
|
11
|
+
module.exports = {
|
|
12
|
+
BrowserHistoryChromeAdapter,
|
|
13
|
+
BROWSER_HISTORY_CHROME_NAME,
|
|
14
|
+
BROWSER_HISTORY_CHROME_VERSION,
|
|
15
|
+
defaultChromeProfileDir: dbReader.defaultChromeProfileDir,
|
|
16
|
+
copyHistorySnapshot: dbReader.copyHistorySnapshot,
|
|
17
|
+
cleanupHistorySnapshot: dbReader.cleanupHistorySnapshot,
|
|
18
|
+
readVisits: dbReader.readVisits,
|
|
19
|
+
readBookmarks: bookmarksReader.readBookmarks,
|
|
20
|
+
webkitUsToEpochMs: dbReader.webkitUsToEpochMs,
|
|
21
|
+
epochMsToWebkitUs: dbReader.epochMsToWebkitUs,
|
|
22
|
+
decodeTransition: dbReader.decodeTransition,
|
|
23
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// BrowserHistoryEdgeAdapter — Microsoft Edge is Chromium under the hood,
|
|
4
|
+
// so the History SQLite schema + Bookmarks JSON layout are byte-identical
|
|
5
|
+
// to Chrome. Only the on-disk profile root differs. Subclass the Chrome
|
|
6
|
+
// adapter and override the browser config; everything else (copy-then-read
|
|
7
|
+
// snapshot, bookmark walker, normalize, etc.) is inherited.
|
|
8
|
+
|
|
9
|
+
const {
|
|
10
|
+
BrowserHistoryChromeAdapter,
|
|
11
|
+
} = require("../browser-history-chrome/adapter");
|
|
12
|
+
const {
|
|
13
|
+
defaultEdgeProfileDir,
|
|
14
|
+
} = require("../browser-history-chrome/chrome-db-reader");
|
|
15
|
+
|
|
16
|
+
const NAME = "browser-history-edge";
|
|
17
|
+
const VERSION = "0.1.0";
|
|
18
|
+
|
|
19
|
+
class BrowserHistoryEdgeAdapter extends BrowserHistoryChromeAdapter {
|
|
20
|
+
_browserConfig() {
|
|
21
|
+
return {
|
|
22
|
+
name: NAME,
|
|
23
|
+
version: VERSION,
|
|
24
|
+
browser: "edge",
|
|
25
|
+
defaultProfileDir: defaultEdgeProfileDir,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
module.exports = {
|
|
31
|
+
BrowserHistoryEdgeAdapter,
|
|
32
|
+
BROWSER_HISTORY_EDGE_NAME: NAME,
|
|
33
|
+
BROWSER_HISTORY_EDGE_VERSION: VERSION,
|
|
34
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const {
|
|
4
|
+
BrowserHistoryEdgeAdapter,
|
|
5
|
+
BROWSER_HISTORY_EDGE_NAME,
|
|
6
|
+
BROWSER_HISTORY_EDGE_VERSION,
|
|
7
|
+
} = require("./adapter");
|
|
8
|
+
|
|
9
|
+
module.exports = {
|
|
10
|
+
BrowserHistoryEdgeAdapter,
|
|
11
|
+
BROWSER_HISTORY_EDGE_NAME,
|
|
12
|
+
BROWSER_HISTORY_EDGE_VERSION,
|
|
13
|
+
};
|