@chainlesschain/personal-data-hub 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/browser-history-chrome.test.js +377 -0
- package/__tests__/adapters/browser-history-edge.test.js +159 -0
- package/__tests__/adapters/git-activity.test.js +216 -0
- package/__tests__/adapters/local-files.test.js +264 -0
- package/__tests__/adapters/shell-history.test.js +180 -0
- package/__tests__/adapters/system-data-android.test.js +104 -3
- package/__tests__/adapters/vscode.test.js +299 -0
- package/__tests__/adapters/win-recent.test.js +192 -0
- package/__tests__/analysis.test.js +841 -2
- package/__tests__/categories.test.js +92 -0
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +146 -0
- package/__tests__/entity-resolver-vault.test.js +5 -2
- package/__tests__/integration/local-data-adapters-pipeline.test.js +373 -0
- package/__tests__/longtail-adapters.test.js +7 -2
- package/__tests__/query-parser.test.js +66 -0
- package/__tests__/registry.test.js +114 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +24 -1
- package/__tests__/sidecar-supervisor.test.js +9 -1
- package/__tests__/social-kuaishou-snapshot.test.js +55 -2
- package/__tests__/social-toutiao-snapshot.test.js +54 -2
- package/__tests__/vault-search-helpers.test.js +104 -0
- package/__tests__/vault-search.test.js +423 -0
- package/__tests__/vault.test.js +77 -3
- package/lib/adapters/browser-history-chrome/adapter.js +247 -0
- package/lib/adapters/browser-history-chrome/bookmarks-reader.js +79 -0
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +223 -0
- package/lib/adapters/browser-history-chrome/index.js +23 -0
- package/lib/adapters/browser-history-edge/adapter.js +34 -0
- package/lib/adapters/browser-history-edge/index.js +13 -0
- package/lib/adapters/git-activity/adapter.js +155 -0
- package/lib/adapters/git-activity/git-reader.js +125 -0
- package/lib/adapters/git-activity/index.js +17 -0
- package/lib/adapters/local-files/adapter.js +149 -0
- package/lib/adapters/local-files/file-walker.js +125 -0
- package/lib/adapters/local-files/index.js +18 -0
- package/lib/adapters/shell-history/adapter.js +137 -0
- package/lib/adapters/shell-history/index.js +17 -0
- package/lib/adapters/shell-history/shell-reader.js +100 -0
- package/lib/adapters/social-kuaishou/index.js +57 -1
- package/lib/adapters/social-toutiao/index.js +59 -1
- package/lib/adapters/system-data-android/adapter.js +220 -3
- package/lib/adapters/vscode/adapter.js +285 -0
- package/lib/adapters/vscode/index.js +18 -0
- package/lib/adapters/vscode/vscode-reader.js +191 -0
- package/lib/adapters/win-recent/adapter.js +150 -0
- package/lib/adapters/win-recent/index.js +16 -0
- package/lib/adapters/win-recent/win-recent-reader.js +72 -0
- package/lib/analysis.js +227 -9
- package/lib/categories.js +101 -0
- package/lib/index.js +61 -0
- package/lib/migrations.js +146 -0
- package/lib/query-parser.js +74 -0
- package/lib/registry.js +162 -0
- package/lib/vault.js +363 -2
- package/package.json +2 -1
- package/scripts/run-native-tests-sandbox.sh +53 -0
|
@@ -7,6 +7,7 @@ const {
|
|
|
7
7
|
parseTimeWindow,
|
|
8
8
|
parseFilters,
|
|
9
9
|
parseIntent,
|
|
10
|
+
extractEntityTerm,
|
|
10
11
|
} = require("../lib/query-parser");
|
|
11
12
|
|
|
12
13
|
// Pin "now" to 2026-05-19 12:00:00 UTC for deterministic windows
|
|
@@ -148,3 +149,68 @@ describe("parseQuery (integration)", () => {
|
|
|
148
149
|
expect(r.filters).toEqual({});
|
|
149
150
|
});
|
|
150
151
|
});
|
|
152
|
+
|
|
153
|
+
// ─── extractEntityTerm — FTS5 fulltext routing helper ───────────────────
|
|
154
|
+
//
|
|
155
|
+
// 2026-05-24 — Powers AnalysisEngine._gatherFacts intent=list augmentation:
|
|
156
|
+
// when the parser pulls a probable entity-name out of the question, the
|
|
157
|
+
// engine appends vault.searchEvents(q=term) hits to the FACTS pool. Wrong
|
|
158
|
+
// extractions are intentionally non-fatal — they waste a few rows of
|
|
159
|
+
// budget at worst, never lose existing events. Memory:
|
|
160
|
+
// pdh_analysis_engine_intent_routing.md.
|
|
161
|
+
|
|
162
|
+
describe("extractEntityTerm", () => {
|
|
163
|
+
it("extracts named entity from '提到 X 的消息' phrasing", () => {
|
|
164
|
+
expect(extractEntityTerm("提到王老板的消息")).toBe("王老板");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("returns null when only stop-words remain (no entity hint)", () => {
|
|
168
|
+
expect(extractEntityTerm("上个月在淘宝总共花了多少?")).toBeNull();
|
|
169
|
+
expect(extractEntityTerm("在淘宝买了什么")).toBeNull();
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("picks the longest remaining chunk when several survive cleaning", () => {
|
|
173
|
+
// 苹果(2) vs 订单(stop) — only 苹果 left.
|
|
174
|
+
expect(extractEntityTerm("苹果的订单")).toBe("苹果");
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it("strips list/search trigger words ('提到', '查找', '看一下')", () => {
|
|
178
|
+
expect(extractEntityTerm("查找王医生的订单")).toBe("王医生");
|
|
179
|
+
expect(extractEntityTerm("看一下王医生的最新消息")).toBe("王医生");
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("strips compound subtype keywords before shorter intent forms", () => {
|
|
183
|
+
// "多少钱" must clear before "多少" leaves stranded "钱". With clean
|
|
184
|
+
// stripping there is no leftover ≥2 char chunk → null.
|
|
185
|
+
expect(extractEntityTerm("我总共花了多少钱")).toBeNull();
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("ignores single-character residues (verbs leak through; 1-char names skipped first-pass)", () => {
|
|
189
|
+
// "我妈" → "我" stripped (pronoun), "妈" left as single char → filtered.
|
|
190
|
+
// Documented limitation; first-pass tradeoff for higher precision.
|
|
191
|
+
expect(extractEntityTerm("我妈最近发的微信")).toBeNull();
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("handles ASCII entity tokens (≥2 chars)", () => {
|
|
195
|
+
expect(extractEntityTerm("提到 GitHub 的消息")).toBe("GitHub");
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
it("returns null for non-string / empty input", () => {
|
|
199
|
+
expect(extractEntityTerm("")).toBeNull();
|
|
200
|
+
expect(extractEntityTerm(null)).toBeNull();
|
|
201
|
+
expect(extractEntityTerm(undefined)).toBeNull();
|
|
202
|
+
expect(extractEntityTerm(123)).toBeNull();
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it("does not pick adapter keywords as the entity (handled by filters)", () => {
|
|
206
|
+
// "淘宝" 是 adapter,会被 parseFilters 抽走当 q.adapter;不该再被
|
|
207
|
+
// 当实体名重复 FTS 搜。
|
|
208
|
+
expect(extractEntityTerm("看下淘宝的订单")).toBeNull();
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
it("> 10 char tokens are dropped (probable concatenated noise)", () => {
|
|
212
|
+
// 拼出一个 12 char ASCII token,期望被 length 上限过滤掉
|
|
213
|
+
const r = extractEntityTerm("提到 abcdefghijkl 的消息");
|
|
214
|
+
expect(r).toBeNull();
|
|
215
|
+
});
|
|
216
|
+
});
|
|
@@ -304,3 +304,117 @@ describe("Phase 2 perf gate: 1k events ingest", () => {
|
|
|
304
304
|
expect(qDur).toBeLessThan(2000); // 1k-row read should be ms-scale
|
|
305
305
|
}, 60_000); // vitest test timeout — extra headroom for slow CI
|
|
306
306
|
});
|
|
307
|
+
|
|
308
|
+
// ─── rederive (recover orphan raw_events) ────────────────────────────────
|
|
309
|
+
|
|
310
|
+
describe("AdapterRegistry.rederive", () => {
|
|
311
|
+
it("promotes raw_events to canonical events for registered adapter", async () => {
|
|
312
|
+
freshVault();
|
|
313
|
+
const reg = new AdapterRegistry({ vault });
|
|
314
|
+
const adapter = new MockAdapter({ name: "mock-rederive" });
|
|
315
|
+
reg.register(adapter);
|
|
316
|
+
|
|
317
|
+
// Simulate the failure mode: putRawEvent succeeded (raw landed) but
|
|
318
|
+
// putBatch failed at sync time (e.g. partial-index drift trap #25),
|
|
319
|
+
// so events table is empty while raw_events has 3 rows.
|
|
320
|
+
const baseTs = 1_700_000_000_000;
|
|
321
|
+
for (let i = 0; i < 3; i++) {
|
|
322
|
+
vault.putRawEvent({
|
|
323
|
+
adapter: "mock-rederive",
|
|
324
|
+
originalId: `raw-${i}`,
|
|
325
|
+
capturedAt: baseTs + i * 1000,
|
|
326
|
+
payload: {
|
|
327
|
+
variant: 1,
|
|
328
|
+
senderName: `Alice${i}`,
|
|
329
|
+
text: `hello ${i}`,
|
|
330
|
+
index: i,
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
expect(vault.stats().rawEvents).toBe(3);
|
|
335
|
+
expect(vault.stats().events).toBe(0);
|
|
336
|
+
|
|
337
|
+
const report = await reg.rederive();
|
|
338
|
+
|
|
339
|
+
expect(report.rawSeen).toBe(3);
|
|
340
|
+
expect(report.adapterMissing).toBe(0);
|
|
341
|
+
expect(report.invalidCount).toBe(0);
|
|
342
|
+
expect(report.entityCounts.events).toBe(3);
|
|
343
|
+
expect(report.entityCounts.persons).toBe(3); // MockAdapter variant 1 yields 1 person/raw
|
|
344
|
+
expect(report.errors).toEqual([]);
|
|
345
|
+
expect(vault.stats().events).toBe(3);
|
|
346
|
+
expect(vault.stats().persons).toBe(3);
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
it("counts raws whose adapter is not registered", async () => {
|
|
350
|
+
freshVault();
|
|
351
|
+
const reg = new AdapterRegistry({ vault });
|
|
352
|
+
// No adapter registered — every raw is orphan
|
|
353
|
+
vault.putRawEvent({
|
|
354
|
+
adapter: "ghost-adapter",
|
|
355
|
+
originalId: "x",
|
|
356
|
+
capturedAt: Date.now(),
|
|
357
|
+
payload: {},
|
|
358
|
+
});
|
|
359
|
+
const report = await reg.rederive();
|
|
360
|
+
expect(report.rawSeen).toBe(1);
|
|
361
|
+
expect(report.adapterMissing).toBe(1);
|
|
362
|
+
expect(report.entityCounts.events).toBe(0);
|
|
363
|
+
expect(vault.stats().events).toBe(0);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it("isolates per-raw normalize failures (invalidCount)", async () => {
|
|
367
|
+
freshVault();
|
|
368
|
+
const reg = new AdapterRegistry({ vault });
|
|
369
|
+
// MockAdapter.normalize throws on call #3 (1-indexed). 3 raws → 3rd throws,
|
|
370
|
+
// 1st+2nd succeed → events=2, invalidCount=1.
|
|
371
|
+
const adapter = new MockAdapter({ name: "mock-throw" });
|
|
372
|
+
adapter.normalizeShouldThrowAt(2);
|
|
373
|
+
reg.register(adapter);
|
|
374
|
+
for (let i = 0; i < 3; i++) {
|
|
375
|
+
vault.putRawEvent({
|
|
376
|
+
adapter: "mock-throw",
|
|
377
|
+
originalId: `raw-${i}`,
|
|
378
|
+
capturedAt: 1_700_000_000_000 + i * 1000,
|
|
379
|
+
payload: { variant: 1, senderName: `Bob${i}`, text: `t${i}`, index: i },
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
const report = await reg.rederive();
|
|
383
|
+
expect(report.rawSeen).toBe(3);
|
|
384
|
+
expect(report.invalidCount).toBe(1);
|
|
385
|
+
expect(report.entityCounts.events).toBe(2);
|
|
386
|
+
expect(vault.stats().events).toBe(2);
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
it("filters by --adapter option", async () => {
|
|
390
|
+
freshVault();
|
|
391
|
+
const reg = new AdapterRegistry({ vault });
|
|
392
|
+
reg.register(new MockAdapter({ name: "adapter-a" }));
|
|
393
|
+
reg.register(new MockAdapter({ name: "adapter-b" }));
|
|
394
|
+
vault.putRawEvent({
|
|
395
|
+
adapter: "adapter-a", originalId: "a1", capturedAt: 1_700_000_000_001,
|
|
396
|
+
payload: { variant: 1, senderName: "S", text: "ta", index: 1 },
|
|
397
|
+
});
|
|
398
|
+
vault.putRawEvent({
|
|
399
|
+
adapter: "adapter-b", originalId: "b1", capturedAt: 1_700_000_000_002,
|
|
400
|
+
payload: { variant: 1, senderName: "S", text: "tb", index: 1 },
|
|
401
|
+
});
|
|
402
|
+
const report = await reg.rederive({ adapter: "adapter-a" });
|
|
403
|
+
expect(report.rawSeen).toBe(1); // only adapter-a was queried
|
|
404
|
+
expect(report.entityCounts.events).toBe(1);
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
it("is idempotent: rerunning produces same events table (UPSERT via partial index)", async () => {
|
|
408
|
+
freshVault();
|
|
409
|
+
const reg = new AdapterRegistry({ vault });
|
|
410
|
+
reg.register(new MockAdapter({ name: "idemp" }));
|
|
411
|
+
vault.putRawEvent({
|
|
412
|
+
adapter: "idemp", originalId: "x", capturedAt: 1_700_000_000_000,
|
|
413
|
+
payload: { variant: 1, senderName: "Alice", text: "hi", index: 0 },
|
|
414
|
+
});
|
|
415
|
+
await reg.rederive();
|
|
416
|
+
const eventsAfterFirst = vault.stats().events;
|
|
417
|
+
await reg.rederive();
|
|
418
|
+
expect(vault.stats().events).toBe(eventsAfterFirst);
|
|
419
|
+
});
|
|
420
|
+
});
|
|
@@ -38,7 +38,30 @@ try {
|
|
|
38
38
|
pythonAvailable = false;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
// Probe bs3mc — fixture seeding opens an unencrypted SQLite file via
|
|
42
|
+
// better-sqlite3-multiple-ciphers, which fails on dev boxes where the root
|
|
43
|
+
// node_modules binding was compiled for Electron's NODE_MODULE_VERSION
|
|
44
|
+
// instead of the host Node ABI. Skip cleanly when the native binding
|
|
45
|
+
// can't load; CI Linux builds get a Node-ABI binary and runs the full path.
|
|
46
|
+
let bs3mcAvailable = true;
|
|
47
|
+
try {
|
|
48
|
+
const probeDir = fs.mkdtempSync(path.join(os.tmpdir(), "bs3mc-contacts-probe-"));
|
|
49
|
+
const probeDb = new Database(path.join(probeDir, "p.db"));
|
|
50
|
+
probeDb.close();
|
|
51
|
+
fs.rmSync(probeDir, { recursive: true, force: true });
|
|
52
|
+
} catch (_err) {
|
|
53
|
+
bs3mcAvailable = false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// FTS5 sandbox runner: the relative path to personal-data-hub-bridge
|
|
57
|
+
// resolves outside the temp tree. Without this gate the spawn fails with
|
|
58
|
+
// ENOENT during beforeAll instead of skipping cleanly.
|
|
59
|
+
const sidecarRootAvailable = fs.existsSync(SIDECAR_ROOT);
|
|
60
|
+
|
|
61
|
+
const describePy =
|
|
62
|
+
pythonAvailable && bs3mcAvailable && sidecarRootAvailable
|
|
63
|
+
? describe
|
|
64
|
+
: describe.skip;
|
|
42
65
|
|
|
43
66
|
function seedFixtureContactsDb(dbPath) {
|
|
44
67
|
const db = new Database(dbPath);
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
4
4
|
import path from "node:path";
|
|
5
|
+
import fs from "node:fs";
|
|
5
6
|
|
|
6
7
|
const {
|
|
7
8
|
SidecarSupervisor,
|
|
@@ -46,7 +47,14 @@ try {
|
|
|
46
47
|
pythonAvailable = false;
|
|
47
48
|
}
|
|
48
49
|
|
|
49
|
-
|
|
50
|
+
// In the FTS5 sandbox runner the relative ../../personal-data-hub-bridge
|
|
51
|
+
// resolves outside the temp tree and the directory does not exist; without
|
|
52
|
+
// this gate, spawn() returns ENOENT and the test fails for environment
|
|
53
|
+
// reasons rather than code reasons. Keep both gates so the file is safe
|
|
54
|
+
// to run in either layout.
|
|
55
|
+
const sidecarRootAvailable = fs.existsSync(SIDECAR_ROOT);
|
|
56
|
+
|
|
57
|
+
const itPy = pythonAvailable && sidecarRootAvailable ? it : it.skip;
|
|
50
58
|
|
|
51
59
|
describe("SidecarSupervisor (forensics-bridge integration)", () => {
|
|
52
60
|
let supervisor;
|
|
@@ -29,9 +29,14 @@ describe("KuaishouAdapter snapshot mode", () => {
|
|
|
29
29
|
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "kuaishou-snap-"));
|
|
30
30
|
});
|
|
31
31
|
|
|
32
|
-
it("exports SNAPSHOT_SCHEMA_VERSION = 1 +
|
|
32
|
+
it("exports SNAPSHOT_SCHEMA_VERSION = 1 + 4 VALID_SNAPSHOT_KINDS (v0.2.1 adds profile)", () => {
|
|
33
33
|
expect(SNAPSHOT_SCHEMA_VERSION).toBe(1);
|
|
34
|
-
expect(VALID_SNAPSHOT_KINDS).toEqual([
|
|
34
|
+
expect(VALID_SNAPSHOT_KINDS).toEqual([
|
|
35
|
+
"profile",
|
|
36
|
+
"watch",
|
|
37
|
+
"collect",
|
|
38
|
+
"search",
|
|
39
|
+
]);
|
|
35
40
|
});
|
|
36
41
|
|
|
37
42
|
it("authenticate(inputPath) ok when readable", async () => {
|
|
@@ -100,6 +105,54 @@ describe("KuaishouAdapter snapshot mode", () => {
|
|
|
100
105
|
expect(raws.length).toBe(0);
|
|
101
106
|
});
|
|
102
107
|
|
|
108
|
+
it("v0.2 profile event normalizes to person-self with kuaishou-uid identifier", async () => {
|
|
109
|
+
const now = Date.now();
|
|
110
|
+
const p = writeSnapshot(tmpDir, {
|
|
111
|
+
schemaVersion: 1,
|
|
112
|
+
snapshottedAt: now,
|
|
113
|
+
account: { uid: "77777", displayName: "alice" },
|
|
114
|
+
events: [
|
|
115
|
+
{
|
|
116
|
+
kind: "profile",
|
|
117
|
+
id: "profile-77777",
|
|
118
|
+
capturedAt: now - 500,
|
|
119
|
+
uid: "77777",
|
|
120
|
+
nickname: "alice",
|
|
121
|
+
kuaishouId: "alice_KS",
|
|
122
|
+
avatarUrl: "https://p.kuaishou.com/u/alice.jpg",
|
|
123
|
+
sex: "F",
|
|
124
|
+
city: "Shanghai",
|
|
125
|
+
constellation: "Libra",
|
|
126
|
+
description: "hi there",
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
});
|
|
130
|
+
const a = new KuaishouAdapter();
|
|
131
|
+
const raws = [];
|
|
132
|
+
for await (const r of a.sync({ inputPath: p })) raws.push(r);
|
|
133
|
+
expect(raws.length).toBe(1);
|
|
134
|
+
expect(raws[0].kind).toBe("profile");
|
|
135
|
+
expect(raws[0].originalId).toMatch(/^kuaishou:profile:/);
|
|
136
|
+
|
|
137
|
+
const batch = a.normalize(raws[0]);
|
|
138
|
+
expect(validateBatch(batch).valid).toBe(true);
|
|
139
|
+
expect(batch.events.length).toBe(0);
|
|
140
|
+
expect(batch.persons.length).toBe(1);
|
|
141
|
+
const person = batch.persons[0];
|
|
142
|
+
expect(person.id).toBe("person-kuaishou-77777");
|
|
143
|
+
expect(person.subtype).toBe("self");
|
|
144
|
+
expect(person.names).toEqual(["alice"]);
|
|
145
|
+
expect(person.identifiers["kuaishou-uid"]).toEqual(["77777"]);
|
|
146
|
+
expect(person.identifiers["kuaishou-id"]).toEqual(["alice_KS"]);
|
|
147
|
+
expect(person.extra.platform).toBe("kuaishou");
|
|
148
|
+
expect(person.extra.avatarUrl).toBe("https://p.kuaishou.com/u/alice.jpg");
|
|
149
|
+
expect(person.extra.sex).toBe("F");
|
|
150
|
+
expect(person.extra.city).toBe("Shanghai");
|
|
151
|
+
expect(person.extra.constellation).toBe("Libra");
|
|
152
|
+
expect(person.extra.description).toBe("hi there");
|
|
153
|
+
expect(person.source.capturedBy).toBe("api");
|
|
154
|
+
});
|
|
155
|
+
|
|
103
156
|
it("watch event round-trips normalize cleanly (BROWSE subtype)", async () => {
|
|
104
157
|
const now = Date.now();
|
|
105
158
|
const p = writeSnapshot(tmpDir, {
|
|
@@ -31,9 +31,14 @@ describe("ToutiaoAdapter snapshot mode", () => {
|
|
|
31
31
|
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "toutiao-snap-"));
|
|
32
32
|
});
|
|
33
33
|
|
|
34
|
-
it("exports SNAPSHOT_SCHEMA_VERSION = 1 +
|
|
34
|
+
it("exports SNAPSHOT_SCHEMA_VERSION = 1 + 4 VALID_SNAPSHOT_KINDS (v0.2.1 adds profile)", () => {
|
|
35
35
|
expect(SNAPSHOT_SCHEMA_VERSION).toBe(1);
|
|
36
|
-
expect(VALID_SNAPSHOT_KINDS).toEqual([
|
|
36
|
+
expect(VALID_SNAPSHOT_KINDS).toEqual([
|
|
37
|
+
"profile",
|
|
38
|
+
"read",
|
|
39
|
+
"collection",
|
|
40
|
+
"search",
|
|
41
|
+
]);
|
|
37
42
|
});
|
|
38
43
|
|
|
39
44
|
it("authenticate(inputPath) ok when readable", async () => {
|
|
@@ -102,6 +107,53 @@ describe("ToutiaoAdapter snapshot mode", () => {
|
|
|
102
107
|
expect(raws.length).toBe(0);
|
|
103
108
|
});
|
|
104
109
|
|
|
110
|
+
it("v0.2 profile event normalizes to person-self with toutiao-uid identifier", async () => {
|
|
111
|
+
const now = Date.now();
|
|
112
|
+
const p = writeSnapshot(tmpDir, {
|
|
113
|
+
schemaVersion: 1,
|
|
114
|
+
snapshottedAt: now,
|
|
115
|
+
account: { uid: "99999", displayName: "alice" },
|
|
116
|
+
events: [
|
|
117
|
+
{
|
|
118
|
+
kind: "profile",
|
|
119
|
+
id: "profile-99999",
|
|
120
|
+
capturedAt: now - 500,
|
|
121
|
+
uid: "99999",
|
|
122
|
+
nickname: "alice",
|
|
123
|
+
avatarUrl: "https://p.toutiao.com/u/alice.jpg",
|
|
124
|
+
description: "hi there",
|
|
125
|
+
followingCount: 12,
|
|
126
|
+
followerCount: 34,
|
|
127
|
+
mediaId: "media-1",
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
});
|
|
131
|
+
const a = new ToutiaoAdapter();
|
|
132
|
+
const raws = [];
|
|
133
|
+
for await (const r of a.sync({ inputPath: p })) raws.push(r);
|
|
134
|
+
expect(raws.length).toBe(1);
|
|
135
|
+
expect(raws[0].kind).toBe("profile");
|
|
136
|
+
expect(raws[0].originalId).toMatch(/^toutiao:profile:/);
|
|
137
|
+
|
|
138
|
+
const batch = a.normalize(raws[0]);
|
|
139
|
+
expect(validateBatch(batch).valid).toBe(true);
|
|
140
|
+
// KIND_PROFILE produces a person record (not an event)
|
|
141
|
+
expect(batch.events.length).toBe(0);
|
|
142
|
+
expect(batch.persons.length).toBe(1);
|
|
143
|
+
const person = batch.persons[0];
|
|
144
|
+
expect(person.id).toBe("person-toutiao-99999");
|
|
145
|
+
expect(person.subtype).toBe("self");
|
|
146
|
+
expect(person.names).toEqual(["alice"]);
|
|
147
|
+
expect(person.identifiers["toutiao-uid"]).toEqual(["99999"]);
|
|
148
|
+
expect(person.identifiers["toutiao-media-id"]).toEqual(["media-1"]);
|
|
149
|
+
expect(person.extra.platform).toBe("toutiao");
|
|
150
|
+
expect(person.extra.avatarUrl).toBe("https://p.toutiao.com/u/alice.jpg");
|
|
151
|
+
expect(person.extra.description).toBe("hi there");
|
|
152
|
+
expect(person.extra.followingCount).toBe(12);
|
|
153
|
+
expect(person.extra.followerCount).toBe(34);
|
|
154
|
+
expect(person.source.capturedBy).toBe("api");
|
|
155
|
+
});
|
|
156
|
+
|
|
105
157
|
it("read event round-trips normalize cleanly (BROWSE subtype)", async () => {
|
|
106
158
|
const now = Date.now();
|
|
107
159
|
const p = writeSnapshot(tmpDir, {
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Pure-JS unit tests for the search SQL fragment builders in vault.js.
|
|
5
|
+
* Runs without the native bs3mc binding so it works on any Node ABI
|
|
6
|
+
* (the integration-level tests in vault-search.test.js need a working
|
|
7
|
+
* native binding and only run in CI).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect } from "vitest";
|
|
11
|
+
|
|
12
|
+
const { _searchHelpers } = require("../lib/vault");
|
|
13
|
+
const { _categoryToWhere, _quoteFtsQuery, FTS5_MIN_QUERY_LEN } = _searchHelpers;
|
|
14
|
+
|
|
15
|
+
describe("_quoteFtsQuery", () => {
|
|
16
|
+
it("wraps a plain string in FTS5 phrase quotes", () => {
|
|
17
|
+
expect(_quoteFtsQuery("hello")).toBe('"hello"');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("doubles embedded double quotes (FTS5 escape rule)", () => {
|
|
21
|
+
expect(_quoteFtsQuery('he said "hi"')).toBe('"he said ""hi"""');
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("preserves CJK as-is", () => {
|
|
25
|
+
expect(_quoteFtsQuery("支付宝订单")).toBe('"支付宝订单"');
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("does not interpret FTS5 operators (OR/AND/NEAR — they're inside quotes)", () => {
|
|
29
|
+
// The whole input is wrapped in phrase quotes, so OR is literal text
|
|
30
|
+
expect(_quoteFtsQuery("a OR b")).toBe('"a OR b"');
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("FTS5_MIN_QUERY_LEN matches trigram tokenizer requirement", () => {
|
|
34
|
+
expect(FTS5_MIN_QUERY_LEN).toBe(3);
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
describe("_categoryToWhere", () => {
|
|
39
|
+
it("returns sql=null for empty/null category (caller skips filter)", () => {
|
|
40
|
+
expect(_categoryToWhere(null)).toEqual({ sql: null, params: {} });
|
|
41
|
+
expect(_categoryToWhere("")).toEqual({ sql: null, params: {} });
|
|
42
|
+
expect(_categoryToWhere(undefined)).toEqual({ sql: null, params: {} });
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("returns '0=1' for unknown category (no rules match)", () => {
|
|
46
|
+
const r = _categoryToWhere("not-a-real-category");
|
|
47
|
+
expect(r.sql).toBe("0=1");
|
|
48
|
+
expect(r.params).toEqual({});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("translates 'chat' to wechat-exact + messaging-prefix OR", () => {
|
|
52
|
+
const r = _categoryToWhere("chat");
|
|
53
|
+
expect(r.sql).toMatch(/source_adapter = @cat\d/);
|
|
54
|
+
expect(r.sql).toMatch(/source_adapter LIKE @cat\d/);
|
|
55
|
+
expect(Object.values(r.params)).toEqual(
|
|
56
|
+
expect.arrayContaining(["wechat", "messaging-%"])
|
|
57
|
+
);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("translates 'social' to a single LIKE 'social-%' condition", () => {
|
|
61
|
+
const r = _categoryToWhere("social");
|
|
62
|
+
expect(r.sql).toMatch(/source_adapter LIKE @cat0/);
|
|
63
|
+
expect(r.params).toEqual({ cat0: "social-%" });
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("translates 'shopping' to shopping-% OR alipay-%", () => {
|
|
67
|
+
const r = _categoryToWhere("shopping");
|
|
68
|
+
const vals = Object.values(r.params).sort();
|
|
69
|
+
expect(vals).toEqual(["alipay-%", "shopping-%"]);
|
|
70
|
+
expect(r.sql).toMatch(/OR/);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("translates 'system' to system-data + browser-history + local-source adapters", () => {
|
|
74
|
+
const r = _categoryToWhere("system");
|
|
75
|
+
// 7 rules currently map to "system" (see lib/categories.js PREFIX_RULES):
|
|
76
|
+
// prefix wildcards: system-data*, browser-*
|
|
77
|
+
// exact names: vscode, win-recent, git-activity, shell-history, local-files
|
|
78
|
+
// If a future adapter joins this bucket, append it here.
|
|
79
|
+
const vals = Object.values(r.params).sort();
|
|
80
|
+
expect(vals).toEqual([
|
|
81
|
+
"browser-%",
|
|
82
|
+
"git-activity",
|
|
83
|
+
"local-files",
|
|
84
|
+
"shell-history",
|
|
85
|
+
"system-data%",
|
|
86
|
+
"vscode",
|
|
87
|
+
"win-recent",
|
|
88
|
+
]);
|
|
89
|
+
expect(r.sql).toContain("OR");
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("translates 'other' to NOT-IN-any-prefix (negation form)", () => {
|
|
93
|
+
const r = _categoryToWhere("other");
|
|
94
|
+
expect(r.sql).toMatch(/NOT LIKE @cat\d/);
|
|
95
|
+
expect(r.sql).toMatch(/AND/);
|
|
96
|
+
// Should reference every prefix rule
|
|
97
|
+
expect(Object.keys(r.params).length).toBeGreaterThan(5);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("supports custom param prefix to avoid collisions in compound queries", () => {
|
|
101
|
+
const r = _categoryToWhere("social", "xx");
|
|
102
|
+
expect(Object.keys(r.params)[0]).toMatch(/^xx/);
|
|
103
|
+
});
|
|
104
|
+
});
|