@chainlesschain/personal-data-hub 0.4.7 → 0.4.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/__tests__/adapters/biz-tianyancha.test.js +159 -0
  2. package/__tests__/adapters/doc-baidu-netdisk.test.js +102 -0
  3. package/__tests__/adapters/doc-camscanner.test.js +147 -0
  4. package/__tests__/adapters/doc-platforms.test.js +177 -0
  5. package/__tests__/adapters/gov-ixiamen.test.js +150 -0
  6. package/__tests__/adapters/gov-tax.test.js +135 -0
  7. package/__tests__/adapters/health-meiyou.test.js +125 -0
  8. package/__tests__/adapters/music-kugou.test.js +187 -0
  9. package/__tests__/adapters/recruit-boss.test.js +180 -0
  10. package/__tests__/adapters/shopping-dianping.test.js +239 -0
  11. package/__tests__/adapters/social-csdn.test.js +175 -0
  12. package/__tests__/adapters/social-dongchedi.test.js +165 -0
  13. package/__tests__/adapters/social-zhihu.test.js +246 -0
  14. package/__tests__/adapters/travel-ctrip.test.js +175 -1
  15. package/__tests__/adapters/travel-didi.test.js +204 -0
  16. package/__tests__/adapters/travel-tongcheng.test.js +289 -0
  17. package/__tests__/adapters/video-platforms.test.js +152 -0
  18. package/__tests__/adapters/video-xigua.test.js +106 -0
  19. package/__tests__/adapters/wework-pc.test.js +124 -0
  20. package/lib/adapter-guide.js +25 -3
  21. package/lib/adapters/_document-base.js +370 -0
  22. package/lib/adapters/_video-base.js +331 -0
  23. package/lib/adapters/biz-tianyancha/index.js +348 -0
  24. package/lib/adapters/doc-baidu-netdisk/index.js +91 -0
  25. package/lib/adapters/doc-camscanner/index.js +102 -0
  26. package/lib/adapters/doc-tencent-docs/index.js +94 -0
  27. package/lib/adapters/doc-wps/index.js +77 -0
  28. package/lib/adapters/gov-ixiamen/index.js +380 -0
  29. package/lib/adapters/gov-tax/index.js +451 -0
  30. package/lib/adapters/health-meiyou/index.js +393 -0
  31. package/lib/adapters/music-kugou/index.js +418 -0
  32. package/lib/adapters/recruit-boss/index.js +442 -0
  33. package/lib/adapters/shopping-dianping/index.js +473 -0
  34. package/lib/adapters/social-csdn/index.js +444 -0
  35. package/lib/adapters/social-dongchedi/index.js +360 -0
  36. package/lib/adapters/social-zhihu/index.js +488 -0
  37. package/lib/adapters/travel-ctrip/index.js +255 -40
  38. package/lib/adapters/travel-didi/index.js +327 -0
  39. package/lib/adapters/travel-tongcheng/index.js +393 -0
  40. package/lib/adapters/video-iqiyi/index.js +75 -0
  41. package/lib/adapters/video-tencent/index.js +78 -0
  42. package/lib/adapters/video-xigua/index.js +68 -0
  43. package/lib/adapters/wework-pc/index.js +31 -0
  44. package/lib/index.js +40 -0
  45. package/package.json +1 -1
@@ -0,0 +1,451 @@
1
+ /**
2
+ * §12.1 Phase 13+ ⭐⭐⭐⭐ — 个人所得税 / 个税 APP (cn.gov.tax.its) adapter,
3
+ * "收入 + 雇主 + 申报".
4
+ *
5
+ * ⚠️ MAXIMALLY SENSITIVE (financial/tax, real-name + 可能人脸 auth).
6
+ * BEST-EFFORT SCAFFOLD (user-requested). The 个税 app is a government tax
7
+ * system behind real-name SSO with NO verifiable public API; the cookie-api
8
+ * endpoints below are FABRICATED placeholders (overridable via opts.*Url, NOT
9
+ * field-verified — FAMILY-23 playbook) and cannot authenticate without the
10
+ * gov real-name login. **snapshot mode is the reliable path** (the app / a
11
+ * manual 收入纳税明细 export produces a JSON); the cookie path is a seam only
12
+ * and surfaces `auth.unverified=true`. Gated sensitivity:"high" +
13
+ * legalGate:true — the registry REQUIRES explicit legal/consent confirmation
14
+ * before any collection runs.
15
+ *
16
+ * Personal footprint modelled (two kinds):
17
+ * - "income" 收入/扣缴明细: { period(YYYY-MM), incomeType(工资薪金/劳务报酬
18
+ * /稿酬/经营所得/...), amount, withheld(已扣缴税额),
19
+ * payer(扣缴义务人 name + id) } → EVENT(INCOME) + employer
20
+ * Person(MERCHANT).
21
+ * - "declaration" 申报/年度汇算: { year, declType(综合所得年度汇算/...),
22
+ * status(申报成功/待缴款/已退税/...), settleAmount(汇算结果:
23
+ * 退税为负 / 补税为正) } → EVENT(OTHER).
24
+ *
25
+ * Snapshot schema (schemaVersion 1):
26
+ * {
27
+ * "schemaVersion": 1, "snapshottedAt": <ms>,
28
+ * "account": { "userId": "...", "name": "..." },
29
+ * "events": [
30
+ * { "kind": "income", "id": "inc-<id>", "recordId": "...", "period": "2025-03",
31
+ * "incomeType": "工资薪金", "amount": 20000, "withheld": 1234.56,
32
+ * "payerName": "某某公司", "payerId": "9144..." },
33
+ * { "kind": "declaration", "id": "dec-<id>", "recordId": "...", "year": 2024,
34
+ * "declType": "综合所得年度汇算", "status": "已退税", "settleAmount": -800.0,
35
+ * "declaredAt": <s|ms> }
36
+ * ]
37
+ * }
38
+ */
39
+
40
+ "use strict";
41
+
42
+ const fs = require("node:fs");
43
+ const { newId } = require("../../ids");
44
+ const {
45
+ ENTITY_TYPES,
46
+ PERSON_SUBTYPES,
47
+ EVENT_SUBTYPES,
48
+ CAPTURED_BY,
49
+ } = require("../../constants");
50
+ const { CookieAuth } = require("../shopping-base");
51
+
52
+ const NAME = "gov-tax";
53
+ const VERSION = "0.1.0";
54
+ const SNAPSHOT_SCHEMA_VERSION = 1;
55
+
56
+ const KIND_INCOME = "income";
57
+ const KIND_DECLARATION = "declaration";
58
+ const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_INCOME, KIND_DECLARATION]);
59
+
60
+ // FABRICATED best-effort endpoints — NOT field-verified. Overridable.
61
+ const INCOME_URL = "https://its.tax.gov.cn/api/v1/income/list";
62
+ const DECLARATION_URL = "https://its.tax.gov.cn/api/v1/declaration/list";
63
+ const PAGE_SIZE = 30;
64
+
65
+ function parseTime(v) {
66
+ if (Number.isFinite(v)) return v > 1e12 ? v : v >= 1e9 ? v * 1000 : v;
67
+ if (typeof v === "string") {
68
+ if (/^\d+$/.test(v)) {
69
+ const n = parseInt(v, 10);
70
+ return n > 1e12 ? n : n >= 1e9 ? n * 1000 : n;
71
+ }
72
+ const t = Date.parse(v);
73
+ return Number.isFinite(t) ? t : null;
74
+ }
75
+ return null;
76
+ }
77
+
78
+ // "2025-03" / "202503" / epoch → ms at month start (best-effort).
79
+ function periodToMs(period) {
80
+ if (period == null) return null;
81
+ const s = String(period);
82
+ let m = s.match(/^(\d{4})[-/]?(\d{1,2})/);
83
+ if (m) {
84
+ const t = Date.parse(`${m[1]}-${String(m[2]).padStart(2, "0")}-01T00:00:00Z`);
85
+ return Number.isFinite(t) ? t : null;
86
+ }
87
+ return parseTime(period);
88
+ }
89
+
90
+ function toAmount(v) {
91
+ if (Number.isFinite(v)) return v;
92
+ if (typeof v === "string") {
93
+ const n = parseFloat(v.replace(/[,,¥\s]/g, ""));
94
+ return Number.isFinite(n) ? n : null;
95
+ }
96
+ return null;
97
+ }
98
+
99
+ function stableOriginalId(kind, id) {
100
+ const safe =
101
+ (typeof id === "string" && id.length > 0 && id) ||
102
+ (typeof id === "number" && Number.isFinite(id) && String(id)) ||
103
+ `unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
104
+ return `tax:${kind}:${safe}`;
105
+ }
106
+
107
+ function mapIncome(raw) {
108
+ if (!raw || typeof raw !== "object") return null;
109
+ const id = raw.recordId || raw.record_id || raw.id || raw.detailId;
110
+ if (id == null) return null;
111
+ return {
112
+ recordId: String(id),
113
+ period: raw.period || raw.taxPeriod || raw.tax_period || raw.month || null,
114
+ incomeType: raw.incomeType || raw.income_type || raw.type || raw.itemName || "其他所得",
115
+ amount: toAmount(raw.amount != null ? raw.amount : raw.income),
116
+ withheld: toAmount(raw.withheld != null ? raw.withheld : raw.tax != null ? raw.tax : raw.withheldTax),
117
+ payerName: raw.payerName || raw.payer_name || raw.payer || raw.company || raw.employer || null,
118
+ payerId: raw.payerId || raw.payer_id || raw.payerTaxId || raw.companyId || null,
119
+ };
120
+ }
121
+
122
+ function mapDeclaration(raw) {
123
+ if (!raw || typeof raw !== "object") return null;
124
+ const id = raw.recordId || raw.record_id || raw.id || raw.declId;
125
+ if (id == null) return null;
126
+ return {
127
+ recordId: String(id),
128
+ year: raw.year || raw.taxYear || raw.tax_year || null,
129
+ declType: raw.declType || raw.decl_type || raw.type || raw.itemName || "申报",
130
+ status: raw.status || raw.statusName || raw.state || null,
131
+ settleAmount: toAmount(raw.settleAmount != null ? raw.settleAmount : raw.amount),
132
+ declaredMs: parseTime(raw.declaredAt || raw.declared_at || raw.submitTime || raw.submit_time || raw.time),
133
+ };
134
+ }
135
+
136
+ function extractList(resp) {
137
+ if (!resp || typeof resp !== "object") return [];
138
+ if (Array.isArray(resp.list)) return resp.list;
139
+ if (Array.isArray(resp.data)) return resp.data;
140
+ const d = resp.data;
141
+ if (d && typeof d === "object") {
142
+ if (Array.isArray(d.list)) return d.list;
143
+ if (Array.isArray(d.records)) return d.records;
144
+ if (Array.isArray(d.result)) return d.result;
145
+ }
146
+ return [];
147
+ }
148
+
149
+ class TaxAdapter {
150
+ constructor(opts = {}) {
151
+ this.account = opts.account || null;
152
+ this._cookieAuth =
153
+ opts.account && opts.account.cookies
154
+ ? new CookieAuth({ platform: "tax", cookies: opts.account.cookies })
155
+ : null;
156
+ this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
157
+ this._signProvider =
158
+ typeof opts.signProvider === "function" ? opts.signProvider : null;
159
+ this._urls = {
160
+ income: opts.incomeUrl || opts.listUrl || INCOME_URL,
161
+ declaration: opts.declarationUrl || DECLARATION_URL,
162
+ };
163
+
164
+ this.name = NAME;
165
+ this.version = VERSION;
166
+ this.capabilities = [
167
+ "sync:snapshot",
168
+ "sync:cookie-api",
169
+ "parse:tax-income",
170
+ "parse:tax-declaration",
171
+ ];
172
+ this.extractMode = "web-api";
173
+ this.rateLimits = { perMinute: 6, perDay: 100 };
174
+ this.dataDisclosure = {
175
+ fields: [
176
+ "tax:income (period / incomeType / amount / withheld / payer)",
177
+ "tax:declaration (year / declType / status / settleAmount)",
178
+ ],
179
+ // Real-name financial / tax data — maximally sensitive.
180
+ sensitivity: "high",
181
+ legalGate: true,
182
+ defaultInclude: { income: true, declaration: true },
183
+ };
184
+
185
+ this._deps = { fs };
186
+ }
187
+
188
+ async authenticate(ctx = {}) {
189
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
190
+ try {
191
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
192
+ } catch (err) {
193
+ return {
194
+ ok: false,
195
+ reason: "INPUT_PATH_UNREADABLE",
196
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
197
+ };
198
+ }
199
+ return { ok: true, mode: "snapshot-file" };
200
+ }
201
+ if (this._cookieAuth) {
202
+ const ok = await this._cookieAuth.validate();
203
+ if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
204
+ return {
205
+ ok: true,
206
+ account: (this.account && this.account.userId) || null,
207
+ mode: "cookie",
208
+ unverified: true,
209
+ };
210
+ }
211
+ return {
212
+ ok: false,
213
+ reason: "NO_INPUT",
214
+ message:
215
+ "gov-tax.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode, best-effort/unverified)",
216
+ };
217
+ }
218
+
219
+ async healthCheck() {
220
+ if (this._cookieAuth) {
221
+ const r = await this.authenticate();
222
+ return r.ok
223
+ ? { ok: true, lastChecked: Date.now(), unverified: true }
224
+ : { ok: false, reason: r.reason, error: r.error };
225
+ }
226
+ return { ok: true, lastChecked: Date.now() };
227
+ }
228
+
229
+ async *sync(opts = {}) {
230
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
231
+ yield* this._syncViaSnapshot(opts);
232
+ return;
233
+ }
234
+ if (this._cookieAuth) {
235
+ yield* this._syncViaCookie(opts);
236
+ return;
237
+ }
238
+ throw new Error(
239
+ "gov-tax.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)",
240
+ );
241
+ }
242
+
243
+ async *_syncViaSnapshot(opts) {
244
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
245
+ const snapshot = JSON.parse(raw);
246
+ if (
247
+ !snapshot ||
248
+ typeof snapshot !== "object" ||
249
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
250
+ ) {
251
+ throw new Error(
252
+ `gov-tax.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
253
+ );
254
+ }
255
+ const fallbackCapturedAt =
256
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
257
+ ? Math.floor(snapshot.snapshottedAt)
258
+ : Date.now();
259
+ const account =
260
+ snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
261
+ const include = opts.include || {};
262
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
263
+
264
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
265
+ let emitted = 0;
266
+ for (const ev of events) {
267
+ if (emitted >= limit) return;
268
+ if (!ev || typeof ev !== "object") continue;
269
+ if (!VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
270
+ if (include[ev.kind] === false) continue;
271
+
272
+ const rec = ev.kind === KIND_INCOME ? mapIncome(ev) : mapDeclaration(ev);
273
+ if (!rec) continue;
274
+ const recTime =
275
+ ev.kind === KIND_INCOME ? periodToMs(rec.period) : rec.declaredMs || (rec.year ? periodToMs(`${rec.year}-01`) : null);
276
+ const capturedAt = parseTime(ev.capturedAt) || recTime || fallbackCapturedAt;
277
+ yield {
278
+ adapter: NAME,
279
+ kind: ev.kind,
280
+ originalId: stableOriginalId(ev.kind, rec.recordId),
281
+ capturedAt,
282
+ payload: { record: rec, kind: ev.kind, account },
283
+ };
284
+ emitted += 1;
285
+ }
286
+ }
287
+
288
+ async *_syncViaCookie(opts = {}) {
289
+ if (!(await this._cookieAuth.validate())) return;
290
+ const cookies = this._cookieAuth.toHeader();
291
+ const include = opts.include || {};
292
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
293
+ const maxPages =
294
+ Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 12;
295
+
296
+ const plan = [
297
+ { kind: KIND_INCOME, url: this._urls.income, map: mapIncome },
298
+ { kind: KIND_DECLARATION, url: this._urls.declaration, map: mapDeclaration },
299
+ ];
300
+
301
+ let emitted = 0;
302
+ for (const step of plan) {
303
+ if (include[step.kind] === false) continue;
304
+ let page = 1;
305
+ while (page <= maxPages) {
306
+ const query = { page, size: PAGE_SIZE };
307
+ let sign = null;
308
+ if (this._signProvider) {
309
+ sign = await this._signProvider({ url: step.url, query, cookies });
310
+ }
311
+ const resp = await this._fetchFn({ url: step.url, cookies, query, sign });
312
+ const items = extractList(resp);
313
+ if (!items.length) break;
314
+ for (const it of items) {
315
+ const rec = step.map(it);
316
+ if (!rec) continue;
317
+ if (emitted >= limit) return;
318
+ const recTime =
319
+ step.kind === KIND_INCOME
320
+ ? periodToMs(rec.period)
321
+ : rec.declaredMs || (rec.year ? periodToMs(`${rec.year}-01`) : null);
322
+ yield {
323
+ adapter: NAME,
324
+ kind: step.kind,
325
+ originalId: stableOriginalId(step.kind, rec.recordId),
326
+ capturedAt: recTime || Date.now(),
327
+ payload: { record: rec, kind: step.kind, cookie: true },
328
+ };
329
+ emitted += 1;
330
+ }
331
+ if (items.length < PAGE_SIZE) break;
332
+ page += 1;
333
+ }
334
+ }
335
+ }
336
+
337
+ normalize(raw) {
338
+ if (!raw || !raw.payload || !raw.payload.record) {
339
+ throw new Error("TaxAdapter.normalize: payload.record missing");
340
+ }
341
+ const kind = raw.kind || raw.payload.kind;
342
+ const rec = raw.payload.record;
343
+ const ingestedAt = Date.now();
344
+ const source = {
345
+ adapter: NAME,
346
+ adapterVersion: VERSION,
347
+ originalId: raw.originalId,
348
+ capturedAt: raw.capturedAt || ingestedAt,
349
+ capturedBy: CAPTURED_BY.API,
350
+ };
351
+
352
+ if (kind === KIND_INCOME) {
353
+ const occurredAt = periodToMs(rec.period) || raw.capturedAt || ingestedAt;
354
+ const persons = [];
355
+ let payerRef = null;
356
+ if (rec.payerName) {
357
+ payerRef = `person-tax-payer-${rec.payerId || rec.payerName}`;
358
+ persons.push({
359
+ id: payerRef,
360
+ type: ENTITY_TYPES.PERSON,
361
+ subtype: PERSON_SUBTYPES.MERCHANT,
362
+ names: [rec.payerName],
363
+ ingestedAt,
364
+ source,
365
+ identifiers: rec.payerId ? { "tax-payer-id": [String(rec.payerId)] } : {},
366
+ extra: { platform: "tax", role: "扣缴义务人" },
367
+ });
368
+ }
369
+ return {
370
+ events: [
371
+ {
372
+ id: newId(),
373
+ type: ENTITY_TYPES.EVENT,
374
+ subtype: EVENT_SUBTYPES.INCOME,
375
+ occurredAt,
376
+ actor: "person-self",
377
+ content: {
378
+ title: `收入: ${rec.incomeType}${rec.period ? ` (${rec.period})` : ""}`.slice(0, 80),
379
+ text: rec.incomeType,
380
+ },
381
+ ingestedAt,
382
+ source,
383
+ extra: {
384
+ platform: "tax",
385
+ kind: KIND_INCOME,
386
+ period: rec.period || null,
387
+ incomeType: rec.incomeType,
388
+ amount: rec.amount,
389
+ withheld: rec.withheld,
390
+ payerRef,
391
+ },
392
+ },
393
+ ],
394
+ persons,
395
+ places: [],
396
+ items: [],
397
+ topics: [],
398
+ };
399
+ }
400
+ // declaration
401
+ const occurredAt =
402
+ rec.declaredMs || (rec.year ? periodToMs(`${rec.year}-01`) : null) || raw.capturedAt || ingestedAt;
403
+ return {
404
+ events: [
405
+ {
406
+ id: newId(),
407
+ type: ENTITY_TYPES.EVENT,
408
+ subtype: EVENT_SUBTYPES.OTHER,
409
+ occurredAt,
410
+ actor: "person-self",
411
+ content: {
412
+ title: `个税申报: ${rec.declType}${rec.year ? ` (${rec.year})` : ""}`.slice(0, 80),
413
+ text: rec.declType,
414
+ },
415
+ ingestedAt,
416
+ source,
417
+ extra: {
418
+ platform: "tax",
419
+ kind: KIND_DECLARATION,
420
+ year: rec.year || null,
421
+ declType: rec.declType,
422
+ status: rec.status || null,
423
+ settleAmount: rec.settleAmount,
424
+ },
425
+ },
426
+ ],
427
+ persons: [],
428
+ places: [],
429
+ items: [],
430
+ topics: [],
431
+ };
432
+ }
433
+ }
434
+
435
+ async function defaultFetch(_opts) {
436
+ throw new Error("gov-tax: no fetchFn configured for cookie-api mode");
437
+ }
438
+
439
+ module.exports = {
440
+ TaxAdapter,
441
+ mapIncome,
442
+ mapDeclaration,
443
+ extractList,
444
+ periodToMs,
445
+ toAmount,
446
+ parseTime,
447
+ NAME,
448
+ VERSION,
449
+ SNAPSHOT_SCHEMA_VERSION,
450
+ VALID_SNAPSHOT_KINDS,
451
+ };