@chainlesschain/personal-data-hub 0.4.18 → 0.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/biz-tianyancha.test.js +159 -0
- package/__tests__/adapters/doc-camscanner.test.js +147 -0
- package/__tests__/adapters/gov-ixiamen.test.js +150 -0
- package/__tests__/adapters/gov-tax.test.js +135 -0
- package/__tests__/adapters/health-meiyou.test.js +125 -0
- package/__tests__/adapters/social-dongchedi.test.js +165 -0
- package/__tests__/adapters/video-xigua.test.js +106 -0
- package/__tests__/adapters/wework-pc.test.js +124 -0
- package/lib/adapter-guide.js +13 -3
- package/lib/adapters/biz-tianyancha/index.js +348 -0
- package/lib/adapters/doc-camscanner/index.js +102 -0
- package/lib/adapters/gov-ixiamen/index.js +380 -0
- package/lib/adapters/gov-tax/index.js +451 -0
- package/lib/adapters/health-meiyou/index.js +393 -0
- package/lib/adapters/social-dongchedi/index.js +360 -0
- package/lib/adapters/video-xigua/index.js +68 -0
- package/lib/adapters/wework-pc/index.js +31 -0
- package/lib/index.js +16 -0
- package/package.json +1 -1
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* §A14 — 天眼查 (Tianyancha, com.tianyancha.skyeye) adapter, dual-mode
|
|
3
|
+
* (snapshot + cookie-api). Phase 13+ §12.1 line-780 ROI ⭐⭐ "自查公司关联".
|
|
4
|
+
*
|
|
5
|
+
* 天眼查 personal data = the user's business-intelligence footprint: companies
|
|
6
|
+
* they monitor/关注 and their company-search history. Both are interest signals →
|
|
7
|
+
* monitor maps to a LIKE event (关注某公司), search to an INTERACTION event
|
|
8
|
+
* (搜索某公司). Mirrors the social-dongchedi two-mode shape; events-only (a
|
|
9
|
+
* company is not a Person, and a lightweight event keeps the vault model simple).
|
|
10
|
+
*
|
|
11
|
+
* 1. snapshot mode (opts.inputPath): JSON schemaVersion 1, stateless.
|
|
12
|
+
* 2. cookie-api mode (opts.account.cookies): fetch monitor list + search
|
|
13
|
+
* history from tianyancha.com via the injected `fetchFn`, paginate; a sign
|
|
14
|
+
* seam (opts.signProvider) covers tianyancha's signed-request header (auth /
|
|
15
|
+
* version token); best-effort unsigned when absent. Endpoints overridable
|
|
16
|
+
* via opts.monitorUrl / opts.searchUrl (best-effort, not field-verified —
|
|
17
|
+
* FAMILY-23 playbook). account OPTIONAL — the cookie carries identity.
|
|
18
|
+
*
|
|
19
|
+
* Snapshot schema (schemaVersion 1):
|
|
20
|
+
* {
|
|
21
|
+
* "schemaVersion": 1, "snapshottedAt": <ms>,
|
|
22
|
+
* "account": { "userId": "...", "name": "..." },
|
|
23
|
+
* "events": [
|
|
24
|
+
* { "kind": "monitor", "id": "mon-<gid>", "companyId": "...", "companyName": "...",
|
|
25
|
+
* "legalPerson": "...", "regStatus": "...", "capturedAt": <ms> },
|
|
26
|
+
* { "kind": "search", "id": "search-<id>", "query": "...", "companyName": "...",
|
|
27
|
+
* "capturedAt": <ms> }
|
|
28
|
+
* ]
|
|
29
|
+
* }
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
"use strict";
|
|
33
|
+
|
|
34
|
+
const fs = require("node:fs");
|
|
35
|
+
const { newId } = require("../../ids");
|
|
36
|
+
const { ENTITY_TYPES, EVENT_SUBTYPES, CAPTURED_BY } = require("../../constants");
|
|
37
|
+
const { CookieAuth } = require("../shopping-base");
|
|
38
|
+
|
|
39
|
+
const NAME = "biz-tianyancha";
|
|
40
|
+
const VERSION = "0.1.0";
|
|
41
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
42
|
+
|
|
43
|
+
const KIND_MONITOR = "monitor";
|
|
44
|
+
const KIND_SEARCH = "search";
|
|
45
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_MONITOR, KIND_SEARCH]);
|
|
46
|
+
|
|
47
|
+
// Best-effort tianyancha.com endpoints. Overridable via opts.*Url.
|
|
48
|
+
const MONITOR_URL = "https://capi.tianyancha.com/cloud-monitor-app/monitor/list";
|
|
49
|
+
const SEARCH_URL = "https://capi.tianyancha.com/cloud-search-app/search/history";
|
|
50
|
+
const PAGE_SIZE = 20;
|
|
51
|
+
|
|
52
|
+
function parseTime(v) {
|
|
53
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v >= 1e9 ? v * 1000 : v;
|
|
54
|
+
if (typeof v === "string") {
|
|
55
|
+
if (/^\d+$/.test(v)) {
|
|
56
|
+
const n = parseInt(v, 10);
|
|
57
|
+
return n > 1e12 ? n : n >= 1e9 ? n * 1000 : n;
|
|
58
|
+
}
|
|
59
|
+
const t = Date.parse(v);
|
|
60
|
+
return Number.isFinite(t) ? t : null;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function stableOriginalId(kind, id) {
|
|
66
|
+
const safe =
|
|
67
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
68
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
69
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
70
|
+
return `tianyancha:${kind}:${safe}`;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
class TianyanchaAdapter {
|
|
74
|
+
constructor(opts = {}) {
|
|
75
|
+
this.account = opts.account || null;
|
|
76
|
+
this._cookieAuth =
|
|
77
|
+
opts.account && opts.account.cookies
|
|
78
|
+
? new CookieAuth({ platform: "tianyancha", cookies: opts.account.cookies })
|
|
79
|
+
: null;
|
|
80
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
81
|
+
this._signProvider =
|
|
82
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
83
|
+
this._urls = {
|
|
84
|
+
monitor: opts.monitorUrl || MONITOR_URL,
|
|
85
|
+
search: opts.searchUrl || SEARCH_URL,
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
this.name = NAME;
|
|
89
|
+
this.version = VERSION;
|
|
90
|
+
this.capabilities = [
|
|
91
|
+
"sync:snapshot",
|
|
92
|
+
"sync:cookie-api",
|
|
93
|
+
"parse:tianyancha-monitor",
|
|
94
|
+
"parse:tianyancha-search",
|
|
95
|
+
];
|
|
96
|
+
this.extractMode = "web-api";
|
|
97
|
+
this.rateLimits = { perMinute: 8, perDay: 200 };
|
|
98
|
+
this.dataDisclosure = {
|
|
99
|
+
fields: [
|
|
100
|
+
"tianyancha:monitor (companyName / legalPerson / regStatus)",
|
|
101
|
+
"tianyancha:search (query / companyName)",
|
|
102
|
+
],
|
|
103
|
+
sensitivity: "medium",
|
|
104
|
+
legalGate: false,
|
|
105
|
+
defaultInclude: { monitor: true, search: true },
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
this._deps = { fs };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async authenticate(ctx = {}) {
|
|
112
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
113
|
+
try {
|
|
114
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
115
|
+
} catch (err) {
|
|
116
|
+
return {
|
|
117
|
+
ok: false,
|
|
118
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
119
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return { ok: true, mode: "snapshot-file" };
|
|
123
|
+
}
|
|
124
|
+
if (this._cookieAuth) {
|
|
125
|
+
const ok = await this._cookieAuth.validate();
|
|
126
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
127
|
+
return { ok: true, account: (this.account && this.account.userId) || null, mode: "cookie" };
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
ok: false,
|
|
131
|
+
reason: "NO_INPUT",
|
|
132
|
+
message:
|
|
133
|
+
"biz-tianyancha.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)",
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async healthCheck() {
|
|
138
|
+
if (this._cookieAuth) {
|
|
139
|
+
const r = await this.authenticate();
|
|
140
|
+
return r.ok ? { ok: true, lastChecked: Date.now() } : { ok: false, reason: r.reason, error: r.error };
|
|
141
|
+
}
|
|
142
|
+
return { ok: true, lastChecked: Date.now() };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async *sync(opts = {}) {
|
|
146
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
147
|
+
yield* this._syncViaSnapshot(opts);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
if (this._cookieAuth) {
|
|
151
|
+
yield* this._syncViaCookie(opts);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
throw new Error(
|
|
155
|
+
"biz-tianyancha.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)",
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
async *_syncViaSnapshot(opts) {
|
|
160
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
161
|
+
const snapshot = JSON.parse(raw);
|
|
162
|
+
if (!snapshot || typeof snapshot !== "object" || snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
`biz-tianyancha.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
const fallback =
|
|
168
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
169
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
170
|
+
: Date.now();
|
|
171
|
+
const account = snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
|
|
172
|
+
const include = opts.include || {};
|
|
173
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
174
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
175
|
+
let emitted = 0;
|
|
176
|
+
for (const ev of events) {
|
|
177
|
+
if (emitted >= limit) return;
|
|
178
|
+
if (!ev || typeof ev !== "object" || !VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
|
|
179
|
+
if (include[ev.kind] === false) continue;
|
|
180
|
+
const id = (typeof ev.id === "string" && ev.id) || ev.companyId || ev.query || null;
|
|
181
|
+
yield {
|
|
182
|
+
adapter: NAME,
|
|
183
|
+
kind: ev.kind,
|
|
184
|
+
originalId: stableOriginalId(ev.kind, id),
|
|
185
|
+
capturedAt: parseTime(ev.capturedAt) || fallback,
|
|
186
|
+
payload: { ...ev, account },
|
|
187
|
+
};
|
|
188
|
+
emitted += 1;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async *_syncViaCookie(opts = {}) {
|
|
193
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
194
|
+
const cookies = this._cookieAuth.toHeader();
|
|
195
|
+
const include = opts.include || {};
|
|
196
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
197
|
+
const maxPages = Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 10;
|
|
198
|
+
|
|
199
|
+
const plan = [
|
|
200
|
+
{ kind: KIND_MONITOR, url: this._urls.monitor, idOf: (it) => it.graphId || it.companyId || it.id },
|
|
201
|
+
{ kind: KIND_SEARCH, url: this._urls.search, idOf: (it) => it.id || it.keyword || it.word },
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
let emitted = 0;
|
|
205
|
+
for (const step of plan) {
|
|
206
|
+
if (include[step.kind] === false) continue;
|
|
207
|
+
let pageNum = 1;
|
|
208
|
+
while (pageNum <= maxPages) {
|
|
209
|
+
const query = { pageNum, pageSize: PAGE_SIZE };
|
|
210
|
+
let sign = null;
|
|
211
|
+
if (this._signProvider) {
|
|
212
|
+
sign = await this._signProvider({ url: step.url, query, cookies });
|
|
213
|
+
}
|
|
214
|
+
const resp = await this._fetchFn({ url: step.url, cookies, query, sign });
|
|
215
|
+
const items = extractData(resp);
|
|
216
|
+
if (!items.length) break;
|
|
217
|
+
for (const it of items) {
|
|
218
|
+
if (!it || typeof it !== "object") continue;
|
|
219
|
+
if (emitted >= limit) return;
|
|
220
|
+
yield {
|
|
221
|
+
adapter: NAME,
|
|
222
|
+
kind: step.kind,
|
|
223
|
+
originalId: stableOriginalId(step.kind, step.idOf(it)),
|
|
224
|
+
capturedAt: parseTime(it.createTime || it.monitorTime || it.searchTime || it.gmtCreate) || Date.now(),
|
|
225
|
+
payload: { item: it, kind: step.kind, cookie: true },
|
|
226
|
+
};
|
|
227
|
+
emitted += 1;
|
|
228
|
+
}
|
|
229
|
+
if (items.length < PAGE_SIZE) break;
|
|
230
|
+
pageNum += 1;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
normalize(raw) {
|
|
236
|
+
if (!raw || !raw.payload) throw new Error("TianyanchaAdapter.normalize: payload missing");
|
|
237
|
+
const ingestedAt = Date.now();
|
|
238
|
+
const kind = raw.kind || raw.payload.kind;
|
|
239
|
+
if (kind === KIND_MONITOR) return normalizeMonitor(raw, ingestedAt);
|
|
240
|
+
if (kind === KIND_SEARCH) return normalizeSearch(raw, ingestedAt);
|
|
241
|
+
throw new Error(`TianyanchaAdapter.normalize: unknown kind ${kind}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// ─── cookie response helpers ─────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
function extractData(resp) {
|
|
248
|
+
if (!resp || typeof resp !== "object") return [];
|
|
249
|
+
if (Array.isArray(resp.data)) return resp.data;
|
|
250
|
+
if (Array.isArray(resp.list)) return resp.list;
|
|
251
|
+
const d = resp.data;
|
|
252
|
+
if (d && typeof d === "object") {
|
|
253
|
+
if (Array.isArray(d.list)) return d.list;
|
|
254
|
+
if (Array.isArray(d.items)) return d.items;
|
|
255
|
+
if (Array.isArray(d.resultList)) return d.resultList;
|
|
256
|
+
if (Array.isArray(d.records)) return d.records;
|
|
257
|
+
}
|
|
258
|
+
return [];
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// ─── per-kind normalizers (snapshot fields OR cookie payload.item) ────────────
|
|
262
|
+
|
|
263
|
+
function buildSource(raw, occurredAt) {
|
|
264
|
+
return {
|
|
265
|
+
adapter: NAME,
|
|
266
|
+
adapterVersion: VERSION,
|
|
267
|
+
originalId: raw.originalId,
|
|
268
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
269
|
+
capturedBy: CAPTURED_BY.API,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function normalizeMonitor(raw, ingestedAt) {
|
|
274
|
+
const p = raw.payload;
|
|
275
|
+
const it = p.cookie ? p.item : p;
|
|
276
|
+
const company = it.companyName || it.name || it.company || "";
|
|
277
|
+
const occurredAt = parseTime(it.capturedAt || it.createTime || it.monitorTime || raw.capturedAt) || ingestedAt;
|
|
278
|
+
const source = buildSource(raw, occurredAt);
|
|
279
|
+
return {
|
|
280
|
+
events: [
|
|
281
|
+
{
|
|
282
|
+
id: newId(),
|
|
283
|
+
type: ENTITY_TYPES.EVENT,
|
|
284
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
285
|
+
occurredAt,
|
|
286
|
+
actor: "person-self",
|
|
287
|
+
content: { title: `关注公司: ${company}`.trim(), text: company },
|
|
288
|
+
ingestedAt,
|
|
289
|
+
source,
|
|
290
|
+
extra: {
|
|
291
|
+
platform: "tianyancha",
|
|
292
|
+
companyId: (it.companyId || it.graphId || it.id) != null ? String(it.companyId || it.graphId || it.id) : null,
|
|
293
|
+
companyName: company || null,
|
|
294
|
+
legalPerson: it.legalPerson || it.legalPersonName || null,
|
|
295
|
+
regStatus: it.regStatus || it.status || null,
|
|
296
|
+
},
|
|
297
|
+
},
|
|
298
|
+
],
|
|
299
|
+
persons: [],
|
|
300
|
+
places: [],
|
|
301
|
+
items: [],
|
|
302
|
+
topics: [],
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function normalizeSearch(raw, ingestedAt) {
|
|
307
|
+
const p = raw.payload;
|
|
308
|
+
const it = p.cookie ? p.item : p;
|
|
309
|
+
const q = it.query || it.keyword || it.word || it.companyName || "";
|
|
310
|
+
const occurredAt = parseTime(it.capturedAt || it.searchTime || it.createTime || raw.capturedAt) || ingestedAt;
|
|
311
|
+
const source = buildSource(raw, occurredAt);
|
|
312
|
+
return {
|
|
313
|
+
events: [
|
|
314
|
+
{
|
|
315
|
+
id: newId(),
|
|
316
|
+
type: ENTITY_TYPES.EVENT,
|
|
317
|
+
subtype: EVENT_SUBTYPES.INTERACTION,
|
|
318
|
+
occurredAt,
|
|
319
|
+
actor: "person-self",
|
|
320
|
+
content: { title: `搜索企业: ${q}`.trim(), text: q },
|
|
321
|
+
ingestedAt,
|
|
322
|
+
source,
|
|
323
|
+
extra: {
|
|
324
|
+
platform: "tianyancha",
|
|
325
|
+
query: q || null,
|
|
326
|
+
companyName: it.companyName || null,
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
],
|
|
330
|
+
persons: [],
|
|
331
|
+
places: [],
|
|
332
|
+
items: [],
|
|
333
|
+
topics: [],
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
async function defaultFetch(_opts) {
|
|
338
|
+
throw new Error("biz-tianyancha: no fetchFn configured for cookie-api mode");
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
module.exports = {
|
|
342
|
+
TianyanchaAdapter,
|
|
343
|
+
extractData,
|
|
344
|
+
NAME,
|
|
345
|
+
VERSION,
|
|
346
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
347
|
+
VALID_SNAPSHOT_KINDS,
|
|
348
|
+
};
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* §13+ — 扫描全能王 (CamScanner, com.intsig.camscanner) adapter. §12.1 ROI ⭐⭐
|
|
3
|
+
* "扫描文档归档".
|
|
4
|
+
*
|
|
5
|
+
* Thin wrapper over _document-base — CamScanner's scanned-document archive is
|
|
6
|
+
* the same shape as the doc-wps / doc-tencent-docs / doc-baidu-netdisk
|
|
7
|
+
* "own-document list": each scanned doc has a title, page count, create/modify
|
|
8
|
+
* time, and a PDF/jump url. CamScanner syncs the owner's docs to its cloud
|
|
9
|
+
* (intsig sync API, session cookie / sync token); this adapter supplies the
|
|
10
|
+
* endpoint + field mapping, the base handles snapshot + cookie-api orchestration
|
|
11
|
+
* + normalize (event POST + item DOCUMENT). Endpoint best-effort + overridable
|
|
12
|
+
* via opts.listUrl (not field-verified — FAMILY-23 playbook). A signProvider
|
|
13
|
+
* seam covers intsig's request signature; best-effort unsigned when absent.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
"use strict";
|
|
17
|
+
|
|
18
|
+
const { createDocumentAdapter, parseTime, SNAPSHOT_SCHEMA_VERSION } = require("../_document-base");
|
|
19
|
+
|
|
20
|
+
const NAME = "doc-camscanner";
|
|
21
|
+
const VERSION = "0.1.0";
|
|
22
|
+
|
|
23
|
+
// Best-effort CamScanner cloud doc-list endpoint. Overridable via opts.listUrl.
|
|
24
|
+
const CAMSCANNER_LIST_URL = "https://sync.intsig.net/sync/list_docs";
|
|
25
|
+
|
|
26
|
+
// CamScanner document type hints → normalized docType. A CamScanner "doc" is a
|
|
27
|
+
// scanned bundle of pages exportable as PDF; certificate/book/excel are OCR
|
|
28
|
+
// sub-modes the app offers.
|
|
29
|
+
const TYPE_MAP = {
|
|
30
|
+
0: "scan",
|
|
31
|
+
1: "scan",
|
|
32
|
+
2: "certificate",
|
|
33
|
+
3: "book",
|
|
34
|
+
4: "excel",
|
|
35
|
+
5: "ppt",
|
|
36
|
+
pdf: "pdf",
|
|
37
|
+
doc: "scan",
|
|
38
|
+
certificate: "certificate",
|
|
39
|
+
book: "book",
|
|
40
|
+
excel: "excel",
|
|
41
|
+
ppt: "ppt",
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
function mapCamScannerType(d) {
|
|
45
|
+
const t = d.doc_type != null ? d.doc_type : d.type;
|
|
46
|
+
if (t != null && TYPE_MAP[t] != null) return TYPE_MAP[t];
|
|
47
|
+
const title = String(d.title || d.doc_title || d.pdf_name || "").toLowerCase();
|
|
48
|
+
if (/\.pdf$/.test(title)) return "pdf";
|
|
49
|
+
if (/\.(xlsx?|csv)$/.test(title)) return "excel";
|
|
50
|
+
if (/\.(pptx?)$/.test(title)) return "ppt";
|
|
51
|
+
return "scan";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function extractDocs(resp) {
|
|
55
|
+
if (!resp || typeof resp !== "object") return [];
|
|
56
|
+
if (Array.isArray(resp.docs)) return resp.docs;
|
|
57
|
+
if (Array.isArray(resp.list)) return resp.list;
|
|
58
|
+
if (Array.isArray(resp.data)) return resp.data;
|
|
59
|
+
if (resp.data && Array.isArray(resp.data.docs)) return resp.data.docs;
|
|
60
|
+
if (resp.data && Array.isArray(resp.data.list)) return resp.data.list;
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function mapDoc(d) {
|
|
65
|
+
if (!d || typeof d !== "object") return null;
|
|
66
|
+
const docId = d.sync_doc_id || d.doc_id || d.docId || d.id || d.sid;
|
|
67
|
+
if (!docId) return null;
|
|
68
|
+
const pages = d.page_num != null ? d.page_num : d.pages != null ? d.pages : d.page_count;
|
|
69
|
+
return {
|
|
70
|
+
docId: String(docId),
|
|
71
|
+
title: d.title || d.doc_title || d.pdf_name || "(未命名扫描件)",
|
|
72
|
+
docType: mapCamScannerType(d),
|
|
73
|
+
url: d.pdf_url || d.url || d.jump_url || null,
|
|
74
|
+
createdMs: parseTime(d.create_time || d.upload_time || d.ctime || d.created),
|
|
75
|
+
updatedMs: parseTime(d.modify_time || d.last_modify_time || d.update_time || d.mtime),
|
|
76
|
+
extra: {
|
|
77
|
+
pageNum: Number.isFinite(Number(pages)) ? Number(pages) : null,
|
|
78
|
+
tags: Array.isArray(d.tags) ? d.tags : d.tag != null ? [d.tag] : [],
|
|
79
|
+
folder: d.dir_title || d.folder || null,
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const CamScannerDocAdapter = createDocumentAdapter({
|
|
85
|
+
NAME,
|
|
86
|
+
VERSION,
|
|
87
|
+
platform: "camscanner",
|
|
88
|
+
defaultListUrl: CAMSCANNER_LIST_URL,
|
|
89
|
+
extractDocs,
|
|
90
|
+
mapDoc,
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
module.exports = {
|
|
94
|
+
CamScannerDocAdapter,
|
|
95
|
+
extractDocs,
|
|
96
|
+
mapDoc,
|
|
97
|
+
mapCamScannerType,
|
|
98
|
+
TYPE_MAP,
|
|
99
|
+
NAME,
|
|
100
|
+
VERSION,
|
|
101
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
102
|
+
};
|