@jackwener/opencli 1.6.10 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/README.zh-CN.md +15 -2
- package/dist/clis/jianyu/detail.js +20 -0
- package/dist/clis/jianyu/search.d.ts +41 -4
- package/dist/clis/jianyu/search.js +458 -96
- package/dist/clis/jianyu/search.test.js +105 -0
- package/dist/clis/jianyu/shared/china-bid-search.d.ts +12 -0
- package/dist/clis/jianyu/shared/china-bid-search.js +165 -0
- package/dist/clis/jianyu/shared/procurement-contract.d.ts +68 -0
- package/dist/clis/jianyu/shared/procurement-contract.js +324 -0
- package/dist/clis/jianyu/shared/procurement-contract.test.d.ts +1 -0
- package/dist/clis/jianyu/shared/procurement-contract.test.js +72 -0
- package/dist/clis/jianyu/shared/procurement-detail.d.ts +6 -0
- package/dist/clis/jianyu/shared/procurement-detail.js +92 -0
- package/dist/clis/jianyu/shared/procurement-detail.test.d.ts +1 -0
- package/dist/clis/jianyu/shared/procurement-detail.test.js +72 -0
- package/dist/clis/xiaoe/catalog.js +36 -0
- package/dist/src/browser/bridge.js +1 -1
- package/dist/src/browser/daemon-client.d.ts +2 -1
- package/dist/src/browser/daemon-client.js +3 -1
- package/dist/src/browser/daemon-client.test.js +0 -3
- package/dist/src/browser.test.js +0 -1
- package/dist/src/cli.js +1 -9
- package/dist/src/commands/daemon.d.ts +2 -6
- package/dist/src/commands/daemon.js +2 -58
- package/dist/src/commands/daemon.test.js +24 -120
- package/dist/src/constants.d.ts +0 -2
- package/dist/src/constants.js +0 -2
- package/dist/src/daemon.d.ts +1 -1
- package/dist/src/daemon.js +2 -15
- package/dist/src/execution.js +5 -1
- package/package.json +2 -1
- package/dist/src/daemon.test.js +0 -65
- package/dist/src/idle-manager.d.ts +0 -19
- package/dist/src/idle-manager.js +0 -54
- /package/dist/{src/daemon.test.d.ts → clis/jianyu/detail.d.ts} +0 -0
|
@@ -3,10 +3,39 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
5
5
|
import { AuthRequiredError } from '@jackwener/opencli/errors';
|
|
6
|
+
import { buildSearchCandidates, cleanText, dedupeCandidates, detectAuthPrompt, normalizeDate, searchRowsFromEntries, } from './shared/china-bid-search.js';
|
|
7
|
+
import { toProcurementSearchRecords } from './shared/procurement-contract.js';
|
|
8
|
+
const SITE = 'jianyu';
|
|
9
|
+
const DOMAIN = 'www.jianyu360.cn';
|
|
6
10
|
const SEARCH_ENTRY = 'https://www.jianyu360.cn/jylab/supsearch/index.html';
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
11
|
+
const SEARCH_ENTRIES = [
|
|
12
|
+
SEARCH_ENTRY,
|
|
13
|
+
'https://www.jianyu360.cn/list/stype/ZBGG.html',
|
|
14
|
+
'https://www.jianyu360.cn/',
|
|
15
|
+
];
|
|
16
|
+
const SEARCH_INDEX_PROXY = 'https://r.jina.ai/http://duckduckgo.com/html/?q=';
|
|
17
|
+
const PROCUREMENT_TITLE_HINT = /(公告|招标|采购|中标|成交|项目|投标|结果|notice|tender|procurement|bidding)/i;
|
|
18
|
+
const AUTH_REQUIRED_HINT = /(请在下图依次点击|登录即可获得更多浏览权限|验证登录|请完成验证|图形验证码)/;
|
|
19
|
+
const NAVIGATION_PATH_PREFIXES = [
|
|
20
|
+
'/product/',
|
|
21
|
+
'/front/',
|
|
22
|
+
'/helpcenter/',
|
|
23
|
+
'/brand/',
|
|
24
|
+
'/page_workdesktop/',
|
|
25
|
+
'/list/',
|
|
26
|
+
'/list/stype/',
|
|
27
|
+
'/list/rmxm',
|
|
28
|
+
'/big/page/',
|
|
29
|
+
'/jylab/',
|
|
30
|
+
'/tags/',
|
|
31
|
+
'/sitemap',
|
|
32
|
+
'/datasmt/',
|
|
33
|
+
'/bank/',
|
|
34
|
+
'/hj/',
|
|
35
|
+
'/exhibition/',
|
|
36
|
+
'/swordfish/page_big_pc/search/',
|
|
37
|
+
];
|
|
38
|
+
const JIANYU_API_TYPES = ['fType', 'eType', 'vType', 'mType'];
|
|
10
39
|
export function buildSearchUrl(query) {
|
|
11
40
|
const url = new URL(SEARCH_ENTRY);
|
|
12
41
|
url.searchParams.set('keywords', query.trim());
|
|
@@ -14,122 +43,455 @@ export function buildSearchUrl(query) {
|
|
|
14
43
|
url.searchParams.set('searchGroup', '1');
|
|
15
44
|
return url.toString();
|
|
16
45
|
}
|
|
17
|
-
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
|
|
46
|
+
function siteSearchCandidates(query) {
|
|
47
|
+
const preferred = buildSearchUrl(query);
|
|
48
|
+
const fallbacks = buildSearchCandidates(query, SEARCH_ENTRIES, ['keywords', 'keyword', 'q', 'search', 'title']);
|
|
49
|
+
const ordered = [];
|
|
50
|
+
const seen = new Set();
|
|
51
|
+
for (const candidate of [preferred, ...fallbacks]) {
|
|
52
|
+
const value = cleanText(candidate);
|
|
53
|
+
if (!value || seen.has(value))
|
|
54
|
+
continue;
|
|
55
|
+
seen.add(value);
|
|
56
|
+
ordered.push(value);
|
|
57
|
+
}
|
|
58
|
+
return ordered;
|
|
59
|
+
}
|
|
60
|
+
function isLikelyNavigationUrl(rawUrl) {
|
|
61
|
+
const urlText = cleanText(rawUrl);
|
|
62
|
+
if (!urlText)
|
|
63
|
+
return true;
|
|
64
|
+
try {
|
|
65
|
+
const parsed = new URL(urlText);
|
|
66
|
+
const path = cleanText(parsed.pathname).toLowerCase().replace(/\/+$/, '/') || '/';
|
|
67
|
+
if (path === '/')
|
|
68
|
+
return true;
|
|
69
|
+
if (NAVIGATION_PATH_PREFIXES.some((prefix) => path.startsWith(prefix)))
|
|
70
|
+
return true;
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
function filterNavigationRows(query, items) {
|
|
78
|
+
const queryTokens = cleanText(query).split(/\s+/).filter(Boolean).map((token) => token.toLowerCase());
|
|
79
|
+
return items
|
|
80
|
+
.map((item) => ({
|
|
81
|
+
title: cleanText(item.title),
|
|
82
|
+
url: cleanText(item.url),
|
|
83
|
+
date: normalizeDate(cleanText(item.date)),
|
|
84
|
+
contextText: cleanText(item.contextText),
|
|
85
|
+
}))
|
|
86
|
+
.filter((item) => {
|
|
87
|
+
if (!item.title || !item.url)
|
|
88
|
+
return false;
|
|
89
|
+
const haystack = `${item.title} ${item.contextText}`.toLowerCase();
|
|
90
|
+
const hasQuery = queryTokens.length === 0 || queryTokens.some((token) => haystack.includes(token));
|
|
91
|
+
const hasProcurementHint = PROCUREMENT_TITLE_HINT.test(`${item.title} ${item.contextText}`);
|
|
92
|
+
const hasDate = !!item.date;
|
|
93
|
+
if (!hasQuery)
|
|
94
|
+
return false;
|
|
95
|
+
if (!isLikelyNavigationUrl(item.url))
|
|
96
|
+
return true;
|
|
97
|
+
return hasDate && hasProcurementHint;
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
async function isAuthRequired(page) {
|
|
101
|
+
const pageText = cleanText(await page.evaluate('document.body ? document.body.innerText : ""'));
|
|
102
|
+
if (AUTH_REQUIRED_HINT.test(pageText))
|
|
103
|
+
return true;
|
|
104
|
+
return detectAuthPrompt(page);
|
|
105
|
+
}
|
|
106
|
+
function toAbsoluteJianyuUrl(rawUrl) {
|
|
107
|
+
const value = cleanText(rawUrl);
|
|
108
|
+
if (!value)
|
|
109
|
+
return '';
|
|
110
|
+
if (value.startsWith('http://') || value.startsWith('https://'))
|
|
111
|
+
return value;
|
|
112
|
+
if (value.startsWith('//'))
|
|
113
|
+
return `https:${value}`;
|
|
114
|
+
if (value.startsWith('/')) {
|
|
115
|
+
try {
|
|
116
|
+
return new URL(value, SEARCH_ENTRY).toString();
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return '';
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return '';
|
|
123
|
+
}
|
|
124
|
+
function extractDateFromJianyuUrl(rawUrl) {
|
|
125
|
+
const value = cleanText(rawUrl);
|
|
126
|
+
if (!value)
|
|
127
|
+
return '';
|
|
128
|
+
const matched = value.match(/\/(20\d{2})(\d{2})(\d{2})(?:[_/]|$)/);
|
|
129
|
+
if (!matched)
|
|
130
|
+
return '';
|
|
131
|
+
return `${matched[1]}-${matched[2]}-${matched[3]}`;
|
|
132
|
+
}
|
|
133
|
+
function flattenStrings(input, depth = 0) {
|
|
134
|
+
if (depth > 2 || input == null)
|
|
135
|
+
return [];
|
|
136
|
+
if (typeof input === 'string' || typeof input === 'number') {
|
|
137
|
+
const text = cleanText(String(input));
|
|
138
|
+
return text ? [text] : [];
|
|
139
|
+
}
|
|
140
|
+
if (Array.isArray(input)) {
|
|
141
|
+
return input.flatMap((item) => flattenStrings(item, depth + 1));
|
|
142
|
+
}
|
|
143
|
+
if (typeof input === 'object') {
|
|
144
|
+
return Object.values(input).flatMap((item) => flattenStrings(item, depth + 1));
|
|
145
|
+
}
|
|
146
|
+
return [];
|
|
147
|
+
}
|
|
148
|
+
function pickString(record, keys) {
|
|
149
|
+
for (const key of keys) {
|
|
150
|
+
const value = record[key];
|
|
151
|
+
if (typeof value === 'string' || typeof value === 'number') {
|
|
152
|
+
const text = cleanText(String(value));
|
|
153
|
+
if (text)
|
|
154
|
+
return text;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return '';
|
|
158
|
+
}
|
|
159
|
+
function normalizeApiRow(item) {
|
|
160
|
+
if (!item || typeof item !== 'object')
|
|
161
|
+
return null;
|
|
162
|
+
const record = item;
|
|
163
|
+
const allStrings = flattenStrings(record);
|
|
164
|
+
let url = toAbsoluteJianyuUrl(pickString(record, [
|
|
165
|
+
'url',
|
|
166
|
+
'detailUrl',
|
|
167
|
+
'detailURL',
|
|
168
|
+
'link',
|
|
169
|
+
'href',
|
|
170
|
+
'articleUrl',
|
|
171
|
+
'newsUrl',
|
|
172
|
+
'contentUrl',
|
|
173
|
+
'jumpUrl',
|
|
174
|
+
'sourceUrl',
|
|
175
|
+
]));
|
|
176
|
+
if (!url) {
|
|
177
|
+
const maybeUrl = allStrings.find((value) => /jianyu360\.cn|\/jybx\/|\/nologin\/content\//i.test(value)) || '';
|
|
178
|
+
url = toAbsoluteJianyuUrl(maybeUrl);
|
|
179
|
+
}
|
|
180
|
+
let title = cleanText(pickString(record, [
|
|
181
|
+
'title',
|
|
182
|
+
'noticeTitle',
|
|
183
|
+
'bidTitle',
|
|
184
|
+
'projectName',
|
|
185
|
+
'name',
|
|
186
|
+
'articleTitle',
|
|
187
|
+
'newsTitle',
|
|
188
|
+
'tenderTitle',
|
|
189
|
+
'contentTitle',
|
|
190
|
+
]));
|
|
191
|
+
if (!title) {
|
|
192
|
+
title = allStrings.find((value) => value.length >= 8 && PROCUREMENT_TITLE_HINT.test(value)) || '';
|
|
193
|
+
}
|
|
194
|
+
const date = normalizeDate(pickString(record, [
|
|
195
|
+
'publishTime',
|
|
196
|
+
'publishDate',
|
|
197
|
+
'pubDate',
|
|
198
|
+
'createTime',
|
|
199
|
+
'time',
|
|
200
|
+
'releaseTime',
|
|
201
|
+
'date',
|
|
202
|
+
])) || extractDateFromJianyuUrl(url);
|
|
203
|
+
const contextText = cleanText([
|
|
204
|
+
pickString(record, ['content', 'summary', 'desc', 'description', 'buyer', 'winner', 'agency', 'industry']),
|
|
205
|
+
...allStrings.slice(0, 6),
|
|
206
|
+
].filter(Boolean).join(' '));
|
|
207
|
+
if (!title || !url)
|
|
208
|
+
return null;
|
|
209
|
+
return {
|
|
210
|
+
title,
|
|
211
|
+
url,
|
|
212
|
+
date,
|
|
213
|
+
contextText,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
function parseSearchIndexMarkdown(markdown) {
|
|
217
|
+
const rows = [];
|
|
218
|
+
for (const line of markdown.split('\n')) {
|
|
219
|
+
const text = line.trim();
|
|
220
|
+
if (!text.startsWith('## ['))
|
|
221
|
+
continue;
|
|
222
|
+
const right = text.slice(3);
|
|
223
|
+
const sep = right.lastIndexOf('](');
|
|
224
|
+
if (sep <= 0 || !right.endsWith(')'))
|
|
225
|
+
continue;
|
|
226
|
+
const title = cleanText(right.slice(1, sep));
|
|
227
|
+
const url = cleanText(right.slice(sep + 2, -1));
|
|
228
|
+
if (!title || !url)
|
|
229
|
+
continue;
|
|
230
|
+
rows.push({ title, url });
|
|
231
|
+
}
|
|
232
|
+
return rows;
|
|
233
|
+
}
|
|
234
|
+
function unwrapDuckDuckGoUrl(rawUrl) {
|
|
235
|
+
const candidate = cleanText(rawUrl);
|
|
236
|
+
if (!candidate)
|
|
237
|
+
return '';
|
|
238
|
+
const normalized = candidate.startsWith('//') ? `https:${candidate}` : candidate;
|
|
239
|
+
try {
|
|
240
|
+
const parsed = new URL(normalized);
|
|
241
|
+
const host = parsed.hostname.toLowerCase();
|
|
242
|
+
if (!host.endsWith('duckduckgo.com'))
|
|
243
|
+
return normalized;
|
|
244
|
+
const uddg = parsed.searchParams.get('uddg');
|
|
245
|
+
if (!uddg)
|
|
246
|
+
return normalized;
|
|
247
|
+
try {
|
|
248
|
+
return decodeURIComponent(uddg);
|
|
249
|
+
}
|
|
250
|
+
catch {
|
|
251
|
+
return uddg;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
21
255
|
return '';
|
|
22
|
-
|
|
23
|
-
const month = match[2].padStart(2, '0');
|
|
24
|
-
const day = match[3].padStart(2, '0');
|
|
25
|
-
return `${year}-${month}-${day}`;
|
|
256
|
+
}
|
|
26
257
|
}
|
|
27
|
-
function
|
|
28
|
-
const
|
|
258
|
+
function isJianyuHost(rawUrl) {
|
|
259
|
+
const value = cleanText(rawUrl);
|
|
260
|
+
if (!value)
|
|
261
|
+
return false;
|
|
262
|
+
try {
|
|
263
|
+
return new URL(value).hostname.toLowerCase().endsWith('jianyu360.cn');
|
|
264
|
+
}
|
|
265
|
+
catch {
|
|
266
|
+
return false;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
function buildIndexQueryVariants(query) {
|
|
270
|
+
const tokens = cleanText(query).split(/\s+/).filter(Boolean);
|
|
271
|
+
const values = [cleanText(query), ...tokens];
|
|
272
|
+
const ordered = [];
|
|
29
273
|
const seen = new Set();
|
|
30
|
-
for (const
|
|
31
|
-
const
|
|
32
|
-
if (seen.has(
|
|
274
|
+
for (const value of values) {
|
|
275
|
+
const text = cleanText(value);
|
|
276
|
+
if (!text || seen.has(text))
|
|
33
277
|
continue;
|
|
34
|
-
seen.add(
|
|
35
|
-
|
|
278
|
+
seen.add(text);
|
|
279
|
+
ordered.push(text);
|
|
36
280
|
}
|
|
37
|
-
return
|
|
281
|
+
return ordered;
|
|
282
|
+
}
|
|
283
|
+
async function fetchDuckDuckGoIndexRows(query, limit) {
|
|
284
|
+
const results = [];
|
|
285
|
+
const seen = new Set();
|
|
286
|
+
for (const variant of buildIndexQueryVariants(query)) {
|
|
287
|
+
if (results.length >= limit)
|
|
288
|
+
break;
|
|
289
|
+
const fullQuery = `site:jianyu360.cn ${variant}`;
|
|
290
|
+
const url = `${SEARCH_INDEX_PROXY}${encodeURIComponent(fullQuery)}`;
|
|
291
|
+
let responseText = '';
|
|
292
|
+
try {
|
|
293
|
+
const response = await fetch(url, {
|
|
294
|
+
headers: {
|
|
295
|
+
Accept: 'text/plain, text/markdown, */*',
|
|
296
|
+
'User-Agent': 'opencli-jianyu-search/1.0',
|
|
297
|
+
},
|
|
298
|
+
});
|
|
299
|
+
if (!response.ok)
|
|
300
|
+
continue;
|
|
301
|
+
responseText = await response.text();
|
|
302
|
+
}
|
|
303
|
+
catch {
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
const indexedRows = parseSearchIndexMarkdown(responseText);
|
|
307
|
+
for (const row of indexedRows) {
|
|
308
|
+
const unwrapped = unwrapDuckDuckGoUrl(row.url);
|
|
309
|
+
const absoluteUrl = toAbsoluteJianyuUrl(unwrapped) || cleanText(unwrapped);
|
|
310
|
+
if (!isJianyuHost(absoluteUrl))
|
|
311
|
+
continue;
|
|
312
|
+
const key = `${row.title}\t${absoluteUrl}`;
|
|
313
|
+
if (seen.has(key))
|
|
314
|
+
continue;
|
|
315
|
+
seen.add(key);
|
|
316
|
+
results.push({
|
|
317
|
+
title: cleanText(row.title),
|
|
318
|
+
url: absoluteUrl,
|
|
319
|
+
date: extractDateFromJianyuUrl(absoluteUrl),
|
|
320
|
+
contextText: cleanText(`${row.title} ${variant}`),
|
|
321
|
+
});
|
|
322
|
+
if (results.length >= limit)
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
return results;
|
|
327
|
+
}
|
|
328
|
+
async function fetchJianyuApiRows(page, query, limit) {
|
|
329
|
+
try {
|
|
330
|
+
await page.goto(buildSearchUrl(query));
|
|
331
|
+
await page.wait(2);
|
|
332
|
+
const payload = await page.evaluate(`
|
|
333
|
+
(async () => {
|
|
334
|
+
const now = Math.floor(Date.now() / 1000);
|
|
335
|
+
const body = {
|
|
336
|
+
searchGroup: 1,
|
|
337
|
+
reqType: 'lastNews',
|
|
338
|
+
pageNum: 1,
|
|
339
|
+
pageSize: Math.max(20, Math.min(${Math.max(20, limit)}, 50)),
|
|
340
|
+
keyWords: ${JSON.stringify(query)},
|
|
341
|
+
searchMode: 0,
|
|
342
|
+
bidField: '',
|
|
343
|
+
publishTime: \`\${now - 3600 * 24 * 365 * 3}-\${now}\`,
|
|
344
|
+
selectType: 'title,content',
|
|
345
|
+
subtype: '',
|
|
346
|
+
exclusionWords: '',
|
|
347
|
+
buyer: '',
|
|
348
|
+
winner: '',
|
|
349
|
+
agency: '',
|
|
350
|
+
industry: '',
|
|
351
|
+
province: '',
|
|
352
|
+
city: '',
|
|
353
|
+
district: '',
|
|
354
|
+
buyerClass: '',
|
|
355
|
+
fileExists: '',
|
|
356
|
+
price: '',
|
|
357
|
+
buyerTel: '',
|
|
358
|
+
winnerTel: '',
|
|
359
|
+
};
|
|
360
|
+
const responses = [];
|
|
361
|
+
const types = ${JSON.stringify([...JIANYU_API_TYPES])};
|
|
362
|
+
for (const type of types) {
|
|
363
|
+
try {
|
|
364
|
+
const response = await fetch('/jyapi/jybx/core/' + type + '/searchList', {
|
|
365
|
+
method: 'POST',
|
|
366
|
+
headers: {
|
|
367
|
+
Accept: 'application/json, text/plain, */*',
|
|
368
|
+
'Content-Type': 'application/json',
|
|
369
|
+
},
|
|
370
|
+
credentials: 'include',
|
|
371
|
+
body: JSON.stringify(body),
|
|
372
|
+
});
|
|
373
|
+
let raw = null;
|
|
374
|
+
try {
|
|
375
|
+
raw = await response.json();
|
|
376
|
+
} catch {
|
|
377
|
+
raw = null;
|
|
378
|
+
}
|
|
379
|
+
const dataList = raw && raw.data && Array.isArray(raw.data.list) ? raw.data.list : [];
|
|
380
|
+
responses.push({
|
|
381
|
+
type,
|
|
382
|
+
ok: response.ok,
|
|
383
|
+
status: response.status,
|
|
384
|
+
payload: {
|
|
385
|
+
antiVerify: raw && typeof raw.antiVerify === 'number' ? raw.antiVerify : undefined,
|
|
386
|
+
error_code: raw && typeof raw.error_code === 'number' ? raw.error_code : undefined,
|
|
387
|
+
hasLogin: raw && typeof raw.hasLogin === 'boolean' ? raw.hasLogin : undefined,
|
|
388
|
+
textVerify: raw && typeof raw.textVerify === 'string' ? raw.textVerify.slice(0, 16) : undefined,
|
|
389
|
+
list: dataList,
|
|
390
|
+
},
|
|
391
|
+
});
|
|
392
|
+
} catch {
|
|
393
|
+
responses.push({
|
|
394
|
+
type,
|
|
395
|
+
ok: false,
|
|
396
|
+
status: 0,
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
const challenge = responses.some((item) => item && item.payload && item.payload.antiVerify === -1);
|
|
401
|
+
return { challenge, responses };
|
|
402
|
+
})()
|
|
403
|
+
`);
|
|
404
|
+
const responses = Array.isArray(payload?.responses) ? payload.responses : [];
|
|
405
|
+
const rows = collectApiRowsFromResponses(responses);
|
|
406
|
+
const challenge = Boolean(payload?.challenge);
|
|
407
|
+
return { rows, challenge };
|
|
408
|
+
}
|
|
409
|
+
catch {
|
|
410
|
+
return { rows: [], challenge: false };
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
function collectApiRowsFromResponses(responses) {
|
|
414
|
+
const rows = [];
|
|
415
|
+
const seen = new Set();
|
|
416
|
+
for (const response of responses) {
|
|
417
|
+
if (!response || typeof response !== 'object')
|
|
418
|
+
continue;
|
|
419
|
+
const meta = response;
|
|
420
|
+
const body = meta.payload;
|
|
421
|
+
if (!body || typeof body !== 'object')
|
|
422
|
+
continue;
|
|
423
|
+
const list = body.list;
|
|
424
|
+
if (!Array.isArray(list))
|
|
425
|
+
continue;
|
|
426
|
+
for (const item of list) {
|
|
427
|
+
const row = normalizeApiRow(item);
|
|
428
|
+
if (!row)
|
|
429
|
+
continue;
|
|
430
|
+
const key = `${row.title}\t${row.url}`;
|
|
431
|
+
if (seen.has(key))
|
|
432
|
+
continue;
|
|
433
|
+
seen.add(key);
|
|
434
|
+
rows.push(row);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
return rows;
|
|
38
438
|
}
|
|
39
439
|
cli({
|
|
40
|
-
site:
|
|
440
|
+
site: SITE,
|
|
41
441
|
name: 'search',
|
|
42
442
|
description: '搜索剑鱼标讯公告',
|
|
43
|
-
domain:
|
|
443
|
+
domain: DOMAIN,
|
|
44
444
|
strategy: Strategy.COOKIE,
|
|
45
445
|
browser: true,
|
|
46
446
|
args: [
|
|
47
447
|
{ name: 'query', required: true, positional: true, help: 'Search keyword, e.g. "procurement"' },
|
|
48
448
|
{ name: 'limit', type: 'int', default: 20, help: 'Number of results (max 50)' },
|
|
49
449
|
],
|
|
50
|
-
columns: ['rank', 'title', '
|
|
450
|
+
columns: ['rank', 'content_type', 'title', 'publish_time', 'project_code', 'budget_or_limit', 'url'],
|
|
51
451
|
func: async (page, kwargs) => {
|
|
52
452
|
const query = cleanText(kwargs.query);
|
|
53
453
|
const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
await page
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
const text = clean(cursor.innerText || cursor.textContent || '');
|
|
78
|
-
const date = parseDate(text);
|
|
79
|
-
if (date) return date;
|
|
80
|
-
cursor = cursor.parentElement;
|
|
81
|
-
}
|
|
82
|
-
return '';
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
const anchors = Array.from(
|
|
86
|
-
document.querySelectorAll('a[href*="/nologin/content/"], a[href*="/content/"]'),
|
|
87
|
-
);
|
|
88
|
-
const rows = [];
|
|
89
|
-
const seen = new Set();
|
|
90
|
-
for (const anchor of anchors) {
|
|
91
|
-
const url = toAbsolute(anchor.getAttribute('href') || anchor.href || '');
|
|
92
|
-
const title = clean(anchor.textContent || '');
|
|
93
|
-
if (!url || !title || title.length < 4) continue;
|
|
94
|
-
const key = title + '\\t' + url;
|
|
95
|
-
if (seen.has(key)) continue;
|
|
96
|
-
seen.add(key);
|
|
97
|
-
rows.push({
|
|
98
|
-
title,
|
|
99
|
-
url,
|
|
100
|
-
date: pickDateText(anchor),
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
return rows;
|
|
104
|
-
})()
|
|
105
|
-
`);
|
|
106
|
-
const pageText = cleanText(await page.evaluate('document.body ? document.body.innerText : ""'));
|
|
107
|
-
if (!Array.isArray(payload)
|
|
108
|
-
&& /(请先登录|登录后|未登录|验证码)/.test(pageText)) {
|
|
109
|
-
throw new AuthRequiredError('www.jianyu360.cn', 'Jianyu search results require login or human verification');
|
|
454
|
+
const apiResult = await fetchJianyuApiRows(page, query, limit);
|
|
455
|
+
const mergedRows = dedupeCandidates(filterNavigationRows(query, apiResult.rows));
|
|
456
|
+
const extractedRows = await searchRowsFromEntries(page, {
|
|
457
|
+
query,
|
|
458
|
+
candidateUrls: siteSearchCandidates(query),
|
|
459
|
+
allowedHostFragments: ['jianyu360.cn'],
|
|
460
|
+
limit,
|
|
461
|
+
});
|
|
462
|
+
const domRows = dedupeCandidates(filterNavigationRows(query, extractedRows));
|
|
463
|
+
const rows = dedupeCandidates([...mergedRows, ...domRows]);
|
|
464
|
+
if (rows.length === 0) {
|
|
465
|
+
const indexedRows = await fetchDuckDuckGoIndexRows(query, limit);
|
|
466
|
+
const filteredIndexedRows = dedupeCandidates(filterNavigationRows(query, indexedRows));
|
|
467
|
+
if (filteredIndexedRows.length > 0) {
|
|
468
|
+
return toProcurementSearchRecords(filteredIndexedRows, {
|
|
469
|
+
site: SITE,
|
|
470
|
+
query,
|
|
471
|
+
limit,
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
if (apiResult.challenge || await isAuthRequired(page)) {
|
|
475
|
+
throw new AuthRequiredError(DOMAIN, '[taxonomy=selector_drift] site=jianyu command=search blocked by human verification / access challenge');
|
|
476
|
+
}
|
|
110
477
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
url: cleanText(item.url),
|
|
117
|
-
date: normalizeDate(cleanText(item.date)),
|
|
118
|
-
}))
|
|
119
|
-
.filter((item) => item.title && item.url)
|
|
120
|
-
: [];
|
|
121
|
-
return dedupeCandidates(rows)
|
|
122
|
-
.slice(0, limit)
|
|
123
|
-
.map((item, index) => ({
|
|
124
|
-
rank: index + 1,
|
|
125
|
-
title: item.title,
|
|
126
|
-
date: item.date,
|
|
127
|
-
url: item.url,
|
|
128
|
-
}));
|
|
478
|
+
return toProcurementSearchRecords(rows, {
|
|
479
|
+
site: SITE,
|
|
480
|
+
query,
|
|
481
|
+
limit,
|
|
482
|
+
});
|
|
129
483
|
},
|
|
130
484
|
});
|
|
131
485
|
export const __test__ = {
|
|
486
|
+
buildSearchCandidates: siteSearchCandidates,
|
|
132
487
|
buildSearchUrl,
|
|
133
488
|
normalizeDate,
|
|
134
489
|
dedupeCandidates,
|
|
490
|
+
filterNavigationRows,
|
|
491
|
+
parseSearchIndexMarkdown,
|
|
492
|
+
unwrapDuckDuckGoUrl,
|
|
493
|
+
extractDateFromJianyuUrl,
|
|
494
|
+
normalizeApiRow,
|
|
495
|
+
fetchJianyuApiRows,
|
|
496
|
+
collectApiRowsFromResponses,
|
|
135
497
|
};
|