@jackwener/opencli 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +469 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/amazon/discussion.js +37 -6
- package/clis/amazon/discussion.test.js +147 -32
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/chatgpt-app/ask.js +3 -19
- package/clis/chatgpt-app/ax.js +132 -1
- package/clis/chatgpt-app/ax.test.js +23 -0
- package/clis/chatgpt-app/send.js +2 -21
- package/clis/deepseek/ask.js +50 -18
- package/clis/deepseek/ask.test.js +195 -2
- package/clis/deepseek/utils.js +113 -29
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/powerchina/search.js +250 -0
- package/clis/powerchina/search.test.js +67 -0
- package/clis/sinafinance/stock.js +5 -2
- package/clis/sinafinance/stock.test.js +59 -0
- package/clis/toutiao/articles.js +81 -0
- package/clis/toutiao/articles.test.js +23 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weixin/create-draft.js +225 -0
- package/clis/weixin/drafts.js +65 -0
- package/clis/weixin/drafts.test.js +65 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/commanderAdapter.js +12 -0
- package/dist/src/commanderAdapter.test.js +11 -0
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 51job job detail by jobId.
|
|
3
|
+
*
|
|
4
|
+
* Navigates to `jobs.51job.com/x/<jobId>.html` (SSR page — the generic `/x/`
|
|
5
|
+
* area slug always resolves) and scrapes the structured blocks. No API
|
|
6
|
+
* surface returns the full detail page, so DOM scraping is the only path.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
10
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
11
|
+
import { JOBS_ORIGIN, requirePage, navigateTo } from './utils.js';
|
|
12
|
+
|
|
13
|
+
cli({
|
|
14
|
+
site: '51job',
|
|
15
|
+
name: 'detail',
|
|
16
|
+
description: '51job 职位详情(按 jobId)',
|
|
17
|
+
domain: 'jobs.51job.com',
|
|
18
|
+
strategy: Strategy.COOKIE,
|
|
19
|
+
browser: true,
|
|
20
|
+
navigateBefore: false,
|
|
21
|
+
args: [
|
|
22
|
+
{ name: 'jobId', type: 'string', required: true, positional: true, help: '职位 ID(search 返回的 jobId)' },
|
|
23
|
+
],
|
|
24
|
+
columns: [
|
|
25
|
+
'jobId', 'title', 'salary', 'location', 'workYear', 'degree',
|
|
26
|
+
'category', 'address', 'ageRequirement',
|
|
27
|
+
'description', 'welfare',
|
|
28
|
+
'company', 'companyType', 'companySize', 'companyIndustry',
|
|
29
|
+
'companyUrl', 'url',
|
|
30
|
+
],
|
|
31
|
+
func: async (page, kwargs) => {
|
|
32
|
+
requirePage(page);
|
|
33
|
+
const jobId = String(kwargs.jobId ?? '').trim();
|
|
34
|
+
if (!jobId) throw new CliError('INVALID_ARGUMENT', 'jobId is required');
|
|
35
|
+
if (!/^\d{6,12}$/.test(jobId)) throw new CliError('INVALID_ARGUMENT', `jobId must be a 6-12 digit number, got "${jobId}"`);
|
|
36
|
+
|
|
37
|
+
const url = `${JOBS_ORIGIN}/x/${jobId}.html`;
|
|
38
|
+
await navigateTo(page, url, 2);
|
|
39
|
+
|
|
40
|
+
const script = `(() => {
|
|
41
|
+
const sel = s => document.querySelector(s)?.innerText?.trim() || '';
|
|
42
|
+
const all = s => [...document.querySelectorAll(s)].map(e => e.innerText.trim()).filter(Boolean);
|
|
43
|
+
const finalUrl = window.location.href;
|
|
44
|
+
const bodyText = (document.body.innerText || '').slice(0, 400);
|
|
45
|
+
if (/职位已下线|该职位已删除|页面不存在/.test(bodyText)) {
|
|
46
|
+
return { error: 'EXPIRED', bodyText };
|
|
47
|
+
}
|
|
48
|
+
const companyA = document.querySelector('.cname a, .tCompany_sidebar .com_msg a');
|
|
49
|
+
const funcs = all('.bmsg .fp');
|
|
50
|
+
const pick = (prefix) => {
|
|
51
|
+
const row = funcs.find(f => f.startsWith(prefix));
|
|
52
|
+
return row ? row.slice(prefix.length).replace(/^[::\\s\\n]+/, '').trim() : '';
|
|
53
|
+
};
|
|
54
|
+
return {
|
|
55
|
+
finalUrl,
|
|
56
|
+
title: sel('h1') || sel('.cn .name'),
|
|
57
|
+
salary: sel('.cn strong') || sel('strong'),
|
|
58
|
+
meta: sel('.cn .msg.ltype') || sel('.msg.ltype'),
|
|
59
|
+
description: (() => {
|
|
60
|
+
const box = document.querySelector('.bmsg.job_msg') || document.querySelector('.job_msg');
|
|
61
|
+
if (!box) return '';
|
|
62
|
+
const clone = box.cloneNode(true);
|
|
63
|
+
clone.querySelectorAll('.fp, .mt10, script, style').forEach(n => n.remove());
|
|
64
|
+
return (clone.innerText || '').trim();
|
|
65
|
+
})(),
|
|
66
|
+
welfare: all('.t1 span, .jtag .t1 span'),
|
|
67
|
+
category: pick('职能类别'),
|
|
68
|
+
address: pick('上班地址'),
|
|
69
|
+
ageRequirement: pick('年龄要求'),
|
|
70
|
+
company: companyA?.innerText?.trim() || '',
|
|
71
|
+
companyUrl: companyA?.href || '',
|
|
72
|
+
companyTag: sel('.com_tag'),
|
|
73
|
+
};
|
|
74
|
+
})()`;
|
|
75
|
+
const data = await page.evaluate(script);
|
|
76
|
+
if (data.error === 'EXPIRED') {
|
|
77
|
+
throw new CliError('NO_DATA', `Job ${jobId} is offline or removed`);
|
|
78
|
+
}
|
|
79
|
+
if (!data.title) {
|
|
80
|
+
throw new CliError('NO_DATA', `Could not parse job detail for ${jobId}; page may have changed layout`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// meta looks like "北京-丰台区 | 3年及以上 | 本科"
|
|
84
|
+
const [locRaw, workYear, degree] = (data.meta || '').split('|').map(s => s.trim());
|
|
85
|
+
// companyTag looks like "国企\n\n150-500人\n\n电子技术/半导体/集成电路"
|
|
86
|
+
const tagParts = (data.companyTag || '').split(/\n+/).map(s => s.trim()).filter(Boolean);
|
|
87
|
+
|
|
88
|
+
return [{
|
|
89
|
+
jobId,
|
|
90
|
+
title: data.title,
|
|
91
|
+
salary: data.salary || '',
|
|
92
|
+
location: locRaw || '',
|
|
93
|
+
workYear: workYear || '',
|
|
94
|
+
degree: degree || '',
|
|
95
|
+
category: data.category || '',
|
|
96
|
+
address: data.address || '',
|
|
97
|
+
ageRequirement: data.ageRequirement || '',
|
|
98
|
+
description: data.description || '',
|
|
99
|
+
welfare: (data.welfare || []).join(','),
|
|
100
|
+
company: data.company || '',
|
|
101
|
+
companyType: tagParts[0] || '',
|
|
102
|
+
companySize: tagParts[1] || '',
|
|
103
|
+
companyIndustry: tagParts.slice(2).join(' / '),
|
|
104
|
+
companyUrl: data.companyUrl || '',
|
|
105
|
+
url: data.finalUrl || url,
|
|
106
|
+
}];
|
|
107
|
+
},
|
|
108
|
+
});
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 51job hot / recommended feed.
|
|
3
|
+
*
|
|
4
|
+
* Same endpoint as `search`, but with empty keyword — 51job returns its
|
|
5
|
+
* own ranked recommendation list (up to ~999 for most regions).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
10
|
+
import {
|
|
11
|
+
WE_ORIGIN, SEARCH_COLUMNS, SORT_CODES,
|
|
12
|
+
requirePage, navigateTo, pageFetchJson,
|
|
13
|
+
buildSearchUrl, mapJobItem, resolveCity, resolveCode,
|
|
14
|
+
} from './utils.js';
|
|
15
|
+
|
|
16
|
+
cli({
|
|
17
|
+
site: '51job',
|
|
18
|
+
name: 'hot',
|
|
19
|
+
description: '51job 推荐职位(按城市/行业/排序浏览)',
|
|
20
|
+
domain: 'we.51job.com',
|
|
21
|
+
strategy: Strategy.COOKIE,
|
|
22
|
+
browser: true,
|
|
23
|
+
navigateBefore: false,
|
|
24
|
+
args: [
|
|
25
|
+
{ name: 'area', type: 'string', default: '全国', help: '城市名或 6 位城市码(默认 "全国")' },
|
|
26
|
+
{ name: 'sort', type: 'string', default: '综合', help: '排序:综合 / 最新 / 薪资 / 距离' },
|
|
27
|
+
{ name: 'page', type: 'int', default: 1, help: '页码(1-based)' },
|
|
28
|
+
{ name: 'limit', type: 'int', default: 20, help: '返回条数(1-50)' },
|
|
29
|
+
],
|
|
30
|
+
columns: SEARCH_COLUMNS,
|
|
31
|
+
func: async (page, kwargs) => {
|
|
32
|
+
requirePage(page);
|
|
33
|
+
const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
|
|
34
|
+
const pageNum = Math.max(1, Number(kwargs.page) || 1);
|
|
35
|
+
const jobArea = resolveCity(kwargs.area);
|
|
36
|
+
const sortType = resolveCode(kwargs.sort, SORT_CODES, '0');
|
|
37
|
+
|
|
38
|
+
const currentUrl = await page.evaluate(`(() => window.location.href)()`);
|
|
39
|
+
if (!String(currentUrl).startsWith(WE_ORIGIN)) {
|
|
40
|
+
await navigateTo(page, `${WE_ORIGIN}/pc/search?searchType=2`, 2);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const url = buildSearchUrl({
|
|
44
|
+
keyword: '', jobArea, sortType,
|
|
45
|
+
pageNum, pageSize: Math.min(limit, 50),
|
|
46
|
+
});
|
|
47
|
+
const data = await pageFetchJson(page, url);
|
|
48
|
+
if (data.status !== '1' && data.status !== 1) {
|
|
49
|
+
throw new CliError('API_ERROR', `51job hot failed: ${data.message ?? 'unknown'}`);
|
|
50
|
+
}
|
|
51
|
+
const items = data?.resultbody?.job?.items ?? [];
|
|
52
|
+
if (items.length === 0) throw new CliError('NO_DATA', 'No recommended jobs returned');
|
|
53
|
+
return items.slice(0, limit).map((it, i) => mapJobItem(it, (pageNum - 1) * limit + i + 1));
|
|
54
|
+
},
|
|
55
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 51job keyword search.
|
|
3
|
+
*
|
|
4
|
+
* Backed by `we.51job.com/api/job/search-pc`, which returns a job list with
|
|
5
|
+
* the full `jobDescribe` embedded. Needs the browser session because the
|
|
6
|
+
* Aliyun WAF in front of `we.51job.com` challenges bare fetches; the
|
|
7
|
+
* `pageFetchJson` helper runs inside the page so the WAF sees a real browser.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
11
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
12
|
+
import {
|
|
13
|
+
WE_ORIGIN, SEARCH_COLUMNS,
|
|
14
|
+
SALARY_CODES, WORKYEAR_CODES, DEGREE_CODES,
|
|
15
|
+
COMPANY_TYPE_CODES, COMPANY_SIZE_CODES, SORT_CODES,
|
|
16
|
+
requirePage, navigateTo, pageFetchJson,
|
|
17
|
+
buildSearchUrl, mapJobItem, resolveCity, resolveCode,
|
|
18
|
+
} from './utils.js';
|
|
19
|
+
|
|
20
|
+
cli({
|
|
21
|
+
site: '51job',
|
|
22
|
+
name: 'search',
|
|
23
|
+
description: '51job 前程无忧关键词职位搜索',
|
|
24
|
+
domain: 'we.51job.com',
|
|
25
|
+
strategy: Strategy.COOKIE,
|
|
26
|
+
browser: true,
|
|
27
|
+
navigateBefore: false,
|
|
28
|
+
args: [
|
|
29
|
+
{ name: 'keyword', type: 'string', required: true, positional: true, help: '搜索关键词(岗位名 / 技能 / 公司)' },
|
|
30
|
+
{ name: 'area', type: 'string', default: '全国', help: '城市名或 6 位城市码(如 "杭州" / "080200" / "全国")' },
|
|
31
|
+
{ name: 'salary', type: 'string', default: '', help: '薪资区间(如 "10-15k" / "1-1.5万" / "20-30k")' },
|
|
32
|
+
{ name: 'experience', type: 'string', default: '', help: '工作年限(如 "应届" / "1-3年" / "3-5年" / "5-7年")' },
|
|
33
|
+
{ name: 'degree', type: 'string', default: '', help: '学历要求(如 "本科" / "大专" / "硕士")' },
|
|
34
|
+
{ name: 'companyType', type: 'string', default: '', help: '公司性质(如 "外资" / "国企" / "民营")' },
|
|
35
|
+
{ name: 'companySize', type: 'string', default: '', help: '公司规模(如 "50-150" / "1000-5000")' },
|
|
36
|
+
{ name: 'sort', type: 'string', default: '综合', help: '排序:综合 / 最新 / 薪资 / 距离' },
|
|
37
|
+
{ name: 'page', type: 'int', default: 1, help: '页码(1-based)' },
|
|
38
|
+
{ name: 'limit', type: 'int', default: 20, help: '返回条数(1-50)' },
|
|
39
|
+
],
|
|
40
|
+
columns: SEARCH_COLUMNS,
|
|
41
|
+
func: async (page, kwargs) => {
|
|
42
|
+
requirePage(page);
|
|
43
|
+
const keyword = String(kwargs.keyword ?? '').trim();
|
|
44
|
+
if (!keyword) throw new CliError('INVALID_ARGUMENT', 'keyword is required');
|
|
45
|
+
const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
|
|
46
|
+
const pageNum = Math.max(1, Number(kwargs.page) || 1);
|
|
47
|
+
|
|
48
|
+
const jobArea = resolveCity(kwargs.area);
|
|
49
|
+
const salary = resolveCode(kwargs.salary, SALARY_CODES);
|
|
50
|
+
const workYear = resolveCode(kwargs.experience, WORKYEAR_CODES);
|
|
51
|
+
const degree = resolveCode(kwargs.degree, DEGREE_CODES);
|
|
52
|
+
const companyType = resolveCode(kwargs.companyType, COMPANY_TYPE_CODES);
|
|
53
|
+
const companySize = resolveCode(kwargs.companySize, COMPANY_SIZE_CODES);
|
|
54
|
+
const sortType = resolveCode(kwargs.sort, SORT_CODES, '0');
|
|
55
|
+
|
|
56
|
+
// Establish WAF-clean origin. Reusing the same tab avoids the slider
|
|
57
|
+
// challenge fire every call.
|
|
58
|
+
const currentUrl = await page.evaluate(`(() => window.location.href)()`);
|
|
59
|
+
if (!String(currentUrl).startsWith(WE_ORIGIN)) {
|
|
60
|
+
await navigateTo(page, `${WE_ORIGIN}/pc/search?keyword=${encodeURIComponent(keyword)}&searchType=2`, 2);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const url = buildSearchUrl({
|
|
64
|
+
keyword, jobArea, salary, workYear, degree,
|
|
65
|
+
companyType, companySize, sortType,
|
|
66
|
+
pageNum, pageSize: Math.min(limit, 50),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
const data = await pageFetchJson(page, url);
|
|
70
|
+
if (data.status !== '1' && data.status !== 1) {
|
|
71
|
+
throw new CliError('API_ERROR', `51job search failed: ${data.message ?? 'unknown'}`);
|
|
72
|
+
}
|
|
73
|
+
const items = data?.resultbody?.job?.items ?? [];
|
|
74
|
+
if (items.length === 0) {
|
|
75
|
+
throw new CliError('NO_DATA', `No jobs matched "${keyword}"`);
|
|
76
|
+
}
|
|
77
|
+
return items.slice(0, limit).map((it, i) => mapJobItem(it, (pageNum - 1) * limit + i + 1));
|
|
78
|
+
},
|
|
79
|
+
});
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 51job shared utilities.
|
|
3
|
+
*
|
|
4
|
+
* Key design points:
|
|
5
|
+
* - we.51job.com is protected by Aliyun WAF — bare `curl` / Node-side fetch
|
|
6
|
+
* gets a slider CAPTCHA HTML page. Only browser-context fetch (page.evaluate)
|
|
7
|
+
* with the session's cookies survives the challenge.
|
|
8
|
+
* - `document.cookie` exposes the anti-bot cookies (`acw_sc__v2`, `ssxmod_itna`
|
|
9
|
+
* etc.) — no HttpOnly/login needed for public pages.
|
|
10
|
+
* - API (`we.51job.com/api/job/search-pc`) is same-origin when we've navigated
|
|
11
|
+
* to `https://we.51job.com/...`, so fetch inside page.evaluate works.
|
|
12
|
+
* - Detail / company pages live on `jobs.51job.com` and render data into the
|
|
13
|
+
* DOM (SSR), so adapters for those navigate and scrape.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
17
|
+
|
|
18
|
+
export const WE_ORIGIN = 'https://we.51job.com';
|
|
19
|
+
export const JOBS_ORIGIN = 'https://jobs.51job.com';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* City name / alias → 6-digit jobArea code. `000000` is the national bucket.
|
|
23
|
+
* Covers the 40 largest cities the search UI surfaces. Unknown input passed
|
|
24
|
+
* as-is if it's already 6 digits; otherwise fall back to `000000` (all).
|
|
25
|
+
*/
|
|
26
|
+
export const CITY_CODES = {
|
|
27
|
+
'全国': '000000', 'all': '000000',
|
|
28
|
+
'北京': '010000', 'beijing': '010000',
|
|
29
|
+
'上海': '020000', 'shanghai': '020000',
|
|
30
|
+
'广州': '030200', 'guangzhou': '030200',
|
|
31
|
+
'深圳': '040000', 'shenzhen': '040000',
|
|
32
|
+
'武汉': '180200', 'wuhan': '180200',
|
|
33
|
+
'西安': '200200', "xi'an": '200200', 'xian': '200200',
|
|
34
|
+
'杭州': '080200', 'hangzhou': '080200',
|
|
35
|
+
'南京': '070200', 'nanjing': '070200',
|
|
36
|
+
'成都': '090200', 'chengdu': '090200',
|
|
37
|
+
'苏州': '070300', 'suzhou': '070300',
|
|
38
|
+
'重庆': '060000', 'chongqing': '060000',
|
|
39
|
+
'天津': '050000', 'tianjin': '050000',
|
|
40
|
+
'长沙': '190200', 'changsha': '190200',
|
|
41
|
+
'郑州': '170200', 'zhengzhou': '170200',
|
|
42
|
+
'青岛': '120300', 'qingdao': '120300',
|
|
43
|
+
'合肥': '150200', 'hefei': '150200',
|
|
44
|
+
'厦门': '110300', 'xiamen': '110300',
|
|
45
|
+
'无锡': '070400', 'wuxi': '070400',
|
|
46
|
+
'济南': '120200', 'jinan': '120200',
|
|
47
|
+
'佛山': '030700', 'foshan': '030700',
|
|
48
|
+
'东莞': '030800', 'dongguan': '030800',
|
|
49
|
+
'宁波': '080300', 'ningbo': '080300',
|
|
50
|
+
'福州': '110200', 'fuzhou': '110200',
|
|
51
|
+
'昆明': '250200', 'kunming': '250200',
|
|
52
|
+
'大连': '230300', 'dalian': '230300',
|
|
53
|
+
'沈阳': '230200', 'shenyang': '230200',
|
|
54
|
+
'哈尔滨': '220200', 'haerbin': '220200', 'harbin': '220200',
|
|
55
|
+
'石家庄': '160200', 'shijiazhuang': '160200',
|
|
56
|
+
'贵阳': '260200', 'guiyang': '260200',
|
|
57
|
+
'南宁': '100200', 'nanning': '100200',
|
|
58
|
+
'南昌': '130200', 'nanchang': '130200',
|
|
59
|
+
'长春': '240200', 'changchun': '240200',
|
|
60
|
+
'太原': '210200', 'taiyuan': '210200',
|
|
61
|
+
'兰州': '280200', 'lanzhou': '280200',
|
|
62
|
+
'乌鲁木齐': '310200', 'urumqi': '310200',
|
|
63
|
+
'海口': '270200', 'haikou': '270200',
|
|
64
|
+
'香港': '330000', 'hongkong': '330000', 'hk': '330000',
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
/** Salary bucket code (matches 51job's `salary` filter). */
|
|
68
|
+
export const SALARY_CODES = {
|
|
69
|
+
'不限': '',
|
|
70
|
+
'2千以下': '01', '2-3千': '02', '3-4.5千': '03',
|
|
71
|
+
'4.5-6千': '04', '6-8千': '05', '8k-1万': '06', '8-10k': '06',
|
|
72
|
+
'1-1.5万': '07', '10-15k': '07',
|
|
73
|
+
'1.5-2万': '08', '15-20k': '08',
|
|
74
|
+
'2-3万': '09', '20-30k': '09',
|
|
75
|
+
'3-5万': '10', '30-50k': '10',
|
|
76
|
+
'5万以上': '11', '50k以上': '11',
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/** Work experience bucket. */
|
|
80
|
+
export const WORKYEAR_CODES = {
|
|
81
|
+
'不限': '',
|
|
82
|
+
'在校生': '01', '应届': '02', '1年以下': '03',
|
|
83
|
+
'1-3年': '04', '3-5年': '05', '5-7年': '06',
|
|
84
|
+
'7-10年': '07', '10年以上': '08',
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
/** Degree bucket. */
|
|
88
|
+
export const DEGREE_CODES = {
|
|
89
|
+
'不限': '',
|
|
90
|
+
'初中及以下': '01', '高中/中技/中专': '02', '高中': '02',
|
|
91
|
+
'大专': '03', '本科': '04', '硕士': '05', '博士': '06',
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
/** Company ownership type. */
|
|
95
|
+
export const COMPANY_TYPE_CODES = {
|
|
96
|
+
'不限': '',
|
|
97
|
+
'外资': '01', '欧美': '0101', '日韩': '0102',
|
|
98
|
+
'合资': '02', '国企': '03', '民营': '04',
|
|
99
|
+
'上市公司': '05', '创业公司': '06', '事业单位': '07',
|
|
100
|
+
'非营利': '08', '政府': '09',
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/** Company headcount bucket. */
|
|
104
|
+
export const COMPANY_SIZE_CODES = {
|
|
105
|
+
'不限': '',
|
|
106
|
+
'少于50': '01', '50以下': '01',
|
|
107
|
+
'50-150': '02', '150-500': '03',
|
|
108
|
+
'500-1000': '04', '1000-5000': '05',
|
|
109
|
+
'5000-10000': '06', '10000以上': '07',
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
/** Sort strategy. */
|
|
113
|
+
export const SORT_CODES = {
|
|
114
|
+
'综合': '0', 'relevance': '0', 'default': '0',
|
|
115
|
+
'最新': '1', 'new': '1', 'newest': '1',
|
|
116
|
+
'薪资': '2', 'salary': '2', 'pay': '2',
|
|
117
|
+
'距离': '9', 'distance': '9',
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
export function resolveCity(input) {
|
|
121
|
+
if (!input) return '000000';
|
|
122
|
+
const s = String(input).trim();
|
|
123
|
+
if (!s || s === '全国' || s.toLowerCase() === 'all') return '000000';
|
|
124
|
+
if (/^\d{6}$/.test(s)) return s;
|
|
125
|
+
const key = s.toLowerCase();
|
|
126
|
+
if (CITY_CODES[s] !== undefined) return CITY_CODES[s];
|
|
127
|
+
if (CITY_CODES[key] !== undefined) return CITY_CODES[key];
|
|
128
|
+
for (const [name, code] of Object.entries(CITY_CODES)) {
|
|
129
|
+
if (typeof name === 'string' && name.includes(s)) return code;
|
|
130
|
+
}
|
|
131
|
+
throw new CliError('INVALID_ARGUMENT', `Unknown city/area "${s}"`, 'Use a supported city name like "杭州" or a 6-digit city code');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export function resolveCode(input, table, fallback = '') {
|
|
135
|
+
if (input === undefined || input === null || input === '') return fallback;
|
|
136
|
+
const s = String(input).trim();
|
|
137
|
+
if (table[s] !== undefined) return table[s];
|
|
138
|
+
const key = s.toLowerCase();
|
|
139
|
+
if (table[key] !== undefined) return table[key];
|
|
140
|
+
if (Object.values(table).includes(s)) return s;
|
|
141
|
+
for (const [k, v] of Object.entries(table)) {
|
|
142
|
+
if (typeof k === 'string' && k.includes(s)) return v;
|
|
143
|
+
}
|
|
144
|
+
return fallback;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export function requirePage(page) {
|
|
148
|
+
if (!page) throw new CliError('INTERNAL_ERROR', 'Browser page required (adapter must set browser: true)');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Navigate the page to a URL and give the SPA a moment to settle. Reuses
|
|
153
|
+
* existing session cookies — first call on a fresh browser may trigger the
|
|
154
|
+
* Aliyun WAF interstitial, which the headless Chromium solves automatically
|
|
155
|
+
* because the JS that sets `acw_sc__v2` runs in the page.
|
|
156
|
+
*/
|
|
157
|
+
export async function navigateTo(page, url, waitSeconds = 2) {
|
|
158
|
+
await page.goto(url);
|
|
159
|
+
await page.wait({ time: waitSeconds });
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Browser-context fetch: execute `fetch(url, { credentials: 'include' })`
|
|
164
|
+
* inside the page so cookies apply and WAF sees a real browser. Returns
|
|
165
|
+
* parsed JSON; throws on network / parse / status failure.
|
|
166
|
+
*/
|
|
167
|
+
export async function pageFetchJson(page, url, opts = {}) {
|
|
168
|
+
const method = opts.method ?? 'GET';
|
|
169
|
+
const body = opts.body ?? null;
|
|
170
|
+
const timeout = opts.timeout ?? 15000;
|
|
171
|
+
const headers = opts.headers ?? {};
|
|
172
|
+
const script = `
|
|
173
|
+
async () => {
|
|
174
|
+
const ctrl = new AbortController();
|
|
175
|
+
const timer = setTimeout(() => ctrl.abort(), ${timeout});
|
|
176
|
+
try {
|
|
177
|
+
const resp = await fetch(${JSON.stringify(url)}, {
|
|
178
|
+
method: ${JSON.stringify(method)},
|
|
179
|
+
credentials: 'include',
|
|
180
|
+
headers: ${JSON.stringify({ Accept: 'application/json', ...headers })},
|
|
181
|
+
${body !== null ? `body: ${JSON.stringify(body)},` : ''}
|
|
182
|
+
signal: ctrl.signal,
|
|
183
|
+
});
|
|
184
|
+
const text = await resp.text();
|
|
185
|
+
return { ok: resp.ok, status: resp.status, text };
|
|
186
|
+
} catch (e) {
|
|
187
|
+
return { ok: false, status: 0, text: '', error: String(e && e.message || e) };
|
|
188
|
+
} finally {
|
|
189
|
+
clearTimeout(timer);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
`;
|
|
193
|
+
const res = await page.evaluate(script);
|
|
194
|
+
if (res.error) throw new CliError('HTTP_ERROR', `51job fetch failed: ${res.error}`);
|
|
195
|
+
if (!res.ok) throw new CliError('HTTP_ERROR', `51job HTTP ${res.status}`);
|
|
196
|
+
if (res.text.trim().startsWith('<')) {
|
|
197
|
+
throw new CliError('ANTI_BOT', '51job returned HTML (likely Aliyun WAF slider). Refresh browser session.');
|
|
198
|
+
}
|
|
199
|
+
try {
|
|
200
|
+
return JSON.parse(res.text);
|
|
201
|
+
} catch (e) {
|
|
202
|
+
throw new CliError('API_ERROR', `51job invalid JSON: ${res.text.slice(0, 200)}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Build the canonical search-pc URL. All optional filters default to empty
|
|
208
|
+
* (no constraint). `scene=7` + `source=1` match what the real SPA sends.
|
|
209
|
+
*/
|
|
210
|
+
export function buildSearchUrl(params) {
|
|
211
|
+
const qs = new URLSearchParams();
|
|
212
|
+
qs.set('api_key', '51job');
|
|
213
|
+
qs.set('timestamp', String(Date.now()));
|
|
214
|
+
qs.set('keyword', params.keyword ?? '');
|
|
215
|
+
qs.set('searchType', '2');
|
|
216
|
+
qs.set('function', params.function ?? '');
|
|
217
|
+
qs.set('industry', params.industry ?? '');
|
|
218
|
+
qs.set('jobArea', params.jobArea ?? '000000');
|
|
219
|
+
qs.set('jobArea2', params.jobArea2 ?? '');
|
|
220
|
+
qs.set('landmark', params.landmark ?? '');
|
|
221
|
+
qs.set('metro', params.metro ?? '');
|
|
222
|
+
qs.set('salary', params.salary ?? '');
|
|
223
|
+
qs.set('workYear', params.workYear ?? '');
|
|
224
|
+
qs.set('degree', params.degree ?? '');
|
|
225
|
+
qs.set('companyType', params.companyType ?? '');
|
|
226
|
+
qs.set('companySize', params.companySize ?? '');
|
|
227
|
+
qs.set('jobType', params.jobType ?? '');
|
|
228
|
+
qs.set('issueDate', params.issueDate ?? '');
|
|
229
|
+
qs.set('sortType', params.sortType ?? '0');
|
|
230
|
+
qs.set('pageNum', String(params.pageNum ?? 1));
|
|
231
|
+
qs.set('pageSize', String(params.pageSize ?? 20));
|
|
232
|
+
qs.set('source', '1');
|
|
233
|
+
qs.set('scene', '7');
|
|
234
|
+
return `${WE_ORIGIN}/api/job/search-pc?${qs.toString()}`;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Map a raw search-pc `resultbody.job.items[i]` into the canonical row shape
|
|
239
|
+
* we expose to the user. Kept here so `search` and `hot` stay aligned.
|
|
240
|
+
*/
|
|
241
|
+
export function mapJobItem(it, rank) {
|
|
242
|
+
const area = it.jobAreaLevelDetail || {};
|
|
243
|
+
return {
|
|
244
|
+
rank,
|
|
245
|
+
jobId: String(it.jobId ?? ''),
|
|
246
|
+
title: it.jobName ?? '',
|
|
247
|
+
salary: it.provideSalaryString ?? '',
|
|
248
|
+
salaryMin: Number(it.jobSalaryMin ?? 0) || 0,
|
|
249
|
+
salaryMax: Number(it.jobSalaryMax ?? 0) || 0,
|
|
250
|
+
city: area.cityString ?? it.jobAreaString ?? '',
|
|
251
|
+
district: area.districtString ?? '',
|
|
252
|
+
workYear: it.workYearString ?? '',
|
|
253
|
+
degree: it.degreeString ?? '',
|
|
254
|
+
tags: Array.isArray(it.jobTags) ? it.jobTags.join(',') : '',
|
|
255
|
+
company: it.companyName ?? '',
|
|
256
|
+
companyFull: it.fullCompanyName ?? '',
|
|
257
|
+
companyType: it.companyTypeString ?? '',
|
|
258
|
+
companySize: it.companySizeString ?? '',
|
|
259
|
+
industry: it.industryType1Str ?? '',
|
|
260
|
+
hr: it.hrName ? `${it.hrName}·${it.hrPosition ?? ''}` : '',
|
|
261
|
+
issueDate: it.issueDateString ?? '',
|
|
262
|
+
url: it.jobHref ?? '',
|
|
263
|
+
companyUrl: it.companyHref ?? '',
|
|
264
|
+
encCoId: it.encCoId ?? '',
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export const SEARCH_COLUMNS = [
|
|
269
|
+
'rank', 'jobId', 'title', 'salary', 'salaryMin', 'salaryMax',
|
|
270
|
+
'city', 'district', 'workYear', 'degree', 'tags',
|
|
271
|
+
'company', 'companyFull', 'companyType', 'companySize', 'industry',
|
|
272
|
+
'hr', 'issueDate', 'url', 'companyUrl', 'encCoId',
|
|
273
|
+
];
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Parse a 51job company-page `<a sensorsdata="...">` payload into a stable
|
|
277
|
+
* row fragment. Returns null when the attribute is absent or malformed.
|
|
278
|
+
*/
|
|
279
|
+
export function parseCompanyJobCard(raw) {
|
|
280
|
+
if (!raw || typeof raw !== 'object') return null;
|
|
281
|
+
const href = typeof raw.href === 'string' ? raw.href : '';
|
|
282
|
+
const sensorsdata = typeof raw.sensorsdata === 'string' ? raw.sensorsdata : '';
|
|
283
|
+
if (!href || !sensorsdata) return null;
|
|
284
|
+
let data;
|
|
285
|
+
try {
|
|
286
|
+
data = JSON.parse(sensorsdata);
|
|
287
|
+
} catch {
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
if (!data || !data.jobId) return null;
|
|
291
|
+
return {
|
|
292
|
+
jobId: String(data.jobId),
|
|
293
|
+
title: data.jobTitle || '',
|
|
294
|
+
salary: data.jobSalary || '',
|
|
295
|
+
city: data.jobArea || '',
|
|
296
|
+
workYear: data.jobYear || '',
|
|
297
|
+
degree: data.jobDegree || '',
|
|
298
|
+
funcType: data.funcType || '',
|
|
299
|
+
issueDate: data.jobTime || '',
|
|
300
|
+
url: href,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { parseCompanyJobCard, pageFetchJson, resolveCity } from './utils.js';
|
|
4
|
+
|
|
5
|
+
describe('51job resolveCity', () => {
|
|
6
|
+
it('maps known city names and explicit national scope', () => {
|
|
7
|
+
expect(resolveCity('杭州')).toBe('080200');
|
|
8
|
+
expect(resolveCity('all')).toBe('000000');
|
|
9
|
+
expect(resolveCity('000000')).toBe('000000');
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('rejects unknown non-empty inputs instead of silently widening to 全国', () => {
|
|
13
|
+
expect(() => resolveCity('杭州z')).toThrowError(CliError);
|
|
14
|
+
expect(() => resolveCity('杭州z')).toThrow(/Unknown city\/area/);
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
describe('51job pageFetchJson', () => {
|
|
19
|
+
it('detects WAF challenge HTML and throws ANTI_BOT', async () => {
|
|
20
|
+
const page = {
|
|
21
|
+
evaluate: vi.fn().mockResolvedValue({
|
|
22
|
+
ok: true,
|
|
23
|
+
status: 200,
|
|
24
|
+
text: '<html><title>slider</title></html>',
|
|
25
|
+
}),
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
await expect(pageFetchJson(page, 'https://we.51job.com/api/job/search-pc')).rejects.toMatchObject({
|
|
29
|
+
code: 'ANTI_BOT',
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
describe('51job parseCompanyJobCard', () => {
|
|
35
|
+
it('parses sensorsdata JSON into a stable row fragment', () => {
|
|
36
|
+
const row = parseCompanyJobCard({
|
|
37
|
+
href: 'https://jobs.51job.com/shanghai/123456789.html',
|
|
38
|
+
sensorsdata: JSON.stringify({
|
|
39
|
+
jobId: '123456789',
|
|
40
|
+
jobTitle: 'Senior Engineer',
|
|
41
|
+
jobSalary: '20-30K',
|
|
42
|
+
jobArea: '上海',
|
|
43
|
+
jobYear: '3-5年',
|
|
44
|
+
jobDegree: '本科',
|
|
45
|
+
funcType: '后端开发',
|
|
46
|
+
jobTime: '04-22',
|
|
47
|
+
}),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
expect(row).toEqual({
|
|
51
|
+
jobId: '123456789',
|
|
52
|
+
title: 'Senior Engineer',
|
|
53
|
+
salary: '20-30K',
|
|
54
|
+
city: '上海',
|
|
55
|
+
workYear: '3-5年',
|
|
56
|
+
degree: '本科',
|
|
57
|
+
funcType: '后端开发',
|
|
58
|
+
issueDate: '04-22',
|
|
59
|
+
url: 'https://jobs.51job.com/shanghai/123456789.html',
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('returns null on malformed sensorsdata', () => {
|
|
64
|
+
expect(parseCompanyJobCard({
|
|
65
|
+
href: 'https://jobs.51job.com/shanghai/123456789.html',
|
|
66
|
+
sensorsdata: '{bad json}',
|
|
67
|
+
})).toBeNull();
|
|
68
|
+
});
|
|
69
|
+
});
|