@jackwener/opencli 1.7.6 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +17 -8
  2. package/README.zh-CN.md +14 -8
  3. package/cli-manifest.json +325 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/bilibili/video.js +11 -4
  11. package/clis/bilibili/video.test.js +51 -0
  12. package/clis/chatgpt/image.js +1 -1
  13. package/clis/deepseek/ask.js +19 -13
  14. package/clis/deepseek/ask.test.js +93 -1
  15. package/clis/deepseek/utils.js +108 -23
  16. package/clis/deepseek/utils.test.js +109 -1
  17. package/clis/gemini/image.js +1 -1
  18. package/clis/instagram/download.js +1 -1
  19. package/clis/twitter/likes.js +3 -2
  20. package/clis/twitter/search.js +4 -2
  21. package/clis/twitter/search.test.js +4 -0
  22. package/clis/twitter/shared.js +28 -0
  23. package/clis/twitter/shared.test.js +96 -0
  24. package/clis/twitter/thread.js +3 -1
  25. package/clis/twitter/timeline.js +3 -2
  26. package/clis/twitter/tweets.js +3 -2
  27. package/clis/twitter/tweets.test.js +1 -1
  28. package/clis/web/read.js +25 -5
  29. package/clis/web/read.test.js +76 -0
  30. package/clis/weread/ai-outline.js +170 -0
  31. package/clis/weread/ai-outline.test.js +83 -0
  32. package/clis/weread/book.js +57 -44
  33. package/clis/weread/commands.test.js +24 -0
  34. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  35. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  36. package/dist/src/browser/analyze.d.ts +103 -0
  37. package/dist/src/browser/analyze.js +230 -0
  38. package/dist/src/browser/analyze.test.d.ts +1 -0
  39. package/dist/src/browser/analyze.test.js +164 -0
  40. package/dist/src/browser/article-extract.d.ts +57 -0
  41. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  42. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  43. package/dist/src/browser/article-extract.js +169 -0
  44. package/dist/src/browser/article-extract.test.d.ts +1 -0
  45. package/dist/src/browser/article-extract.test.js +94 -0
  46. package/dist/src/browser/cdp.js +11 -2
  47. package/dist/src/browser/verify-fixture.d.ts +59 -0
  48. package/dist/src/browser/verify-fixture.js +213 -0
  49. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  50. package/dist/src/browser/verify-fixture.test.js +161 -0
  51. package/dist/src/cli.d.ts +32 -0
  52. package/dist/src/cli.js +333 -43
  53. package/dist/src/cli.test.js +257 -1
  54. package/dist/src/daemon.d.ts +3 -2
  55. package/dist/src/daemon.js +16 -4
  56. package/dist/src/daemon.test.d.ts +1 -0
  57. package/dist/src/daemon.test.js +19 -0
  58. package/dist/src/download/article-download.d.ts +12 -0
  59. package/dist/src/download/article-download.js +141 -17
  60. package/dist/src/download/article-download.test.js +196 -0
  61. package/dist/src/download/index.js +73 -86
  62. package/dist/src/errors.js +4 -2
  63. package/dist/src/errors.test.js +13 -0
  64. package/dist/src/launcher.d.ts +1 -1
  65. package/dist/src/launcher.js +3 -3
  66. package/dist/src/output.js +1 -1
  67. package/dist/src/output.test.js +6 -0
  68. package/package.json +5 -1
@@ -0,0 +1,125 @@
1
+ /**
2
+ * 51job company jobs + basic info by encCoId.
3
+ *
4
+ * Navigates to `jobs.51job.com/all/co<encCoId>.html`. Each job card is an
5
+ * `<a sensorsdata="…">` whose attribute is a JSON blob with jobId, title,
6
+ * salary, area, year, degree — so parsing is just JSON, not DOM-text fragile.
7
+ */
8
+
9
+ import { cli, Strategy } from '@jackwener/opencli/registry';
10
+ import { CliError } from '@jackwener/opencli/errors';
11
+ import { JOBS_ORIGIN, requirePage, navigateTo, parseCompanyJobCard } from './utils.js';
12
+
13
+ cli({
14
+ site: '51job',
15
+ name: 'company',
16
+ description: '51job 公司简介 + 在招职位(按 encCoId)',
17
+ domain: 'jobs.51job.com',
18
+ strategy: Strategy.COOKIE,
19
+ browser: true,
20
+ navigateBefore: false,
21
+ args: [
22
+ { name: 'encCoId', type: 'string', required: true, positional: true, help: '加密公司 ID(search 返回的 encCoId)' },
23
+ { name: 'limit', type: 'int', default: 20, help: '返回职位数(1-50)' },
24
+ ],
25
+ columns: [
26
+ 'rank', 'jobId', 'title', 'salary', 'city', 'workYear', 'degree',
27
+ 'funcType', 'issueDate', 'url',
28
+ 'companyName', 'companyType', 'companySize', 'companyIndustry',
29
+ 'companyIntro', 'companyUrl',
30
+ ],
31
+ func: async (page, kwargs) => {
32
+ requirePage(page);
33
+ const encCoId = String(kwargs.encCoId ?? '').trim();
34
+ if (!encCoId) throw new CliError('INVALID_ARGUMENT', 'encCoId is required');
35
+ if (!/^[A-Za-z0-9_]+$/.test(encCoId)) {
36
+ throw new CliError('INVALID_ARGUMENT', `encCoId must be alphanumeric/underscore, got "${encCoId}"`);
37
+ }
38
+ const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
39
+
40
+ const url = `${JOBS_ORIGIN}/all/co${encCoId}.html`;
41
+ await navigateTo(page, url, 2);
42
+
43
+ const script = `(() => {
44
+ const sel = s => document.querySelector(s)?.innerText?.trim() || '';
45
+ const bodyText = (document.body.innerText || '').slice(0, 400);
46
+ if (/公司不存在|页面不存在|账号状态异常/.test(bodyText)) {
47
+ return { error: 'NOT_FOUND', bodyText };
48
+ }
49
+ const companyName = sel('h1') || sel('.cname');
50
+ // Company introduction block
51
+ const introEl = document.querySelector('#companyIntroRef, .c-intro');
52
+ const companyIntro = introEl ? (introEl.innerText || '').trim() : '';
53
+ // Info sidebar (type / size / industry) — labels sit in .com-info dl or .coinfo
54
+ const sidebarText = sel('.ci-content, .company-info, .coinfo, .com-info');
55
+ const links = [...document.querySelectorAll('a[sensorsdata]')]
56
+ .filter(a => /\\/\\d{6,}\\.html/.test(a.href || ''))
57
+ .slice(0, 60)
58
+ .map(a => {
59
+ return {
60
+ href: a.href,
61
+ sensorsdata: a.getAttribute('sensorsdata') || '',
62
+ text: (a.innerText || '').trim(),
63
+ };
64
+ });
65
+ // Company meta is three inline spans under .c-info.ellipsis
66
+ // (title/size/industry) — extract them by position.
67
+ const cInfo = document.querySelector('.c-info.ellipsis');
68
+ const cInfoParts = cInfo
69
+ ? [...cInfo.querySelectorAll('span')].map(s => (s.innerText || '').trim()).filter(Boolean)
70
+ : [];
71
+ return {
72
+ companyName,
73
+ companyIntro,
74
+ links,
75
+ cInfoParts,
76
+ sidebarText: sidebarText.slice(0, 400),
77
+ };
78
+ })()`;
79
+ const data = await page.evaluate(script);
80
+ if (data.error === 'NOT_FOUND') {
81
+ throw new CliError('NO_DATA', `Company ${encCoId} not found`);
82
+ }
83
+ if (!data.companyName) {
84
+ throw new CliError('NO_DATA', `Could not parse company page ${encCoId}; layout may have changed`);
85
+ }
86
+
87
+ const companyUrl = url;
88
+ const [companyType = '', companySize = '', companyIndustry = ''] = data.cInfoParts || [];
89
+
90
+ const seen = new Set();
91
+ const rows = [];
92
+ for (const link of data.links || []) {
93
+ const job = parseCompanyJobCard(link);
94
+ if (!job) continue;
95
+ if (seen.has(job.jobId)) continue;
96
+ seen.add(job.jobId);
97
+ rows.push({
98
+ rank: rows.length + 1,
99
+ ...job,
100
+ companyName: data.companyName,
101
+ companyType,
102
+ companySize,
103
+ companyIndustry,
104
+ companyIntro: data.companyIntro || '',
105
+ companyUrl,
106
+ });
107
+ if (rows.length >= limit) break;
108
+ }
109
+ if (rows.length === 0) {
110
+ // Still return a sentinel row with the company info so caller isn't left with [].
111
+ return [{
112
+ rank: 0,
113
+ jobId: '',
114
+ title: '(no active jobs)',
115
+ salary: '', city: '', workYear: '', degree: '',
116
+ funcType: '', issueDate: '', url: '',
117
+ companyName: data.companyName,
118
+ companyType, companySize, companyIndustry,
119
+ companyIntro: data.companyIntro || '',
120
+ companyUrl,
121
+ }];
122
+ }
123
+ return rows;
124
+ },
125
+ });
@@ -0,0 +1,108 @@
1
+ /**
2
+ * 51job job detail by jobId.
3
+ *
4
+ * Navigates to `jobs.51job.com/x/<jobId>.html` (SSR page — the generic `/x/`
5
+ * area slug always resolves) and scrapes the structured blocks. No API
6
+ * surface returns the full detail page, so DOM scraping is the only path.
7
+ */
8
+
9
+ import { cli, Strategy } from '@jackwener/opencli/registry';
10
+ import { CliError } from '@jackwener/opencli/errors';
11
+ import { JOBS_ORIGIN, requirePage, navigateTo } from './utils.js';
12
+
13
+ cli({
14
+ site: '51job',
15
+ name: 'detail',
16
+ description: '51job 职位详情(按 jobId)',
17
+ domain: 'jobs.51job.com',
18
+ strategy: Strategy.COOKIE,
19
+ browser: true,
20
+ navigateBefore: false,
21
+ args: [
22
+ { name: 'jobId', type: 'string', required: true, positional: true, help: '职位 ID(search 返回的 jobId)' },
23
+ ],
24
+ columns: [
25
+ 'jobId', 'title', 'salary', 'location', 'workYear', 'degree',
26
+ 'category', 'address', 'ageRequirement',
27
+ 'description', 'welfare',
28
+ 'company', 'companyType', 'companySize', 'companyIndustry',
29
+ 'companyUrl', 'url',
30
+ ],
31
+ func: async (page, kwargs) => {
32
+ requirePage(page);
33
+ const jobId = String(kwargs.jobId ?? '').trim();
34
+ if (!jobId) throw new CliError('INVALID_ARGUMENT', 'jobId is required');
35
+ if (!/^\d{6,12}$/.test(jobId)) throw new CliError('INVALID_ARGUMENT', `jobId must be a 6-12 digit number, got "${jobId}"`);
36
+
37
+ const url = `${JOBS_ORIGIN}/x/${jobId}.html`;
38
+ await navigateTo(page, url, 2);
39
+
40
+ const script = `(() => {
41
+ const sel = s => document.querySelector(s)?.innerText?.trim() || '';
42
+ const all = s => [...document.querySelectorAll(s)].map(e => e.innerText.trim()).filter(Boolean);
43
+ const finalUrl = window.location.href;
44
+ const bodyText = (document.body.innerText || '').slice(0, 400);
45
+ if (/职位已下线|该职位已删除|页面不存在/.test(bodyText)) {
46
+ return { error: 'EXPIRED', bodyText };
47
+ }
48
+ const companyA = document.querySelector('.cname a, .tCompany_sidebar .com_msg a');
49
+ const funcs = all('.bmsg .fp');
50
+ const pick = (prefix) => {
51
+ const row = funcs.find(f => f.startsWith(prefix));
52
+ return row ? row.slice(prefix.length).replace(/^[::\\s\\n]+/, '').trim() : '';
53
+ };
54
+ return {
55
+ finalUrl,
56
+ title: sel('h1') || sel('.cn .name'),
57
+ salary: sel('.cn strong') || sel('strong'),
58
+ meta: sel('.cn .msg.ltype') || sel('.msg.ltype'),
59
+ description: (() => {
60
+ const box = document.querySelector('.bmsg.job_msg') || document.querySelector('.job_msg');
61
+ if (!box) return '';
62
+ const clone = box.cloneNode(true);
63
+ clone.querySelectorAll('.fp, .mt10, script, style').forEach(n => n.remove());
64
+ return (clone.innerText || '').trim();
65
+ })(),
66
+ welfare: all('.t1 span, .jtag .t1 span'),
67
+ category: pick('职能类别'),
68
+ address: pick('上班地址'),
69
+ ageRequirement: pick('年龄要求'),
70
+ company: companyA?.innerText?.trim() || '',
71
+ companyUrl: companyA?.href || '',
72
+ companyTag: sel('.com_tag'),
73
+ };
74
+ })()`;
75
+ const data = await page.evaluate(script);
76
+ if (data.error === 'EXPIRED') {
77
+ throw new CliError('NO_DATA', `Job ${jobId} is offline or removed`);
78
+ }
79
+ if (!data.title) {
80
+ throw new CliError('NO_DATA', `Could not parse job detail for ${jobId}; page may have changed layout`);
81
+ }
82
+
83
+ // meta looks like "北京-丰台区 | 3年及以上 | 本科"
84
+ const [locRaw, workYear, degree] = (data.meta || '').split('|').map(s => s.trim());
85
+ // companyTag looks like "国企\n\n150-500人\n\n电子技术/半导体/集成电路"
86
+ const tagParts = (data.companyTag || '').split(/\n+/).map(s => s.trim()).filter(Boolean);
87
+
88
+ return [{
89
+ jobId,
90
+ title: data.title,
91
+ salary: data.salary || '',
92
+ location: locRaw || '',
93
+ workYear: workYear || '',
94
+ degree: degree || '',
95
+ category: data.category || '',
96
+ address: data.address || '',
97
+ ageRequirement: data.ageRequirement || '',
98
+ description: data.description || '',
99
+ welfare: (data.welfare || []).join(','),
100
+ company: data.company || '',
101
+ companyType: tagParts[0] || '',
102
+ companySize: tagParts[1] || '',
103
+ companyIndustry: tagParts.slice(2).join(' / '),
104
+ companyUrl: data.companyUrl || '',
105
+ url: data.finalUrl || url,
106
+ }];
107
+ },
108
+ });
@@ -0,0 +1,55 @@
1
+ /**
2
+ * 51job hot / recommended feed.
3
+ *
4
+ * Same endpoint as `search`, but with empty keyword — 51job returns its
5
+ * own ranked recommendation list (up to ~999 for most regions).
6
+ */
7
+
8
+ import { cli, Strategy } from '@jackwener/opencli/registry';
9
+ import { CliError } from '@jackwener/opencli/errors';
10
+ import {
11
+ WE_ORIGIN, SEARCH_COLUMNS, SORT_CODES,
12
+ requirePage, navigateTo, pageFetchJson,
13
+ buildSearchUrl, mapJobItem, resolveCity, resolveCode,
14
+ } from './utils.js';
15
+
16
+ cli({
17
+ site: '51job',
18
+ name: 'hot',
19
+ description: '51job 推荐职位(按城市/行业/排序浏览)',
20
+ domain: 'we.51job.com',
21
+ strategy: Strategy.COOKIE,
22
+ browser: true,
23
+ navigateBefore: false,
24
+ args: [
25
+ { name: 'area', type: 'string', default: '全国', help: '城市名或 6 位城市码(默认 "全国")' },
26
+ { name: 'sort', type: 'string', default: '综合', help: '排序:综合 / 最新 / 薪资 / 距离' },
27
+ { name: 'page', type: 'int', default: 1, help: '页码(1-based)' },
28
+ { name: 'limit', type: 'int', default: 20, help: '返回条数(1-50)' },
29
+ ],
30
+ columns: SEARCH_COLUMNS,
31
+ func: async (page, kwargs) => {
32
+ requirePage(page);
33
+ const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
34
+ const pageNum = Math.max(1, Number(kwargs.page) || 1);
35
+ const jobArea = resolveCity(kwargs.area);
36
+ const sortType = resolveCode(kwargs.sort, SORT_CODES, '0');
37
+
38
+ const currentUrl = await page.evaluate(`(() => window.location.href)()`);
39
+ if (!String(currentUrl).startsWith(WE_ORIGIN)) {
40
+ await navigateTo(page, `${WE_ORIGIN}/pc/search?searchType=2`, 2);
41
+ }
42
+
43
+ const url = buildSearchUrl({
44
+ keyword: '', jobArea, sortType,
45
+ pageNum, pageSize: Math.min(limit, 50),
46
+ });
47
+ const data = await pageFetchJson(page, url);
48
+ if (data.status !== '1' && data.status !== 1) {
49
+ throw new CliError('API_ERROR', `51job hot failed: ${data.message ?? 'unknown'}`);
50
+ }
51
+ const items = data?.resultbody?.job?.items ?? [];
52
+ if (items.length === 0) throw new CliError('NO_DATA', 'No recommended jobs returned');
53
+ return items.slice(0, limit).map((it, i) => mapJobItem(it, (pageNum - 1) * limit + i + 1));
54
+ },
55
+ });
@@ -0,0 +1,79 @@
1
+ /**
2
+ * 51job keyword search.
3
+ *
4
+ * Backed by `we.51job.com/api/job/search-pc`, which returns a job list with
5
+ * the full `jobDescribe` embedded. Needs the browser session because the
6
+ * Aliyun WAF in front of `we.51job.com` challenges bare fetches; the
7
+ * `pageFetchJson` helper runs inside the page so the WAF sees a real browser.
8
+ */
9
+
10
+ import { cli, Strategy } from '@jackwener/opencli/registry';
11
+ import { CliError } from '@jackwener/opencli/errors';
12
+ import {
13
+ WE_ORIGIN, SEARCH_COLUMNS,
14
+ SALARY_CODES, WORKYEAR_CODES, DEGREE_CODES,
15
+ COMPANY_TYPE_CODES, COMPANY_SIZE_CODES, SORT_CODES,
16
+ requirePage, navigateTo, pageFetchJson,
17
+ buildSearchUrl, mapJobItem, resolveCity, resolveCode,
18
+ } from './utils.js';
19
+
20
+ cli({
21
+ site: '51job',
22
+ name: 'search',
23
+ description: '51job 前程无忧关键词职位搜索',
24
+ domain: 'we.51job.com',
25
+ strategy: Strategy.COOKIE,
26
+ browser: true,
27
+ navigateBefore: false,
28
+ args: [
29
+ { name: 'keyword', type: 'string', required: true, positional: true, help: '搜索关键词(岗位名 / 技能 / 公司)' },
30
+ { name: 'area', type: 'string', default: '全国', help: '城市名或 6 位城市码(如 "杭州" / "080200" / "全国")' },
31
+ { name: 'salary', type: 'string', default: '', help: '薪资区间(如 "10-15k" / "1-1.5万" / "20-30k")' },
32
+ { name: 'experience', type: 'string', default: '', help: '工作年限(如 "应届" / "1-3年" / "3-5年" / "5-7年")' },
33
+ { name: 'degree', type: 'string', default: '', help: '学历要求(如 "本科" / "大专" / "硕士")' },
34
+ { name: 'companyType', type: 'string', default: '', help: '公司性质(如 "外资" / "国企" / "民营")' },
35
+ { name: 'companySize', type: 'string', default: '', help: '公司规模(如 "50-150" / "1000-5000")' },
36
+ { name: 'sort', type: 'string', default: '综合', help: '排序:综合 / 最新 / 薪资 / 距离' },
37
+ { name: 'page', type: 'int', default: 1, help: '页码(1-based)' },
38
+ { name: 'limit', type: 'int', default: 20, help: '返回条数(1-50)' },
39
+ ],
40
+ columns: SEARCH_COLUMNS,
41
+ func: async (page, kwargs) => {
42
+ requirePage(page);
43
+ const keyword = String(kwargs.keyword ?? '').trim();
44
+ if (!keyword) throw new CliError('INVALID_ARGUMENT', 'keyword is required');
45
+ const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
46
+ const pageNum = Math.max(1, Number(kwargs.page) || 1);
47
+
48
+ const jobArea = resolveCity(kwargs.area);
49
+ const salary = resolveCode(kwargs.salary, SALARY_CODES);
50
+ const workYear = resolveCode(kwargs.experience, WORKYEAR_CODES);
51
+ const degree = resolveCode(kwargs.degree, DEGREE_CODES);
52
+ const companyType = resolveCode(kwargs.companyType, COMPANY_TYPE_CODES);
53
+ const companySize = resolveCode(kwargs.companySize, COMPANY_SIZE_CODES);
54
+ const sortType = resolveCode(kwargs.sort, SORT_CODES, '0');
55
+
56
+ // Establish WAF-clean origin. Reusing the same tab avoids the slider
57
+ // challenge fire every call.
58
+ const currentUrl = await page.evaluate(`(() => window.location.href)()`);
59
+ if (!String(currentUrl).startsWith(WE_ORIGIN)) {
60
+ await navigateTo(page, `${WE_ORIGIN}/pc/search?keyword=${encodeURIComponent(keyword)}&searchType=2`, 2);
61
+ }
62
+
63
+ const url = buildSearchUrl({
64
+ keyword, jobArea, salary, workYear, degree,
65
+ companyType, companySize, sortType,
66
+ pageNum, pageSize: Math.min(limit, 50),
67
+ });
68
+
69
+ const data = await pageFetchJson(page, url);
70
+ if (data.status !== '1' && data.status !== 1) {
71
+ throw new CliError('API_ERROR', `51job search failed: ${data.message ?? 'unknown'}`);
72
+ }
73
+ const items = data?.resultbody?.job?.items ?? [];
74
+ if (items.length === 0) {
75
+ throw new CliError('NO_DATA', `No jobs matched "${keyword}"`);
76
+ }
77
+ return items.slice(0, limit).map((it, i) => mapJobItem(it, (pageNum - 1) * limit + i + 1));
78
+ },
79
+ });
@@ -0,0 +1,302 @@
1
+ /**
2
+ * 51job shared utilities.
3
+ *
4
+ * Key design points:
5
+ * - we.51job.com is protected by Aliyun WAF — bare `curl` / Node-side fetch
6
+ * gets a slider CAPTCHA HTML page. Only browser-context fetch (page.evaluate)
7
+ * with the session's cookies survives the challenge.
8
+ * - `document.cookie` exposes the anti-bot cookies (`acw_sc__v2`, `ssxmod_itna`
9
+ * etc.) — no HttpOnly/login needed for public pages.
10
+ * - API (`we.51job.com/api/job/search-pc`) is same-origin when we've navigated
11
+ * to `https://we.51job.com/...`, so fetch inside page.evaluate works.
12
+ * - Detail / company pages live on `jobs.51job.com` and render data into the
13
+ * DOM (SSR), so adapters for those navigate and scrape.
14
+ */
15
+
16
+ import { CliError } from '@jackwener/opencli/errors';
17
+
18
+ export const WE_ORIGIN = 'https://we.51job.com';
19
+ export const JOBS_ORIGIN = 'https://jobs.51job.com';
20
+
21
+ /**
22
+ * City name / alias → 6-digit jobArea code. `000000` is the national bucket.
23
+ * Covers the 40 largest cities the search UI surfaces. Unknown input passed
24
+ * as-is if it's already 6 digits; otherwise fall back to `000000` (all).
25
+ */
26
+ export const CITY_CODES = {
27
+ '全国': '000000', 'all': '000000',
28
+ '北京': '010000', 'beijing': '010000',
29
+ '上海': '020000', 'shanghai': '020000',
30
+ '广州': '030200', 'guangzhou': '030200',
31
+ '深圳': '040000', 'shenzhen': '040000',
32
+ '武汉': '180200', 'wuhan': '180200',
33
+ '西安': '200200', "xi'an": '200200', 'xian': '200200',
34
+ '杭州': '080200', 'hangzhou': '080200',
35
+ '南京': '070200', 'nanjing': '070200',
36
+ '成都': '090200', 'chengdu': '090200',
37
+ '苏州': '070300', 'suzhou': '070300',
38
+ '重庆': '060000', 'chongqing': '060000',
39
+ '天津': '050000', 'tianjin': '050000',
40
+ '长沙': '190200', 'changsha': '190200',
41
+ '郑州': '170200', 'zhengzhou': '170200',
42
+ '青岛': '120300', 'qingdao': '120300',
43
+ '合肥': '150200', 'hefei': '150200',
44
+ '厦门': '110300', 'xiamen': '110300',
45
+ '无锡': '070400', 'wuxi': '070400',
46
+ '济南': '120200', 'jinan': '120200',
47
+ '佛山': '030700', 'foshan': '030700',
48
+ '东莞': '030800', 'dongguan': '030800',
49
+ '宁波': '080300', 'ningbo': '080300',
50
+ '福州': '110200', 'fuzhou': '110200',
51
+ '昆明': '250200', 'kunming': '250200',
52
+ '大连': '230300', 'dalian': '230300',
53
+ '沈阳': '230200', 'shenyang': '230200',
54
+ '哈尔滨': '220200', 'haerbin': '220200', 'harbin': '220200',
55
+ '石家庄': '160200', 'shijiazhuang': '160200',
56
+ '贵阳': '260200', 'guiyang': '260200',
57
+ '南宁': '100200', 'nanning': '100200',
58
+ '南昌': '130200', 'nanchang': '130200',
59
+ '长春': '240200', 'changchun': '240200',
60
+ '太原': '210200', 'taiyuan': '210200',
61
+ '兰州': '280200', 'lanzhou': '280200',
62
+ '乌鲁木齐': '310200', 'urumqi': '310200',
63
+ '海口': '270200', 'haikou': '270200',
64
+ '香港': '330000', 'hongkong': '330000', 'hk': '330000',
65
+ };
66
+
67
+ /** Salary bucket code (matches 51job's `salary` filter). */
68
+ export const SALARY_CODES = {
69
+ '不限': '',
70
+ '2千以下': '01', '2-3千': '02', '3-4.5千': '03',
71
+ '4.5-6千': '04', '6-8千': '05', '8k-1万': '06', '8-10k': '06',
72
+ '1-1.5万': '07', '10-15k': '07',
73
+ '1.5-2万': '08', '15-20k': '08',
74
+ '2-3万': '09', '20-30k': '09',
75
+ '3-5万': '10', '30-50k': '10',
76
+ '5万以上': '11', '50k以上': '11',
77
+ };
78
+
79
+ /** Work experience bucket. */
80
+ export const WORKYEAR_CODES = {
81
+ '不限': '',
82
+ '在校生': '01', '应届': '02', '1年以下': '03',
83
+ '1-3年': '04', '3-5年': '05', '5-7年': '06',
84
+ '7-10年': '07', '10年以上': '08',
85
+ };
86
+
87
+ /** Degree bucket. */
88
+ export const DEGREE_CODES = {
89
+ '不限': '',
90
+ '初中及以下': '01', '高中/中技/中专': '02', '高中': '02',
91
+ '大专': '03', '本科': '04', '硕士': '05', '博士': '06',
92
+ };
93
+
94
+ /** Company ownership type. */
95
+ export const COMPANY_TYPE_CODES = {
96
+ '不限': '',
97
+ '外资': '01', '欧美': '0101', '日韩': '0102',
98
+ '合资': '02', '国企': '03', '民营': '04',
99
+ '上市公司': '05', '创业公司': '06', '事业单位': '07',
100
+ '非营利': '08', '政府': '09',
101
+ };
102
+
103
+ /** Company headcount bucket. */
104
+ export const COMPANY_SIZE_CODES = {
105
+ '不限': '',
106
+ '少于50': '01', '50以下': '01',
107
+ '50-150': '02', '150-500': '03',
108
+ '500-1000': '04', '1000-5000': '05',
109
+ '5000-10000': '06', '10000以上': '07',
110
+ };
111
+
112
+ /** Sort strategy. */
113
+ export const SORT_CODES = {
114
+ '综合': '0', 'relevance': '0', 'default': '0',
115
+ '最新': '1', 'new': '1', 'newest': '1',
116
+ '薪资': '2', 'salary': '2', 'pay': '2',
117
+ '距离': '9', 'distance': '9',
118
+ };
119
+
120
+ export function resolveCity(input) {
121
+ if (!input) return '000000';
122
+ const s = String(input).trim();
123
+ if (!s || s === '全国' || s.toLowerCase() === 'all') return '000000';
124
+ if (/^\d{6}$/.test(s)) return s;
125
+ const key = s.toLowerCase();
126
+ if (CITY_CODES[s] !== undefined) return CITY_CODES[s];
127
+ if (CITY_CODES[key] !== undefined) return CITY_CODES[key];
128
+ for (const [name, code] of Object.entries(CITY_CODES)) {
129
+ if (typeof name === 'string' && name.includes(s)) return code;
130
+ }
131
+ throw new CliError('INVALID_ARGUMENT', `Unknown city/area "${s}"`, 'Use a supported city name like "杭州" or a 6-digit city code');
132
+ }
133
+
134
+ export function resolveCode(input, table, fallback = '') {
135
+ if (input === undefined || input === null || input === '') return fallback;
136
+ const s = String(input).trim();
137
+ if (table[s] !== undefined) return table[s];
138
+ const key = s.toLowerCase();
139
+ if (table[key] !== undefined) return table[key];
140
+ if (Object.values(table).includes(s)) return s;
141
+ for (const [k, v] of Object.entries(table)) {
142
+ if (typeof k === 'string' && k.includes(s)) return v;
143
+ }
144
+ return fallback;
145
+ }
146
+
147
+ export function requirePage(page) {
148
+ if (!page) throw new CliError('INTERNAL_ERROR', 'Browser page required (adapter must set browser: true)');
149
+ }
150
+
151
+ /**
152
+ * Navigate the page to a URL and give the SPA a moment to settle. Reuses
153
+ * existing session cookies — first call on a fresh browser may trigger the
154
+ * Aliyun WAF interstitial, which the headless Chromium solves automatically
155
+ * because the JS that sets `acw_sc__v2` runs in the page.
156
+ */
157
+ export async function navigateTo(page, url, waitSeconds = 2) {
158
+ await page.goto(url);
159
+ await page.wait({ time: waitSeconds });
160
+ }
161
+
162
+ /**
163
+ * Browser-context fetch: execute `fetch(url, { credentials: 'include' })`
164
+ * inside the page so cookies apply and WAF sees a real browser. Returns
165
+ * parsed JSON; throws on network / parse / status failure.
166
+ */
167
+ export async function pageFetchJson(page, url, opts = {}) {
168
+ const method = opts.method ?? 'GET';
169
+ const body = opts.body ?? null;
170
+ const timeout = opts.timeout ?? 15000;
171
+ const headers = opts.headers ?? {};
172
+ const script = `
173
+ async () => {
174
+ const ctrl = new AbortController();
175
+ const timer = setTimeout(() => ctrl.abort(), ${timeout});
176
+ try {
177
+ const resp = await fetch(${JSON.stringify(url)}, {
178
+ method: ${JSON.stringify(method)},
179
+ credentials: 'include',
180
+ headers: ${JSON.stringify({ Accept: 'application/json', ...headers })},
181
+ ${body !== null ? `body: ${JSON.stringify(body)},` : ''}
182
+ signal: ctrl.signal,
183
+ });
184
+ const text = await resp.text();
185
+ return { ok: resp.ok, status: resp.status, text };
186
+ } catch (e) {
187
+ return { ok: false, status: 0, text: '', error: String(e && e.message || e) };
188
+ } finally {
189
+ clearTimeout(timer);
190
+ }
191
+ }
192
+ `;
193
+ const res = await page.evaluate(script);
194
+ if (res.error) throw new CliError('HTTP_ERROR', `51job fetch failed: ${res.error}`);
195
+ if (!res.ok) throw new CliError('HTTP_ERROR', `51job HTTP ${res.status}`);
196
+ if (res.text.trim().startsWith('<')) {
197
+ throw new CliError('ANTI_BOT', '51job returned HTML (likely Aliyun WAF slider). Refresh browser session.');
198
+ }
199
+ try {
200
+ return JSON.parse(res.text);
201
+ } catch (e) {
202
+ throw new CliError('API_ERROR', `51job invalid JSON: ${res.text.slice(0, 200)}`);
203
+ }
204
+ }
205
+
206
+ /**
207
+ * Build the canonical search-pc URL. All optional filters default to empty
208
+ * (no constraint). `scene=7` + `source=1` match what the real SPA sends.
209
+ */
210
+ export function buildSearchUrl(params) {
211
+ const qs = new URLSearchParams();
212
+ qs.set('api_key', '51job');
213
+ qs.set('timestamp', String(Date.now()));
214
+ qs.set('keyword', params.keyword ?? '');
215
+ qs.set('searchType', '2');
216
+ qs.set('function', params.function ?? '');
217
+ qs.set('industry', params.industry ?? '');
218
+ qs.set('jobArea', params.jobArea ?? '000000');
219
+ qs.set('jobArea2', params.jobArea2 ?? '');
220
+ qs.set('landmark', params.landmark ?? '');
221
+ qs.set('metro', params.metro ?? '');
222
+ qs.set('salary', params.salary ?? '');
223
+ qs.set('workYear', params.workYear ?? '');
224
+ qs.set('degree', params.degree ?? '');
225
+ qs.set('companyType', params.companyType ?? '');
226
+ qs.set('companySize', params.companySize ?? '');
227
+ qs.set('jobType', params.jobType ?? '');
228
+ qs.set('issueDate', params.issueDate ?? '');
229
+ qs.set('sortType', params.sortType ?? '0');
230
+ qs.set('pageNum', String(params.pageNum ?? 1));
231
+ qs.set('pageSize', String(params.pageSize ?? 20));
232
+ qs.set('source', '1');
233
+ qs.set('scene', '7');
234
+ return `${WE_ORIGIN}/api/job/search-pc?${qs.toString()}`;
235
+ }
236
+
237
+ /**
238
+ * Map a raw search-pc `resultbody.job.items[i]` into the canonical row shape
239
+ * we expose to the user. Kept here so `search` and `hot` stay aligned.
240
+ */
241
+ export function mapJobItem(it, rank) {
242
+ const area = it.jobAreaLevelDetail || {};
243
+ return {
244
+ rank,
245
+ jobId: String(it.jobId ?? ''),
246
+ title: it.jobName ?? '',
247
+ salary: it.provideSalaryString ?? '',
248
+ salaryMin: Number(it.jobSalaryMin ?? 0) || 0,
249
+ salaryMax: Number(it.jobSalaryMax ?? 0) || 0,
250
+ city: area.cityString ?? it.jobAreaString ?? '',
251
+ district: area.districtString ?? '',
252
+ workYear: it.workYearString ?? '',
253
+ degree: it.degreeString ?? '',
254
+ tags: Array.isArray(it.jobTags) ? it.jobTags.join(',') : '',
255
+ company: it.companyName ?? '',
256
+ companyFull: it.fullCompanyName ?? '',
257
+ companyType: it.companyTypeString ?? '',
258
+ companySize: it.companySizeString ?? '',
259
+ industry: it.industryType1Str ?? '',
260
+ hr: it.hrName ? `${it.hrName}·${it.hrPosition ?? ''}` : '',
261
+ issueDate: it.issueDateString ?? '',
262
+ url: it.jobHref ?? '',
263
+ companyUrl: it.companyHref ?? '',
264
+ encCoId: it.encCoId ?? '',
265
+ };
266
+ }
267
+
268
+ export const SEARCH_COLUMNS = [
269
+ 'rank', 'jobId', 'title', 'salary', 'salaryMin', 'salaryMax',
270
+ 'city', 'district', 'workYear', 'degree', 'tags',
271
+ 'company', 'companyFull', 'companyType', 'companySize', 'industry',
272
+ 'hr', 'issueDate', 'url', 'companyUrl', 'encCoId',
273
+ ];
274
+
275
+ /**
276
+ * Parse a 51job company-page `<a sensorsdata="...">` payload into a stable
277
+ * row fragment. Returns null when the attribute is absent or malformed.
278
+ */
279
+ export function parseCompanyJobCard(raw) {
280
+ if (!raw || typeof raw !== 'object') return null;
281
+ const href = typeof raw.href === 'string' ? raw.href : '';
282
+ const sensorsdata = typeof raw.sensorsdata === 'string' ? raw.sensorsdata : '';
283
+ if (!href || !sensorsdata) return null;
284
+ let data;
285
+ try {
286
+ data = JSON.parse(sensorsdata);
287
+ } catch {
288
+ return null;
289
+ }
290
+ if (!data || !data.jobId) return null;
291
+ return {
292
+ jobId: String(data.jobId),
293
+ title: data.jobTitle || '',
294
+ salary: data.jobSalary || '',
295
+ city: data.jobArea || '',
296
+ workYear: data.jobYear || '',
297
+ degree: data.jobDegree || '',
298
+ funcType: data.funcType || '',
299
+ issueDate: data.jobTime || '',
300
+ url: href,
301
+ };
302
+ }