job-pro 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cambricon.js CHANGED
@@ -3,402 +3,30 @@
3
3
  // ============================================================
4
4
  // API DISCOVERY (probed 2026-05-16)
5
5
  //
6
- // www.cambricon.com (the corporate site) embeds links to Moka tenant URLs
7
- // in its 加入我们 / careers section. Extracted slugs:
6
+ // www.cambricon.com embeds links to Moka tenant URLs in its 加入我们 section:
8
7
  //
9
8
  // /campus-recruitment/cambricon/44201 ← campus + intern (main entry)
10
9
  // /recommendation-recruitment/cambricon/42452 (referral channel, overlaps)
11
10
  // /recommendation-recruitment/cambricon/46261 (referral channel, overlaps)
12
11
  //
13
12
  // No /social-recruitment/cambricon/<siteId> URL is published — Cambricon
14
- // only opens 校招 / 实习 publicly through Moka. The campus SSR HTML embeds
15
- // `<input id="init-data" value="{...}">` containing the full first page of
16
- // jobs + aesIv for subsequent AES-CBC paginated calls. Same pattern as
17
- // `cli/src/megvii.ts`; the heavy lifting (htmlDecode, parseInitData,
18
- // fetchPortalHtml two-fetch cookie dance, decryptMokaEnvelope) is
19
- // duplicated here for now — a shared `moka.ts` factory is worth refactoring
20
- // to once we have 6+ Moka tenants (currently megvii/deepseek/galaxyuniversal/
21
- // stepfun/moonshot/+cambricon = 6 schedule for next pass).
22
- import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
23
- import { createDecipheriv } from "node:crypto";
24
- export { checkResume, extractResumeSignals, scoreOverlap };
25
- const SOURCE = "app.mokahr.com/cambricon";
26
- const ORG_SLUG = "cambricon";
27
- const CAMPUS_SITE_ID = 44201;
28
- const CAMPUS_URL = `https://app.mokahr.com/campus-recruitment/${ORG_SLUG}/${CAMPUS_SITE_ID}`;
29
- const API_ENDPOINT = "https://app.mokahr.com/api/outer/ats-apply/website/jobs/v2";
30
- const DEFAULT_HEADERS = {
31
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
32
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
33
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
34
- };
35
- // ---- helpers (duplicated from megvii.ts — slated for moka.ts refactor) ----
36
- function htmlDecode(s) {
37
- return s
38
- .replace(/&quot;/g, '"')
39
- .replace(/&amp;/g, "&")
40
- .replace(/&lt;/g, "<")
41
- .replace(/&gt;/g, ">")
42
- .replace(/&#x27;/g, "'")
43
- .replace(/&#39;/g, "'");
44
- }
45
- function parseInitData(html) {
46
- const m = html.match(/<input[^>]*id="init-data"[^>]*value="([^"]+)"/);
47
- if (!m)
48
- return null;
49
- try {
50
- return JSON.parse(htmlDecode(m[1]));
51
- }
52
- catch {
53
- return null;
54
- }
55
- }
56
- async function fetchPortalHtml(url) {
57
- let response;
58
- try {
59
- response = await fetch(url, { method: "GET", headers: DEFAULT_HEADERS, redirect: "manual" });
60
- }
61
- catch (err) {
62
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
63
- }
64
- const cookies = [];
65
- const headersAny = response.headers;
66
- if (typeof headersAny.getSetCookie === "function") {
67
- for (const v of headersAny.getSetCookie.call(response.headers) ?? []) {
68
- const c = v.split(";")[0];
69
- if (c)
70
- cookies.push(c);
71
- }
72
- }
73
- if (cookies.length === 0) {
74
- const raw = response.headers.get("set-cookie");
75
- if (raw)
76
- cookies.push(...raw.split(/,(?=[^;]+=)/).map((c) => c.split(";")[0].trim()));
77
- }
78
- const cookieHeader = cookies.join("; ");
79
- let r2;
80
- try {
81
- r2 = await fetch(url, {
82
- method: "GET",
83
- headers: { ...DEFAULT_HEADERS, Cookie: cookieHeader },
84
- redirect: "follow",
85
- });
86
- }
87
- catch (err) {
88
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
89
- }
90
- if (!r2.ok)
91
- return { ok: false, status: r2.status, message: `HTTP ${r2.status}` };
92
- const html = await r2.text();
93
- return { ok: true, html, cookieHeader, status: r2.status, message: "ok" };
94
- }
95
- function decryptMokaEnvelope(envelope, aesIv) {
96
- if (!envelope.data || !envelope.necromancer)
97
- return null;
98
- try {
99
- const key = Buffer.from(envelope.necromancer, "utf8");
100
- const iv = Buffer.from(aesIv, "utf8");
101
- const decipher = createDecipheriv("aes-128-cbc", key, iv);
102
- const plain = Buffer.concat([
103
- decipher.update(Buffer.from(envelope.data, "base64")),
104
- decipher.final(),
105
- ]);
106
- return JSON.parse(plain.toString("utf8"));
107
- }
108
- catch {
109
- return null;
110
- }
111
- }
112
- async function fetchEncryptedPage(pageNum, pageSize, aesIv, cookieHeader) {
113
- const url = `${API_ENDPOINT}?orgId=${encodeURIComponent(ORG_SLUG)}`;
114
- const body = {
115
- orgId: ORG_SLUG,
116
- siteId: String(CAMPUS_SITE_ID),
117
- pageNum,
118
- pageSize,
119
- needStat: true,
120
- };
121
- let response;
122
- try {
123
- response = await fetch(url, {
124
- method: "POST",
125
- headers: {
126
- ...DEFAULT_HEADERS,
127
- Accept: "application/json,*/*",
128
- "Content-Type": "application/json",
129
- Origin: "https://app.mokahr.com",
130
- Referer: CAMPUS_URL,
131
- Cookie: cookieHeader,
132
- },
133
- body: JSON.stringify(body),
134
- });
135
- }
136
- catch (err) {
137
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
138
- }
139
- if (!response.ok)
140
- return { ok: false, message: `HTTP ${response.status}` };
141
- let envelope;
142
- try {
143
- envelope = await response.json();
144
- }
145
- catch {
146
- return { ok: false, message: "bad JSON from upstream" };
147
- }
148
- const decoded = decryptMokaEnvelope(envelope, aesIv);
149
- if (!decoded || decoded.code !== 0 || !decoded.data) {
150
- return { ok: false, message: decoded?.msg || envelope?.msg || "decrypt or upstream error" };
151
- }
152
- return {
153
- ok: true,
154
- jobs: decoded.data.jobs ?? [],
155
- total: decoded.data.jobStats?.total ?? 0,
156
- message: "ok",
157
- };
158
- }
159
- function buildCityMap(groups) {
160
- const out = {};
161
- if (!groups)
162
- return out;
163
- for (const g of groups) {
164
- if (typeof g.cityId === "number" && g.label)
165
- out[g.cityId] = g.label;
166
- }
167
- return out;
168
- }
169
- function workCitiesFor(job, cityMap) {
170
- const cities = (job.locations ?? [])
171
- .map((l) => {
172
- if (typeof l.cityId === "number" && cityMap[l.cityId])
173
- return cityMap[l.cityId];
174
- return l.country || "";
175
- })
176
- .filter((s) => s.length > 0);
177
- const uniq = [];
178
- for (const c of cities)
179
- if (!uniq.includes(c))
180
- uniq.push(c);
181
- return uniq.join(" / ");
182
- }
183
- function commitmentFor(job) {
184
- if (typeof job.commitment === "string" && job.commitment.length > 0)
185
- return job.commitment;
186
- if (job.hireMode === 1)
187
- return "全职";
188
- if (job.hireMode === 2)
189
- return "实习";
190
- return "";
191
- }
192
- function summarize(job, cityMap) {
193
- return {
194
- post_id: String(job.id),
195
- title: job.title ?? "",
196
- project: job.zhineng?.name ?? "",
197
- recruit_label: commitmentFor(job),
198
- bgs: job.department?.name ?? "",
199
- work_cities: workCitiesFor(job, cityMap),
200
- apply_url: `${CAMPUS_URL}#/jobs/${encodeURIComponent(job.id)}`,
201
- };
202
- }
203
- function matchesKeyword(job, kw) {
204
- if (!kw)
205
- return true;
206
- const lc = kw.toLowerCase();
207
- return ((job.title ?? "").toLowerCase().includes(lc) ||
208
- (job.zhineng?.name ?? "").toLowerCase().includes(lc) ||
209
- (job.department?.name ?? "").toLowerCase().includes(lc));
210
- }
211
- // ---- searchPositions ----
212
- export async function searchPositions(opts = {}) {
213
- const pageSize = opts.pageSize ?? 20;
214
- const page = opts.page ?? 1;
215
- const keyword = opts.keyword ?? "";
216
- const portal = await fetchPortalHtml(CAMPUS_URL);
217
- if (!portal.ok || !portal.html) {
218
- return {
219
- ok: false,
220
- source: SOURCE,
221
- message: portal.message,
222
- query: { keyword, page, pageSize },
223
- positions: [],
224
- total: 0,
225
- };
226
- }
227
- const init = parseInitData(portal.html);
228
- if (!init || !init.jobs || !init.jobStats) {
229
- return {
230
- ok: false,
231
- source: SOURCE,
232
- message: "Moka init-data missing jobs/jobStats",
233
- query: { keyword, page, pageSize },
234
- positions: [],
235
- total: 0,
236
- };
237
- }
238
- const cityMap = buildCityMap(init.jobsGroupedByLocation);
239
- let jobs = init.jobs;
240
- const total = init.jobStats.total ?? jobs.length;
241
- if (page > 1 && init.aesIv && portal.cookieHeader) {
242
- const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader);
243
- if (!more.ok || !more.jobs) {
244
- return {
245
- ok: false,
246
- source: SOURCE,
247
- message: `pagination failed: ${more.message}`,
248
- query: { keyword, page, pageSize },
249
- positions: [],
250
- total,
251
- };
252
- }
253
- jobs = more.jobs;
254
- }
255
- const filtered = jobs.filter((j) => matchesKeyword(j, keyword));
256
- const sliced = filtered.slice(0, pageSize);
257
- const positions = sliced.map((j) => summarize(j, cityMap));
258
- return {
259
- ok: true,
260
- source: SOURCE,
261
- query: { keyword, page, pageSize },
262
- page,
263
- page_size: pageSize,
264
- total,
265
- positions,
266
- };
267
- }
268
- // ---- fetchAllPositions ----
269
- export async function fetchAllPositions(opts = {}) {
270
- const pageSize = opts.pageSize ?? 20;
271
- const maxPages = Math.max(1, opts.maxPages ?? 50);
272
- const keyword = opts.keyword ?? "";
273
- const portal = await fetchPortalHtml(CAMPUS_URL);
274
- if (!portal.ok || !portal.html) {
275
- return {
276
- ok: false,
277
- source: SOURCE,
278
- message: portal.message,
279
- total: 0,
280
- fetched: 0,
281
- positions: [],
282
- };
283
- }
284
- const init = parseInitData(portal.html);
285
- if (!init || !init.jobs || !init.jobStats || !init.aesIv) {
286
- return {
287
- ok: false,
288
- source: SOURCE,
289
- message: "Moka init-data missing required fields",
290
- total: 0,
291
- fetched: 0,
292
- positions: [],
293
- };
294
- }
295
- const cityMap = buildCityMap(init.jobsGroupedByLocation);
296
- const total = init.jobStats.total ?? 0;
297
- const collected = [...init.jobs];
298
- let page = 2;
299
- while (collected.length < total && page <= maxPages) {
300
- const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader ?? "");
301
- if (!more.ok || !more.jobs || more.jobs.length === 0)
302
- break;
303
- collected.push(...more.jobs);
304
- page += 1;
305
- }
306
- const filtered = collected.filter((j) => matchesKeyword(j, keyword));
307
- return {
308
- ok: true,
309
- source: SOURCE,
310
- total,
311
- fetched: filtered.length,
312
- positions: filtered.map((j) => summarize(j, cityMap)),
313
- };
314
- }
315
- // ---- fetchPositionDetail ----
316
- export async function fetchPositionDetail(postId) {
317
- return {
318
- ok: false,
319
- source: SOURCE,
320
- message: "Moka detail endpoint requires the same encrypted-session flow; not implemented. " +
321
- "Use the apply_url deeplink for the full JD.",
322
- post_id: postId,
323
- apply_url: `${CAMPUS_URL}#/jobs/${encodeURIComponent(postId)}`,
324
- };
325
- }
326
- // ---- fetchDictionaries ----
327
- export async function fetchDictionaries() {
328
- const portal = await fetchPortalHtml(CAMPUS_URL);
329
- if (!portal.ok || !portal.html) {
330
- return { ok: false, source: SOURCE, message: portal.message };
331
- }
332
- const init = parseInitData(portal.html);
333
- if (!init) {
334
- return { ok: false, source: SOURCE, message: "Moka init-data missing" };
335
- }
336
- return {
337
- ok: true,
338
- source: SOURCE,
339
- locations: init.jobsGroupedByLocation ?? [],
340
- moka_org: { slug: ORG_SLUG, id: CAMPUS_SITE_ID, url: CAMPUS_URL },
341
- };
342
- }
343
- // ---- notices (no public endpoint) ----
344
- const NOTICES_STUB_MSG = "Cambricon (寒武纪): no public notices endpoint on Moka tenant";
345
- export async function listNotices() {
346
- return {
347
- ok: false,
348
- source: SOURCE,
349
- message: NOTICES_STUB_MSG,
350
- notices: [],
351
- };
352
- }
353
- export async function getNotice(noticeId) {
354
- return {
355
- ok: false,
356
- source: SOURCE,
357
- message: NOTICES_STUB_MSG,
358
- notice_id: noticeId,
359
- };
360
- }
361
- export async function findNoticesByQuestion(question, _opts = {}) {
362
- return {
363
- ok: false,
364
- source: SOURCE,
365
- question,
366
- message: NOTICES_STUB_MSG,
367
- matches: [],
368
- };
369
- }
370
- // ---- matchResume ----
371
- export async function matchResume(text, opts = {}) {
372
- const { terms, cities } = extractResumeSignals(text ?? "");
373
- const candidates = Math.max(20, opts.candidates ?? 100);
374
- const search = await fetchAllPositions({
375
- pageSize: 20,
376
- maxPages: Math.ceil(candidates / 15),
377
- });
378
- if (!search.ok) {
379
- return {
380
- ok: false,
381
- source: SOURCE,
382
- extracted_terms: terms,
383
- city_preferences: cities,
384
- matches: [],
385
- message: search.message,
386
- };
387
- }
388
- const topN = Math.max(1, opts.topN ?? 10);
389
- const scored = search.positions
390
- .map((p) => ({
391
- p,
392
- score: scoreOverlap(`${p.title} ${p.project} ${p.bgs}`, terms, cities).score,
393
- }))
394
- .sort((a, b) => b.score - a.score)
395
- .slice(0, topN)
396
- .map((x) => x.p);
397
- return {
398
- ok: true,
399
- source: SOURCE,
400
- extracted_terms: terms,
401
- city_preferences: cities,
402
- matches: scored,
403
- };
404
- }
13
+ // only opens 校招 / 实习 publicly through Moka. Same factory as
14
+ // `cli/src/moka.ts` (used by megvii / geely / etc.).
15
+ import { createAdapter } from "./moka.js";
16
+ const adapter = createAdapter({
17
+ orgSlug: "cambricon",
18
+ label: "Cambricon",
19
+ channels: [
20
+ { siteId: 44201, kind: "campus-recruitment", recruitType: "campus" },
21
+ ],
22
+ defaultRecruitType: "campus",
23
+ });
24
+ export const searchPositions = adapter.searchPositions;
25
+ export const fetchAllPositions = adapter.fetchAllPositions;
26
+ export const fetchPositionDetail = adapter.fetchPositionDetail;
27
+ export const fetchDictionaries = adapter.fetchDictionaries;
28
+ export const listNotices = adapter.listNotices;
29
+ export const getNotice = adapter.getNotice;
30
+ export const findNoticesByQuestion = adapter.findNoticesByQuestion;
31
+ export const matchResume = adapter.matchResume;
32
+ export const checkResume = adapter.checkResume;
package/dist/cdp.js ADDED
@@ -0,0 +1,191 @@
1
+ // Headless-browser helper for adapters whose upstream is gated by anti-bot
2
+ // signatures that the CLI can't reproduce from raw HTTP.
3
+ //
4
+ // Usage pattern:
5
+ // 1. `await getBrowser()` returns a process-singleton puppeteer-core Browser
6
+ // attached to the user's system Chrome.
7
+ // 2. Call `viaBrowser(url, async page => …)` to navigate and run a fn in
8
+ // the page context, then receive the return value.
9
+ //
10
+ // Why puppeteer-core (not puppeteer): we attach to the user's existing
11
+ // Chrome installation; no 100MB Chromium download. Trade-off: we need a
12
+ // working Chrome executable path.
13
+ //
14
+ // Failure modes:
15
+ // * puppeteer-core not installed → ENOENT on dynamic import → caller
16
+ // receives `{ ok:false, reason:"puppeteer-not-installed", message: … }`
17
+ // and renders it as the canonical ok:false stub.
18
+ // * No Chrome found at any well-known path → same error shape with
19
+ // `reason:"chrome-not-found"`.
20
+ // * Browser launch failed (sandbox, profile lock, …) → `reason:"launch-failed"`.
21
+ import { existsSync } from "node:fs";
22
+ const CHROME_PATHS = [
23
+ // macOS
24
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
25
+ "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
26
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
27
+ // Linux
28
+ "/usr/bin/google-chrome",
29
+ "/usr/bin/google-chrome-stable",
30
+ "/usr/bin/chromium",
31
+ "/usr/bin/chromium-browser",
32
+ // Windows (when running under WSL / Git Bash)
33
+ "/c/Program Files/Google/Chrome/Application/chrome.exe",
34
+ ];
35
+ const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36";
36
+ // ---------- singleton browser ----------
37
+ let _browser = null;
38
+ let _browserError = null;
39
+ let _launching = null;
40
+ async function loadPuppeteer() {
41
+ try {
42
+ // Dynamic import; if puppeteer-core was tree-shaken or uninstalled,
43
+ // this rejects with ERR_MODULE_NOT_FOUND.
44
+ const mod = (await import("puppeteer-core"));
45
+ return { ok: true, mod: mod.default };
46
+ }
47
+ catch (err) {
48
+ return {
49
+ ok: false,
50
+ error: {
51
+ reason: "puppeteer-not-installed",
52
+ message: "`puppeteer-core` is not installed. Install it locally with " +
53
+ "`npm i puppeteer-core` (or `pnpm add puppeteer-core`). " +
54
+ `Original error: ${err instanceof Error ? err.message : String(err)}`,
55
+ },
56
+ };
57
+ }
58
+ }
59
+ function findChrome() {
60
+ if (process.env.JOB_PRO_CHROME && existsSync(process.env.JOB_PRO_CHROME)) {
61
+ return process.env.JOB_PRO_CHROME;
62
+ }
63
+ for (const p of CHROME_PATHS) {
64
+ if (existsSync(p))
65
+ return p;
66
+ }
67
+ return null;
68
+ }
69
+ async function launchOnce() {
70
+ const pp = await loadPuppeteer();
71
+ if (!pp.ok)
72
+ return pp.error;
73
+ const chrome = findChrome();
74
+ if (!chrome) {
75
+ return {
76
+ reason: "chrome-not-found",
77
+ message: "No Chrome/Chromium executable found. Tried: " +
78
+ CHROME_PATHS.join(", ") +
79
+ ". Set $JOB_PRO_CHROME=/path/to/chrome to override.",
80
+ };
81
+ }
82
+ try {
83
+ const browser = await pp.mod.launch({
84
+ executablePath: chrome,
85
+ headless: true,
86
+ args: [
87
+ "--no-sandbox",
88
+ "--disable-blink-features=AutomationControlled",
89
+ "--disable-features=IsolateOrigins,site-per-process",
90
+ ],
91
+ });
92
+ return browser;
93
+ }
94
+ catch (err) {
95
+ return {
96
+ reason: "launch-failed",
97
+ message: `Chrome failed to launch: ${err instanceof Error ? err.message : String(err)}`,
98
+ };
99
+ }
100
+ }
101
+ /** Get a process-singleton headless browser. Subsequent calls reuse it. */
102
+ export async function getBrowser() {
103
+ if (_browser)
104
+ return { ok: true, browser: _browser };
105
+ if (_browserError)
106
+ return { ok: false, error: _browserError };
107
+ if (!_launching) {
108
+ _launching = launchOnce();
109
+ }
110
+ const result = await _launching;
111
+ _launching = null;
112
+ if ("reason" in result) {
113
+ _browserError = result;
114
+ return { ok: false, error: result };
115
+ }
116
+ _browser = result;
117
+ return { ok: true, browser: result };
118
+ }
119
+ /** Close the singleton browser (call before process exit). */
120
+ export async function closeBrowser() {
121
+ if (_browser) {
122
+ try {
123
+ await _browser.close();
124
+ }
125
+ catch {
126
+ /* ignore */
127
+ }
128
+ _browser = null;
129
+ }
130
+ }
131
+ // On Node exit, best-effort close the browser to avoid zombie processes.
132
+ let _exitHookInstalled = false;
133
+ function ensureExitHook() {
134
+ if (_exitHookInstalled)
135
+ return;
136
+ _exitHookInstalled = true;
137
+ const cleanup = () => {
138
+ if (_browser) {
139
+ try {
140
+ // synchronous best-effort kill; puppeteer launches Chrome as a child
141
+ // process tracked by the Browser object, so close() handles SIGTERM.
142
+ void _browser.close().catch(() => undefined);
143
+ }
144
+ catch {
145
+ /* ignore */
146
+ }
147
+ }
148
+ };
149
+ process.on("exit", cleanup);
150
+ process.on("SIGINT", () => {
151
+ cleanup();
152
+ process.exit(130);
153
+ });
154
+ process.on("SIGTERM", () => {
155
+ cleanup();
156
+ process.exit(143);
157
+ });
158
+ }
159
+ /** Open a page, run fn against it, and close the page. The singleton browser stays open. */
160
+ export async function withPage(fn) {
161
+ ensureExitHook();
162
+ const b = await getBrowser();
163
+ if (!b.ok)
164
+ return b;
165
+ let page = null;
166
+ try {
167
+ page = await b.browser.newPage();
168
+ await page.setUserAgent(USER_AGENT);
169
+ const value = await fn(page);
170
+ return { ok: true, value };
171
+ }
172
+ catch (err) {
173
+ return {
174
+ ok: false,
175
+ error: {
176
+ reason: "launch-failed",
177
+ message: `page operation failed: ${err instanceof Error ? err.message : String(err)}`,
178
+ },
179
+ };
180
+ }
181
+ finally {
182
+ if (page) {
183
+ try {
184
+ await page.close();
185
+ }
186
+ catch {
187
+ /* ignore */
188
+ }
189
+ }
190
+ }
191
+ }