job-pro 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/megvii.js CHANGED
@@ -1,456 +1,26 @@
1
- // Thin client for 旷视科技 / Megvii / Face++ recruiting portal at app.mokahr.com.
1
+ // 旷视科技 / Megvii / Face++ careers Moka SSR + AES-128-CBC.
2
2
  //
3
- // ============================================================
4
- // HOW THIS WORKS (probed 2026-05):
5
- //
6
- // Moka social-recruitment SSR HTML at
7
- // https://app.mokahr.com/social-recruitment/megviihr/38641
8
- // embeds the entire first page of jobs INLINE in a hidden input
9
- // `<input id="init-data" value="<HTML-escaped JSON>">`. The JSON
10
- // shape is documented in the call helper below; the important keys are
11
- // `jobs[]` (first 15 entries) and `jobStats.total` (full count).
12
- //
13
- // The same SSR HTML is also emitted for the campus portal at
14
- // https://app.mokahr.com/campus_apply/megviihr/38642
15
- //
16
- // For deeper pagination the SPA POSTs to
17
- // /api/outer/ats-apply/website/jobs/v2?orgId=megviihr
18
- // with body { orgId, siteId, pageNum, pageSize, needStat:true } and
19
- // receives an AES-CBC encrypted envelope {data, necromancer}. We
20
- // decrypt using key=necromancer (raw utf8) and iv=aesIv (raw utf8,
21
- // served in the SSR HTML as a constant — observed value is the
22
- // same Moka-wide string across orgs).
23
- //
24
- // CONFIRMED MOKA ORG IDs:
25
- // Campus (校园招聘): orgSlug=megviihr, siteId=38642
26
- // URL: https://app.mokahr.com/campus_apply/megviihr/38642
27
- // Social (社会招聘): orgSlug=megviihr, siteId=38641
28
- // URL: https://app.mokahr.com/social-recruitment/megviihr/38641
29
- //
30
- // PositionSummary field mapping (Moka raw → canonical):
31
- // post_id ← job.id (UUID, used as positionId in detail deeplink)
32
- // title ← job.title
33
- // project ← job.zhineng?.name (职位类别, e.g. "算法类", "职能类")
34
- // recruit_label ← job.commitment || hireMode-derived label
35
- // bgs ← job.department?.name (部门)
36
- // work_cities ← job.locations[].cityId resolved via jobsGroupedByLocation
37
- // (concatenated with " / "); falls back to job.location.country
38
- // apply_url ← portal URL + "#/jobs/{id}"
39
- import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
40
- import { createDecipheriv } from "node:crypto";
41
- export { checkResume, extractResumeSignals, scoreOverlap };
42
- const SOURCE = "app.mokahr.com/megviihr";
43
- const ORG_SLUG = "megviihr";
44
- const CAMPUS_SITE_ID = 38642;
45
- const SOCIAL_SITE_ID = 38641;
46
- const CAMPUS_URL = `https://app.mokahr.com/campus_apply/${ORG_SLUG}/${CAMPUS_SITE_ID}`;
47
- const SOCIAL_URL = `https://app.mokahr.com/social-recruitment/${ORG_SLUG}/${SOCIAL_SITE_ID}`;
48
- const API_ENDPOINT = "https://app.mokahr.com/api/outer/ats-apply/website/jobs/v2";
49
- const DEFAULT_HEADERS = {
50
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
51
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
52
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
53
- };
54
- // ---- helpers ----
55
- /** HTML-decode &quot; / &amp; / &lt; / &gt; / &#x27; */
56
- function htmlDecode(s) {
57
- return s
58
- .replace(/&quot;/g, '"')
59
- .replace(/&amp;/g, "&")
60
- .replace(/&lt;/g, "<")
61
- .replace(/&gt;/g, ">")
62
- .replace(/&#x27;/g, "'")
63
- .replace(/&#39;/g, "'");
64
- }
65
- /** Parse the init-data JSON blob out of Moka SSR HTML. */
66
- function parseInitData(html) {
67
- const m = html.match(/<input[^>]*id="init-data"[^>]*value="([^"]+)"/);
68
- if (!m)
69
- return null;
70
- try {
71
- return JSON.parse(htmlDecode(m[1]));
72
- }
73
- catch {
74
- return null;
75
- }
76
- }
77
- /** Fetch SSR HTML for a Moka portal URL with a fresh cookie jar in-memory. */
78
- async function fetchPortalHtml(url) {
79
- // Two-fetch dance: first request bounces with Set-Cookie + 302 to self;
80
- // we capture cookies and re-issue with them attached.
81
- let response;
82
- try {
83
- response = await fetch(url, { method: "GET", headers: DEFAULT_HEADERS, redirect: "manual" });
84
- }
85
- catch (err) {
86
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
87
- }
88
- const cookies = [];
89
- // getSetCookie() must be called bound to the Headers object (Node undici brandCheck)
90
- const headersAny = response.headers;
91
- if (typeof headersAny.getSetCookie === "function") {
92
- for (const v of headersAny.getSetCookie.call(response.headers) ?? []) {
93
- const c = v.split(";")[0];
94
- if (c)
95
- cookies.push(c);
96
- }
97
- }
98
- // Some runtimes only expose combined header
99
- if (cookies.length === 0) {
100
- const raw = response.headers.get("set-cookie");
101
- if (raw)
102
- cookies.push(...raw.split(/,(?=[^;]+=)/).map((c) => c.split(";")[0].trim()));
103
- }
104
- const cookieHeader = cookies.join("; ");
105
- // Now fetch with cookies (follow redirects automatically)
106
- let r2;
107
- try {
108
- r2 = await fetch(url, {
109
- method: "GET",
110
- headers: { ...DEFAULT_HEADERS, Cookie: cookieHeader },
111
- redirect: "follow",
112
- });
113
- }
114
- catch (err) {
115
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
116
- }
117
- if (!r2.ok) {
118
- return { ok: false, status: r2.status, message: `HTTP ${r2.status}` };
119
- }
120
- const html = await r2.text();
121
- return { ok: true, html, cookieHeader, status: r2.status, message: "ok" };
122
- }
123
- /** AES-128-CBC decrypt of Moka encrypted job payload. */
124
- function decryptMokaEnvelope(envelope, aesIv) {
125
- if (!envelope.data || !envelope.necromancer)
126
- return null;
127
- try {
128
- const key = Buffer.from(envelope.necromancer, "utf8");
129
- const iv = Buffer.from(aesIv, "utf8");
130
- const decipher = createDecipheriv("aes-128-cbc", key, iv);
131
- const plain = Buffer.concat([
132
- decipher.update(Buffer.from(envelope.data, "base64")),
133
- decipher.final(),
134
- ]);
135
- return JSON.parse(plain.toString("utf8"));
136
- }
137
- catch {
138
- return null;
139
- }
140
- }
141
- /** Fetch a deeper page via the encrypted POST endpoint. */
142
- async function fetchEncryptedPage(orgSlug, siteId, pageNum, pageSize, aesIv, cookieHeader, portalUrl) {
143
- const url = `${API_ENDPOINT}?orgId=${encodeURIComponent(orgSlug)}`;
144
- const body = {
145
- orgId: orgSlug,
146
- siteId: String(siteId),
147
- pageNum,
148
- pageSize,
149
- needStat: true,
150
- };
151
- let response;
152
- try {
153
- response = await fetch(url, {
154
- method: "POST",
155
- headers: {
156
- ...DEFAULT_HEADERS,
157
- Accept: "application/json,*/*",
158
- "Content-Type": "application/json",
159
- Origin: "https://app.mokahr.com",
160
- Referer: portalUrl,
161
- Cookie: cookieHeader,
162
- },
163
- body: JSON.stringify(body),
164
- });
165
- }
166
- catch (err) {
167
- return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
168
- }
169
- if (!response.ok)
170
- return { ok: false, message: `HTTP ${response.status}` };
171
- let envelope;
172
- try {
173
- envelope = await response.json();
174
- }
175
- catch {
176
- return { ok: false, message: "bad JSON from upstream" };
177
- }
178
- const decoded = decryptMokaEnvelope(envelope, aesIv);
179
- if (!decoded || decoded.code !== 0 || !decoded.data) {
180
- return { ok: false, message: decoded?.msg || envelope?.msg || "decrypt or upstream error" };
181
- }
182
- return {
183
- ok: true,
184
- jobs: decoded.data.jobs ?? [],
185
- total: decoded.data.jobStats?.total ?? 0,
186
- message: "ok",
187
- };
188
- }
189
- /** Build cityId → city label map from jobsGroupedByLocation. */
190
- function buildCityMap(groups) {
191
- const out = {};
192
- if (!groups)
193
- return out;
194
- for (const g of groups) {
195
- if (typeof g.cityId === "number" && g.label)
196
- out[g.cityId] = g.label;
197
- }
198
- return out;
199
- }
200
- function workCitiesFor(job, cityMap) {
201
- const cities = (job.locations ?? [])
202
- .map((l) => {
203
- if (typeof l.cityId === "number" && cityMap[l.cityId])
204
- return cityMap[l.cityId];
205
- return l.country || "";
206
- })
207
- .filter((s) => s.length > 0);
208
- const uniq = [];
209
- for (const c of cities)
210
- if (!uniq.includes(c))
211
- uniq.push(c);
212
- return uniq.join(" / ");
213
- }
214
- function commitmentFor(job) {
215
- if (typeof job.commitment === "string" && job.commitment.length > 0)
216
- return job.commitment;
217
- if (job.hireMode === 1)
218
- return "全职";
219
- if (job.hireMode === 2)
220
- return "实习";
221
- return "";
222
- }
223
- function summarize(job, cityMap, portalUrl) {
224
- return {
225
- post_id: String(job.id),
226
- title: job.title ?? "",
227
- project: job.zhineng?.name ?? "",
228
- recruit_label: commitmentFor(job),
229
- bgs: job.department?.name ?? "",
230
- work_cities: workCitiesFor(job, cityMap),
231
- apply_url: `${portalUrl}#/jobs/${encodeURIComponent(job.id)}`,
232
- };
233
- }
234
- function matchesKeyword(job, kw) {
235
- if (!kw)
236
- return true;
237
- const lc = kw.toLowerCase();
238
- return ((job.title ?? "").toLowerCase().includes(lc) ||
239
- (job.zhineng?.name ?? "").toLowerCase().includes(lc) ||
240
- (job.department?.name ?? "").toLowerCase().includes(lc));
241
- }
242
- function portalUrlFor(recruitType) {
243
- return recruitType === "campus" ? CAMPUS_URL : SOCIAL_URL;
244
- }
245
- function siteIdFor(recruitType) {
246
- return recruitType === "campus" ? CAMPUS_SITE_ID : SOCIAL_SITE_ID;
247
- }
248
- // ---- searchPositions ----
249
- export async function searchPositions(opts = {}) {
250
- const recruitType = opts.recruitType ?? "social";
251
- const portalUrl = portalUrlFor(recruitType);
252
- const pageSize = opts.pageSize ?? 20;
253
- const page = opts.page ?? 1;
254
- const keyword = opts.keyword ?? "";
255
- const portal = await fetchPortalHtml(portalUrl);
256
- if (!portal.ok || !portal.html) {
257
- return {
258
- ok: false,
259
- source: SOURCE,
260
- message: portal.message,
261
- query: { recruitType, keyword, page, pageSize },
262
- positions: [],
263
- total: 0,
264
- };
265
- }
266
- const init = parseInitData(portal.html);
267
- if (!init || !init.jobs || !init.jobStats) {
268
- return {
269
- ok: false,
270
- source: SOURCE,
271
- message: "Moka init-data missing jobs/jobStats",
272
- query: { recruitType, keyword, page, pageSize },
273
- positions: [],
274
- total: 0,
275
- };
276
- }
277
- const cityMap = buildCityMap(init.jobsGroupedByLocation);
278
- let jobs = init.jobs;
279
- const total = init.jobStats.total ?? jobs.length;
280
- // If caller requested page > 1, fetch via encrypted POST
281
- if (page > 1 && init.aesIv && portal.cookieHeader) {
282
- const more = await fetchEncryptedPage(ORG_SLUG, siteIdFor(recruitType), page, pageSize, init.aesIv, portal.cookieHeader, portalUrl);
283
- if (!more.ok || !more.jobs) {
284
- return {
285
- ok: false,
286
- source: SOURCE,
287
- message: `pagination failed: ${more.message}`,
288
- query: { recruitType, keyword, page, pageSize },
289
- positions: [],
290
- total,
291
- };
292
- }
293
- jobs = more.jobs;
294
- }
295
- // Client-side keyword filter — Moka server-side keyword on this endpoint
296
- // is observed to be ignored on first-page SSR, so we filter locally.
297
- const filtered = jobs.filter((j) => matchesKeyword(j, keyword));
298
- const sliced = filtered.slice(0, pageSize);
299
- const positions = sliced.map((j) => summarize(j, cityMap, portalUrl));
300
- return {
301
- ok: true,
302
- source: SOURCE,
303
- query: { recruitType, keyword, page, pageSize },
304
- page,
305
- page_size: pageSize,
306
- total,
307
- positions,
308
- };
309
- }
310
- // ---- fetchAllPositions ----
311
- export async function fetchAllPositions(opts = {}) {
312
- const recruitType = opts.recruitType ?? "social";
313
- const portalUrl = portalUrlFor(recruitType);
314
- const pageSize = opts.pageSize ?? 20;
315
- const maxPages = Math.max(1, opts.maxPages ?? 50);
316
- const keyword = opts.keyword ?? "";
317
- const portal = await fetchPortalHtml(portalUrl);
318
- if (!portal.ok || !portal.html) {
319
- return {
320
- ok: false,
321
- source: SOURCE,
322
- message: portal.message,
323
- total: 0,
324
- fetched: 0,
325
- positions: [],
326
- };
327
- }
328
- const init = parseInitData(portal.html);
329
- if (!init || !init.jobs || !init.jobStats || !init.aesIv) {
330
- return {
331
- ok: false,
332
- source: SOURCE,
333
- message: "Moka init-data missing required fields",
334
- total: 0,
335
- fetched: 0,
336
- positions: [],
337
- };
338
- }
339
- const cityMap = buildCityMap(init.jobsGroupedByLocation);
340
- const total = init.jobStats.total ?? 0;
341
- const collected = [...init.jobs];
342
- // Page 1 came from SSR; for subsequent pages use encrypted POST.
343
- // SSR returns ~15 per page; we cap with maxPages * pageSize.
344
- let page = 2;
345
- while (collected.length < total && page <= maxPages) {
346
- const more = await fetchEncryptedPage(ORG_SLUG, siteIdFor(recruitType), page, pageSize, init.aesIv, portal.cookieHeader ?? "", portalUrl);
347
- if (!more.ok || !more.jobs || more.jobs.length === 0)
348
- break;
349
- collected.push(...more.jobs);
350
- page += 1;
351
- }
352
- const filtered = collected.filter((j) => matchesKeyword(j, keyword));
353
- return {
354
- ok: true,
355
- source: SOURCE,
356
- total,
357
- fetched: filtered.length,
358
- positions: filtered.map((j) => summarize(j, cityMap, portalUrl)),
359
- };
360
- }
361
- // ---- fetchPositionDetail ----
362
- //
363
- // The Moka detail endpoint /api/outer/ats-apply/website/job is also AES-encrypted
364
- // and requires a fresh session cookie. For now we return the deeplink + a
365
- // note — keeping the verb honest rather than fake-successful.
366
- export async function fetchPositionDetail(postId) {
367
- return {
368
- ok: false,
369
- source: SOURCE,
370
- message: "Moka detail endpoint /api/outer/ats-apply/website/job requires the same encrypted-session " +
371
- "flow; not implemented in this adapter. Use the apply_url deeplink for the full JD.",
372
- post_id: postId,
373
- apply_url: `${SOCIAL_URL}#/jobs/${encodeURIComponent(postId)}`,
374
- };
375
- }
376
- // ---- fetchDictionaries ----
377
- export async function fetchDictionaries() {
378
- const portal = await fetchPortalHtml(SOCIAL_URL);
379
- if (!portal.ok || !portal.html) {
380
- return { ok: false, source: SOURCE, message: portal.message };
381
- }
382
- const init = parseInitData(portal.html);
383
- if (!init) {
384
- return { ok: false, source: SOURCE, message: "Moka init-data missing" };
385
- }
386
- return {
387
- ok: true,
388
- source: SOURCE,
389
- locations: init.jobsGroupedByLocation ?? [],
390
- moka_orgs: {
391
- campus: { slug: ORG_SLUG, id: CAMPUS_SITE_ID, url: CAMPUS_URL },
392
- social: { slug: ORG_SLUG, id: SOCIAL_SITE_ID, url: SOCIAL_URL },
393
- },
394
- };
395
- }
396
- // ---- notices (no public endpoint) ----
397
- export async function listNotices() {
398
- return {
399
- ok: false,
400
- source: SOURCE,
401
- message: "Megvii (旷视): no public notices endpoint",
402
- notices: [],
403
- };
404
- }
405
- export async function getNotice(noticeId) {
406
- return {
407
- ok: false,
408
- source: SOURCE,
409
- message: "Megvii (旷视): no public notices endpoint",
410
- notice_id: noticeId,
411
- };
412
- }
413
- export async function findNoticesByQuestion(question, _opts = {}) {
414
- return {
415
- ok: false,
416
- source: SOURCE,
417
- question,
418
- message: "Megvii (旷视): no public notices endpoint",
419
- matches: [],
420
- };
421
- }
422
- // ---- matchResume ----
423
- export async function matchResume(text, opts = {}) {
424
- const { terms, cities } = extractResumeSignals(text ?? "");
425
- const candidates = Math.max(20, opts.candidates ?? 100);
426
- const search = await fetchAllPositions({
427
- pageSize: 20,
428
- maxPages: Math.ceil(candidates / 15),
429
- });
430
- if (!search.ok) {
431
- return {
432
- ok: false,
433
- source: SOURCE,
434
- extracted_terms: terms,
435
- city_preferences: cities,
436
- matches: [],
437
- message: search.message,
438
- };
439
- }
440
- const topN = Math.max(1, opts.topN ?? 10);
441
- const scored = search.positions
442
- .map((p) => ({
443
- p,
444
- score: scoreOverlap(`${p.title} ${p.project} ${p.bgs}`, terms, cities).score,
445
- }))
446
- .sort((a, b) => b.score - a.score)
447
- .slice(0, topN)
448
- .map((x) => x.p);
449
- return {
450
- ok: true,
451
- source: SOURCE,
452
- extracted_terms: terms,
453
- city_preferences: cities,
454
- matches: scored,
455
- };
456
- }
3
+ // Two portals on the same Moka tenant `megviihr`:
4
+ // campus → https://app.mokahr.com/campus_apply/megviihr/38642
5
+ // social → https://app.mokahr.com/social-recruitment/megviihr/38641
6
+ // Probed 2026-05; ~5 visible positions (Megvii hiring is currently low).
7
+ // See cli/src/moka.ts for the shared factory.
8
+ import { createAdapter } from "./moka.js";
9
+ const adapter = createAdapter({
10
+ orgSlug: "megviihr",
11
+ label: "Megvii",
12
+ channels: [
13
+ { siteId: 38642, kind: "campus_apply", recruitType: "campus" },
14
+ { siteId: 38641, kind: "social-recruitment", recruitType: "social" },
15
+ ],
16
+ defaultRecruitType: "social",
17
+ });
18
+ export const searchPositions = adapter.searchPositions;
19
+ export const fetchAllPositions = adapter.fetchAllPositions;
20
+ export const fetchPositionDetail = adapter.fetchPositionDetail;
21
+ export const fetchDictionaries = adapter.fetchDictionaries;
22
+ export const listNotices = adapter.listNotices;
23
+ export const getNotice = adapter.getNotice;
24
+ export const findNoticesByQuestion = adapter.findNoticesByQuestion;
25
+ export const matchResume = adapter.matchResume;
26
+ export const checkResume = adapter.checkResume;