job-pro 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/deepseek.js CHANGED
@@ -1,39 +1,387 @@
1
- // DeepSeek (深度求索) stub adapter for `job-pro`.
1
+ // DeepSeek (深度求索) / High-Flyer (幻方量化) recruiting via app.mokahr.com.
2
2
  //
3
- // STATUS: stub-only. DeepSeek is part of High-Flyer (幻方量化) and lists
4
- // careers via the parent company on Moka social-recruitment. Probe results:
5
- // www.deepseek.com/careers → 200 HTML, no inline job data / API path
6
- // careers.deepseek.com → DNS resolves but TLS rejects from non-CN IPs
7
- // app.mokahr.com/social-recruitment/high-flyer/140576/ → Moka SPA, auth-gated
8
- // Moka public anonymous API is gated (confirmed; see Moka probe in repo
9
- // history). When DeepSeek opens a public JSON endpoint we rewrite in one pass.
10
- import { extractResumeSignals, checkResume } from "./tencent.js";
11
- export { checkResume };
12
- const SOURCE = "www.deepseek.com";
13
- const STUB_MESSAGE = "DeepSeek: no public job API — careers route through Moka social-recruitment " +
14
- "(high-flyer/140576), which requires session auth. corporate careers page is HTML only.";
15
- export async function searchPositions(_opts = {}) {
16
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, query: {}, positions: [] };
17
- }
18
- export async function fetchAllPositions(_opts = {}) {
19
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, total: 0, fetched: 0, positions: [] };
3
+ // ============================================================
4
+ // HOW THIS WORKS (probed 2026-05):
5
+ //
6
+ // The SSR HTML at https://app.mokahr.com/social-recruitment/high-flyer/140576
7
+ // embeds the first page of jobs in an `<input id="init-data">` blob.
8
+ // `jobStats.total` is the canonical total count. Deeper pages come from
9
+ // POST /api/outer/ats-apply/website/jobs/v2?orgId=high-flyer (AES-128-CBC
10
+ // encrypted envelope; key=necromancer, iv=aesIv from init-data).
11
+ //
12
+ // CONFIRMED MOKA ORG:
13
+ // slug=high-flyer, siteId=140576, mode=social
14
+ // Portal: https://app.mokahr.com/social-recruitment/high-flyer/140576
15
+ //
16
+ // PositionSummary field mapping:
17
+ // post_id ← job.id
18
+ // title ← job.title
19
+ // project ← job.zhineng?.name
20
+ // recruit_label ← job.commitment || hireMode label
21
+ // bgs ← job.department?.name
22
+ // work_cities ← locations[].cityId → label via jobsGroupedByLocation
23
+ // apply_url ← portal#/jobs/{id}
24
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
25
+ import { createDecipheriv } from "node:crypto";
26
+ export { checkResume, extractResumeSignals, scoreOverlap };
27
+ const SOURCE = "app.mokahr.com/high-flyer";
28
+ const ORG_SLUG = "high-flyer";
29
+ const SITE_ID = 140576;
30
+ const PORTAL_URL = `https://app.mokahr.com/social-recruitment/${ORG_SLUG}/${SITE_ID}`;
31
+ const API_ENDPOINT = "https://app.mokahr.com/api/outer/ats-apply/website/jobs/v2";
32
+ const DEFAULT_HEADERS = {
33
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
34
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
35
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
36
+ };
37
+ function htmlDecode(s) {
38
+ return s
39
+ .replace(/&quot;/g, '"')
40
+ .replace(/&amp;/g, "&")
41
+ .replace(/&lt;/g, "<")
42
+ .replace(/&gt;/g, ">")
43
+ .replace(/&#x27;/g, "'")
44
+ .replace(/&#39;/g, "'");
45
+ }
46
+ function parseInitData(html) {
47
+ const m = html.match(/<input[^>]*id="init-data"[^>]*value="([^"]+)"/);
48
+ if (!m)
49
+ return null;
50
+ try {
51
+ return JSON.parse(htmlDecode(m[1]));
52
+ }
53
+ catch {
54
+ return null;
55
+ }
56
+ }
57
+ async function fetchPortalHtml() {
58
+ let r1;
59
+ try {
60
+ r1 = await fetch(PORTAL_URL, { method: "GET", headers: DEFAULT_HEADERS, redirect: "manual" });
61
+ }
62
+ catch (err) {
63
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
64
+ }
65
+ const cookies = [];
66
+ // getSetCookie() must be called bound to the Headers object (Node undici brandCheck)
67
+ const headersAny = r1.headers;
68
+ if (typeof headersAny.getSetCookie === "function") {
69
+ for (const v of headersAny.getSetCookie.call(r1.headers) ?? []) {
70
+ const c = v.split(";")[0];
71
+ if (c)
72
+ cookies.push(c);
73
+ }
74
+ }
75
+ if (cookies.length === 0) {
76
+ const raw = r1.headers.get("set-cookie");
77
+ if (raw)
78
+ cookies.push(...raw.split(/,(?=[^;]+=)/).map((c) => c.split(";")[0].trim()));
79
+ }
80
+ const cookieHeader = cookies.join("; ");
81
+ let r2;
82
+ try {
83
+ r2 = await fetch(PORTAL_URL, {
84
+ method: "GET",
85
+ headers: { ...DEFAULT_HEADERS, Cookie: cookieHeader },
86
+ redirect: "follow",
87
+ });
88
+ }
89
+ catch (err) {
90
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
91
+ }
92
+ if (!r2.ok)
93
+ return { ok: false, message: `HTTP ${r2.status}` };
94
+ return { ok: true, html: await r2.text(), cookieHeader, message: "ok" };
95
+ }
96
+ function decryptMoka(envelope, aesIv) {
97
+ if (!envelope.data || !envelope.necromancer)
98
+ return null;
99
+ try {
100
+ const decipher = createDecipheriv("aes-128-cbc", Buffer.from(envelope.necromancer, "utf8"), Buffer.from(aesIv, "utf8"));
101
+ const plain = Buffer.concat([
102
+ decipher.update(Buffer.from(envelope.data, "base64")),
103
+ decipher.final(),
104
+ ]);
105
+ return JSON.parse(plain.toString("utf8"));
106
+ }
107
+ catch {
108
+ return null;
109
+ }
110
+ }
111
+ async function fetchEncryptedPage(pageNum, pageSize, aesIv, cookieHeader) {
112
+ let response;
113
+ try {
114
+ response = await fetch(`${API_ENDPOINT}?orgId=${encodeURIComponent(ORG_SLUG)}`, {
115
+ method: "POST",
116
+ headers: {
117
+ ...DEFAULT_HEADERS,
118
+ Accept: "application/json,*/*",
119
+ "Content-Type": "application/json",
120
+ Origin: "https://app.mokahr.com",
121
+ Referer: PORTAL_URL,
122
+ Cookie: cookieHeader,
123
+ },
124
+ body: JSON.stringify({
125
+ orgId: ORG_SLUG,
126
+ siteId: String(SITE_ID),
127
+ pageNum,
128
+ pageSize,
129
+ needStat: true,
130
+ }),
131
+ });
132
+ }
133
+ catch (err) {
134
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
135
+ }
136
+ if (!response.ok)
137
+ return { ok: false, message: `HTTP ${response.status}` };
138
+ let envelope;
139
+ try {
140
+ envelope = await response.json();
141
+ }
142
+ catch {
143
+ return { ok: false, message: "bad JSON" };
144
+ }
145
+ const decoded = decryptMoka(envelope, aesIv);
146
+ if (!decoded || decoded.code !== 0 || !decoded.data) {
147
+ return { ok: false, message: decoded?.msg || envelope.msg || "decrypt error" };
148
+ }
149
+ return {
150
+ ok: true,
151
+ jobs: decoded.data.jobs ?? [],
152
+ total: decoded.data.jobStats?.total ?? 0,
153
+ message: "ok",
154
+ };
155
+ }
156
+ function buildCityMap(groups) {
157
+ const out = {};
158
+ if (!groups)
159
+ return out;
160
+ for (const g of groups) {
161
+ if (typeof g.cityId === "number" && g.label)
162
+ out[g.cityId] = g.label;
163
+ }
164
+ return out;
165
+ }
166
+ function workCities(job, cityMap) {
167
+ const uniq = [];
168
+ for (const loc of job.locations ?? []) {
169
+ const label = (typeof loc.cityId === "number" && cityMap[loc.cityId]) || loc.country || "";
170
+ if (label && !uniq.includes(label))
171
+ uniq.push(label);
172
+ }
173
+ return uniq.join(" / ");
174
+ }
175
+ function recruitLabel(job) {
176
+ if (job.commitment)
177
+ return job.commitment;
178
+ if (job.hireMode === 1)
179
+ return "全职";
180
+ if (job.hireMode === 2)
181
+ return "实习";
182
+ return "";
183
+ }
184
+ function summarize(job, cityMap) {
185
+ return {
186
+ post_id: String(job.id),
187
+ title: job.title ?? "",
188
+ project: job.zhineng?.name ?? "",
189
+ recruit_label: recruitLabel(job),
190
+ bgs: job.department?.name ?? "",
191
+ work_cities: workCities(job, cityMap),
192
+ apply_url: `${PORTAL_URL}#/jobs/${encodeURIComponent(job.id)}`,
193
+ };
194
+ }
195
+ function matchesKeyword(job, kw) {
196
+ if (!kw)
197
+ return true;
198
+ const lc = kw.toLowerCase();
199
+ return ((job.title ?? "").toLowerCase().includes(lc) ||
200
+ (job.zhineng?.name ?? "").toLowerCase().includes(lc) ||
201
+ (job.department?.name ?? "").toLowerCase().includes(lc));
202
+ }
203
+ export async function searchPositions(opts = {}) {
204
+ const pageSize = opts.pageSize ?? 20;
205
+ const page = opts.page ?? 1;
206
+ const keyword = opts.keyword ?? "";
207
+ const portal = await fetchPortalHtml();
208
+ if (!portal.ok || !portal.html) {
209
+ return {
210
+ ok: false,
211
+ source: SOURCE,
212
+ message: portal.message,
213
+ query: { keyword, page, pageSize },
214
+ positions: [],
215
+ total: 0,
216
+ };
217
+ }
218
+ const init = parseInitData(portal.html);
219
+ if (!init || !init.jobs || !init.jobStats) {
220
+ return {
221
+ ok: false,
222
+ source: SOURCE,
223
+ message: "Moka init-data missing jobs/jobStats",
224
+ query: { keyword, page, pageSize },
225
+ positions: [],
226
+ total: 0,
227
+ };
228
+ }
229
+ const cityMap = buildCityMap(init.jobsGroupedByLocation);
230
+ let jobs = init.jobs;
231
+ const total = init.jobStats.total ?? jobs.length;
232
+ if (page > 1 && init.aesIv && portal.cookieHeader) {
233
+ const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader);
234
+ if (!more.ok || !more.jobs) {
235
+ return {
236
+ ok: false,
237
+ source: SOURCE,
238
+ message: `pagination failed: ${more.message}`,
239
+ query: { keyword, page, pageSize },
240
+ positions: [],
241
+ total,
242
+ };
243
+ }
244
+ jobs = more.jobs;
245
+ }
246
+ const filtered = jobs.filter((j) => matchesKeyword(j, keyword)).slice(0, pageSize);
247
+ return {
248
+ ok: true,
249
+ source: SOURCE,
250
+ query: { keyword, page, pageSize },
251
+ page,
252
+ page_size: pageSize,
253
+ total,
254
+ positions: filtered.map((j) => summarize(j, cityMap)),
255
+ };
256
+ }
257
+ export async function fetchAllPositions(opts = {}) {
258
+ const pageSize = opts.pageSize ?? 20;
259
+ const maxPages = Math.max(1, opts.maxPages ?? 50);
260
+ const keyword = opts.keyword ?? "";
261
+ const portal = await fetchPortalHtml();
262
+ if (!portal.ok || !portal.html) {
263
+ return {
264
+ ok: false,
265
+ source: SOURCE,
266
+ message: portal.message,
267
+ total: 0,
268
+ fetched: 0,
269
+ positions: [],
270
+ };
271
+ }
272
+ const init = parseInitData(portal.html);
273
+ if (!init || !init.jobs || !init.jobStats || !init.aesIv) {
274
+ return {
275
+ ok: false,
276
+ source: SOURCE,
277
+ message: "Moka init-data missing required fields",
278
+ total: 0,
279
+ fetched: 0,
280
+ positions: [],
281
+ };
282
+ }
283
+ const cityMap = buildCityMap(init.jobsGroupedByLocation);
284
+ const total = init.jobStats.total ?? 0;
285
+ const collected = [...init.jobs];
286
+ let page = 2;
287
+ while (collected.length < total && page <= maxPages) {
288
+ const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader ?? "");
289
+ if (!more.ok || !more.jobs || more.jobs.length === 0)
290
+ break;
291
+ collected.push(...more.jobs);
292
+ page += 1;
293
+ }
294
+ const filtered = collected.filter((j) => matchesKeyword(j, keyword));
295
+ return {
296
+ ok: true,
297
+ source: SOURCE,
298
+ total,
299
+ fetched: filtered.length,
300
+ positions: filtered.map((j) => summarize(j, cityMap)),
301
+ };
20
302
  }
21
303
  export async function fetchPositionDetail(postId) {
22
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, post_id: postId };
304
+ return {
305
+ ok: false,
306
+ source: SOURCE,
307
+ message: "Moka detail endpoint is also AES-encrypted and not implemented; " +
308
+ "use the apply_url deeplink for the full JD.",
309
+ post_id: postId,
310
+ apply_url: `${PORTAL_URL}#/jobs/${encodeURIComponent(postId)}`,
311
+ };
23
312
  }
24
313
  export async function fetchDictionaries() {
25
- return { ok: false, source: SOURCE, message: STUB_MESSAGE };
314
+ const portal = await fetchPortalHtml();
315
+ if (!portal.ok || !portal.html) {
316
+ return { ok: false, source: SOURCE, message: portal.message };
317
+ }
318
+ const init = parseInitData(portal.html);
319
+ if (!init) {
320
+ return { ok: false, source: SOURCE, message: "Moka init-data missing" };
321
+ }
322
+ return {
323
+ ok: true,
324
+ source: SOURCE,
325
+ locations: init.jobsGroupedByLocation ?? [],
326
+ moka_org: { slug: ORG_SLUG, siteId: SITE_ID, url: PORTAL_URL },
327
+ };
26
328
  }
27
329
  export async function listNotices() {
28
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notices: [] };
330
+ return {
331
+ ok: false,
332
+ source: SOURCE,
333
+ message: "DeepSeek: no public notices endpoint",
334
+ notices: [],
335
+ };
29
336
  }
30
337
  export async function getNotice(noticeId) {
31
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notice_id: noticeId };
338
+ return {
339
+ ok: false,
340
+ source: SOURCE,
341
+ message: "DeepSeek: no public notices endpoint",
342
+ notice_id: noticeId,
343
+ };
32
344
  }
33
345
  export async function findNoticesByQuestion(question, _opts = {}) {
34
- return { ok: false, source: SOURCE, question, message: STUB_MESSAGE, matches: [] };
346
+ return {
347
+ ok: false,
348
+ source: SOURCE,
349
+ question,
350
+ message: "DeepSeek: no public notices endpoint",
351
+ matches: [],
352
+ };
35
353
  }
36
- export async function matchResume(text, _opts = {}) {
354
+ export async function matchResume(text, opts = {}) {
37
355
  const { terms, cities } = extractResumeSignals(text ?? "");
38
- return { ok: false, source: SOURCE, extracted_terms: terms, city_preferences: cities, matches: [], message: STUB_MESSAGE };
356
+ const candidates = Math.max(20, opts.candidates ?? 100);
357
+ const all = await fetchAllPositions({
358
+ pageSize: 20,
359
+ maxPages: Math.ceil(candidates / 15),
360
+ });
361
+ if (!all.ok) {
362
+ return {
363
+ ok: false,
364
+ source: SOURCE,
365
+ extracted_terms: terms,
366
+ city_preferences: cities,
367
+ matches: [],
368
+ message: all.message,
369
+ };
370
+ }
371
+ const topN = Math.max(1, opts.topN ?? 10);
372
+ const scored = all.positions
373
+ .map((p) => ({
374
+ p,
375
+ score: scoreOverlap(`${p.title} ${p.project} ${p.bgs}`, terms, cities).score,
376
+ }))
377
+ .sort((a, b) => b.score - a.score)
378
+ .slice(0, topN)
379
+ .map((x) => x.p);
380
+ return {
381
+ ok: true,
382
+ source: SOURCE,
383
+ extracted_terms: terms,
384
+ city_preferences: cities,
385
+ matches: scored,
386
+ };
39
387
  }