job-pro 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,389 @@
1
- // 银河通用 / Galaxy Universal (Galbot — embodied AI robotics) stub for `job-pro`.
1
+ // 银河通用 / Galaxy Universal (Galbot) recruiting via app.mokahr.com.
2
2
  //
3
- // STATUS: stub-only. Galaxy Universal lists careers via Moka social-recruitment
4
- // (orgId 165930, slug yinhetongyong), which is auth-gated for anonymous access.
5
- // Probe results:
6
- // www.galbot.com/careers, galaxyuniversal.com/careers no public API discoverable
7
- // app.mokahr.com/social-recruitment/yinhetongyong/165930 → Moka SPA, session auth required
8
- import { extractResumeSignals, checkResume } from "./tencent.js";
9
- export { checkResume };
10
- const SOURCE = "galbot.com";
11
- const STUB_MESSAGE = "Galaxy Universal / 银河通用: no public job API — Moka social-recruitment portal " +
12
- "(yinhetongyong/165930) requires session auth (verified Moka anon path is gated).";
13
- export async function searchPositions(_opts = {}) {
14
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, query: {}, positions: [] };
15
- }
16
- export async function fetchAllPositions(_opts = {}) {
17
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, total: 0, fetched: 0, positions: [] };
3
+ // ============================================================
4
+ // HOW THIS WORKS (probed 2026-05):
5
+ //
6
+ // www.galbot.com/careers → SPA shell that links to
7
+ // https://app.mokahr.com/social-recruitment/yinhetongyong/165929
8
+ // (corrected from the previously documented 165930, which 404s).
9
+ //
10
+ // The Moka SSR HTML embeds `<input id="init-data" value="<JSON>">`
11
+ // with the first 15 jobs and `jobStats.total`. Pagination uses
12
+ // POST /api/outer/ats-apply/website/jobs/v2?orgId=yinhetongyong
13
+ // (AES-128-CBC envelope: key=necromancer, iv=aesIv from SSR HTML).
14
+ //
15
+ // CONFIRMED MOKA ORG:
16
+ // slug=yinhetongyong, siteId=165929, mode=social
17
+ // Portal: https://app.mokahr.com/social-recruitment/yinhetongyong/165929
18
+ //
19
+ // PositionSummary field mapping:
20
+ // post_id ← job.id
21
+ // title ← job.title
22
+ // project ← job.zhineng?.name (e.g. "算法类")
23
+ // recruit_label ← job.commitment || hireMode label
24
+ // bgs ← job.department?.name (e.g. "算法中心")
25
+ // work_cities ← locations[].cityId → label via jobsGroupedByLocation
26
+ // apply_url ← portal#/jobs/{id}
27
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
28
+ import { createDecipheriv } from "node:crypto";
29
+ export { checkResume, extractResumeSignals, scoreOverlap };
30
+ const SOURCE = "app.mokahr.com/yinhetongyong";
31
+ const ORG_SLUG = "yinhetongyong";
32
+ const SITE_ID = 165929;
33
+ const PORTAL_URL = `https://app.mokahr.com/social-recruitment/${ORG_SLUG}/${SITE_ID}`;
34
+ const API_ENDPOINT = "https://app.mokahr.com/api/outer/ats-apply/website/jobs/v2";
35
+ const DEFAULT_HEADERS = {
36
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
37
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
38
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
39
+ };
40
+ function htmlDecode(s) {
41
+ return s
42
+ .replace(/&quot;/g, '"')
43
+ .replace(/&amp;/g, "&")
44
+ .replace(/&lt;/g, "<")
45
+ .replace(/&gt;/g, ">")
46
+ .replace(/&#x27;/g, "'")
47
+ .replace(/&#39;/g, "'");
48
+ }
49
+ function parseInitData(html) {
50
+ const m = html.match(/<input[^>]*id="init-data"[^>]*value="([^"]+)"/);
51
+ if (!m)
52
+ return null;
53
+ try {
54
+ return JSON.parse(htmlDecode(m[1]));
55
+ }
56
+ catch {
57
+ return null;
58
+ }
59
+ }
60
+ async function fetchPortalHtml() {
61
+ let r1;
62
+ try {
63
+ r1 = await fetch(PORTAL_URL, { method: "GET", headers: DEFAULT_HEADERS, redirect: "manual" });
64
+ }
65
+ catch (err) {
66
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
67
+ }
68
+ const cookies = [];
69
+ const headersAny = r1.headers;
70
+ if (typeof headersAny.getSetCookie === "function") {
71
+ for (const v of headersAny.getSetCookie.call(r1.headers) ?? []) {
72
+ const c = v.split(";")[0];
73
+ if (c)
74
+ cookies.push(c);
75
+ }
76
+ }
77
+ if (cookies.length === 0) {
78
+ const raw = r1.headers.get("set-cookie");
79
+ if (raw)
80
+ cookies.push(...raw.split(/,(?=[^;]+=)/).map((c) => c.split(";")[0].trim()));
81
+ }
82
+ const cookieHeader = cookies.join("; ");
83
+ let r2;
84
+ try {
85
+ r2 = await fetch(PORTAL_URL, {
86
+ method: "GET",
87
+ headers: { ...DEFAULT_HEADERS, Cookie: cookieHeader },
88
+ redirect: "follow",
89
+ });
90
+ }
91
+ catch (err) {
92
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
93
+ }
94
+ if (!r2.ok)
95
+ return { ok: false, message: `HTTP ${r2.status}` };
96
+ return { ok: true, html: await r2.text(), cookieHeader, message: "ok" };
97
+ }
98
+ function decryptMoka(envelope, aesIv) {
99
+ if (!envelope.data || !envelope.necromancer)
100
+ return null;
101
+ try {
102
+ const decipher = createDecipheriv("aes-128-cbc", Buffer.from(envelope.necromancer, "utf8"), Buffer.from(aesIv, "utf8"));
103
+ const plain = Buffer.concat([
104
+ decipher.update(Buffer.from(envelope.data, "base64")),
105
+ decipher.final(),
106
+ ]);
107
+ return JSON.parse(plain.toString("utf8"));
108
+ }
109
+ catch {
110
+ return null;
111
+ }
112
+ }
113
+ async function fetchEncryptedPage(pageNum, pageSize, aesIv, cookieHeader) {
114
+ let response;
115
+ try {
116
+ response = await fetch(`${API_ENDPOINT}?orgId=${encodeURIComponent(ORG_SLUG)}`, {
117
+ method: "POST",
118
+ headers: {
119
+ ...DEFAULT_HEADERS,
120
+ Accept: "application/json,*/*",
121
+ "Content-Type": "application/json",
122
+ Origin: "https://app.mokahr.com",
123
+ Referer: PORTAL_URL,
124
+ Cookie: cookieHeader,
125
+ },
126
+ body: JSON.stringify({
127
+ orgId: ORG_SLUG,
128
+ siteId: String(SITE_ID),
129
+ pageNum,
130
+ pageSize,
131
+ needStat: true,
132
+ }),
133
+ });
134
+ }
135
+ catch (err) {
136
+ return { ok: false, message: `network error: ${err instanceof Error ? err.message : err}` };
137
+ }
138
+ if (!response.ok)
139
+ return { ok: false, message: `HTTP ${response.status}` };
140
+ let envelope;
141
+ try {
142
+ envelope = await response.json();
143
+ }
144
+ catch {
145
+ return { ok: false, message: "bad JSON" };
146
+ }
147
+ const decoded = decryptMoka(envelope, aesIv);
148
+ if (!decoded || decoded.code !== 0 || !decoded.data) {
149
+ return { ok: false, message: decoded?.msg || envelope.msg || "decrypt error" };
150
+ }
151
+ return {
152
+ ok: true,
153
+ jobs: decoded.data.jobs ?? [],
154
+ total: decoded.data.jobStats?.total ?? 0,
155
+ message: "ok",
156
+ };
157
+ }
158
+ function buildCityMap(groups) {
159
+ const out = {};
160
+ if (!groups)
161
+ return out;
162
+ for (const g of groups) {
163
+ if (typeof g.cityId === "number" && g.label)
164
+ out[g.cityId] = g.label;
165
+ }
166
+ return out;
167
+ }
168
+ function workCities(job, cityMap) {
169
+ const uniq = [];
170
+ for (const loc of job.locations ?? []) {
171
+ const label = (typeof loc.cityId === "number" && cityMap[loc.cityId]) || loc.country || "";
172
+ if (label && !uniq.includes(label))
173
+ uniq.push(label);
174
+ }
175
+ return uniq.join(" / ");
176
+ }
177
+ function recruitLabel(job) {
178
+ if (job.commitment)
179
+ return job.commitment;
180
+ if (job.hireMode === 1)
181
+ return "全职";
182
+ if (job.hireMode === 2)
183
+ return "实习";
184
+ return "";
185
+ }
186
+ function summarize(job, cityMap) {
187
+ return {
188
+ post_id: String(job.id),
189
+ title: job.title ?? "",
190
+ project: job.zhineng?.name ?? "",
191
+ recruit_label: recruitLabel(job),
192
+ bgs: job.department?.name ?? "",
193
+ work_cities: workCities(job, cityMap),
194
+ apply_url: `${PORTAL_URL}#/jobs/${encodeURIComponent(job.id)}`,
195
+ };
196
+ }
197
+ function matchesKeyword(job, kw) {
198
+ if (!kw)
199
+ return true;
200
+ const lc = kw.toLowerCase();
201
+ return ((job.title ?? "").toLowerCase().includes(lc) ||
202
+ (job.zhineng?.name ?? "").toLowerCase().includes(lc) ||
203
+ (job.department?.name ?? "").toLowerCase().includes(lc));
204
+ }
205
+ export async function searchPositions(opts = {}) {
206
+ const pageSize = opts.pageSize ?? 20;
207
+ const page = opts.page ?? 1;
208
+ const keyword = opts.keyword ?? "";
209
+ const portal = await fetchPortalHtml();
210
+ if (!portal.ok || !portal.html) {
211
+ return {
212
+ ok: false,
213
+ source: SOURCE,
214
+ message: portal.message,
215
+ query: { keyword, page, pageSize },
216
+ positions: [],
217
+ total: 0,
218
+ };
219
+ }
220
+ const init = parseInitData(portal.html);
221
+ if (!init || !init.jobs || !init.jobStats) {
222
+ return {
223
+ ok: false,
224
+ source: SOURCE,
225
+ message: "Moka init-data missing jobs/jobStats",
226
+ query: { keyword, page, pageSize },
227
+ positions: [],
228
+ total: 0,
229
+ };
230
+ }
231
+ const cityMap = buildCityMap(init.jobsGroupedByLocation);
232
+ let jobs = init.jobs;
233
+ const total = init.jobStats.total ?? jobs.length;
234
+ if (page > 1 && init.aesIv && portal.cookieHeader) {
235
+ const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader);
236
+ if (!more.ok || !more.jobs) {
237
+ return {
238
+ ok: false,
239
+ source: SOURCE,
240
+ message: `pagination failed: ${more.message}`,
241
+ query: { keyword, page, pageSize },
242
+ positions: [],
243
+ total,
244
+ };
245
+ }
246
+ jobs = more.jobs;
247
+ }
248
+ const filtered = jobs.filter((j) => matchesKeyword(j, keyword)).slice(0, pageSize);
249
+ return {
250
+ ok: true,
251
+ source: SOURCE,
252
+ query: { keyword, page, pageSize },
253
+ page,
254
+ page_size: pageSize,
255
+ total,
256
+ positions: filtered.map((j) => summarize(j, cityMap)),
257
+ };
258
+ }
259
+ export async function fetchAllPositions(opts = {}) {
260
+ const pageSize = opts.pageSize ?? 20;
261
+ const maxPages = Math.max(1, opts.maxPages ?? 50);
262
+ const keyword = opts.keyword ?? "";
263
+ const portal = await fetchPortalHtml();
264
+ if (!portal.ok || !portal.html) {
265
+ return {
266
+ ok: false,
267
+ source: SOURCE,
268
+ message: portal.message,
269
+ total: 0,
270
+ fetched: 0,
271
+ positions: [],
272
+ };
273
+ }
274
+ const init = parseInitData(portal.html);
275
+ if (!init || !init.jobs || !init.jobStats || !init.aesIv) {
276
+ return {
277
+ ok: false,
278
+ source: SOURCE,
279
+ message: "Moka init-data missing required fields",
280
+ total: 0,
281
+ fetched: 0,
282
+ positions: [],
283
+ };
284
+ }
285
+ const cityMap = buildCityMap(init.jobsGroupedByLocation);
286
+ const total = init.jobStats.total ?? 0;
287
+ const collected = [...init.jobs];
288
+ let page = 2;
289
+ while (collected.length < total && page <= maxPages) {
290
+ const more = await fetchEncryptedPage(page, pageSize, init.aesIv, portal.cookieHeader ?? "");
291
+ if (!more.ok || !more.jobs || more.jobs.length === 0)
292
+ break;
293
+ collected.push(...more.jobs);
294
+ page += 1;
295
+ }
296
+ const filtered = collected.filter((j) => matchesKeyword(j, keyword));
297
+ return {
298
+ ok: true,
299
+ source: SOURCE,
300
+ total,
301
+ fetched: filtered.length,
302
+ positions: filtered.map((j) => summarize(j, cityMap)),
303
+ };
18
304
  }
19
305
  export async function fetchPositionDetail(postId) {
20
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, post_id: postId };
306
+ return {
307
+ ok: false,
308
+ source: SOURCE,
309
+ message: "Moka detail endpoint is also AES-encrypted and not implemented; " +
310
+ "use the apply_url deeplink for the full JD.",
311
+ post_id: postId,
312
+ apply_url: `${PORTAL_URL}#/jobs/${encodeURIComponent(postId)}`,
313
+ };
21
314
  }
22
315
  export async function fetchDictionaries() {
23
- return { ok: false, source: SOURCE, message: STUB_MESSAGE };
316
+ const portal = await fetchPortalHtml();
317
+ if (!portal.ok || !portal.html) {
318
+ return { ok: false, source: SOURCE, message: portal.message };
319
+ }
320
+ const init = parseInitData(portal.html);
321
+ if (!init) {
322
+ return { ok: false, source: SOURCE, message: "Moka init-data missing" };
323
+ }
324
+ return {
325
+ ok: true,
326
+ source: SOURCE,
327
+ locations: init.jobsGroupedByLocation ?? [],
328
+ moka_org: { slug: ORG_SLUG, siteId: SITE_ID, url: PORTAL_URL },
329
+ };
24
330
  }
25
331
  export async function listNotices() {
26
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notices: [] };
332
+ return {
333
+ ok: false,
334
+ source: SOURCE,
335
+ message: "Galaxy Universal / 银河通用: no public notices endpoint",
336
+ notices: [],
337
+ };
27
338
  }
28
339
  export async function getNotice(noticeId) {
29
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notice_id: noticeId };
340
+ return {
341
+ ok: false,
342
+ source: SOURCE,
343
+ message: "Galaxy Universal / 银河通用: no public notices endpoint",
344
+ notice_id: noticeId,
345
+ };
30
346
  }
31
347
  export async function findNoticesByQuestion(question, _opts = {}) {
32
- return { ok: false, source: SOURCE, question, message: STUB_MESSAGE, matches: [] };
348
+ return {
349
+ ok: false,
350
+ source: SOURCE,
351
+ question,
352
+ message: "Galaxy Universal / 银河通用: no public notices endpoint",
353
+ matches: [],
354
+ };
33
355
  }
34
- export async function matchResume(text, _opts = {}) {
356
+ export async function matchResume(text, opts = {}) {
35
357
  const { terms, cities } = extractResumeSignals(text ?? "");
36
- return { ok: false, source: SOURCE, extracted_terms: terms, city_preferences: cities, matches: [], message: STUB_MESSAGE };
358
+ const candidates = Math.max(20, opts.candidates ?? 100);
359
+ const all = await fetchAllPositions({
360
+ pageSize: 20,
361
+ maxPages: Math.ceil(candidates / 15),
362
+ });
363
+ if (!all.ok) {
364
+ return {
365
+ ok: false,
366
+ source: SOURCE,
367
+ extracted_terms: terms,
368
+ city_preferences: cities,
369
+ matches: [],
370
+ message: all.message,
371
+ };
372
+ }
373
+ const topN = Math.max(1, opts.topN ?? 10);
374
+ const scored = all.positions
375
+ .map((p) => ({
376
+ p,
377
+ score: scoreOverlap(`${p.title} ${p.project} ${p.bgs}`, terms, cities).score,
378
+ }))
379
+ .sort((a, b) => b.score - a.score)
380
+ .slice(0, topN)
381
+ .map((x) => x.p);
382
+ return {
383
+ ok: true,
384
+ source: SOURCE,
385
+ extracted_terms: terms,
386
+ city_preferences: cities,
387
+ matches: scored,
388
+ };
37
389
  }
@@ -1,66 +1,45 @@
1
- // 地平线 (Horizon Robotics) stub adapter for `job-pro`.
2
- //
3
- // STATUS: stub-only. Horizon's careers portal is hosted on Moka and gated
4
- // behind the Moka SPA's login flow; the per-slug JSON endpoint returns the
5
- // "您访问的页面不存在" Moka error page for anonymous requests.
1
+ // 地平线 (Horizon Robotics) careers adapter for `job-pro`.
6
2
  //
7
3
  // ============================================================
8
- // RECONNAISSANCE RESULTS (probed 2026-05):
9
- //
10
- // https://career.horizon.ai — 000 (no public DNS / unreachable)
11
- // https://campus.horizon.ai — 000 (no public DNS / unreachable)
12
- // https://horizon.app.mokahr.com — Moka SPA shell renders, but
13
- // /api/career/website/horizon/jobs returns {"code":-1,"message":"您访问的页面不存在"}
4
+ // API DISCOVERY (probed 2026-05-16 via puppeteer-core network capture)
14
5
  //
15
- // Feishu ATSX: horizonrobotics.jobs.feishu.cn HTTP 405 (DNS but no portal)
16
- // horizon.jobs.feishu.cn — HTTP 405
17
- // horizon-robotics.jobs.feishu.cn HTTP 400 (no portal)
18
- // Greenhouse: horizon / horizon-robotics — HTTP 404
19
- // Lever: horizonrobotics — HTTP 404
6
+ // Horizon's careers run on `wecruit.hotjob.cn`, the same Beisen Wecruit
7
+ // stack as SenseTime (see cli/src/sensetime.ts). The `/{SU…}/pb/<channel>.html`
8
+ // SPA path returns nginx 405 on any anonymous POST. The real XHR is fired
9
+ // at the sibling `/wecruit/positionInfo/listPosition/{SU…}` route.
20
10
  //
21
- // The Moka portal exists (the slug 'horizonrobotics' returns 200 page
22
- // shell) but the underlying job-list endpoint requires the Moka SPA's
23
- // user-session JWT, which is only minted post-login.
11
+ // Channels (probed 2026-05-16):
12
+ // * school — `SU6409ef49bef57c635fd390a6` (校园招聘 / 实习生) ~84 positions
13
+ // * social — `SU64819a4f2f9d2433ba8b043a` (社会招聘) ~216 positions
24
14
  //
25
- // Conclusion: no unauthenticated public API. Visit Moka careers shell at
26
- // https://app.mokahr.com/social-recruitment/horizonrobotics for the portal.
27
- import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
28
- export { checkResume };
29
- const SOURCE = "app.mokahr.com/horizonrobotics";
30
- const STUB_MESSAGE = "Horizon Robotics (地平线): Moka careers portal (slug horizonrobotics) is gated — the public " +
31
- "/api/career/website/horizon/jobs endpoint returns the Moka 'page not found' error for anonymous " +
32
- "requests; positions are visible only after a candidate session is established. No Greenhouse / Lever / " +
33
- "Feishu tenant provisioned. No unauthenticated public API available.";
34
- export async function searchPositions(_opts = {}) {
35
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, query: {}, positions: [] };
36
- }
37
- export async function fetchAllPositions(_opts = {}) {
38
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, total: 0, fetched: 0, positions: [] };
39
- }
40
- export async function fetchPositionDetail(postId) {
41
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, post_id: postId };
42
- }
43
- export async function fetchDictionaries() {
44
- return { ok: false, source: SOURCE, message: STUB_MESSAGE };
45
- }
46
- export async function listNotices() {
47
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notices: [] };
48
- }
49
- export async function getNotice(noticeId) {
50
- return { ok: false, source: SOURCE, message: STUB_MESSAGE, notice_id: noticeId };
51
- }
52
- export async function findNoticesByQuestion(question, _opts = {}) {
53
- return { ok: false, source: SOURCE, question, message: STUB_MESSAGE, matches: [] };
54
- }
55
- export async function matchResume(text, _opts = {}) {
56
- const { terms, cities } = extractResumeSignals(text ?? "");
57
- return {
58
- ok: false,
59
- source: SOURCE,
60
- extracted_terms: terms,
61
- city_preferences: cities,
62
- matches: [],
63
- message: STUB_MESSAGE,
64
- };
65
- }
66
- export { extractResumeSignals, scoreOverlap };
15
+ // Anonymous, no token, no cookie. See cli/src/wecruit.ts for the shared
16
+ // factory: POST to `/wecruit/positionInfo/listPosition/{channelId}` with
17
+ // `application/x-www-form-urlencoded` body containing
18
+ // `isFrompb=true&recruitType=<1|2>&pageSize=N&currentPage=N`. Response is
19
+ // `{ data:{ pageForm:{ totalPage, pageData[…] } }, state:"200" }`.
20
+ import { createAdapter } from "./wecruit.js";
21
+ const adapter = createAdapter({
22
+ host: "wecruit.hotjob.cn",
23
+ label: "Horizon Robotics",
24
+ channels: [
25
+ {
26
+ channelId: "SU6409ef49bef57c635fd390a6",
27
+ recruitType: "campus",
28
+ pagePath: "school",
29
+ },
30
+ {
31
+ channelId: "SU64819a4f2f9d2433ba8b043a",
32
+ recruitType: "social",
33
+ pagePath: "social",
34
+ },
35
+ ],
36
+ });
37
+ export const searchPositions = adapter.searchPositions;
38
+ export const fetchAllPositions = adapter.fetchAllPositions;
39
+ export const fetchPositionDetail = adapter.fetchPositionDetail;
40
+ export const fetchDictionaries = adapter.fetchDictionaries;
41
+ export const listNotices = adapter.listNotices;
42
+ export const getNotice = adapter.getNotice;
43
+ export const findNoticesByQuestion = adapter.findNoticesByQuestion;
44
+ export const matchResume = adapter.matchResume;
45
+ export const checkResume = adapter.checkResume;