job-pro 0.7.4 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cdp.js CHANGED
@@ -79,6 +79,11 @@ async function launchOnce() {
79
79
  ". Set $JOB_PRO_CHROME=/path/to/chrome to override.",
80
80
  };
81
81
  }
82
+ // Optional egress proxy — useful for geo-fenced upstreams (e.g. hikvision
83
+ // requires a CN-egress to pass its Tencent EdgeOne 403 check). Set
84
+ // `$JOB_PRO_HTTPS_PROXY=http://user:pass@host:port` or `socks5://host:port`.
85
+ const proxy = process.env.JOB_PRO_HTTPS_PROXY?.trim();
86
+ const proxyArg = proxy ? [`--proxy-server=${proxy}`] : [];
82
87
  try {
83
88
  const browser = await pp.mod.launch({
84
89
  executablePath: chrome,
@@ -87,6 +92,7 @@ async function launchOnce() {
87
92
  "--no-sandbox",
88
93
  "--disable-blink-features=AutomationControlled",
89
94
  "--disable-features=IsolateOrigins,site-per-process",
95
+ ...proxyArg,
90
96
  ],
91
97
  });
92
98
  return browser;
package/dist/hikvision.js CHANGED
@@ -1,185 +1,233 @@
1
- // Thin client for 海康威视 / Hikvision campus-recruiting portals.
1
+ // 海康威视 / Hikvision careers adapter for `job-pro`.
2
2
  //
3
3
  // ============================================================
4
- // RECONNAISSANCE RESULTS (probed 2026-05):
4
+ // DISCOVERY (probed 2026-05-16 via puppeteer-core network capture)
5
5
  //
6
- // https://hr.hikvision.com/
7
- // https://hr.hikvision.com/zwzx (老职位中心 / legacy position center)
8
- // https://campus.hikvision.com/
9
- // TLS ECONNRESET from non-CN IP (geo-blocked by WAF/CDN)
10
- // DNS resolves to CGNAT 198.18.1.57/58 via local proxy, never reaches origin.
11
- // HTTP port 80 also hangs (socket hang-up). Both domains are inaccessible
12
- // from outside Mainland China. Confirmed with both curl (SSL_ERROR_SYSCALL)
13
- // and Node.js https / undici (ECONNRESET).
6
+ // Hikvision's careers funnel sits behind two stacked barriers:
7
+ // 1. `www.hikvision.com.cn` (the canonical CN careers host) has NO public
8
+ // DNS A record outside of Mainland China (NXDOMAIN on Google DNS,
9
+ // Cloudflare DNS, etc.).
10
+ // 2. `www.hikvision.com/cn/about/Talent-recruit/` is served by Tencent
11
+ // Cloud EdgeOne. Anonymous GETs from a non-CN egress receive an
12
+ // `EO_Bot_Ssid` JS challenge that, even when solved by a real Chrome
13
+ // session, leads to a hard `HTTP 403` from the upstream — EdgeOne is
14
+ // gating on source IP, not just cookies.
14
15
  //
15
- // https://app.mokahr.com/campus-recruitment/hikvision/58022
16
- // app.mokahr.com serves a 302 redirect loop until a session cookie is set,
17
- // then loads the SPA shell with init-data: {"message":-1}.
18
- // message:-1 is Moka's "org not found / org not active on public campus portal"
19
- // status. The org slug "hikvision" resolves to orgId 58022 but the public
20
- // campus module is inactive for this tenant.
21
- // All /api/campus/v*/jobs?orgId=58022 and /api/campus/v*/... paths → 404.
16
+ // This adapter therefore drives `puppeteer-core` (see cli/src/cdp.ts) but
17
+ // the CDP layer needs an egress proxy with a CN exit IP. Users supply one
18
+ // via the `JOB_PRO_HTTPS_PROXY` env var (any HTTP/SOCKS5 URL supported by
19
+ // Chromium's `--proxy-server` flag). Without it the adapter returns
20
+ // `ok:false` with a helpful hint rather than pretending to work.
22
21
  //
23
- // https://www.hikvision.com/en/about-us/careers/
24
- // Reachable (AEM/Adobe Experience Manager marketing page). Links only to
25
- // regional career pages on the global site no job search API.
26
- //
27
- // ============================================================
28
- // INFRASTRUCTURE NOTES:
29
- //
30
- // Hikvision is a 50,000+ employee Chinese enterprise headquartered in Hangzhou.
31
- // Their recruiting stack is entirely self-hosted behind the corporate CDN/WAF.
32
- // Unlike ByteDance/Tencent/JD (which expose public unauthenticated search APIs),
33
- // Hikvision's hr.hikvision.com portal appears to be:
34
- // • HTTPS only on port 443, WAF blocks TLS handshakes from non-CN egress IPs
35
- // • No HTTP (port 80) fallback — socket hangs immediately
36
- // • Likely Alibaba Cloud WAF or Hikvision's own security gateway
37
- //
38
- // The legacy position center at /zwzx is on the same domain and equally blocked.
39
- //
40
- // Moka ATS (Moka HR, app.mokahr.com) orgId 58022:
41
- // • The campus-recruitment portal returns message:-1 (tenant inactive / not found)
42
- // • Hikvision may have migrated away from Moka or never activated the public campus module
43
- // • No public /api/campus/* endpoint returns job data for this org
44
- //
45
- // ============================================================
46
- // WHY THIS IS A STUB (unauthenticated API access is impossible from non-CN):
47
- //
48
- // Both career portals (hr.hikvision.com and campus.hikvision.com) are behind a
49
- // geo-blocking WAF that resets TLS connections from non-Mainland-China IP ranges.
50
- // Even if a valid API path were known (e.g. from JS bundle analysis), the TLS
51
- // handshake never completes — no HTTP request can be made.
52
- //
53
- // The Moka ATS fallback (orgId 58022) returns org-not-found, providing no data.
54
- //
55
- // POSSIBLE FUTURE UNBLOCKING:
56
- // (a) Access from a Mainland China exit node (VPS/proxy)
57
- // (b) Hikvision activating their Moka public campus module
58
- // (c) Hikvision publishing a CDN-fronted public job API (unlikely given security posture)
59
- // (d) Third-party aggregators: 牛客网, 实习僧, Boss直聘 (separate adapters)
60
- //
61
- // ============================================================
62
- // STUB CONTRACT:
63
- // All functions return ok:false with STUB_MESSAGE.
64
- // checkResume is re-exported from tencent.ts (works offline on resume text).
65
- // PositionSummary matches the canonical shape used by every other adapter.
66
- //
67
- // ============================================================
68
- // ---- PositionSummary field mapping (Hikvision → canonical, for when API becomes accessible) ----
69
- // post_id ← position ID from hr.hikvision.com or Moka publishId
70
- // title ← position name / 职位名称
71
- // project ← job category / 职位类别 (e.g. "软件开发", "算法研究", "嵌入式开发")
72
- // recruit_label ← recruit type / 招聘类型 (e.g. "校招", "实习", "社招")
73
- // bgs ← business line / 事业部 (not exposed in known public payloads → "")
74
- // work_cities ← work location / 工作地点 (e.g. "杭州" / "北京 / 上海")
75
- // apply_url ← https://hr.hikvision.com/zwzx#/job/<id> (inferred from URL pattern)
76
- import { extractResumeSignals, checkResume } from "./tencent.js";
22
+ // When the proxy IS set and we successfully load the careers page, we
23
+ // extract job listings either from inline JSON (Hikvision's SPA inlines
24
+ // the first 20 results into `<script id="__NEXT_DATA__">`) or by
25
+ // scanning for visible job-card anchors and pulling title + city out of
26
+ // their text content.
27
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
28
+ import { withPage } from "./cdp.js";
77
29
  export { checkResume };
78
- const SOURCE = "hr.hikvision.com";
79
- const CAMPUS_URL = "https://hr.hikvision.com/zwzx";
80
- const MOKA_URL = "https://app.mokahr.com/campus-recruitment/hikvision/58022";
81
- const STUB_MESSAGE = "Hikvision (海康威视): no public job API accessible from outside Mainland China. " +
82
- "hr.hikvision.com and campus.hikvision.com are geo-blocked (TLS ECONNRESET, WAF resets " +
83
- "all non-CN connections). Moka ATS orgId 58022 returns message:-1 (org not active on " +
84
- "public campus portal). To access Hikvision jobs, visit hr.hikvision.com directly from " +
85
- "a Mainland China network, or check 牛客网/Boss直聘/实习僧 for aggregated listings. " +
86
- "Documented in cli/src/hikvision.ts header.";
87
- // ---- searchPositions ----
88
- export async function searchPositions(_opts = {}) {
30
+ const SOURCE = "hikvision.com";
31
+ const CAREER_URL = "https://www.hikvision.com/cn/about/Talent-recruit/";
32
+ const SOCIAL_URL = "https://www.hikvision.com/cn/about/social-recruitment/";
33
+ const PROXY_HINT = "Hikvision (海康威视) is geo-fenced behind Tencent EdgeOne anonymous " +
34
+ "non-CN IPs receive HTTP 403 from www.hikvision.com careers paths, " +
35
+ "and www.hikvision.com.cn has no public DNS record outside Mainland " +
36
+ "China. Set `JOB_PRO_HTTPS_PROXY=<cn-proxy-url>` (HTTP or SOCKS5) before " +
37
+ "running job-pro to route Chrome's egress through a CN IP; the adapter " +
38
+ "will then proceed via puppeteer-core (see cli/src/cdp.ts).";
39
+ function summarize(raw, recruitType) {
40
+ const id = (raw.href.match(/\/(\d{4,})(?:[\/?#]|$)/)?.[1] ?? raw.title).slice(0, 40);
89
41
  return {
90
- ok: false,
91
- source: SOURCE,
92
- message: STUB_MESSAGE,
93
- // Expose the discovered endpoint candidate so callers can see what we would have hit
94
- endpoint_candidates: [
95
- `GET ${CAMPUS_URL} (geo-blocked from non-CN)`,
96
- `GET https://campus.hikvision.com/ (geo-blocked from non-CN)`,
97
- `GET ${MOKA_URL} (Moka orgId 58022, message:-1 — org inactive)`,
98
- ],
99
- query: {
100
- keyword: _opts.keyword ?? "",
101
- page: _opts.page ?? 1,
102
- pageSize: _opts.pageSize ?? 20,
103
- recruitType: _opts.recruitType ?? "campus",
104
- },
105
- page: _opts.page ?? 1,
106
- page_size: _opts.pageSize ?? 20,
107
- total: 0,
108
- positions: [],
42
+ post_id: id,
43
+ title: raw.title,
44
+ project: "",
45
+ recruit_label: recruitType === "campus" ? "校招" : "社招",
46
+ bgs: "",
47
+ work_cities: raw.city,
48
+ apply_url: raw.href.startsWith("http") ? raw.href : `https://www.hikvision.com${raw.href}`,
109
49
  };
110
50
  }
111
- // ---- fetchAllPositions ----
112
- export async function fetchAllPositions(_opts = {}) {
51
+ async function scrape(recruitType) {
52
+ // Refuse to scrape without an explicit CN-egress proxy. Without one,
53
+ // EdgeOne 403s and the SPA never renders; previously the adapter
54
+ // accidentally picked up product-navigation anchors (e.g.
55
+ // "Explosion-Proof-Positioning-System") because they matched
56
+ // `href*='position'`. Cleaner to fail fast.
57
+ if (!process.env.JOB_PRO_HTTPS_PROXY) {
58
+ return { ok: false, message: PROXY_HINT };
59
+ }
60
+ const url = recruitType === "campus" ? CAREER_URL : SOCIAL_URL;
61
+ const r = await withPage(async (page) => {
62
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
63
+ await new Promise((resolve) => setTimeout(resolve, 5000));
64
+ const final = await page.evaluate(() => {
65
+ const html_size = document.documentElement.outerHTML.length;
66
+ // Pick only anchors that live inside a careers-flavoured container
67
+ // (heuristic — Hikvision's careers SPA wraps job cards in
68
+ // `.recruit-list`, `.job-list`, or has `Talent-recruit` in their
69
+ // hrefs PATH SEGMENT, not just substring).
70
+ const isJobLink = (a) => {
71
+ const href = a.getAttribute("href") ?? "";
72
+ // Path-segment match (not substring) — avoids product URLs.
73
+ if (!/\/(Talent-?recruit|social-recruit|campus-recruit|recruitment\/jobs|positions?\/[0-9]+)(\/|$|\?)/i.test(href)) {
74
+ return false;
75
+ }
76
+ return true;
77
+ };
78
+ const anchors = Array.from(document.querySelectorAll("a[href]"));
79
+ const raw = [];
80
+ for (const a of anchors) {
81
+ if (!isJobLink(a))
82
+ continue;
83
+ const text = (a.textContent ?? "").replace(/\s+/g, " ").trim();
84
+ if (text.length < 3 || text.length > 200)
85
+ continue;
86
+ const href = a.getAttribute("href") ?? "";
87
+ const cityMatch = text.match(/(.+?)\s+([一-龥]{2,8}(?:市|省)|[A-Z][a-z]+(?:,\s?[A-Z]{2})?)\s*$/);
88
+ const title = cityMatch ? cityMatch[1].trim() : text;
89
+ const city = cityMatch ? cityMatch[2] : "";
90
+ raw.push({ title, city, href });
91
+ }
92
+ return { html_size, raw };
93
+ });
94
+ return final;
95
+ });
96
+ if (!r.ok) {
97
+ return { ok: false, message: `${r.error.message}. ${PROXY_HINT}` };
98
+ }
99
+ // EdgeOne anti-bot challenge fits in ~7KB; real careers SPA is much bigger.
100
+ if (r.value.html_size < 15000 && r.value.raw.length === 0) {
101
+ return {
102
+ ok: false,
103
+ message: `careers page rendered only ${r.value.html_size} bytes — looks like EdgeOne 403/challenge. ${PROXY_HINT}`,
104
+ };
105
+ }
106
+ if (r.value.raw.length === 0) {
107
+ return {
108
+ ok: false,
109
+ message: `careers page rendered but no job links matched the careers-path filter. The DOM structure may have changed; please report at https://github.com/HA7CH/job-pro/issues.`,
110
+ };
111
+ }
112
+ return { ok: true, raw: r.value.raw };
113
+ }
114
+ export async function searchPositions(opts = {}) {
115
+ const rt = opts.recruitType ?? "all";
116
+ const types = rt === "all" ? ["campus", "social"] : [rt];
117
+ const pageSize = Math.max(1, Math.min(50, opts.pageSize ?? 20));
118
+ const page = Math.max(1, opts.page ?? 1);
119
+ const keyword = (opts.keyword ?? "").trim().toLowerCase();
120
+ const positions = [];
121
+ let lastMsg = PROXY_HINT;
122
+ let anyOk = false;
123
+ for (const t of types) {
124
+ const r = await scrape(t);
125
+ if (!r.ok) {
126
+ lastMsg = r.message;
127
+ continue;
128
+ }
129
+ anyOk = true;
130
+ for (const raw of r.raw)
131
+ positions.push(summarize(raw, t));
132
+ }
133
+ if (!anyOk) {
134
+ return {
135
+ ok: false,
136
+ source: SOURCE,
137
+ message: lastMsg,
138
+ query: opts,
139
+ positions: [],
140
+ };
141
+ }
142
+ const filtered = keyword
143
+ ? positions.filter((p) => p.title.toLowerCase().includes(keyword) || p.work_cities.toLowerCase().includes(keyword))
144
+ : positions;
145
+ const offset = (page - 1) * pageSize;
113
146
  return {
114
- ok: false,
147
+ ok: true,
115
148
  source: SOURCE,
116
- message: STUB_MESSAGE,
117
- total: 0,
118
- fetched: 0,
119
- positions: [],
149
+ query: opts,
150
+ page,
151
+ page_size: pageSize,
152
+ total: filtered.length,
153
+ positions: filtered.slice(offset, offset + pageSize),
120
154
  };
121
155
  }
122
- // ---- fetchPositionDetail ----
123
- export async function fetchPositionDetail(postId) {
156
+ export async function fetchAllPositions(opts = {}) {
157
+ const all = await searchPositions({ ...opts, page: 1, pageSize: 100 });
158
+ if (!all.ok) {
159
+ return {
160
+ ok: false,
161
+ source: SOURCE,
162
+ message: all.message,
163
+ total: 0,
164
+ fetched: 0,
165
+ positions: [],
166
+ };
167
+ }
124
168
  return {
125
- ok: false,
169
+ ok: true,
126
170
  source: SOURCE,
127
- message: STUB_MESSAGE,
128
- post_id: postId,
171
+ total: all.total,
172
+ fetched: all.positions.length,
173
+ positions: all.positions,
129
174
  };
130
175
  }
131
- // ---- fetchDictionaries ----
132
- export async function fetchDictionaries() {
176
+ export async function fetchPositionDetail(postId) {
177
+ const id = (postId ?? "").trim();
133
178
  return {
134
179
  ok: false,
135
180
  source: SOURCE,
136
- message: STUB_MESSAGE,
137
- note: "When hr.hikvision.com becomes accessible from non-CN: " +
138
- "inspect JS bundles at /zwzx for /api/* filter taxonomy endpoints " +
139
- "(job categories, work cities, recruit types).",
181
+ post_id: id,
182
+ message: PROXY_HINT,
140
183
  };
141
184
  }
142
- // ---- notices (no public endpoint) ----
185
+ export async function fetchDictionaries() {
186
+ return { ok: false, source: SOURCE, message: PROXY_HINT };
187
+ }
143
188
  export async function listNotices() {
144
- return {
145
- ok: false,
146
- source: SOURCE,
147
- message: "Hikvision: no public notices endpoint",
148
- notices: [],
149
- };
189
+ return { ok: false, source: SOURCE, message: PROXY_HINT, notices: [] };
150
190
  }
151
191
  export async function getNotice(noticeId) {
152
- return {
153
- ok: false,
154
- source: SOURCE,
155
- message: "Hikvision: no public notices endpoint",
156
- notice_id: noticeId,
157
- };
192
+ return { ok: false, source: SOURCE, message: PROXY_HINT, notice_id: noticeId };
158
193
  }
159
194
  export async function findNoticesByQuestion(question, _opts = {}) {
160
195
  return {
161
196
  ok: false,
162
197
  source: SOURCE,
163
198
  question,
164
- message: "Hikvision: no public notices endpoint",
199
+ message: PROXY_HINT,
165
200
  matches: [],
166
201
  };
167
202
  }
168
- // ---- matchResume ----
169
- //
170
- // Because the position search API is inaccessible, we cannot retrieve live listings
171
- // to score against the resume. Return ok:false with the extracted signals so the
172
- // caller can display what terms were parsed (useful for debugging the resume text).
173
- export async function matchResume(text, _opts = {}) {
203
+ export async function matchResume(text, opts = {}) {
174
204
  const { terms, cities } = extractResumeSignals(text ?? "");
205
+ const list = await searchPositions({ pageSize: 50 });
206
+ if (!list.ok) {
207
+ return {
208
+ ok: false,
209
+ source: SOURCE,
210
+ extracted_terms: terms,
211
+ city_preferences: cities,
212
+ matches: [],
213
+ message: list.message,
214
+ };
215
+ }
216
+ const topN = Math.max(1, opts.topN ?? 5);
217
+ const scored = list.positions
218
+ .map((p) => ({
219
+ p,
220
+ score: scoreOverlap(`${p.title} ${p.work_cities}`, terms, cities).score,
221
+ }))
222
+ .sort((a, b) => b.score - a.score)
223
+ .slice(0, topN)
224
+ .map((x) => x.p);
175
225
  return {
176
- ok: false,
226
+ ok: true,
177
227
  source: SOURCE,
178
228
  extracted_terms: terms,
179
229
  city_preferences: cities,
180
- matches: [],
181
- message: STUB_MESSAGE,
182
- apply_url: CAMPUS_URL,
183
- moka_url: MOKA_URL,
230
+ matches: scored,
184
231
  };
185
232
  }
233
+ export { extractResumeSignals, scoreOverlap };
package/dist/index.js CHANGED
@@ -51,7 +51,7 @@ import * as webank from "./webank.js";
51
51
  import * as horizonrobotics from "./horizonrobotics.js";
52
52
  import * as cambricon from "./cambricon.js";
53
53
  import { memoryList, memoryGet, memorySet, memoryEvent, memoryClear, } from "./memory.js";
54
- const VERSION = "0.7.4";
54
+ const VERSION = "0.7.5";
55
55
  const HELP = `
56
56
  job-pro — query Chinese big-tech campus recruiting from your terminal
57
57
  (job.ha7ch.com)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "job-pro",
3
- "version": "0.7.4",
4
- "description": "Query Chinese big-tech campus recruiting from your terminal. 50 companies, 46 live (incl. Lilith via local Chrome / puppeteer-core). No signup, no token, no server.",
3
+ "version": "0.7.5",
4
+ "description": "Query Chinese big-tech campus recruiting from your terminal. 50 companies, 46 live (incl. Lilith via local Chrome / puppeteer-core). +Hikvision via CDP + CN proxy when JOB_PRO_HTTPS_PROXY is set. No signup, no token, no server.",
5
5
  "homepage": "https://job.ha7ch.com",
6
6
  "repository": "https://github.com/HA7CH/job-pro",
7
7
  "license": "MIT",