job-pro 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ // Thin client for Ant Group's campus-recruiting portal at talent.antgroup.com.
2
+ //
3
+ // ============================================================
4
+ // API Discovery (probed 2026-05, JS bundle + network analysis):
5
+ //
6
+ // Portal URL: https://talent.antgroup.com/campus-list (public list view)
7
+ // https://talent.antgroup.com/campus-full-list (full list view)
8
+ // JS bundles: gw.alipayobjects.com/render/p/yuyan/180020010001257966/umi.6f081e74.js
9
+ // render.alipay.com/p/yuyan/180020010001257966/p__CampusRecruitment__CRList__index.*.async.js
10
+ // render.alipay.com/p/yuyan/180020010001257966/p__CampusRecruitment__CRFullList__index.*.async.js
11
+ // Gateway host: talent.antgroup.com (Spanner CDN/WAF, Alipay's proprietary gateway)
12
+ // Backend host: antwork-prod.antgroup-inc.cn (actual API server)
13
+ //
14
+ // ============================================================
15
+ // Endpoint inventory (extracted from JS bundle module 64588 + full UMI bundle):
16
+ //
17
+ // POST /api/campus/position/search — paginated job search
18
+ // POST /api/campus/position/detail — single position detail
19
+ // POST /api/campus/position/queryDept — dept tree for a position group
20
+ // POST /api/campus/positionGroup/queryBatchConfig — batch config
21
+ // POST /api/campus/positionGroup/queryBatchDetailById — batch detail
22
+ // POST /api/searchCondition/list — filter taxonomy (categories, cities, depts)
23
+ // POST /api/searchCondition/listPositionGroup
24
+ // POST /api/searchCondition/listTalentPlan
25
+ //
26
+ // Canonical position detail URL: /campus-position?positionId=<id>
27
+ //
28
+ // ============================================================
29
+ // AUTH STATUS — GATED (Alipay OAuth / buservice SDK):
30
+ //
31
+ // EVERY endpoint (including /api/campus/position/search and
32
+ // /api/searchCondition/list) requires an authenticated Alipay/Ant Group
33
+ // session. Without login, the backend returns:
34
+ //
35
+ // { "buserviceErrorCode": "USER_NOT_LOGIN",
36
+ // "buserviceErrorMsg": "https://pubbuservice.alipay.com/…" }
37
+ //
38
+ // The buservice middleware intercepts ALL routes as a catch-all auth gate
39
+ // before any controller logic runs. There is no guest/anonymous tier.
40
+ //
41
+ // The talent.antgroup.com Spanner gateway additionally returns 405 Method
42
+ // Not Allowed for POST requests that lack valid Alipay session cookies,
43
+ // preventing even the USER_NOT_LOGIN response from being seen in most cases.
44
+ // Direct calls to antwork-prod.antgroup-inc.cn reveal the auth error clearly.
45
+ //
46
+ // ============================================================
47
+ // CSRF / session flow (observed but INSUFFICIENT for anonymous access):
48
+ //
49
+ // GET /campus-list sets:
50
+ // ALIPAYJSESSIONID=<token>; domain=.antgroup.com
51
+ // _CHIPS-ALIPAYJSESSIONID=<same_token>; samesite=none; partitioned
52
+ // spanner=<signed_value>; path=/; secure
53
+ //
54
+ // These cookies are required for CORS (Access-Control-Allow-Credentials: true)
55
+ // but the buservice SDK then validates the session against Alipay's auth
56
+ // infrastructure — a simple GET-derived cookie has no authenticated user.
57
+ // Unlike Alibaba's portal (campus-talent.alibaba.com) which only needs an
58
+ // XSRF-TOKEN for public search, Ant Group's portal requires full Alipay OAuth.
59
+ //
60
+ // ============================================================
61
+ // Ant Group vs Alibaba — KEY DIFFERENCES:
62
+ //
63
+ // Portal: talent.antgroup.com vs campus-talent.alibaba.com
64
+ // Auth: Alipay OAuth (gated) vs XSRF-TOKEN only (public search works)
65
+ // CSRF: Not sufficient alone vs Sufficient for anonymous search
66
+ // Backend host: antwork-prod.antgroup-inc.cn vs campus-talent.alibaba.com
67
+ // Auth MW: buservice SDK (blocks all) vs Spring XSRF (only mutating ops)
68
+ //
69
+ // ============================================================
70
+ // FILTER TAXONOMY (from JS bundle, not verified against live API):
71
+ // channel values: "campus_group_official_site" (zh), "en_official_site" (en)
72
+ // searchCondition/list returns: searchItems with types "workCity", "category", "dept", "recruitType"
73
+ // Position fields: id, categoryName, workLocations, graduationTime, circleNames (BU)
74
+ //
75
+ // ============================================================
76
+ // ---- PositionSummary field mapping (Ant Group → canonical) ----
77
+ // post_id ← item.id (stringified)
78
+ // title ← item.name
79
+ // project ← item.categoryName ?? "" (e.g. "技术类", "产品类")
80
+ // recruit_label ← item.recruitType ?? "" (e.g. "实习生", "校招生")
81
+ // bgs ← item.circleNames?.[0] ?? "" (BU / business unit)
82
+ // work_cities ← item.workLocations?.join(" / ") ?? ""
83
+ // apply_url ← https://talent.antgroup.com/campus-position?positionId=<id>
84
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
85
+ export { extractResumeSignals, scoreOverlap, checkResume };
86
+ const PORTAL_ROOT = "https://talent.antgroup.com";
87
+ const CAMPUS_PAGE = `${PORTAL_ROOT}/campus-list`;
88
+ const DETAIL_PAGE = (id) => `${PORTAL_ROOT}/campus-position?positionId=${encodeURIComponent(String(id))}`;
89
+ // ---------- stub reason constant ----------
90
+ const STUB_MESSAGE = "Ant Group (talent.antgroup.com): all API endpoints require Alipay OAuth login. " +
91
+ "POST /api/campus/position/search returns buserviceErrorCode=USER_NOT_LOGIN for " +
92
+ "unauthenticated requests. The Spanner CDN gateway additionally returns HTTP 405 " +
93
+ "for POST requests lacking a valid Alipay session cookie. No anonymous/guest tier exists. " +
94
+ "To use this portal, the user must log in at talent.antgroup.com with an Alipay account " +
95
+ "and supply a valid ALIPAYJSESSIONID cookie.";
96
+ // ---------- searchPositions (stub) ----------
97
+ export async function searchPositions(opts = {}) {
98
+ const pageSize = Math.max(1, Math.min(100, opts.pageSize ?? 20));
99
+ const page = Math.max(1, opts.page ?? 1);
100
+ const channel = opts.channel ?? "campus_group_official_site";
101
+ const query = {
102
+ pageIndex: page,
103
+ pageSize,
104
+ channel,
105
+ language: "zh",
106
+ };
107
+ if (opts.keyword?.trim())
108
+ query.keyword = opts.keyword.trim().slice(0, 60);
109
+ if (opts.category)
110
+ query.category = opts.category;
111
+ if (opts.region)
112
+ query.region = opts.region;
113
+ if (opts.deptCode)
114
+ query.deptCode = opts.deptCode;
115
+ return {
116
+ ok: false,
117
+ source: PORTAL_ROOT,
118
+ message: STUB_MESSAGE,
119
+ query,
120
+ page,
121
+ page_size: pageSize,
122
+ total: null,
123
+ positions: [],
124
+ };
125
+ }
126
+ // ---------- fetchAllPositions (stub) ----------
127
+ export async function fetchAllPositions(opts = {}) {
128
+ return {
129
+ ok: false,
130
+ source: PORTAL_ROOT,
131
+ message: STUB_MESSAGE,
132
+ fetched: 0,
133
+ total: null,
134
+ positions: [],
135
+ };
136
+ }
137
+ // ---------- fetchPositionDetail (stub) ----------
138
+ export async function fetchPositionDetail(postId) {
139
+ const id = (postId ?? "").trim();
140
+ if (!id) {
141
+ return {
142
+ ok: false,
143
+ source: PORTAL_ROOT,
144
+ message: "post_id is required",
145
+ };
146
+ }
147
+ return {
148
+ ok: false,
149
+ source: PORTAL_ROOT,
150
+ message: STUB_MESSAGE,
151
+ post_id: id,
152
+ apply_url: DETAIL_PAGE(id),
153
+ };
154
+ }
155
+ // ---------- fetchDictionaries (stub) ----------
156
+ export async function fetchDictionaries() {
157
+ return {
158
+ ok: false,
159
+ source: PORTAL_ROOT,
160
+ message: STUB_MESSAGE,
161
+ note: "Filter taxonomy (categories, cities, depts) is served via POST /api/searchCondition/list " +
162
+ "with body {channel:'campus_group_official_site', language:'zh'}. " +
163
+ "Response shape: { searchItems: [{type:'workCity'|'category'|'dept'|'recruitType', items:[{label,value}]}] }. " +
164
+ "All require Alipay login.",
165
+ };
166
+ }
167
+ // ---------- notices (stub) ----------
168
+ export async function listNotices() {
169
+ return {
170
+ ok: false,
171
+ source: PORTAL_ROOT,
172
+ message: "Ant Group: no public notices endpoint",
173
+ };
174
+ }
175
+ export async function getNotice(_id) {
176
+ return {
177
+ ok: false,
178
+ source: PORTAL_ROOT,
179
+ message: "Ant Group: no public notices endpoint",
180
+ };
181
+ }
182
+ export async function findNoticesByQuestion(_question, _opts = {}) {
183
+ return {
184
+ ok: false,
185
+ source: PORTAL_ROOT,
186
+ message: "Ant Group: no public notices endpoint",
187
+ matches: [],
188
+ };
189
+ }
190
+ // ---------- matchResume (stub) ----------
191
+ export async function matchResume(text, opts = {}) {
192
+ const { terms, cities } = extractResumeSignals(text ?? "");
193
+ return {
194
+ ok: false,
195
+ source: PORTAL_ROOT,
196
+ message: STUB_MESSAGE,
197
+ extracted_terms: terms,
198
+ city_preferences: cities,
199
+ matches: [],
200
+ };
201
+ }
package/dist/byd.js ADDED
@@ -0,0 +1,160 @@
1
+ // Thin client stub for BYD (比亚迪) campus-recruiting portal at job.byd.com.
2
+ //
3
+ // ============================================================
4
+ // Endpoint discovery (probed 2026-05, JS bundle app.e46eb97b.js +
5
+ // chunk-e8fe.d262cda1.js, chunk-ac75.7dee0692.js, chunk-a7e5.62aed375.js,
6
+ // chunk-76ac.cedb4013.js, chunk-dbeb.0075e53e.js):
7
+ //
8
+ // Portal entry:
9
+ // https://job.byd.com/ → redirects to https://job.byd.com/portal/pc/
10
+ // https://careers.byd.com/ → Vite/Vue marketing page (static, no job listings)
11
+ // https://job.byd.com/portal/pc/ → main Vue SPA (webpack, ElementUI)
12
+ //
13
+ // Axios instance (t3Un module in app.e46eb97b.js):
14
+ // baseURL = "/portal/api"
15
+ // Interceptor: adds header Authorization: "bearer <token>" from Vuex store.
16
+ // Code 4001 → "Token无效或已过期: Not Authenticated" (auto-redirect to login).
17
+ //
18
+ // Campus-related API endpoints found in JS bundles:
19
+ // POST /portal/api/school/queryJobList → campus job list
20
+ // POST /portal/api/position/queryList → position list (also skiller/social)
21
+ // POST /portal/api/position/queryDetail → position detail
22
+ // POST /portal/api/other-info/notice/query-list → campus notices
23
+ // POST /portal/api/position/schedule/query-list → campus schedule / timeline
24
+ // GET /portal/api/siteInfo/faq → FAQ
25
+ // POST /portal/api/common/queryCodeTree → code dictionary
26
+ //
27
+ // All endpoints probed 2026-05: EVERY request returns:
28
+ // HTTP 200, body: {"code":4001,"timestamp":...,"msg":"Token无效或已过期: Not Authenticated"}
29
+ //
30
+ // Auth model:
31
+ // Requires a JWT bearer token obtained through BYD account login
32
+ // (POST /portal/api/account/login, then GET /portal/api/account/user-info).
33
+ // There is NO public/anonymous browsing API — even the FAQ and code-tree
34
+ // endpoints are gated behind a valid token.
35
+ //
36
+ // careers.byd.com investigation:
37
+ // careers.byd.com is an internationalised marketing SPA (Vite + Vue 3).
38
+ // Its BydPage-6104aa3e.js uses baseURL "/global-portal/api" with two
39
+ // known endpoints:
40
+ // GET /global-portal-api/global-material/getGlobalMaterial → 404
41
+ // GET /global-portal-api/global-country/getCountryNetwork → 404
42
+ // The site is a static landing page that links to job.byd.com; it does
43
+ // not expose any independent job-search API.
44
+ //
45
+ // ============================================================
46
+ // Summary: BYD has no publicly accessible campus-job search API.
47
+ // All API calls require a logged-in user JWT.
48
+ // This adapter is an honest stub — every function returns ok:false with
49
+ // an informative message. It will be upgraded once an authenticated
50
+ // (scrape-friendly) path is identified.
51
+ //
52
+ // ============================================================
53
+ // PositionSummary field mapping (BYD → canonical, documented for future use):
54
+ // post_id ← item.positionId or item.id (string)
55
+ // title ← item.positionName (e.g. "校招-软件开发工程师")
56
+ // project ← item.positionTypeName (职位类型, e.g. "研发")
57
+ // recruit_label ← item.recruitTypeName (e.g. "应届生" / "实习生")
58
+ // bgs ← "" (not exposed in API)
59
+ // work_cities ← item.workPlace or item.city (free-text Chinese city string)
60
+ // apply_url ← https://job.byd.com/portal/pc/school/schoolPositionApply
61
+ // ?positionId={id}
62
+ //
63
+ // ============================================================
64
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
65
+ export { checkResume };
66
+ const SOURCE = "job.byd.com";
67
+ const CAMPUS_PAGE = "https://job.byd.com/portal/pc/school/home";
68
+ // ---- stub reason ----
69
+ const STUB_REASON = "BYD job.byd.com: all API endpoints require a valid JWT bearer token " +
70
+ "(code 4001 — Token无效或已过期). No public/anonymous job search API exists. " +
71
+ "Visit https://job.byd.com/portal/pc/school/home to browse positions after login.";
72
+ // ---- searchPositions ----
73
+ export async function searchPositions(_opts = {}) {
74
+ return {
75
+ ok: false,
76
+ source: SOURCE,
77
+ message: STUB_REASON,
78
+ campus_page: CAMPUS_PAGE,
79
+ positions: [],
80
+ };
81
+ }
82
+ // ---- fetchAllPositions ----
83
+ export async function fetchAllPositions(_opts = {}) {
84
+ return {
85
+ ok: false,
86
+ source: SOURCE,
87
+ message: STUB_REASON,
88
+ campus_page: CAMPUS_PAGE,
89
+ fetched: 0,
90
+ positions: [],
91
+ };
92
+ }
93
+ // ---- fetchPositionDetail ----
94
+ export async function fetchPositionDetail(_postId) {
95
+ return { ok: false, source: SOURCE, message: STUB_REASON };
96
+ }
97
+ // ---- fetchDictionaries ----
98
+ export async function fetchDictionaries() {
99
+ return {
100
+ ok: false,
101
+ source: SOURCE,
102
+ message: STUB_REASON,
103
+ note: "BYD: no public filter-taxonomy endpoint. " +
104
+ "POST /portal/api/common/queryCodeTree returns 4001 without a token.",
105
+ known_endpoints: [
106
+ "POST /portal/api/school/queryJobList (campus job list — auth required)",
107
+ "POST /portal/api/position/queryList (position list — auth required)",
108
+ "POST /portal/api/position/queryDetail (position detail — auth required)",
109
+ "POST /portal/api/other-info/notice/query-list (notices — auth required)",
110
+ "POST /portal/api/position/schedule/query-list (campus schedule — auth required)",
111
+ "GET /portal/api/siteInfo/faq (FAQ — auth required)",
112
+ "POST /portal/api/common/queryCodeTree (code tree — auth required)",
113
+ ],
114
+ };
115
+ }
116
+ // ---- listNotices ----
117
+ export async function listNotices() {
118
+ return {
119
+ ok: false,
120
+ source: SOURCE,
121
+ message: "BYD: notices endpoint (POST /portal/api/other-info/notice/query-list) requires authentication.",
122
+ };
123
+ }
124
+ // ---- getNotice ----
125
+ export async function getNotice(_id) {
126
+ return {
127
+ ok: false,
128
+ source: SOURCE,
129
+ message: "BYD: no public notices endpoint (auth required).",
130
+ };
131
+ }
132
+ // ---- findNoticesByQuestion ----
133
+ export async function findNoticesByQuestion(_question, _opts = {}) {
134
+ return {
135
+ ok: false,
136
+ source: SOURCE,
137
+ message: "BYD: no public notices endpoint (auth required).",
138
+ };
139
+ }
140
+ // ---- matchResume ----
141
+ // Resume matching is best-effort using extractResumeSignals/scoreOverlap from
142
+ // tencent.ts, but since the position listing API is gated, we can only return
143
+ // a stub with the extracted signals and a pointer to the campus page.
144
+ export async function matchResume(text, opts = {}) {
145
+ // Extract signals so the caller knows what was parsed from the resume
146
+ const { terms, cities } = extractResumeSignals(text ?? "");
147
+ void opts; // unused until API becomes accessible
148
+ return {
149
+ ok: false,
150
+ source: SOURCE,
151
+ message: "BYD: cannot search positions — API requires authentication. " +
152
+ `Extracted resume signals: [${terms.slice(0, 10).join(", ")}]. ` +
153
+ "Visit the campus page to search manually.",
154
+ campus_page: CAMPUS_PAGE,
155
+ extracted_terms: terms,
156
+ city_preferences: cities,
157
+ };
158
+ }
159
+ // ---- re-export helpers so the tencent resume signals are accessible ----
160
+ export { extractResumeSignals, scoreOverlap };
package/dist/index.js CHANGED
@@ -19,8 +19,13 @@ import * as huawei from "./huawei.js";
19
19
  import * as weibo from "./weibo.js";
20
20
  import * as mihoyo from "./mihoyo.js";
21
21
  import * as pingan from "./pingan.js";
22
+ import * as sensetime from "./sensetime.js";
23
+ import * as trip from "./trip.js";
24
+ import * as unitree from "./unitree.js";
25
+ import * as byd from "./byd.js";
26
+ import * as antgroup from "./antgroup.js";
22
27
  import { memoryList, memoryGet, memorySet, memoryEvent, memoryClear, } from "./memory.js";
23
- const VERSION = "0.5.0";
28
+ const VERSION = "0.6.0";
24
29
  const HELP = `
25
30
  job-pro — query Chinese big-tech campus recruiting from your terminal
26
31
  (job.ha7ch.com)
@@ -50,6 +55,11 @@ COMPANIES
50
55
  weibo career.sina.com.cn (Weibo / 微博 — auth-gated, limited)
51
56
  mihoyo campus.mihoyo.com (miHoYo / 米哈游 — SPA, limited)
52
57
  pingan campus.pingan.com (Ping An / 平安 — unified group)
58
+ sensetime hr.sensetime.com (SenseTime / 商汤 — auth-gated, limited)
59
+ trip careers.ctrip.com (Trip.com / 携程)
60
+ unitree www.unitree.com (Unitree / 宇树科技 — robotics)
61
+ byd job.byd.com (BYD / 比亚迪 — JWT-gated, limited)
62
+ antgroup talent.antgroup.com (Ant Group / 蚂蚁集团 — OAuth-gated, limited)
53
63
 
54
64
  VERBS (same surface for every company)
55
65
  search <kw> search openings (free text)
@@ -208,6 +218,11 @@ const ADAPTERS = {
208
218
  weibo: weibo,
209
219
  mihoyo: mihoyo,
210
220
  pingan: pingan,
221
+ sensetime: sensetime,
222
+ trip: trip,
223
+ unitree: unitree,
224
+ byd: byd,
225
+ antgroup: antgroup,
211
226
  };
212
227
  async function runCompany(adapter, company, rawArgs) {
213
228
  const [verb, ...rest] = rawArgs;
@@ -0,0 +1,186 @@
1
+ // Thin client for 商汤科技 / SenseTime campus-recruiting portal at hr.sensetime.com.
2
+ //
3
+ // ============================================================
4
+ // RECONNAISSANCE RESULTS (probed 2026-05):
5
+ //
6
+ // https://hr.sensetime.com/
7
+ // → 302 redirect to /SU60fa3bdabef57c1023fc1cbc/pb/social.html
8
+ // Platform: "PB" / self-hosted Chinese HRIS (not Feishu, not Workday)
9
+ // Vendor fingerprint: /pb/js/vendor.js + /pb/js/{page}.js webpack bundles
10
+ //
11
+ // https://careers.sensetime.com/
12
+ // https://campus.sensetime.com/
13
+ // → SSL handshake failure (geo-blocked / Apple Private Relay conflict)
14
+ //
15
+ // ============================================================
16
+ // PORTAL STRUCTURE (from JS bundle analysis):
17
+ //
18
+ // The SPA serves these page bundles:
19
+ // /pb/js/social.js → 社招 (social/full-time hire) page
20
+ // /pb/js/school.js → 校园 (campus / new-grad + intern) page
21
+ // /pb/js/home.js → 首页 (home hub) page
22
+ //
23
+ // Channel IDs embedded in JS bundles:
24
+ // SU60fa3bdabef57c1023fc1cbc — social (社招) channel (main redirect target)
25
+ // SU6710d7c21c240e54e1f82a1b — campus (校园) channel (school.html)
26
+ //
27
+ // recruitType values (from bundle analysis):
28
+ // 1 = 校园/campus (new-grad), used by school.js
29
+ // 2 = 社招/social (full-time hire), used by social.js
30
+ //
31
+ // ============================================================
32
+ // API DISCOVERY (probed 2026-05, paths extracted from social.js + school.js bundles):
33
+ //
34
+ // Discovered paths (relative to origin+channelBase):
35
+ // POST /positionInfo/listPosition/{channelId}
36
+ // Payload: { isFrompb: true, recruitType: 1|2, pageSize: N, currentPage: N,
37
+ // postName?: str, postKey?: str, workPlace?: {...}, category?: {...} }
38
+ // Response: { state: "200", data: { pageForm: { pageData: [...], currentPage: N },
39
+ // positonNum: N } }
40
+ //
41
+ // POST /positionInfo/listSearchTerm/{channelId}
42
+ // Returns filter taxonomies (work cities, departments, job types)
43
+ //
44
+ // POST /positionInfo/listPositionDetail/{channelId}
45
+ // Payload: { postId: str, recruitType: N }
46
+ // Returns full JD for a single posting
47
+ //
48
+ // POST /positionInfo/UnassignedPostDetail/{channelId}
49
+ // Returns detail for positions with unassigned departments
50
+ //
51
+ // GET /suite/post/search/condition/{channelId}
52
+ // Returns search filter configuration
53
+ //
54
+ // Constructed API base:
55
+ // https://hr.sensetime.com/{channelId}/pb/{apiPath}/{channelId}
56
+ // (the Nginx proxy at /SU.../pb/ maps sub-paths to the backend)
57
+ //
58
+ // ============================================================
59
+ // WHY THIS IS A STUB (unauthenticated access is impossible):
60
+ //
61
+ // Every POST request to the above paths returns HTTP 405 Method Not Allowed,
62
+ // regardless of Origin, Referer, Content-Type, or User-Agent headers.
63
+ // GET requests return the SPA HTML shell (client-side routing catch-all).
64
+ //
65
+ // The Nginx WAF at hr.sensetime.com blocks all unauthenticated POST requests.
66
+ // The API requires a valid session cookie / JWT obtained via:
67
+ // POST /login/ or POST /ssoLogin
68
+ // These are enterprise SSO flows (phone OTP, WeChat OAuth, or SAML enterprise SSO)
69
+ // that cannot be automated without a real account.
70
+ //
71
+ // This is fundamentally different from ByteDance/Tencent/Feishu portals, which
72
+ // allow anonymous POST to their search endpoints without any session cookie.
73
+ //
74
+ // Recommendation: Monitor for:
75
+ // (a) A future public campus API at campus.sensetime.com
76
+ // (b) A Feishu Recruiting migration (SenseTime does use Feishu internally)
77
+ // (c) Third-party job boards (牛客, 实习僧) that scrape SenseTime listings
78
+ //
79
+ // ============================================================
80
+ // STUB CONTRACT: All functions return ok:false with STUB_MESSAGE.
81
+ // checkResume is re-exported from tencent.ts (works offline on resume text).
82
+ // When/if SenseTime opens a public API, rewrite this file — the export shape
83
+ // is already locked in by the PositionSummary interface below.
84
+ import { extractResumeSignals, checkResume } from "./tencent.js";
85
+ export { checkResume };
86
+ const SOURCE = "hr.sensetime.com";
87
+ const CAMPUS_URL = "https://hr.sensetime.com/SU6710d7c21c240e54e1f82a1b/pb/school.html";
88
+ const STUB_MESSAGE = "SenseTime (商汤): no public job API — hr.sensetime.com POSTs are blocked by WAF (HTTP 405) " +
89
+ "without a valid session cookie; campus.sensetime.com and careers.sensetime.com are " +
90
+ "geo-blocked (SSL failure). The HRIS platform (PB/PushB, channel SU6710d7c21c240e54e1f82a1b) " +
91
+ "requires enterprise SSO (phone OTP / WeChat OAuth). " +
92
+ "Documented in cli/src/sensetime.ts header.";
93
+ // ---- searchPositions ----
94
+ export async function searchPositions(_opts = {}) {
95
+ return {
96
+ ok: false,
97
+ source: SOURCE,
98
+ message: STUB_MESSAGE,
99
+ // Expose the discovered endpoint so callers can see what we would have hit
100
+ endpoint: `POST https://hr.sensetime.com/SU6710d7c21c240e54e1f82a1b/pb/positionInfo/listPosition/SU6710d7c21c240e54e1f82a1b`,
101
+ query: {
102
+ isFrompb: true,
103
+ recruitType: _opts.recruitType ?? 1,
104
+ pageSize: _opts.pageSize ?? 20,
105
+ currentPage: _opts.page ?? 1,
106
+ ...(_opts.keyword ? { postKey: _opts.keyword } : {}),
107
+ },
108
+ positions: [],
109
+ total: 0,
110
+ };
111
+ }
112
+ // ---- fetchAllPositions ----
113
+ export async function fetchAllPositions(_opts = {}) {
114
+ return {
115
+ ok: false,
116
+ source: SOURCE,
117
+ message: STUB_MESSAGE,
118
+ total: 0,
119
+ fetched: 0,
120
+ positions: [],
121
+ };
122
+ }
123
+ // ---- fetchPositionDetail ----
124
+ export async function fetchPositionDetail(postId) {
125
+ return {
126
+ ok: false,
127
+ source: SOURCE,
128
+ message: STUB_MESSAGE,
129
+ post_id: postId,
130
+ };
131
+ }
132
+ // ---- fetchDictionaries ----
133
+ //
134
+ // When accessible, POST /positionInfo/listSearchTerm/{channelId} returns:
135
+ // { state: "200", data: { projectList, provinceList, orgList, postTypeList, salaryList } }
136
+ export async function fetchDictionaries() {
137
+ return {
138
+ ok: false,
139
+ source: SOURCE,
140
+ message: STUB_MESSAGE,
141
+ note: "When API becomes accessible: POST /positionInfo/listSearchTerm/{channelId}",
142
+ };
143
+ }
144
+ // ---- notices (no public endpoint) ----
145
+ export async function listNotices() {
146
+ return {
147
+ ok: false,
148
+ source: SOURCE,
149
+ message: "SenseTime: no public notices endpoint",
150
+ notices: [],
151
+ };
152
+ }
153
+ export async function getNotice(noticeId) {
154
+ return {
155
+ ok: false,
156
+ source: SOURCE,
157
+ message: "SenseTime: no public notices endpoint",
158
+ notice_id: noticeId,
159
+ };
160
+ }
161
+ export async function findNoticesByQuestion(question, _opts = {}) {
162
+ return {
163
+ ok: false,
164
+ source: SOURCE,
165
+ question,
166
+ message: "SenseTime: no public notices endpoint",
167
+ matches: [],
168
+ };
169
+ }
170
+ // ---- matchResume ----
171
+ //
172
+ // Because the position search API is inaccessible, we cannot retrieve live listings
173
+ // to score against the resume. Return ok:false with the extracted signals so the
174
+ // caller can display what terms were parsed (useful for debugging the resume text).
175
+ export async function matchResume(text, _opts = {}) {
176
+ const { terms, cities } = extractResumeSignals(text ?? "");
177
+ return {
178
+ ok: false,
179
+ source: SOURCE,
180
+ extracted_terms: terms,
181
+ city_preferences: cities,
182
+ matches: [],
183
+ message: STUB_MESSAGE,
184
+ apply_url: CAMPUS_URL,
185
+ };
186
+ }
package/dist/trip.js ADDED
@@ -0,0 +1,365 @@
1
+ // Thin client for Trip.com / Ctrip (携程) public campus-recruiting API.
2
+ //
3
+ // Both portals are backed by the same API server:
4
+ // careers.ctrip.com — Chinese domestic portal (携程招聘)
5
+ // careers.trip.com — International portal (Trip.com Group Careers)
6
+ //
7
+ // This adapter targets careers.ctrip.com since it hosts the authoritative
8
+ // Chinese campus job feed. All JSON endpoints are unauthenticated; the server
9
+ // validates the presence of a mandatory `condition` wrapper in the POST body.
10
+ //
11
+ // ============================================================
12
+ // Endpoint inventory (probed 2026-05, JS bundle main.ad2ffe67.js):
13
+ //
14
+ // POST https://careers.ctrip.com/api/hrrecruit/getJobAd
15
+ // Payload (all fields inside a "condition" key):
16
+ // { condition: {
17
+ // pageIndex: <int>, // 1-based
18
+ // pageSize: <int>, // max tested: 100
19
+ // category: "2", // "2"=校招/campus, "1"=社招/social hire
20
+ // searchText: <string>, // keyword filter (free-text)
21
+ // city: <string>, // e.g. "CO0009" = Shanghai
22
+ // jobFamilyGroupCode: n/a // rejected with 202 — do not send
23
+ // } }
24
+ // Response: { retCode:"201", retMessage:"调用成功",
25
+ // retValue:{ total:<int>, recruitJobAdList:[...] } }
26
+ // retCode "201" = success (not HTTP 201).
27
+ // retCode "501" = validation error (missing `condition`).
28
+ // retCode "202" = data-validation error (bad field value).
29
+ //
30
+ // POST https://careers.ctrip.com/api/hrrecruit/getJobCount
31
+ // Payload: { source:"ctrip" }
32
+ // Response: retValue: [{categoryCode:"Categroy_1",total:44}, ...]
33
+ // Used for statistics only; not required for job search.
34
+ //
35
+ // IMPORTANT QUIRKS:
36
+ // 1. The `keyword` field (inside condition) crashes the server with a
37
+ // NullPointerException when combined with pagination. Use `searchText`
38
+ // instead — it is the working search field.
39
+ // 2. Combining `searchText` with `category` is accepted by the server but
40
+ // the server ignores searchText (returns all campus results). Keyword
41
+ // filtering therefore works only without the category filter.
42
+ // Practical consequence: when campus=true, keyword is applied client-side
43
+ // on the title after fetching the full campus set.
44
+ // 3. `category:"2"` (校招/fresh graduates) gives ~112 positions;
45
+ // no intern-only category exists (intern jobs appear mixed inside category 1
46
+ // or surface via keyword "实习" across all listings).
47
+ //
48
+ // ============================================================
49
+ // Field mapping (API response → PositionSummary)
50
+ // post_id ← item.id (numeric string, e.g. "27655163")
51
+ // title ← item.jobTitle (may include code suffix "(MJ034955)")
52
+ // project ← item.jobFamilyGroupName (e.g. "Software development")
53
+ // recruit_label ← item.kindName (e.g. "Fresh Graduates")
54
+ // bgs ← item.buName (BU = Business Unit, e.g. "International business")
55
+ // work_cities ← item.cityName
56
+ // apply_url ← https://careers.ctrip.com/campus/job-detail/<jobId>
57
+ // (uses UUID `jobId`, not numeric `id`)
58
+ //
59
+ // ============================================================
60
+ // Category/filter values probed 2026-05:
61
+ // category "1" = 社招 (social/experienced hire) ~657 positions
62
+ // category "2" = 校招 (campus / fresh graduates) ~112 positions
63
+ // No category (omit field) = all listings ~769 positions
64
+ //
65
+ // City codes (from item.city in responses):
66
+ // CO0009 = Shanghai CO0001 = Beijing CO0013 = Xiamen
67
+ // CO0004 = Shenzhen CO0006 = Chengdu (+ many others not enumerated)
68
+ //
69
+ // jobFamilyGroupName values seen in responses:
70
+ // "Software development", "Admin", "Business development",
71
+ // "Marketing & PR", "Finance", "Data & Analytics", "Product management"
72
+ //
73
+ // ============================================================
74
+ // Workday dead-end investigation:
75
+ // trip.wd1.myworkdayjobs.com — resolves and is behind Cloudflare but
76
+ // all POST attempts to /wday/cxs/trip/<slug>/jobs return HTTP 422 (no slug
77
+ // identifiable without an active UI page). The Workday tenant appears to be
78
+ // a legacy artifact from Trip.com's international hiring pre-2024.
79
+ // Not used in this adapter.
80
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
81
+ export { checkResume };
82
+ const API_ROOT = "https://careers.ctrip.com/api/hrrecruit";
83
+ const CAMPUS_PAGE = "https://careers.ctrip.com/campus";
84
+ const DETAIL_PAGE = (jobId) => `https://careers.ctrip.com/campus/job-detail/${encodeURIComponent(jobId)}`;
85
+ const DEFAULT_HEADERS = {
86
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
87
+ Accept: "application/json, text/plain, */*",
88
+ "Content-Type": "application/json",
89
+ Origin: "https://careers.ctrip.com",
90
+ Referer: CAMPUS_PAGE,
91
+ };
92
+ async function call(path, body) {
93
+ const url = `${API_ROOT}${path}`;
94
+ let response;
95
+ try {
96
+ response = await fetch(url, {
97
+ method: "POST",
98
+ headers: DEFAULT_HEADERS,
99
+ body: JSON.stringify(body),
100
+ });
101
+ }
102
+ catch (err) {
103
+ return {
104
+ ok: false,
105
+ message: `network error: ${err instanceof Error ? err.message : String(err)}`,
106
+ };
107
+ }
108
+ if (!response.ok) {
109
+ return { ok: false, message: `HTTP ${response.status}: ${response.statusText}` };
110
+ }
111
+ let payload;
112
+ try {
113
+ payload = (await response.json());
114
+ }
115
+ catch (err) {
116
+ return { ok: false, message: `bad JSON: ${err instanceof Error ? err.message : err}` };
117
+ }
118
+ // retCode "201" = success; any other value is an error.
119
+ const ok = payload.retCode === "201";
120
+ return {
121
+ ok,
122
+ data: ok ? payload.retValue : undefined,
123
+ message: payload.retMessage || (ok ? "ok" : `upstream error (code ${payload.retCode})`),
124
+ };
125
+ }
126
+ function summarizePosition(item) {
127
+ const id = String(item.id ?? "");
128
+ const jobId = item.jobId ?? "";
129
+ return {
130
+ post_id: id,
131
+ title: item.jobTitle ?? "",
132
+ project: item.jobFamilyGroupName ?? "",
133
+ recruit_label: item.kindName ?? "",
134
+ bgs: (item.buName ?? "").trim(),
135
+ work_cities: item.cityName ?? "",
136
+ apply_url: jobId ? DETAIL_PAGE(jobId) : CAMPUS_PAGE,
137
+ };
138
+ }
139
+ // ---------- searchPositions ----------
140
+ export async function searchPositions(opts = {}) {
141
+ const pageSize = Math.max(1, Math.min(100, opts.pageSize ?? 20));
142
+ const page = Math.max(1, opts.page ?? 1);
143
+ const keyword = (opts.keyword ?? "").trim().slice(0, 60);
144
+ const campusOnly = opts.campusOnly !== false; // default true
145
+ // Build the condition object.
146
+ // NOTE: `keyword` crashes the server with a NullPointerException when combined
147
+ // with pagination; use `searchText` for safe text search. However, when
148
+ // `category` is also set, the server silently ignores `searchText`, so keyword
149
+ // filtering is applied client-side after the response is received.
150
+ const condition = {
151
+ pageIndex: page,
152
+ pageSize,
153
+ };
154
+ if (campusOnly) {
155
+ condition.category = "2";
156
+ // searchText is ignored by server when category is set; skip it to avoid confusion
157
+ }
158
+ else {
159
+ // Without category filter, searchText works correctly
160
+ if (keyword)
161
+ condition.searchText = keyword;
162
+ }
163
+ if (opts.cityCode?.trim()) {
164
+ condition.city = opts.cityCode.trim();
165
+ }
166
+ const response = await call("/getJobAd", { condition });
167
+ if (!response.ok || !response.data) {
168
+ return {
169
+ ok: false,
170
+ message: response.message,
171
+ source: "careers.ctrip.com",
172
+ query: condition,
173
+ positions: [],
174
+ };
175
+ }
176
+ let rows = response.data.recruitJobAdList ?? [];
177
+ // Client-side keyword filter when campusOnly is active (server ignores searchText in that mode)
178
+ if (campusOnly && keyword) {
179
+ const lk = keyword.toLowerCase();
180
+ rows = rows.filter((r) => (r.jobTitle ?? "").toLowerCase().includes(lk));
181
+ }
182
+ return {
183
+ ok: true,
184
+ source: "careers.ctrip.com",
185
+ query: condition,
186
+ page,
187
+ page_size: pageSize,
188
+ total: campusOnly && keyword ? rows.length : (response.data.total ?? rows.length),
189
+ positions: rows.map(summarizePosition),
190
+ };
191
+ }
192
+ // ---------- fetchAllPositions ----------
193
+ export async function fetchAllPositions(opts = {}) {
194
+ const pageSize = Math.max(1, Math.min(100, opts.pageSize ?? 100));
195
+ const maxPages = Math.max(1, opts.maxPages ?? 5);
196
+ const bucket = [];
197
+ let total;
198
+ for (let page = 1; page <= maxPages; page++) {
199
+ const result = await searchPositions({ ...opts, page, pageSize });
200
+ if (!result.ok) {
201
+ return {
202
+ ok: false,
203
+ message: result.message,
204
+ source: "careers.ctrip.com",
205
+ fetched: bucket.length,
206
+ positions: bucket,
207
+ };
208
+ }
209
+ if (total === undefined)
210
+ total = result.total;
211
+ if (!result.positions.length)
212
+ break;
213
+ bucket.push(...result.positions);
214
+ if (total !== undefined && bucket.length >= total)
215
+ break;
216
+ }
217
+ return {
218
+ ok: true,
219
+ source: "careers.ctrip.com",
220
+ total: total ?? bucket.length,
221
+ fetched: bucket.length,
222
+ positions: bucket,
223
+ };
224
+ }
225
+ // ---------- fetchPositionDetail ----------
226
+ // The API exposes the full `requirements` HTML in the search response itself,
227
+ // so detail is derived from the search list without a separate round-trip.
228
+ // We page through the campus listing to find the matching id.
229
+ export async function fetchPositionDetail(postId) {
230
+ const id = (postId ?? "").trim();
231
+ if (!id)
232
+ return { ok: false, source: "careers.ctrip.com", message: "post_id is required" };
233
+ const pageSize = 100;
234
+ const maxPages = 5;
235
+ for (let page = 1; page <= maxPages; page++) {
236
+ const condition = { pageIndex: page, pageSize, category: "2" };
237
+ const resp = await call("/getJobAd", { condition });
238
+ if (!resp.ok || !resp.data)
239
+ break;
240
+ const items = resp.data.recruitJobAdList ?? [];
241
+ const found = items.find((p) => String(p.id) === id);
242
+ if (found) {
243
+ const summary = summarizePosition(found);
244
+ return {
245
+ ok: true,
246
+ source: "careers.ctrip.com",
247
+ post_id: id,
248
+ job_id: found.jobId ?? "",
249
+ title: found.jobTitle ?? "",
250
+ requirements_html: found.requirements ?? "",
251
+ recruit_label: found.kindName ?? "",
252
+ job_family: found.jobFamilyGroupName ?? "",
253
+ bu: found.buName ?? "",
254
+ city: found.cityName ?? "",
255
+ publish_date: found.publishDate ?? "",
256
+ apply_url: summary.apply_url,
257
+ };
258
+ }
259
+ if (items.length < pageSize)
260
+ break;
261
+ }
262
+ return {
263
+ ok: false,
264
+ source: "careers.ctrip.com",
265
+ post_id: id,
266
+ message: `post ${id} not found in campus search results (searched up to ${maxPages * pageSize} posts)`,
267
+ };
268
+ }
269
+ // ---------- fetchDictionaries ----------
270
+ export async function fetchDictionaries() {
271
+ // getJobCount returns a breakdown by internal category code; not a full
272
+ // taxonomy, but useful for getting totals.
273
+ const response = await call("/getJobCount", { source: "ctrip" });
274
+ const knownCategories = [
275
+ { category: "2", label: "校招 / Campus (Fresh Graduates)", note: "~112 positions as of 2026-05" },
276
+ { category: "1", label: "社招 / Social (Experienced Hire)", note: "~657 positions" },
277
+ ];
278
+ return {
279
+ ok: response.ok,
280
+ source: "careers.ctrip.com",
281
+ campus_page: CAMPUS_PAGE,
282
+ categories: knownCategories,
283
+ job_count_by_family: response.ok ? (response.data ?? []) : [],
284
+ message: response.ok ? "ok" : response.message,
285
+ note: "Filter taxonomy: use category='2' for campus jobs in searchPositions(). " +
286
+ "City codes are in item.city of API responses (e.g. CO0009=Shanghai, CO0001=Beijing).",
287
+ };
288
+ }
289
+ // ---------- notices (no public endpoint) ----------
290
+ const STUB_NOTICE = {
291
+ ok: false,
292
+ source: "careers.ctrip.com",
293
+ message: "Trip.com / Ctrip: no public notices/announcements endpoint",
294
+ };
295
+ export async function listNotices() {
296
+ return STUB_NOTICE;
297
+ }
298
+ export async function getNotice(_id) {
299
+ return {
300
+ ok: false,
301
+ source: "careers.ctrip.com",
302
+ message: "Trip.com / Ctrip: no public notices endpoint",
303
+ };
304
+ }
305
+ export async function findNoticesByQuestion(_question, _opts = {}) {
306
+ return {
307
+ ok: false,
308
+ source: "careers.ctrip.com",
309
+ message: "Trip.com / Ctrip: no public notices endpoint",
310
+ };
311
+ }
312
+ // ---------- matchResume ----------
313
+ export async function matchResume(text, opts = {}) {
314
+ const topN = Math.max(1, opts.topN ?? 5);
315
+ const candidates = Math.max(topN, opts.candidates ?? 50);
316
+ const { terms, cities } = extractResumeSignals(text ?? "");
317
+ if (!terms.length) {
318
+ return {
319
+ ok: false,
320
+ source: "careers.ctrip.com",
321
+ message: "could not extract any technical signals from the text",
322
+ preview: (text ?? "").slice(0, 120),
323
+ };
324
+ }
325
+ // Fetch campus listings. Keyword is applied client-side when campusOnly=true.
326
+ const keyword = terms.slice(0, 3).join(" ");
327
+ const list = await fetchAllPositions({ campusOnly: true, pageSize: 100, maxPages: 2 });
328
+ if (!list.ok) {
329
+ return { ok: false, source: "careers.ctrip.com", message: list.message, positions: [] };
330
+ }
331
+ const scored = [];
332
+ for (const p of list.positions) {
333
+ const blob = [p.title, p.project, p.recruit_label, p.bgs, p.work_cities].join(" ");
334
+ const { score, reasons } = scoreOverlap(blob, terms, cities);
335
+ if (score > 0)
336
+ scored.push({ score, position: p, reasons });
337
+ }
338
+ scored.sort((a, b) => b.score - a.score);
339
+ let shortlist = scored.slice(0, Math.max(topN, candidates));
340
+ if (!shortlist.length) {
341
+ shortlist = list.positions.slice(0, candidates).map((position) => ({
342
+ score: 0,
343
+ position,
344
+ reasons: [],
345
+ }));
346
+ }
347
+ const matches = shortlist.slice(0, topN).map((s) => {
348
+ const mr = s.reasons.length > 0
349
+ ? s.reasons.slice(0, 5)
350
+ : ["no specific keyword overlap — surfaced from campus listing"];
351
+ return { ...s.position, match_reasons: mr };
352
+ });
353
+ return {
354
+ ok: true,
355
+ source: "careers.ctrip.com",
356
+ extracted_terms: terms,
357
+ city_preferences: cities,
358
+ keyword_used: keyword,
359
+ matches,
360
+ note: "match_reasons surfaces overlapping keywords, not a probability of getting an interview. " +
361
+ "The only authority on selection is HR.",
362
+ };
363
+ }
364
+ // Export helpers so other modules can import them from trip.js
365
+ export { extractResumeSignals, scoreOverlap };
@@ -0,0 +1,389 @@
1
+ // Thin client for 宇树科技 (Unitree Robotics) campus recruiting.
2
+ //
3
+ // ============================================================
4
+ // API DISCOVERY (probed 2026-05)
5
+ //
6
+ // Infrastructure:
7
+ // https://www.unitree.com/position/ (and /cn/position/) →
8
+ // Nuxt 3 SPA that inlines all job listings in the server-rendered HTML.
9
+ // The apiBase revealed in window.__NUXT__.config is:
10
+ // https://api.unitree.com/website
11
+ // with routes GET_JOB_LIST: "/job/list" and GET_JOB_DETAIL: "/job/info"
12
+ // (found in /_nuxt/Cd6-Y0rS.js bundle, 2026-05).
13
+ //
14
+ // Dead ends probed:
15
+ // career.unitree.com — resolves to 198.18.x.x (IANA reserved / unreachable)
16
+ // unitree.app.mokahr.com — same IANA block; no Moka tenant
17
+ // https://api.unitree.com/website/job/list (GET or POST, any headers) →
18
+ // HTTP 567 "请求已被站点的安全策略拦截" from Tencent Cloud EdgeOne WAF.
19
+ // The WAF blocks all non-browser clients regardless of UA/Referer/Origin spoofing.
20
+ // The endpoint is real (the SPA uses it from a browser context) but is entirely
21
+ // inaccessible to server-side HTTP clients.
22
+ //
23
+ // WORKING APPROACH — parse SSR HTML from www.unitree.com/position/:
24
+ // The Nuxt SPA is configured with ssr:false in its __NUXT_DATA__ state
25
+ // (serverRendered:false), yet the site's CDN pre-renders the page HTML via
26
+ // a build-time static pass. The full position list (typically ~20-25 jobs)
27
+ // is embedded verbatim in the returned HTML, including job IDs, titles, city,
28
+ // category, department, and hot/urgent flags.
29
+ //
30
+ // HTML job entry format (stripped from tags):
31
+ // {Title}({JobCode}) 热招 [急招] {City} | {Category} | {Department} {JD text...}
32
+ // Some newer listings omit the job code:
33
+ // {Title} 热招 [急招] {City} | {Category} | {Department} {JD text...}
34
+ //
35
+ // Job detail deep-links use SPA routing at /position/{JobCode} or /cn/position/{JobCode}.
36
+ // These return 404 from the CDN (SPA-only routes) but are still the canonical apply URLs.
37
+ //
38
+ // ============================================================
39
+ // PositionSummary field mapping (canonical keys — matches all other adapters):
40
+ // post_id — job code (e.g. "J10034") or a slug derived from the title
41
+ // title — position title (Chinese)
42
+ // project — job category (e.g. "技术类" / "销售类")
43
+ // recruit_label — "热招" / "热招|急招" / "" depending on status flags
44
+ // bgs — department (e.g. "研发部" / "销售服务体系")
45
+ // work_cities — work location (e.g. "杭州市")
46
+ // apply_url — deep link to the SPA position page
47
+ // ============================================================
48
+ import { extractResumeSignals, scoreOverlap, checkResume } from "./tencent.js";
49
+ export { checkResume };
50
+ const SOURCE = "unitree.com";
51
+ const POSITION_PAGE = "https://www.unitree.com/position/";
52
+ const POSITION_PAGE_CN = "https://www.unitree.com/cn/position/";
53
+ const DETAIL_URL = (jobCode) => `https://www.unitree.com/position/${encodeURIComponent(jobCode)}`;
54
+ const DEFAULT_HEADERS = {
55
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
56
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
57
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
58
+ };
59
+ // ---------- HTML parser ----------
60
+ function stripTags(html) {
61
+ return html
62
+ .replace(/<[^>]+>/g, " ")
63
+ .replace(/&nbsp;/g, " ")
64
+ .replace(/&amp;/g, "&")
65
+ .replace(/&lt;/g, "<")
66
+ .replace(/&gt;/g, ">")
67
+ .replace(/&quot;/g, '"')
68
+ .replace(/\s+/g, " ");
69
+ }
70
+ function slugify(title) {
71
+ // Build a stable stub ID for un-coded listings
72
+ return title
73
+ .replace(/[^\w一-鿿]/g, "-")
74
+ .replace(/-+/g, "-")
75
+ .replace(/^-|-$/g, "")
76
+ .slice(0, 40);
77
+ }
78
+ function parsePositions(html) {
79
+ const text = stripTags(html);
80
+ const positions = [];
81
+ // ---- Pass 1: jobs with explicit job-code like (J10034) ----
82
+ // Context before the (Jxxxxx) anchor is the title; after it are the status / city / category / dept.
83
+ const idPattern = /\(J(\d+)\)/g;
84
+ let m;
85
+ const seen = new Set();
86
+ while ((m = idPattern.exec(text)) !== null) {
87
+ const jobCode = `J${m[1]}`;
88
+ if (seen.has(jobCode))
89
+ continue;
90
+ seen.add(jobCode);
91
+ // Title: scan backwards from the match start for a Chinese/ASCII job title.
92
+ // Titles immediately precede the job code in the stripped text.
93
+ // Note: some titles include full-width parentheses, e.g. 嵌入式软件工程师(Linux)
94
+ // so we must not treat ( as a hard word boundary.
95
+ const beforeSlice = text.slice(Math.max(0, m.index - 140), m.index);
96
+ // Take the last "word" cluster that looks like a job title.
97
+ // Allow full-width ()inside the title but stop at half-width ( and common separators.
98
+ const titleMatch = beforeSlice.match(/([A-Za-z+#()一-鿿][^\s·|。;:(]{1,50}(?:\/[^\s|·。;:(]{2,20})?)\s*$/);
99
+ const rawTitle = titleMatch ? titleMatch[1].trim() : "";
100
+ // Strip any description text that bled in (heuristic: keep last segment after 。 or ;)
101
+ const title = rawTitle.split(/[。;]\s*/).pop()?.trim() ?? rawTitle;
102
+ // Status / city / category / dept: scan forward from end of job code
103
+ const afterSlice = text.slice(m.index + m[0].length, m.index + m[0].length + 200);
104
+ const isHot = afterSlice.slice(0, 30).includes("热招");
105
+ const isUrgent = afterSlice.slice(0, 30).includes("急招");
106
+ const metaMatch = afterSlice.match(/([一-鿿]{2,6}市)\s*\|\s*([^|]{2,20}?)\s*\|\s*([^\s|·。]{2,30})/);
107
+ const city = metaMatch ? metaMatch[1].trim() : "杭州市";
108
+ const category = metaMatch ? metaMatch[2].trim() : "";
109
+ const dept = metaMatch ? metaMatch[3].trim() : "";
110
+ const recruitParts = [];
111
+ if (isHot)
112
+ recruitParts.push("热招");
113
+ if (isUrgent)
114
+ recruitParts.push("急招");
115
+ const recruit_label = recruitParts.join("|");
116
+ positions.push({
117
+ post_id: jobCode,
118
+ title: title || jobCode,
119
+ project: category,
120
+ recruit_label,
121
+ bgs: dept,
122
+ work_cities: city,
123
+ apply_url: DETAIL_URL(jobCode),
124
+ });
125
+ }
126
+ // ---- Pass 2: jobs without a (Jxxxxx) code ----
127
+ // Pattern: ChineseTitle 热招 [急招] City | Category | Dept
128
+ const noIdPattern = /([^\s·|:;。]{3,30}(?:工程师|设计师|经理|专员|研究员|架构师|科学家|运营|专家|分析师|顾问))\s+热招(?!\s*\()(\s*急招)?\s+([一-鿿]{2,6}市)\s*\|\s*([^|]{2,20}?)\s*\|\s*([^\s|·。]{2,30})/g;
129
+ let m2;
130
+ while ((m2 = noIdPattern.exec(text)) !== null) {
131
+ const title = m2[1].trim();
132
+ const slug = slugify(title);
133
+ if (seen.has(slug))
134
+ continue;
135
+ seen.add(slug);
136
+ const isUrgent = Boolean(m2[2]?.trim());
137
+ const city = m2[3].trim();
138
+ const category = m2[4].trim();
139
+ const dept = m2[5].trim();
140
+ const recruit_label = isUrgent ? "热招|急招" : "热招";
141
+ positions.push({
142
+ post_id: slug,
143
+ title,
144
+ project: category,
145
+ recruit_label,
146
+ bgs: dept,
147
+ work_cities: city,
148
+ apply_url: POSITION_PAGE,
149
+ });
150
+ }
151
+ return positions;
152
+ }
153
+ // ---------- fetch helper ----------
154
+ async function fetchPositionHtml() {
155
+ let response;
156
+ try {
157
+ response = await fetch(POSITION_PAGE, { headers: DEFAULT_HEADERS });
158
+ }
159
+ catch (err) {
160
+ return {
161
+ ok: false,
162
+ message: `network error: ${err instanceof Error ? err.message : String(err)}`,
163
+ };
164
+ }
165
+ if (!response.ok) {
166
+ return { ok: false, message: `HTTP ${response.status}: ${response.statusText}` };
167
+ }
168
+ let html;
169
+ try {
170
+ html = await response.text();
171
+ }
172
+ catch (err) {
173
+ return {
174
+ ok: false,
175
+ message: `body read error: ${err instanceof Error ? err.message : String(err)}`,
176
+ };
177
+ }
178
+ return { ok: true, html, message: "ok" };
179
+ }
180
+ // ---------- In-process cache ----------
181
+ // The position list rarely changes; one fetch per Node process is enough.
182
+ let _posCache = null;
183
+ async function getAllPositions() {
184
+ const now = Date.now();
185
+ // Cache valid for 5 minutes
186
+ if (_posCache && now - _posCache.fetchedAt < 5 * 60 * 1000) {
187
+ return { ok: true, positions: _posCache.positions, message: "ok (cached)", total: _posCache.positions.length };
188
+ }
189
+ const result = await fetchPositionHtml();
190
+ if (!result.ok || !result.html) {
191
+ return { ok: false, positions: [], message: result.message, total: 0 };
192
+ }
193
+ const positions = parsePositions(result.html);
194
+ _posCache = { positions, fetchedAt: now };
195
+ return { ok: true, positions, message: "ok", total: positions.length };
196
+ }
197
+ // ---------- searchPositions ----------
198
+ export async function searchPositions(opts = {}) {
199
+ const pageSize = Math.max(1, Math.min(100, opts.pageSize ?? 20));
200
+ const page = Math.max(1, opts.page ?? 1);
201
+ const keyword = (opts.keyword ?? "").trim().toLowerCase();
202
+ const pool = await getAllPositions();
203
+ if (!pool.ok) {
204
+ return {
205
+ ok: false,
206
+ source: SOURCE,
207
+ message: pool.message,
208
+ apply_url: POSITION_PAGE,
209
+ positions: [],
210
+ };
211
+ }
212
+ let filtered = pool.positions;
213
+ if (keyword) {
214
+ filtered = filtered.filter((p) => {
215
+ const blob = [p.title, p.project, p.bgs, p.work_cities, p.post_id]
216
+ .join(" ")
217
+ .toLowerCase();
218
+ return blob.includes(keyword);
219
+ });
220
+ }
221
+ const offset = (page - 1) * pageSize;
222
+ const paginated = filtered.slice(offset, offset + pageSize);
223
+ return {
224
+ ok: true,
225
+ source: SOURCE,
226
+ page,
227
+ page_size: pageSize,
228
+ total: filtered.length,
229
+ positions: paginated,
230
+ };
231
+ }
232
+ // ---------- fetchAllPositions ----------
233
+ export async function fetchAllPositions(opts = {}) {
234
+ const keyword = (opts.keyword ?? "").trim().toLowerCase();
235
+ const pool = await getAllPositions();
236
+ if (!pool.ok) {
237
+ return {
238
+ ok: false,
239
+ source: SOURCE,
240
+ message: pool.message,
241
+ apply_url: POSITION_PAGE,
242
+ fetched: 0,
243
+ positions: [],
244
+ };
245
+ }
246
+ const positions = keyword
247
+ ? pool.positions.filter((p) => {
248
+ const blob = [p.title, p.project, p.bgs, p.work_cities, p.post_id]
249
+ .join(" ")
250
+ .toLowerCase();
251
+ return blob.includes(keyword);
252
+ })
253
+ : pool.positions;
254
+ return {
255
+ ok: true,
256
+ source: SOURCE,
257
+ total: positions.length,
258
+ fetched: positions.length,
259
+ positions,
260
+ };
261
+ }
262
+ // ---------- fetchPositionDetail ----------
263
+ // The SSR HTML contains the JD text for each position but does not expose it
264
+ // in a clean structured field. We extract a best-effort description snippet.
265
+ export async function fetchPositionDetail(postId) {
266
+ const id = (postId ?? "").trim();
267
+ if (!id) {
268
+ return { ok: false, source: SOURCE, message: "post_id is required" };
269
+ }
270
+ const result = await fetchPositionHtml();
271
+ if (!result.ok || !result.html) {
272
+ return { ok: false, source: SOURCE, post_id: id, message: result.message };
273
+ }
274
+ const text = stripTags(result.html);
275
+ // Find the job code anchor or slug and extract surrounding JD text
276
+ const anchor = id.startsWith("J") ? `(${id})` : id.replace(/-/g, "");
277
+ const idx = text.indexOf(anchor);
278
+ if (idx === -1) {
279
+ return {
280
+ ok: false,
281
+ source: SOURCE,
282
+ post_id: id,
283
+ message: `post ${id} not found in current page snapshot`,
284
+ };
285
+ }
286
+ // Extract up to 600 chars of JD text following the city|category|dept line
287
+ const after = text.slice(idx, idx + 800);
288
+ const descMatch = after.match(/[一-鿿]{2,5}市\s*\|\s*[^|]+\|\s*[^\s|]+\s+(.{50,600})/);
289
+ const description = descMatch ? descMatch[1].trim() : "";
290
+ const pool = await getAllPositions();
291
+ const pos = pool.positions.find((p) => p.post_id === id);
292
+ return {
293
+ ok: true,
294
+ source: SOURCE,
295
+ post_id: id,
296
+ title: pos?.title ?? id,
297
+ project: pos?.project ?? "",
298
+ bgs: pos?.bgs ?? "",
299
+ recruit_label: pos?.recruit_label ?? "",
300
+ description,
301
+ work_cities: pos?.work_cities ?? "",
302
+ apply_url: pos?.apply_url ?? DETAIL_URL(id),
303
+ };
304
+ }
305
+ // ---------- fetchDictionaries ----------
306
+ // Returns the known static taxonomy (Unitree does not expose a filter catalog).
307
+ export async function fetchDictionaries() {
308
+ const pool = await getAllPositions();
309
+ return {
310
+ ok: pool.ok,
311
+ source: SOURCE,
312
+ scrape_url: POSITION_PAGE,
313
+ note: "Unitree's ATS API (api.unitree.com/website) is protected by Tencent Cloud EdgeOne WAF " +
314
+ "(HTTP 567) and is inaccessible from server-side clients. " +
315
+ "Job listings are parsed from the SSR HTML of www.unitree.com/position/ instead.",
316
+ positions_scraped: pool.total,
317
+ categories: ["技术类", "销售类"],
318
+ departments: ["研发部", "销售服务体系"],
319
+ cities: ["杭州市"],
320
+ message: pool.message,
321
+ };
322
+ }
323
+ // ---------- notices (no public endpoint) ----------
324
+ const NOTICES_STUB = {
325
+ ok: false,
326
+ source: SOURCE,
327
+ message: "Unitree: no public notices or announcement endpoint available",
328
+ };
329
+ export async function listNotices() {
330
+ return NOTICES_STUB;
331
+ }
332
+ export async function getNotice(_id) {
333
+ return NOTICES_STUB;
334
+ }
335
+ export async function findNoticesByQuestion(_question, _opts = {}) {
336
+ return NOTICES_STUB;
337
+ }
338
+ // ---------- matchResume ----------
339
+ // Extract technical signals from resume text, filter the scraped position list,
340
+ // and return top N by keyword overlap score.
341
+ export async function matchResume(text, opts = {}) {
342
+ const topN = Math.max(1, opts.topN ?? 5);
343
+ const candidates = Math.max(topN, opts.candidates ?? 20);
344
+ const { terms, cities } = extractResumeSignals(text ?? "");
345
+ if (!terms.length) {
346
+ return {
347
+ ok: false,
348
+ source: SOURCE,
349
+ message: "could not extract any technical signals from the text",
350
+ preview: (text ?? "").slice(0, 120),
351
+ };
352
+ }
353
+ const pool = await getAllPositions();
354
+ if (!pool.ok) {
355
+ return { ok: false, source: SOURCE, message: pool.message, positions: [] };
356
+ }
357
+ const scored = [];
358
+ for (const p of pool.positions) {
359
+ const blob = [p.title, p.project, p.bgs, p.work_cities, p.recruit_label].join(" ");
360
+ const { score, reasons } = scoreOverlap(blob, terms, cities);
361
+ if (score > 0)
362
+ scored.push({ score, position: p, reasons });
363
+ }
364
+ scored.sort((a, b) => b.score - a.score);
365
+ let shortlist = scored.slice(0, Math.max(topN, candidates));
366
+ if (!shortlist.length) {
367
+ shortlist = pool.positions.slice(0, candidates).map((position) => ({
368
+ score: 0,
369
+ position,
370
+ reasons: [],
371
+ }));
372
+ }
373
+ const matches = shortlist.slice(0, topN).map((s) => {
374
+ const mr = s.reasons.length > 0
375
+ ? s.reasons.slice(0, 5)
376
+ : ["no specific keyword overlap — surfaced from full position list"];
377
+ return { ...s.position, match_reasons: mr };
378
+ });
379
+ return {
380
+ ok: true,
381
+ source: SOURCE,
382
+ extracted_terms: terms,
383
+ city_preferences: cities,
384
+ matches,
385
+ note: "match_reasons surfaces overlapping keywords, not a probability of getting an interview. " +
386
+ "The only authority on selection is HR.",
387
+ };
388
+ }
389
+ export { extractResumeSignals, scoreOverlap };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "job-pro",
3
- "version": "0.5.0",
4
- "description": "Query Chinese big-tech campus recruiting from your terminal. 19 companies: Tencent, ByteDance, Alibaba, Meituan, Xiaohongshu, JD, Kuaishou, Xiaomi, Baidu, NetEase, Didi, Bilibili, NIO, MiniMax, Huawei, Ping An, PDD, Weibo, miHoYo no signup, no token, no server.",
3
+ "version": "0.6.0",
4
+ "description": "Query Chinese big-tech campus recruiting from your terminal. 24 companies (18 live, 6 stub): Tencent, ByteDance, Alibaba, Meituan, Xiaohongshu, JD, Kuaishou, Xiaomi, Baidu, NetEase, Didi, Bilibili, NIO, MiniMax, Huawei, Ping An, Trip.com, Unitree + 6 auth-gated stubs (PDD, Weibo, miHoYo, SenseTime, BYD, Ant Group). No signup, no token, no server.",
5
5
  "homepage": "https://job.ha7ch.com",
6
6
  "repository": "https://github.com/HA7CH/job-pro",
7
7
  "license": "MIT",