jd-intel 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/package.json +2 -2
- package/registry/workday.json +8 -0
- package/src/adapters/index.js +2 -0
- package/src/adapters/workday.js +198 -0
- package/src/cli.js +55 -8
- package/src/index.js +43 -9
- package/src/registry.js +28 -2
package/README.md
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
[](LICENSE)
|
|
4
4
|
[](https://nodejs.org)
|
|
5
5
|
[](https://www.npmjs.com/package/jd-intel)
|
|
6
|
-
[](https://www.npmjs.com/package/jd-intel)
|
|
7
|
+
[](https://www.npmjs.com/package/jd-intel-mcp)
|
|
7
8
|
[](https://github.com/prPMDev/jd-intel/stargazers)
|
|
8
9
|
|
|
9
10
|
> **Stop pasting job descriptions into AI assistants. Let your AI fetch them directly.**
|
|
@@ -41,7 +42,7 @@ Because scraping breaks where jd-intel doesn't:
|
|
|
41
42
|
- **Full JDs when browsing fails.** SPA-rendered boards, slow loads, auth walls, and geo-restrictions block a browser. They don't block a public API call.
|
|
42
43
|
- **Structured data, not HTML soup.** Salary, location type, department, and clean markdown, normalized across every ATS.
|
|
43
44
|
- **No keys, no browser.** Public APIs only. Runs anywhere your AI does.
|
|
44
|
-
- **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee return the same shape.
|
|
45
|
+
- **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee, Workday return the same shape.
|
|
45
46
|
|
|
46
47
|
---
|
|
47
48
|
|
|
@@ -209,8 +210,8 @@ No custom parsing per company.
|
|
|
209
210
|
| SmartRecruiters | Shipped | Enterprise and mid-market |
|
|
210
211
|
| TeamTailor | Shipped | European startups and scale-ups |
|
|
211
212
|
| Recruitee | Shipped | Dutch / EU SMBs and scale-ups |
|
|
213
|
+
| Workday | Shipped | Large enterprises (registry-keyed) |
|
|
212
214
|
| Personio | Planned | German / EU mid-market |
|
|
213
|
-
| Workday | Planned | Large enterprises (scoped scraper) |
|
|
214
215
|
|
|
215
216
|
Adding a new ATS is a single adapter file. See [Contributing](#contributing).
|
|
216
217
|
|
|
@@ -235,10 +236,10 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
|
|
|
235
236
|
|
|
236
237
|
**Shipped**
|
|
237
238
|
- Library, CLI, and MCP server (three surfaces of one toolkit)
|
|
238
|
-
- Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee adapters
|
|
239
|
+
- Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee, Workday adapters
|
|
239
240
|
- Title, topic, location, and date filters
|
|
240
241
|
- Salary extraction from JD text
|
|
241
|
-
- Verified company registry (
|
|
242
|
+
- Verified company registry (160+ companies)
|
|
242
243
|
|
|
243
244
|
**Next**
|
|
244
245
|
- Personio adapter (German / EU mid-market)
|
|
@@ -246,7 +247,6 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
|
|
|
246
247
|
|
|
247
248
|
**Planned**
|
|
248
249
|
- Workable adapter (parked — needs SPA shortcode resolution)
|
|
249
|
-
- Workday support (scoped scraper — large enterprise universe)
|
|
250
250
|
- Temporal tracking (when roles open, close, reopen)
|
|
251
251
|
- Change detection
|
|
252
252
|
- Resume-aware fit scoring
|
|
@@ -257,7 +257,7 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
|
|
|
257
257
|
|
|
258
258
|
**Add a company to the registry:** submit a PR to the appropriate file in `registry/`.
|
|
259
259
|
|
|
260
|
-
**Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing
|
|
260
|
+
**Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing adapters.
|
|
261
261
|
|
|
262
262
|
**Request a company:** [open an issue](https://github.com/prPMDev/jd-intel/issues/new). Tell me who's missing.
|
|
263
263
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "jd-intel",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby) — for your AI assistant, no copy-paste.",
|
|
3
|
+
"version": "0.6.0",
|
|
4
|
+
"description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby, Workday, and more) — for your AI assistant, no copy-paste.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"bin": {
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[
|
|
2
|
+
{"slug": "cisco", "name": "Cisco", "sector": "networking", "config": {"tenant": "cisco", "env": "wd5", "site": "Cisco_Careers"}},
|
|
3
|
+
{"slug": "salesforce", "name": "Salesforce", "sector": "crm / saas", "config": {"tenant": "salesforce", "env": "wd12", "site": "External_Career_Site"}},
|
|
4
|
+
{"slug": "bankofamerica", "name": "Bank of America", "sector": "banking", "config": {"tenant": "ghr", "env": "wd1", "site": "Lateral-US"}},
|
|
5
|
+
{"slug": "adobe", "name": "Adobe", "sector": "creative software", "config": {"tenant": "adobe", "env": "wd5", "site": "external_experienced"}},
|
|
6
|
+
{"slug": "nvidia", "name": "Nvidia", "sector": "semiconductors", "config": {"tenant": "nvidia", "env": "wd5", "site": "NVIDIAExternalCareerSite"}},
|
|
7
|
+
{"slug": "servicetitan", "name": "ServiceTitan", "sector": "vertical saas", "config": {"tenant": "servicetitan", "env": "wd1", "site": "ServiceTitan"}}
|
|
8
|
+
]
|
package/src/adapters/index.js
CHANGED
|
@@ -4,6 +4,7 @@ export { fetchAshby, hasAshby } from './ashby.js';
|
|
|
4
4
|
export { fetchSmartrecruiters, hasSmartrecruiters } from './smartrecruiters.js';
|
|
5
5
|
export { fetchTeamtailor, hasTeamtailor } from './teamtailor.js';
|
|
6
6
|
export { fetchRecruitee, hasRecruitee } from './recruitee.js';
|
|
7
|
+
export { fetchWorkday, hasWorkday } from './workday.js';
|
|
7
8
|
|
|
8
9
|
export const ADAPTERS = {
|
|
9
10
|
greenhouse: { fetch: (...args) => import('./greenhouse.js').then(m => m.fetchGreenhouse(...args)), has: (...args) => import('./greenhouse.js').then(m => m.hasGreenhouse(...args)) },
|
|
@@ -12,6 +13,7 @@ export const ADAPTERS = {
|
|
|
12
13
|
smartrecruiters: { fetch: (...args) => import('./smartrecruiters.js').then(m => m.fetchSmartrecruiters(...args)), has: (...args) => import('./smartrecruiters.js').then(m => m.hasSmartrecruiters(...args)) },
|
|
13
14
|
teamtailor: { fetch: (...args) => import('./teamtailor.js').then(m => m.fetchTeamtailor(...args)), has: (...args) => import('./teamtailor.js').then(m => m.hasTeamtailor(...args)) },
|
|
14
15
|
recruitee: { fetch: (...args) => import('./recruitee.js').then(m => m.fetchRecruitee(...args)), has: (...args) => import('./recruitee.js').then(m => m.hasRecruitee(...args)) },
|
|
16
|
+
workday: { fetch: (...args) => import('./workday.js').then(m => m.fetchWorkday(...args)), has: (...args) => import('./workday.js').then(m => m.hasWorkday(...args)) },
|
|
15
17
|
};
|
|
16
18
|
|
|
17
19
|
export const ATS_NAMES = Object.keys(ADAPTERS);
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import { normalize, stripHtml } from '../normalizer.js';
|
|
2
|
+
|
|
3
|
+
const MAX_DETAIL_FETCHES = 100;
|
|
4
|
+
const LIST_PAGE_SIZE = 20;
|
|
5
|
+
const LIST_PAGE_HARD_CAP = 100; // <= 2000 list items scanned per request
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Fetch jobs from a Workday tenant via the public "CXS" JSON API.
|
|
9
|
+
*
|
|
10
|
+
* Workday's career-site SPA calls an unauthenticated JSON API. No
|
|
11
|
+
* official docs, but it's stable and has no anti-bot at modest volume.
|
|
12
|
+
*
|
|
13
|
+
* REGISTRY-ONLY. Workday is keyed by an opaque {tenant, env, site}
|
|
14
|
+
* triple that is NOT derivable from the company name (Bank of America's
|
|
15
|
+
* tenant is `ghr`). So this adapter only works when called with
|
|
16
|
+
* ctx.config from a registry entry; discovery-mode probing (no config)
|
|
17
|
+
* bails instantly with zero network — see the guard below and
|
|
18
|
+
* hasWorkday().
|
|
19
|
+
*
|
|
20
|
+
* Two-step like SmartRecruiters: a list endpoint (title/location/
|
|
21
|
+
* postedOn, NO descriptions) plus a per-posting detail endpoint for
|
|
22
|
+
* the full JD. Enterprise tenants are huge (Salesforce ~1398 jobs), so
|
|
23
|
+
* we apply list-evaluable filters BEFORE detail-hydrating and cap the
|
|
24
|
+
* detail set.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} slug - normalized company slug (registry routing key)
|
|
27
|
+
* @param {object} [ctx] - { config:{tenant,env,site}, companyName, filterContext }
|
|
28
|
+
* @returns {Promise<Array>} Normalized job objects
|
|
29
|
+
*/
|
|
30
|
+
export async function fetchWorkday(slug, ctx = {}) {
|
|
31
|
+
const cfg = ctx.config;
|
|
32
|
+
if (!cfg || !cfg.tenant || !cfg.env || !cfg.site) return []; // registry-only guard
|
|
33
|
+
|
|
34
|
+
const { tenant, env, site } = cfg;
|
|
35
|
+
const base = `https://${tenant}.${env}.myworkdayjobs.com/wday/cxs/${tenant}/${site}`;
|
|
36
|
+
const fc = ctx.filterContext || {};
|
|
37
|
+
|
|
38
|
+
// 1. Page the cheap list (no descriptions in list responses).
|
|
39
|
+
const postings = [];
|
|
40
|
+
let offset = 0;
|
|
41
|
+
let pages = 0;
|
|
42
|
+
while (pages < LIST_PAGE_HARD_CAP) {
|
|
43
|
+
const resp = await fetch(`${base}/jobs`, {
|
|
44
|
+
method: 'POST',
|
|
45
|
+
headers: { 'Content-Type': 'application/json' },
|
|
46
|
+
body: JSON.stringify({ appliedFacets: {}, limit: LIST_PAGE_SIZE, offset, searchText: '' }),
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
if (!resp.ok) {
|
|
50
|
+
if (resp.status === 404) return []; // wrong site / no such board
|
|
51
|
+
if (offset === 0) {
|
|
52
|
+
throw new Error(`Workday API error for ${slug} (${tenant}/${env}/${site}): ${resp.status}`);
|
|
53
|
+
}
|
|
54
|
+
break; // mid-paging failure: keep what we have
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const data = await resp.json();
|
|
58
|
+
const page = data.jobPostings || [];
|
|
59
|
+
postings.push(...page);
|
|
60
|
+
pages += 1;
|
|
61
|
+
offset += LIST_PAGE_SIZE;
|
|
62
|
+
if (page.length === 0 || offset >= (data.total || 0)) break;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// 2. Filter-aware candidate selection BEFORE the N+1 detail cost.
|
|
66
|
+
// The list carries title/locationsText/postedOn — enough to apply
|
|
67
|
+
// titleFilter, location, and recency without descriptions.
|
|
68
|
+
let candidates = postings;
|
|
69
|
+
|
|
70
|
+
if (fc.titleFilter) {
|
|
71
|
+
const re = new RegExp(fc.titleFilter, 'i');
|
|
72
|
+
candidates = candidates.filter(p => re.test(p.title || ''));
|
|
73
|
+
}
|
|
74
|
+
if (Array.isArray(fc.locationIncludes) && fc.locationIncludes.length > 0) {
|
|
75
|
+
const inc = fc.locationIncludes.map(s => String(s).toLowerCase());
|
|
76
|
+
candidates = candidates.filter(p => {
|
|
77
|
+
const loc = (p.locationsText || '').toLowerCase();
|
|
78
|
+
return inc.some(s => loc.includes(s));
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
if (Array.isArray(fc.locationExcludes) && fc.locationExcludes.length > 0) {
|
|
82
|
+
const exc = fc.locationExcludes.map(s => String(s).toLowerCase());
|
|
83
|
+
candidates = candidates.filter(p => {
|
|
84
|
+
const loc = (p.locationsText || '').toLowerCase();
|
|
85
|
+
return !exc.some(s => loc.includes(s));
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
if (typeof fc.postedWithinDays === 'number') {
|
|
89
|
+
candidates = candidates.filter(p => withinDays(p.postedOn, fc.postedWithinDays));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// 3. Bound the detail-fetch set.
|
|
93
|
+
// NOTE: huge-tenant coverage is intentionally capped for v1
|
|
94
|
+
// (Salesforce ~1398 postings). A description `filter` is applied
|
|
95
|
+
// by the library AFTER this returns, so for that case we keep the
|
|
96
|
+
// full backstop instead of truncating tightly to `limit` (which
|
|
97
|
+
// could hydrate jobs that all fail the regex while better matches
|
|
98
|
+
// go unscanned). Proper fix (smart pagination / rate-limited
|
|
99
|
+
// concurrency / surfaced truncation) is tracked in #26, to be
|
|
100
|
+
// designed alongside retry/rate-limit work (#7).
|
|
101
|
+
const limit = typeof fc.limit === 'number' && fc.limit > 0 ? fc.limit : 100;
|
|
102
|
+
const cap = fc.filter ? MAX_DETAIL_FETCHES : Math.min(limit, MAX_DETAIL_FETCHES);
|
|
103
|
+
candidates = candidates.slice(0, cap);
|
|
104
|
+
|
|
105
|
+
// 4. Hydrate descriptions via the per-posting detail endpoint.
|
|
106
|
+
const jobs = await Promise.all(candidates.map(async (p) => {
|
|
107
|
+
const externalPath = p.externalPath || ''; // already begins with '/job/...'
|
|
108
|
+
let info = {};
|
|
109
|
+
try {
|
|
110
|
+
// externalPath already carries the '/job/...' segment, so it is
|
|
111
|
+
// concatenated directly onto the CXS base. Inserting another
|
|
112
|
+
// '/job' here yields '/job/job/...' which Workday rejects (422).
|
|
113
|
+
const dResp = await fetch(`${base}${externalPath}`);
|
|
114
|
+
if (dResp.ok) {
|
|
115
|
+
const detail = await dResp.json();
|
|
116
|
+
info = detail.jobPostingInfo || {};
|
|
117
|
+
}
|
|
118
|
+
} catch {
|
|
119
|
+
// detail failed: fall back to list fields, empty description
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return normalize({
|
|
123
|
+
companySlug: slug,
|
|
124
|
+
company: ctx.companyName || slug,
|
|
125
|
+
title: p.title || info.title || '',
|
|
126
|
+
department: '',
|
|
127
|
+
location: info.location || p.locationsText || '',
|
|
128
|
+
description: stripHtml(info.jobDescription || ''),
|
|
129
|
+
url: `https://${tenant}.${env}.myworkdayjobs.com/${site}${externalPath}`,
|
|
130
|
+
postedAt: parseWorkdayDate(info.startDate) || normalizePostedOn(p.postedOn),
|
|
131
|
+
salary: null, // normalizer extracts from description text
|
|
132
|
+
metadata: {
|
|
133
|
+
workdayTenant: tenant,
|
|
134
|
+
workdayEnv: env,
|
|
135
|
+
workdaySite: site,
|
|
136
|
+
externalPath,
|
|
137
|
+
},
|
|
138
|
+
}, 'workday');
|
|
139
|
+
}));
|
|
140
|
+
|
|
141
|
+
return jobs;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Workday list `postedOn` is a relative string ("Posted Today",
|
|
146
|
+
* "Posted 5 Days Ago", "Posted 30+ Days Ago"). Decide membership in
|
|
147
|
+
* the last N days WITHOUT a network call. Unparseable -> keep (true);
|
|
148
|
+
* the library re-filters authoritatively on the real postedAt after
|
|
149
|
+
* hydration, so a false-keep here is corrected downstream.
|
|
150
|
+
*/
|
|
151
|
+
function withinDays(postedOn, days) {
|
|
152
|
+
if (!postedOn) return true;
|
|
153
|
+
const s = String(postedOn).toLowerCase();
|
|
154
|
+
if (/today/.test(s)) return days >= 0;
|
|
155
|
+
if (/yesterday/.test(s)) return days >= 1;
|
|
156
|
+
const m = s.match(/(\d+)\+?\s*days?\s*ago/);
|
|
157
|
+
if (m) return parseInt(m[1], 10) <= days;
|
|
158
|
+
return true;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Coerce a Workday list `postedOn` (relative) into an approx ISO date
|
|
163
|
+
* so the library's postedWithinDays re-filter has a value to compare.
|
|
164
|
+
*/
|
|
165
|
+
function normalizePostedOn(v) {
|
|
166
|
+
if (!v) return null;
|
|
167
|
+
const direct = new Date(v);
|
|
168
|
+
if (Number.isFinite(direct.getTime())) return direct.toISOString();
|
|
169
|
+
const s = String(v).toLowerCase();
|
|
170
|
+
let daysAgo = null;
|
|
171
|
+
if (/today/.test(s)) daysAgo = 0;
|
|
172
|
+
else if (/yesterday/.test(s)) daysAgo = 1;
|
|
173
|
+
else {
|
|
174
|
+
const m = s.match(/(\d+)\+?\s*days?\s*ago/);
|
|
175
|
+
if (m) daysAgo = parseInt(m[1], 10);
|
|
176
|
+
}
|
|
177
|
+
if (daysAgo === null) return null;
|
|
178
|
+
return new Date(Date.now() - daysAgo * 86400000).toISOString();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Workday detail `startDate` ("2026-05-01" or "May 1, 2026"). Return
|
|
183
|
+
* ISO, or null if unparseable.
|
|
184
|
+
*/
|
|
185
|
+
function parseWorkdayDate(s) {
|
|
186
|
+
if (!s) return null;
|
|
187
|
+
const d = new Date(s);
|
|
188
|
+
return Number.isFinite(d.getTime()) ? d.toISOString() : null;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Registry-only invariant: the {tenant,env,site} triple can't be
|
|
193
|
+
* probed from a company name. Always false so detect_ats never selects
|
|
194
|
+
* Workday and discovery-mode fetchJobs bails via the config guard.
|
|
195
|
+
*/
|
|
196
|
+
export async function hasWorkday() {
|
|
197
|
+
return false;
|
|
198
|
+
}
|
package/src/cli.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* jd-intel CLI
|
|
5
5
|
*
|
|
6
6
|
* Usage:
|
|
7
|
-
* jd-intel fetch <company> [--ats
|
|
7
|
+
* jd-intel fetch <company> [--ats <platform>] [--filter keyword|pattern]
|
|
8
8
|
* jd-intel detect <company>
|
|
9
9
|
* jd-intel registry search <query>
|
|
10
10
|
*/
|
|
@@ -18,12 +18,12 @@ async function main() {
|
|
|
18
18
|
switch (command) {
|
|
19
19
|
case 'fetch': {
|
|
20
20
|
const company = args[0];
|
|
21
|
-
if (!company) { console.error('Usage: jd-intel fetch <company> [--ats
|
|
21
|
+
if (!company) { console.error('Usage: jd-intel fetch <company> [--ats <platform>] (omit --ats to auto-detect; run "jd-intel" for the platform list)'); process.exit(1); }
|
|
22
22
|
const getArg = (flag) => {
|
|
23
23
|
const idx = args.indexOf(flag);
|
|
24
24
|
return idx >= 0 ? args[idx + 1] : undefined;
|
|
25
25
|
};
|
|
26
|
-
|
|
26
|
+
let ats = getArg('--ats');
|
|
27
27
|
const titleFilter = getArg('--title-filter');
|
|
28
28
|
const filter = getArg('--filter');
|
|
29
29
|
const postedWithinRaw = getArg('--posted-within-days');
|
|
@@ -35,6 +35,28 @@ async function main() {
|
|
|
35
35
|
const limitRaw = getArg('--limit');
|
|
36
36
|
const limit = limitRaw !== undefined ? Number(limitRaw) : undefined;
|
|
37
37
|
|
|
38
|
+
// Workday is keyed by a {tenant, env, site} triple, not a slug.
|
|
39
|
+
// Supplying it here makes a Workday board reachable without a
|
|
40
|
+
// registry entry; presence of the flags infers --ats workday.
|
|
41
|
+
const wdTenant = getArg('--workday-tenant');
|
|
42
|
+
const wdEnv = getArg('--workday-env');
|
|
43
|
+
const wdSite = getArg('--workday-site');
|
|
44
|
+
let config;
|
|
45
|
+
if (wdTenant || wdEnv || wdSite) {
|
|
46
|
+
if (!wdTenant || !wdEnv || !wdSite) {
|
|
47
|
+
console.error('Workday needs all three: --workday-tenant, --workday-env, --workday-site.');
|
|
48
|
+
console.error('Find them in the careers URL: https://{tenant}.{env}.myworkdayjobs.com/{site}');
|
|
49
|
+
console.error('e.g. https://expedia.wd108.myworkdayjobs.com/search -> --workday-tenant expedia --workday-env wd108 --workday-site search');
|
|
50
|
+
process.exit(1);
|
|
51
|
+
}
|
|
52
|
+
if (ats && ats !== 'workday') {
|
|
53
|
+
console.error(`--ats ${ats} conflicts with the --workday-* flags (workday is inferred). Drop one.`);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
config = { tenant: wdTenant, env: wdEnv, site: wdSite };
|
|
57
|
+
ats = 'workday';
|
|
58
|
+
}
|
|
59
|
+
|
|
38
60
|
const parts = [];
|
|
39
61
|
if (titleFilter) parts.push(`title: ${titleFilter}`);
|
|
40
62
|
if (filter) parts.push(`topic: ${filter}`);
|
|
@@ -43,10 +65,23 @@ async function main() {
|
|
|
43
65
|
if (locationExcludes) parts.push(`loc-: ${locationExcludes.join('|')}`);
|
|
44
66
|
const suffix = parts.length ? ` [${parts.join(', ')}]` : '';
|
|
45
67
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
});
|
|
68
|
+
const atsLabel = config
|
|
69
|
+
? ` (workday: ${config.tenant}/${config.env}/${config.site})`
|
|
70
|
+
: ats ? ` (${ats})` : ' (auto-detect)';
|
|
71
|
+
console.log(`Fetching jobs from ${company}${atsLabel}${suffix}...`);
|
|
72
|
+
let jobs;
|
|
73
|
+
try {
|
|
74
|
+
jobs = await fetchJobs({
|
|
75
|
+
company, ats, config, titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit,
|
|
76
|
+
});
|
|
77
|
+
} catch (err) {
|
|
78
|
+
if (config) {
|
|
79
|
+
console.error(`Could not reach that Workday board (${config.tenant}/${config.env}/${config.site}): ${err.message}`);
|
|
80
|
+
console.error('Verify the triple against the careers URL: https://{tenant}.{env}.myworkdayjobs.com/{site}');
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
throw err;
|
|
84
|
+
}
|
|
50
85
|
console.log(`Found ${jobs.length} jobs\n`);
|
|
51
86
|
|
|
52
87
|
for (const job of jobs.slice(0, 20)) {
|
|
@@ -112,7 +147,18 @@ Usage:
|
|
|
112
147
|
jd-intel registry search <query>
|
|
113
148
|
|
|
114
149
|
Fetch options:
|
|
115
|
-
--ats
|
|
150
|
+
--ats <platform> Skip auto-detect. One of: greenhouse, lever,
|
|
151
|
+
ashby, smartrecruiters, teamtailor, recruitee,
|
|
152
|
+
workday. Omit to auto-detect (registry-backed).
|
|
153
|
+
--workday-tenant T Workday is keyed by a {tenant, env, site}
|
|
154
|
+
--workday-env wdN triple, not a slug. Registered Workday
|
|
155
|
+
--workday-site S companies work via auto-detect or --ats
|
|
156
|
+
workday; for any other Workday board pass
|
|
157
|
+
all three, read from the careers URL
|
|
158
|
+
https://{tenant}.{env}.myworkdayjobs.com/{site}
|
|
159
|
+
e.g. https://expedia.wd108.myworkdayjobs.com/search
|
|
160
|
+
-> --workday-tenant expedia --workday-env wd108
|
|
161
|
+
--workday-site search
|
|
116
162
|
--title-filter pattern Regex matched against TITLE only (role identity)
|
|
117
163
|
--filter pattern Regex matched across title, department, description (topic/scope)
|
|
118
164
|
--posted-within-days N Only jobs posted in the last N days
|
|
@@ -132,6 +178,7 @@ Examples:
|
|
|
132
178
|
jd-intel fetch stripe --title-filter "product manager" --filter "growth|platform"
|
|
133
179
|
jd-intel fetch ramp --location-include "United States,US,Remote - US" --location-exclude "London,Dublin"
|
|
134
180
|
jd-intel fetch notion --ats ashby --title-filter engineer --posted-within-days 14
|
|
181
|
+
jd-intel fetch expedia --workday-tenant expedia --workday-env wd108 --workday-site search
|
|
135
182
|
jd-intel detect figma
|
|
136
183
|
jd-intel registry search fintech`);
|
|
137
184
|
}
|
package/src/index.js
CHANGED
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
* jd-intel — JD intelligence toolkit: fetch, normalize, and search job descriptions across every major ATS.
|
|
3
3
|
*
|
|
4
4
|
* Fetches, normalizes, and enriches job data from public ATS APIs
|
|
5
|
-
* (Greenhouse, Lever, Ashby
|
|
5
|
+
* (Greenhouse, Lever, Ashby, SmartRecruiters, Teamtailor, Recruitee,
|
|
6
|
+
* Workday) into a unified schema.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { ADAPTERS, ATS_NAMES } from './adapters/index.js';
|
|
9
|
-
import { loadRegistry, searchRegistry, detectAts, findAtsBySlug } from './registry.js';
|
|
10
|
+
import { loadRegistry, searchRegistry, detectAts, findAtsBySlug, findEntryBySlug } from './registry.js';
|
|
10
11
|
import { applyFilters } from './filters.js';
|
|
11
12
|
|
|
12
13
|
/**
|
|
@@ -15,6 +16,7 @@ import { applyFilters } from './filters.js';
|
|
|
15
16
|
* @param {Object} options
|
|
16
17
|
* @param {string} options.company - Company slug or name
|
|
17
18
|
* @param {string} [options.ats] - Specific ATS platform. If omitted, auto-detects.
|
|
19
|
+
* @param {object} [options.config] - Adapter-specific config (e.g. Workday {tenant, env, site}). Bypasses the registry; the only way to reach a Workday company not in the registry.
|
|
18
20
|
* @param {string} [options.titleFilter] - Regex matched against title only. Use for role identity ("product manager", "staff engineer").
|
|
19
21
|
* @param {string} [options.filter] - Regex matched across title, department, description. Use for topic/scope.
|
|
20
22
|
* @param {number} [options.postedWithinDays] - Only return jobs posted within N days.
|
|
@@ -26,6 +28,7 @@ import { applyFilters } from './filters.js';
|
|
|
26
28
|
export async function fetchJobs({
|
|
27
29
|
company,
|
|
28
30
|
ats,
|
|
31
|
+
config,
|
|
29
32
|
titleFilter,
|
|
30
33
|
filter,
|
|
31
34
|
postedWithinDays,
|
|
@@ -38,21 +41,51 @@ export async function fetchJobs({
|
|
|
38
41
|
// Unified slug normalization: strip all non-alphanumeric (matches detectAts)
|
|
39
42
|
const slug = company.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
40
43
|
|
|
44
|
+
// Filter context is passed as an additive 2nd arg to adapters. Existing
|
|
45
|
+
// adapters declare fetch{Name}(slug) and ignore extra positional args
|
|
46
|
+
// (JS no-op), so this is backward-compatible. Filter-aware adapters
|
|
47
|
+
// (e.g. Workday) use it to avoid mass detail-fetching on huge tenants.
|
|
48
|
+
const filterContext = { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit };
|
|
49
|
+
|
|
41
50
|
let jobs;
|
|
42
51
|
if (ats) {
|
|
43
52
|
const adapter = ADAPTERS[ats];
|
|
44
53
|
if (!adapter) throw new Error(`Unknown ATS: ${ats}. Supported: ${ATS_NAMES.join(', ')}`);
|
|
45
|
-
|
|
54
|
+
// Explicit ATS: an explicitly passed config wins (the only path that
|
|
55
|
+
// can reach a Workday company not in the registry). With no explicit
|
|
56
|
+
// config, fall back to the registry so config-keyed adapters
|
|
57
|
+
// (Workday) and canonically-cased registry slugs (SmartRecruiters
|
|
58
|
+
// "Visa") also work on the explicit path, not just under auto-detect.
|
|
59
|
+
let fetchSlug = slug;
|
|
60
|
+
let cfg = config;
|
|
61
|
+
let companyName;
|
|
62
|
+
if (!cfg) {
|
|
63
|
+
const hit = await findEntryBySlug(slug);
|
|
64
|
+
if (hit && hit.ats === ats) {
|
|
65
|
+
fetchSlug = hit.entry.slug;
|
|
66
|
+
cfg = hit.entry.config;
|
|
67
|
+
companyName = hit.entry.name;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
jobs = await adapter.fetch(fetchSlug, { config: cfg, companyName, filterContext });
|
|
46
71
|
} else {
|
|
47
72
|
// Consult registry first — if we know which ATS this company uses,
|
|
48
73
|
// skip probing the others (saves API calls, clearer error semantics).
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
// The registry entry carries the canonical slug (so the adapter is
|
|
75
|
+
// called with the ATS's own casing, e.g. SmartRecruiters "Visa") and
|
|
76
|
+
// any adapter-specific config (the Workday {tenant,env,site} triple).
|
|
77
|
+
const hit = await findEntryBySlug(slug);
|
|
78
|
+
if (hit) {
|
|
79
|
+
jobs = await ADAPTERS[hit.ats].fetch(hit.entry.slug, {
|
|
80
|
+
config: config || hit.entry.config,
|
|
81
|
+
companyName: hit.entry.name,
|
|
82
|
+
filterContext,
|
|
83
|
+
});
|
|
52
84
|
} else {
|
|
53
|
-
// Discovery mode: company not in registry, probe all adapters
|
|
85
|
+
// Discovery mode: company not in registry, probe all adapters.
|
|
86
|
+
// (Registry-only adapters like Workday bail here via their guard.)
|
|
54
87
|
const results = await Promise.allSettled(
|
|
55
|
-
Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug))
|
|
88
|
+
Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug, { filterContext }))
|
|
56
89
|
);
|
|
57
90
|
jobs = results
|
|
58
91
|
.filter(r => r.status === 'fulfilled')
|
|
@@ -79,7 +112,7 @@ export { detectAts } from './registry.js';
|
|
|
79
112
|
|
|
80
113
|
/**
|
|
81
114
|
* Look up which ATS a slug belongs to in the registry (cached, no network).
|
|
82
|
-
* Returns the ATS name ("greenhouse"
|
|
115
|
+
* Returns the ATS name (e.g. "greenhouse", "workday") or null if not in registry.
|
|
83
116
|
*/
|
|
84
117
|
export { findAtsBySlug } from './registry.js';
|
|
85
118
|
|
|
@@ -91,6 +124,7 @@ export const registry = {
|
|
|
91
124
|
search: searchRegistry,
|
|
92
125
|
detect: detectAts,
|
|
93
126
|
findAtsBySlug,
|
|
127
|
+
findEntryBySlug,
|
|
94
128
|
};
|
|
95
129
|
|
|
96
130
|
// Re-export individual adapters for direct use
|
package/src/registry.js
CHANGED
|
@@ -25,7 +25,7 @@ export async function loadRegistry(ats) {
|
|
|
25
25
|
|
|
26
26
|
// Load all
|
|
27
27
|
const all = {};
|
|
28
|
-
for (const platform of ['greenhouse', 'lever', 'ashby']) {
|
|
28
|
+
for (const platform of ['greenhouse', 'lever', 'ashby', 'smartrecruiters', 'teamtailor', 'recruitee', 'workday']) {
|
|
29
29
|
try {
|
|
30
30
|
const data = await readFile(join(REGISTRY_DIR, `${platform}.json`), 'utf-8');
|
|
31
31
|
all[platform] = JSON.parse(data);
|
|
@@ -58,14 +58,40 @@ export async function searchRegistry(query) {
|
|
|
58
58
|
return results;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
+
// Slug match is case/punctuation-insensitive: registry slugs are stored
|
|
62
|
+
// in each ATS's canonical form (SmartRecruiters uses PascalCase, e.g.
|
|
63
|
+
// "Visa"), but callers pass a lowercased/alnum-stripped slug. Comparing
|
|
64
|
+
// normalized forms keeps registry-first routing working for those.
|
|
65
|
+
const normSlug = (s) => String(s).toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
66
|
+
|
|
61
67
|
/**
|
|
62
68
|
* Look up which ATS a slug belongs to in the registry.
|
|
63
69
|
* Returns the ATS name (e.g., "greenhouse") or null if not in registry.
|
|
64
70
|
*/
|
|
65
71
|
export async function findAtsBySlug(slug) {
|
|
66
72
|
const all = await loadRegistry();
|
|
73
|
+
const key = normSlug(slug);
|
|
74
|
+
for (const [ats, companies] of Object.entries(all)) {
|
|
75
|
+
if (companies.some(c => normSlug(c.slug) === key)) return ats;
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Look up the full registry entry for a slug, with its ATS.
|
|
82
|
+
* Unlike findAtsBySlug (returns just the ats name), this returns the
|
|
83
|
+
* whole entry so callers can read adapter-specific config (e.g. the
|
|
84
|
+
* Workday {tenant, env, site} triple). Additive — does not change
|
|
85
|
+
* findAtsBySlug, which has other callers.
|
|
86
|
+
*
|
|
87
|
+
* @returns {Promise<{ats: string, entry: object}|null>}
|
|
88
|
+
*/
|
|
89
|
+
export async function findEntryBySlug(slug) {
|
|
90
|
+
const all = await loadRegistry();
|
|
91
|
+
const key = normSlug(slug);
|
|
67
92
|
for (const [ats, companies] of Object.entries(all)) {
|
|
68
|
-
|
|
93
|
+
const entry = companies.find(c => normSlug(c.slug) === key);
|
|
94
|
+
if (entry) return { ats, entry };
|
|
69
95
|
}
|
|
70
96
|
return null;
|
|
71
97
|
}
|