jd-intel 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,7 +3,8 @@
3
3
  [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
4
4
  [![Node.js 18+](https://img.shields.io/badge/node-18%2B-green.svg)](https://nodejs.org)
5
5
  [![npm](https://img.shields.io/npm/v/jd-intel.svg)](https://www.npmjs.com/package/jd-intel)
6
- [![npm downloads](https://img.shields.io/npm/dw/jd-intel-mcp.svg)](https://www.npmjs.com/package/jd-intel-mcp)
6
+ [![jd-intel downloads](https://badgen.net/npm/dt/jd-intel?label=jd-intel)](https://www.npmjs.com/package/jd-intel)
7
+ [![jd-intel-mcp downloads](https://badgen.net/npm/dt/jd-intel-mcp?label=jd-intel-mcp)](https://www.npmjs.com/package/jd-intel-mcp)
7
8
  [![GitHub stars](https://img.shields.io/github/stars/prPMDev/jd-intel.svg?style=flat)](https://github.com/prPMDev/jd-intel/stargazers)
8
9
 
9
10
  > **Stop pasting job descriptions into AI assistants. Let your AI fetch them directly.**
@@ -41,7 +42,7 @@ Because scraping breaks where jd-intel doesn't:
41
42
  - **Full JDs when browsing fails.** SPA-rendered boards, slow loads, auth walls, and geo-restrictions block a browser. They don't block a public API call.
42
43
  - **Structured data, not HTML soup.** Salary, location type, department, and clean markdown, normalized across every ATS.
43
44
  - **No keys, no browser.** Public APIs only. Runs anywhere your AI does.
44
- - **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee return the same shape.
45
+ - **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee, Workday return the same shape.
45
46
 
46
47
  ---
47
48
 
@@ -209,8 +210,8 @@ No custom parsing per company.
209
210
  | SmartRecruiters | Shipped | Enterprise and mid-market |
210
211
  | TeamTailor | Shipped | European startups and scale-ups |
211
212
  | Recruitee | Shipped | Dutch / EU SMBs and scale-ups |
213
+ | Workday | Shipped | Large enterprises (registry-keyed) |
212
214
  | Personio | Planned | German / EU mid-market |
213
- | Workday | Planned | Large enterprises (scoped scraper) |
214
215
 
215
216
  Adding a new ATS is a single adapter file. See [Contributing](#contributing).
216
217
 
@@ -235,10 +236,10 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
235
236
 
236
237
  **Shipped**
237
238
  - Library, CLI, and MCP server (three surfaces of one toolkit)
238
- - Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee adapters
239
+ - Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee, Workday adapters
239
240
  - Title, topic, location, and date filters
240
241
  - Salary extraction from JD text
241
- - Verified company registry (155+ companies)
242
+ - Verified company registry (160+ companies)
242
243
 
243
244
  **Next**
244
245
  - Personio adapter (German / EU mid-market)
@@ -246,7 +247,6 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
246
247
 
247
248
  **Planned**
248
249
  - Workable adapter (parked — needs SPA shortcode resolution)
249
- - Workday support (scoped scraper — large enterprise universe)
250
250
  - Temporal tracking (when roles open, close, reopen)
251
251
  - Change detection
252
252
  - Resume-aware fit scoring
@@ -257,7 +257,7 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
257
257
 
258
258
  **Add a company to the registry:** submit a PR to the appropriate file in `registry/`.
259
259
 
260
- **Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing three.
260
+ **Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing adapters.
261
261
 
262
262
  **Request a company:** [open an issue](https://github.com/prPMDev/jd-intel/issues/new). Tell me who's missing.
263
263
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "jd-intel",
3
- "version": "0.4.0",
4
- "description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby) — for your AI assistant, no copy-paste.",
3
+ "version": "0.5.0",
4
+ "description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby, Workday, and more) — for your AI assistant, no copy-paste.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
7
7
  "bin": {
@@ -0,0 +1,8 @@
1
+ [
2
+ {"slug": "cisco", "name": "Cisco", "sector": "networking", "config": {"tenant": "cisco", "env": "wd5", "site": "Cisco_Careers"}},
3
+ {"slug": "salesforce", "name": "Salesforce", "sector": "crm / saas", "config": {"tenant": "salesforce", "env": "wd12", "site": "External_Career_Site"}},
4
+ {"slug": "bankofamerica", "name": "Bank of America", "sector": "banking", "config": {"tenant": "ghr", "env": "wd1", "site": "Lateral-US"}},
5
+ {"slug": "adobe", "name": "Adobe", "sector": "creative software", "config": {"tenant": "adobe", "env": "wd5", "site": "external_experienced"}},
6
+ {"slug": "nvidia", "name": "Nvidia", "sector": "semiconductors", "config": {"tenant": "nvidia", "env": "wd5", "site": "NVIDIAExternalCareerSite"}},
7
+ {"slug": "servicetitan", "name": "ServiceTitan", "sector": "vertical saas", "config": {"tenant": "servicetitan", "env": "wd1", "site": "ServiceTitan"}}
8
+ ]
@@ -4,6 +4,7 @@ export { fetchAshby, hasAshby } from './ashby.js';
4
4
  export { fetchSmartrecruiters, hasSmartrecruiters } from './smartrecruiters.js';
5
5
  export { fetchTeamtailor, hasTeamtailor } from './teamtailor.js';
6
6
  export { fetchRecruitee, hasRecruitee } from './recruitee.js';
7
+ export { fetchWorkday, hasWorkday } from './workday.js';
7
8
 
8
9
  export const ADAPTERS = {
9
10
  greenhouse: { fetch: (...args) => import('./greenhouse.js').then(m => m.fetchGreenhouse(...args)), has: (...args) => import('./greenhouse.js').then(m => m.hasGreenhouse(...args)) },
@@ -12,6 +13,7 @@ export const ADAPTERS = {
12
13
  smartrecruiters: { fetch: (...args) => import('./smartrecruiters.js').then(m => m.fetchSmartrecruiters(...args)), has: (...args) => import('./smartrecruiters.js').then(m => m.hasSmartrecruiters(...args)) },
13
14
  teamtailor: { fetch: (...args) => import('./teamtailor.js').then(m => m.fetchTeamtailor(...args)), has: (...args) => import('./teamtailor.js').then(m => m.hasTeamtailor(...args)) },
14
15
  recruitee: { fetch: (...args) => import('./recruitee.js').then(m => m.fetchRecruitee(...args)), has: (...args) => import('./recruitee.js').then(m => m.hasRecruitee(...args)) },
16
+ workday: { fetch: (...args) => import('./workday.js').then(m => m.fetchWorkday(...args)), has: (...args) => import('./workday.js').then(m => m.hasWorkday(...args)) },
15
17
  };
16
18
 
17
19
  export const ATS_NAMES = Object.keys(ADAPTERS);
@@ -0,0 +1,198 @@
1
+ import { normalize, stripHtml } from '../normalizer.js';
2
+
3
+ const MAX_DETAIL_FETCHES = 100;
4
+ const LIST_PAGE_SIZE = 20;
5
+ const LIST_PAGE_HARD_CAP = 100; // <= 2000 list items scanned per request
6
+
7
+ /**
8
+ * Fetch jobs from a Workday tenant via the public "CXS" JSON API.
9
+ *
10
+ * Workday's career-site SPA calls an unauthenticated JSON API. No
11
+ * official docs, but it's stable and has no anti-bot at modest volume.
12
+ *
13
+ * REGISTRY-ONLY. Workday is keyed by an opaque {tenant, env, site}
14
+ * triple that is NOT derivable from the company name (Bank of America's
15
+ * tenant is `ghr`). So this adapter only works when called with
16
+ * ctx.config from a registry entry; discovery-mode probing (no config)
17
+ * bails instantly with zero network — see the guard below and
18
+ * hasWorkday().
19
+ *
20
+ * Two-step like SmartRecruiters: a list endpoint (title/location/
21
+ * postedOn, NO descriptions) plus a per-posting detail endpoint for
22
+ * the full JD. Enterprise tenants are huge (Salesforce ~1398 jobs), so
23
+ * we apply list-evaluable filters BEFORE detail-hydrating and cap the
24
+ * detail set.
25
+ *
26
+ * @param {string} slug - normalized company slug (registry routing key)
27
+ * @param {object} [ctx] - { config:{tenant,env,site}, companyName, filterContext }
28
+ * @returns {Promise<Array>} Normalized job objects
29
+ */
30
+ export async function fetchWorkday(slug, ctx = {}) {
31
+ const cfg = ctx.config;
32
+ if (!cfg || !cfg.tenant || !cfg.env || !cfg.site) return []; // registry-only guard
33
+
34
+ const { tenant, env, site } = cfg;
35
+ const base = `https://${tenant}.${env}.myworkdayjobs.com/wday/cxs/${tenant}/${site}`;
36
+ const fc = ctx.filterContext || {};
37
+
38
+ // 1. Page the cheap list (no descriptions in list responses).
39
+ const postings = [];
40
+ let offset = 0;
41
+ let pages = 0;
42
+ while (pages < LIST_PAGE_HARD_CAP) {
43
+ const resp = await fetch(`${base}/jobs`, {
44
+ method: 'POST',
45
+ headers: { 'Content-Type': 'application/json' },
46
+ body: JSON.stringify({ appliedFacets: {}, limit: LIST_PAGE_SIZE, offset, searchText: '' }),
47
+ });
48
+
49
+ if (!resp.ok) {
50
+ if (resp.status === 404) return []; // wrong site / no such board
51
+ if (offset === 0) {
52
+ throw new Error(`Workday API error for ${slug} (${tenant}/${env}/${site}): ${resp.status}`);
53
+ }
54
+ break; // mid-paging failure: keep what we have
55
+ }
56
+
57
+ const data = await resp.json();
58
+ const page = data.jobPostings || [];
59
+ postings.push(...page);
60
+ pages += 1;
61
+ offset += LIST_PAGE_SIZE;
62
+ if (page.length === 0 || offset >= (data.total || 0)) break;
63
+ }
64
+
65
+ // 2. Filter-aware candidate selection BEFORE the N+1 detail cost.
66
+ // The list carries title/locationsText/postedOn — enough to apply
67
+ // titleFilter, location, and recency without descriptions.
68
+ let candidates = postings;
69
+
70
+ if (fc.titleFilter) {
71
+ const re = new RegExp(fc.titleFilter, 'i');
72
+ candidates = candidates.filter(p => re.test(p.title || ''));
73
+ }
74
+ if (Array.isArray(fc.locationIncludes) && fc.locationIncludes.length > 0) {
75
+ const inc = fc.locationIncludes.map(s => String(s).toLowerCase());
76
+ candidates = candidates.filter(p => {
77
+ const loc = (p.locationsText || '').toLowerCase();
78
+ return inc.some(s => loc.includes(s));
79
+ });
80
+ }
81
+ if (Array.isArray(fc.locationExcludes) && fc.locationExcludes.length > 0) {
82
+ const exc = fc.locationExcludes.map(s => String(s).toLowerCase());
83
+ candidates = candidates.filter(p => {
84
+ const loc = (p.locationsText || '').toLowerCase();
85
+ return !exc.some(s => loc.includes(s));
86
+ });
87
+ }
88
+ if (typeof fc.postedWithinDays === 'number') {
89
+ candidates = candidates.filter(p => withinDays(p.postedOn, fc.postedWithinDays));
90
+ }
91
+
92
+ // 3. Bound the detail-fetch set.
93
+ // NOTE: huge-tenant coverage is intentionally capped for v1
94
+ // (Salesforce ~1398 postings). A description `filter` is applied
95
+ // by the library AFTER this returns, so for that case we keep the
96
+ // full backstop instead of truncating tightly to `limit` (which
97
+ // could hydrate jobs that all fail the regex while better matches
98
+ // go unscanned). Proper fix (smart pagination / rate-limited
99
+ // concurrency / surfaced truncation) is tracked in #26, to be
100
+ // designed alongside retry/rate-limit work (#7).
101
+ const limit = typeof fc.limit === 'number' && fc.limit > 0 ? fc.limit : 100;
102
+ const cap = fc.filter ? MAX_DETAIL_FETCHES : Math.min(limit, MAX_DETAIL_FETCHES);
103
+ candidates = candidates.slice(0, cap);
104
+
105
+ // 4. Hydrate descriptions via the per-posting detail endpoint.
106
+ const jobs = await Promise.all(candidates.map(async (p) => {
107
+ const externalPath = p.externalPath || ''; // already begins with '/job/...'
108
+ let info = {};
109
+ try {
110
+ // externalPath already carries the '/job/...' segment, so it is
111
+ // concatenated directly onto the CXS base. Inserting another
112
+ // '/job' here yields '/job/job/...' which Workday rejects (422).
113
+ const dResp = await fetch(`${base}${externalPath}`);
114
+ if (dResp.ok) {
115
+ const detail = await dResp.json();
116
+ info = detail.jobPostingInfo || {};
117
+ }
118
+ } catch {
119
+ // detail failed: fall back to list fields, empty description
120
+ }
121
+
122
+ return normalize({
123
+ companySlug: slug,
124
+ company: ctx.companyName || slug,
125
+ title: p.title || info.title || '',
126
+ department: '',
127
+ location: info.location || p.locationsText || '',
128
+ description: stripHtml(info.jobDescription || ''),
129
+ url: `https://${tenant}.${env}.myworkdayjobs.com/${site}${externalPath}`,
130
+ postedAt: parseWorkdayDate(info.startDate) || normalizePostedOn(p.postedOn),
131
+ salary: null, // normalizer extracts from description text
132
+ metadata: {
133
+ workdayTenant: tenant,
134
+ workdayEnv: env,
135
+ workdaySite: site,
136
+ externalPath,
137
+ },
138
+ }, 'workday');
139
+ }));
140
+
141
+ return jobs;
142
+ }
143
+
144
+ /**
145
+ * Workday list `postedOn` is a relative string ("Posted Today",
146
+ * "Posted 5 Days Ago", "Posted 30+ Days Ago"). Decide membership in
147
+ * the last N days WITHOUT a network call. Unparseable -> keep (true);
148
+ * the library re-filters authoritatively on the real postedAt after
149
+ * hydration, so a false-keep here is corrected downstream.
150
+ */
151
+ function withinDays(postedOn, days) {
152
+ if (!postedOn) return true;
153
+ const s = String(postedOn).toLowerCase();
154
+ if (/today/.test(s)) return days >= 0;
155
+ if (/yesterday/.test(s)) return days >= 1;
156
+ const m = s.match(/(\d+)\+?\s*days?\s*ago/);
157
+ if (m) return parseInt(m[1], 10) <= days;
158
+ return true;
159
+ }
160
+
161
+ /**
162
+ * Coerce a Workday list `postedOn` (relative) into an approx ISO date
163
+ * so the library's postedWithinDays re-filter has a value to compare.
164
+ */
165
+ function normalizePostedOn(v) {
166
+ if (!v) return null;
167
+ const direct = new Date(v);
168
+ if (Number.isFinite(direct.getTime())) return direct.toISOString();
169
+ const s = String(v).toLowerCase();
170
+ let daysAgo = null;
171
+ if (/today/.test(s)) daysAgo = 0;
172
+ else if (/yesterday/.test(s)) daysAgo = 1;
173
+ else {
174
+ const m = s.match(/(\d+)\+?\s*days?\s*ago/);
175
+ if (m) daysAgo = parseInt(m[1], 10);
176
+ }
177
+ if (daysAgo === null) return null;
178
+ return new Date(Date.now() - daysAgo * 86400000).toISOString();
179
+ }
180
+
181
+ /**
182
+ * Workday detail `startDate` ("2026-05-01" or "May 1, 2026"). Return
183
+ * ISO, or null if unparseable.
184
+ */
185
+ function parseWorkdayDate(s) {
186
+ if (!s) return null;
187
+ const d = new Date(s);
188
+ return Number.isFinite(d.getTime()) ? d.toISOString() : null;
189
+ }
190
+
191
+ /**
192
+ * Registry-only invariant: the {tenant,env,site} triple can't be
193
+ * probed from a company name. Always false so detect_ats never selects
194
+ * Workday and discovery-mode fetchJobs bails via the config guard.
195
+ */
196
+ export async function hasWorkday() {
197
+ return false;
198
+ }
package/src/index.js CHANGED
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import { ADAPTERS, ATS_NAMES } from './adapters/index.js';
9
- import { loadRegistry, searchRegistry, detectAts, findAtsBySlug } from './registry.js';
9
+ import { loadRegistry, searchRegistry, detectAts, findAtsBySlug, findEntryBySlug } from './registry.js';
10
10
  import { applyFilters } from './filters.js';
11
11
 
12
12
  /**
@@ -38,21 +38,34 @@ export async function fetchJobs({
38
38
  // Unified slug normalization: strip all non-alphanumeric (matches detectAts)
39
39
  const slug = company.toLowerCase().replace(/[^a-z0-9]/g, '');
40
40
 
41
+ // Filter context is passed as an additive 2nd arg to adapters. Existing
42
+ // adapters declare fetch{Name}(slug) and ignore extra positional args
43
+ // (JS no-op), so this is backward-compatible. Filter-aware adapters
44
+ // (e.g. Workday) use it to avoid mass detail-fetching on huge tenants.
45
+ const filterContext = { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit };
46
+
41
47
  let jobs;
42
48
  if (ats) {
43
49
  const adapter = ADAPTERS[ats];
44
50
  if (!adapter) throw new Error(`Unknown ATS: ${ats}. Supported: ${ATS_NAMES.join(', ')}`);
45
- jobs = await adapter.fetch(slug);
51
+ jobs = await adapter.fetch(slug, { filterContext });
46
52
  } else {
47
53
  // Consult registry first — if we know which ATS this company uses,
48
54
  // skip probing the others (saves API calls, clearer error semantics).
49
- const known = await findAtsBySlug(slug);
50
- if (known) {
51
- jobs = await ADAPTERS[known].fetch(slug);
55
+ // The full entry is needed so adapter-specific config (e.g. the
56
+ // Workday {tenant,env,site} triple) reaches the adapter.
57
+ const hit = await findEntryBySlug(slug);
58
+ if (hit) {
59
+ jobs = await ADAPTERS[hit.ats].fetch(slug, {
60
+ config: hit.entry.config,
61
+ companyName: hit.entry.name,
62
+ filterContext,
63
+ });
52
64
  } else {
53
- // Discovery mode: company not in registry, probe all adapters
65
+ // Discovery mode: company not in registry, probe all adapters.
66
+ // (Registry-only adapters like Workday bail here via their guard.)
54
67
  const results = await Promise.allSettled(
55
- Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug))
68
+ Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug, { filterContext }))
56
69
  );
57
70
  jobs = results
58
71
  .filter(r => r.status === 'fulfilled')
@@ -91,6 +104,7 @@ export const registry = {
91
104
  search: searchRegistry,
92
105
  detect: detectAts,
93
106
  findAtsBySlug,
107
+ findEntryBySlug,
94
108
  };
95
109
 
96
110
  // Re-export individual adapters for direct use
package/src/registry.js CHANGED
@@ -25,7 +25,7 @@ export async function loadRegistry(ats) {
25
25
 
26
26
  // Load all
27
27
  const all = {};
28
- for (const platform of ['greenhouse', 'lever', 'ashby']) {
28
+ for (const platform of ['greenhouse', 'lever', 'ashby', 'smartrecruiters', 'teamtailor', 'recruitee', 'workday']) {
29
29
  try {
30
30
  const data = await readFile(join(REGISTRY_DIR, `${platform}.json`), 'utf-8');
31
31
  all[platform] = JSON.parse(data);
@@ -70,6 +70,24 @@ export async function findAtsBySlug(slug) {
70
70
  return null;
71
71
  }
72
72
 
73
+ /**
74
+ * Look up the full registry entry for a slug, with its ATS.
75
+ * Unlike findAtsBySlug (returns just the ats name), this returns the
76
+ * whole entry so callers can read adapter-specific config (e.g. the
77
+ * Workday {tenant, env, site} triple). Additive — does not change
78
+ * findAtsBySlug, which has other callers.
79
+ *
80
+ * @returns {Promise<{ats: string, entry: object}|null>}
81
+ */
82
+ export async function findEntryBySlug(slug) {
83
+ const all = await loadRegistry();
84
+ for (const [ats, companies] of Object.entries(all)) {
85
+ const entry = companies.find(c => c.slug === slug);
86
+ if (entry) return { ats, entry };
87
+ }
88
+ return null;
89
+ }
90
+
73
91
  /**
74
92
  * Auto-detect which ATS a company uses.
75
93
  */