npm - jd-intel - Versions diffs - 0.4.0 → 0.5.0 - Mend

jd-intel 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -3,7 +3,8 @@
 [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 [![Node.js 18+](https://img.shields.io/badge/node-18%2B-green.svg)](https://nodejs.org)
 [![npm](https://img.shields.io/npm/v/jd-intel.svg)](https://www.npmjs.com/package/jd-intel)
-[![npm downloads](https://img.shields.io/npm/dw/jd-intel-mcp.svg)](https://www.npmjs.com/package/jd-intel-mcp)
+[![jd-intel downloads](https://badgen.net/npm/dt/jd-intel?label=jd-intel)](https://www.npmjs.com/package/jd-intel)
+[![jd-intel-mcp downloads](https://badgen.net/npm/dt/jd-intel-mcp?label=jd-intel-mcp)](https://www.npmjs.com/package/jd-intel-mcp)
 [![GitHub stars](https://img.shields.io/github/stars/prPMDev/jd-intel.svg?style=flat)](https://github.com/prPMDev/jd-intel/stargazers)
 > **Stop pasting job descriptions into AI assistants. Let your AI fetch them directly.**
@@ -41,7 +42,7 @@ Because scraping breaks where jd-intel doesn't:
 - **Full JDs when browsing fails.** SPA-rendered boards, slow loads, auth walls, and geo-restrictions block a browser. They don't block a public API call.
 - **Structured data, not HTML soup.** Salary, location type, department, and clean markdown, normalized across every ATS.
 - **No keys, no browser.** Public APIs only. Runs anywhere your AI does.
-- **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee return the same shape.
+- **One schema, every platform.** Greenhouse, Lever, Ashby, SmartRecruiters, TeamTailor, Recruitee, Workday return the same shape.
 ---
@@ -209,8 +210,8 @@ No custom parsing per company.
 | SmartRecruiters | Shipped | Enterprise and mid-market |
 | TeamTailor | Shipped | European startups and scale-ups |
 | Recruitee | Shipped | Dutch / EU SMBs and scale-ups |
+| Workday | Shipped | Large enterprises (registry-keyed) |
 | Personio | Planned | German / EU mid-market |
-| Workday | Planned | Large enterprises (scoped scraper) |
 Adding a new ATS is a single adapter file. See [Contributing](#contributing).
@@ -235,10 +236,10 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
 **Shipped**
 - Library, CLI, and MCP server (three surfaces of one toolkit)
-- Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee adapters
+- Greenhouse, Ashby, Lever, SmartRecruiters, TeamTailor, Recruitee, Workday adapters
 - Title, topic, location, and date filters
 - Salary extraction from JD text
-- Verified company registry (155+ companies)
+- Verified company registry (160+ companies)
 **Next**
 - Personio adapter (German / EU mid-market)
@@ -246,7 +247,6 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
 **Planned**
 - Workable adapter (parked — needs SPA shortcode resolution)
-- Workday support (scoped scraper — large enterprise universe)
 - Temporal tracking (when roles open, close, reopen)
 - Change detection
 - Resume-aware fit scoring
@@ -257,7 +257,7 @@ All filters AND together. Deep dive on patterns and gotchas: [docs/filters.md](d
 **Add a company to the registry:** submit a PR to the appropriate file in `registry/`.
-**Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing three.
+**Add an ATS adapter:** new file in `src/adapters/`. One adapter, one file. Follow the pattern of the existing adapters.
 **Request a company:** [open an issue](https://github.com/prPMDev/jd-intel/issues/new). Tell me who's missing.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "jd-intel",
-  "version": "0.4.0",
-  "description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby) — for your AI assistant, no copy-paste.",
+  "version": "0.5.0",
+  "description": "Fetch and normalize job descriptions across every major ATS (Greenhouse, Lever, Ashby, Workday, and more) — for your AI assistant, no copy-paste.",
   "type": "module",
   "main": "src/index.js",
   "bin": {

package/registry/workday.json ADDED Viewed

@@ -0,0 +1,8 @@
+[
+  {"slug": "cisco",         "name": "Cisco",           "sector": "networking",        "config": {"tenant": "cisco",        "env": "wd5",  "site": "Cisco_Careers"}},
+  {"slug": "salesforce",    "name": "Salesforce",      "sector": "crm / saas",        "config": {"tenant": "salesforce",   "env": "wd12", "site": "External_Career_Site"}},
+  {"slug": "bankofamerica", "name": "Bank of America", "sector": "banking",           "config": {"tenant": "ghr",          "env": "wd1",  "site": "Lateral-US"}},
+  {"slug": "adobe",         "name": "Adobe",           "sector": "creative software", "config": {"tenant": "adobe",        "env": "wd5",  "site": "external_experienced"}},
+  {"slug": "nvidia",        "name": "Nvidia",          "sector": "semiconductors",    "config": {"tenant": "nvidia",       "env": "wd5",  "site": "NVIDIAExternalCareerSite"}},
+  {"slug": "servicetitan",  "name": "ServiceTitan",    "sector": "vertical saas",     "config": {"tenant": "servicetitan", "env": "wd1",  "site": "ServiceTitan"}}
+]

package/src/adapters/index.js CHANGED Viewed

@@ -4,6 +4,7 @@ export { fetchAshby, hasAshby } from './ashby.js';
 export { fetchSmartrecruiters, hasSmartrecruiters } from './smartrecruiters.js';
 export { fetchTeamtailor, hasTeamtailor } from './teamtailor.js';
 export { fetchRecruitee, hasRecruitee } from './recruitee.js';
+export { fetchWorkday, hasWorkday } from './workday.js';
 export const ADAPTERS = {
   greenhouse: { fetch: (...args) => import('./greenhouse.js').then(m => m.fetchGreenhouse(...args)), has: (...args) => import('./greenhouse.js').then(m => m.hasGreenhouse(...args)) },
@@ -12,6 +13,7 @@ export const ADAPTERS = {
   smartrecruiters: { fetch: (...args) => import('./smartrecruiters.js').then(m => m.fetchSmartrecruiters(...args)), has: (...args) => import('./smartrecruiters.js').then(m => m.hasSmartrecruiters(...args)) },
   teamtailor: { fetch: (...args) => import('./teamtailor.js').then(m => m.fetchTeamtailor(...args)), has: (...args) => import('./teamtailor.js').then(m => m.hasTeamtailor(...args)) },
   recruitee: { fetch: (...args) => import('./recruitee.js').then(m => m.fetchRecruitee(...args)), has: (...args) => import('./recruitee.js').then(m => m.hasRecruitee(...args)) },
+  workday: { fetch: (...args) => import('./workday.js').then(m => m.fetchWorkday(...args)), has: (...args) => import('./workday.js').then(m => m.hasWorkday(...args)) },
 };
 export const ATS_NAMES = Object.keys(ADAPTERS);

package/src/adapters/workday.js ADDED Viewed

@@ -0,0 +1,198 @@
+import { normalize, stripHtml } from '../normalizer.js';
+const MAX_DETAIL_FETCHES = 100;
+const LIST_PAGE_SIZE = 20;
+const LIST_PAGE_HARD_CAP = 100; // <= 2000 list items scanned per request
+/**
+ * Fetch jobs from a Workday tenant via the public "CXS" JSON API.
+ *
+ * Workday's career-site SPA calls an unauthenticated JSON API. No
+ * official docs, but it's stable and has no anti-bot at modest volume.
+ *
+ * REGISTRY-ONLY. Workday is keyed by an opaque {tenant, env, site}
+ * triple that is NOT derivable from the company name (Bank of America's
+ * tenant is `ghr`). So this adapter only works when called with
+ * ctx.config from a registry entry; discovery-mode probing (no config)
+ * bails instantly with zero network — see the guard below and
+ * hasWorkday().
+ *
+ * Two-step like SmartRecruiters: a list endpoint (title/location/
+ * postedOn, NO descriptions) plus a per-posting detail endpoint for
+ * the full JD. Enterprise tenants are huge (Salesforce ~1398 jobs), so
+ * we apply list-evaluable filters BEFORE detail-hydrating and cap the
+ * detail set.
+ *
+ * @param {string} slug - normalized company slug (registry routing key)
+ * @param {object} [ctx] - { config:{tenant,env,site}, companyName, filterContext }
+ * @returns {Promise<Array>} Normalized job objects
+ */
+export async function fetchWorkday(slug, ctx = {}) {
+  const cfg = ctx.config;
+  if (!cfg || !cfg.tenant || !cfg.env || !cfg.site) return []; // registry-only guard
+  const { tenant, env, site } = cfg;
+  const base = `https://${tenant}.${env}.myworkdayjobs.com/wday/cxs/${tenant}/${site}`;
+  const fc = ctx.filterContext || {};
+  // 1. Page the cheap list (no descriptions in list responses).
+  const postings = [];
+  let offset = 0;
+  let pages = 0;
+  while (pages < LIST_PAGE_HARD_CAP) {
+    const resp = await fetch(`${base}/jobs`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ appliedFacets: {}, limit: LIST_PAGE_SIZE, offset, searchText: '' }),
+    });
+    if (!resp.ok) {
+      if (resp.status === 404) return []; // wrong site / no such board
+      if (offset === 0) {
+        throw new Error(`Workday API error for ${slug} (${tenant}/${env}/${site}): ${resp.status}`);
+      }
+      break; // mid-paging failure: keep what we have
+    }
+    const data = await resp.json();
+    const page = data.jobPostings || [];
+    postings.push(...page);
+    pages += 1;
+    offset += LIST_PAGE_SIZE;
+    if (page.length === 0 || offset >= (data.total || 0)) break;
+  }
+  // 2. Filter-aware candidate selection BEFORE the N+1 detail cost.
+  //    The list carries title/locationsText/postedOn — enough to apply
+  //    titleFilter, location, and recency without descriptions.
+  let candidates = postings;
+  if (fc.titleFilter) {
+    const re = new RegExp(fc.titleFilter, 'i');
+    candidates = candidates.filter(p => re.test(p.title || ''));
+  }
+  if (Array.isArray(fc.locationIncludes) && fc.locationIncludes.length > 0) {
+    const inc = fc.locationIncludes.map(s => String(s).toLowerCase());
+    candidates = candidates.filter(p => {
+      const loc = (p.locationsText || '').toLowerCase();
+      return inc.some(s => loc.includes(s));
+    });
+  }
+  if (Array.isArray(fc.locationExcludes) && fc.locationExcludes.length > 0) {
+    const exc = fc.locationExcludes.map(s => String(s).toLowerCase());
+    candidates = candidates.filter(p => {
+      const loc = (p.locationsText || '').toLowerCase();
+      return !exc.some(s => loc.includes(s));
+    });
+  }
+  if (typeof fc.postedWithinDays === 'number') {
+    candidates = candidates.filter(p => withinDays(p.postedOn, fc.postedWithinDays));
+  }
+  // 3. Bound the detail-fetch set.
+  //    NOTE: huge-tenant coverage is intentionally capped for v1
+  //    (Salesforce ~1398 postings). A description `filter` is applied
+  //    by the library AFTER this returns, so for that case we keep the
+  //    full backstop instead of truncating tightly to `limit` (which
+  //    could hydrate jobs that all fail the regex while better matches
+  //    go unscanned). Proper fix (smart pagination / rate-limited
+  //    concurrency / surfaced truncation) is tracked in #26, to be
+  //    designed alongside retry/rate-limit work (#7).
+  const limit = typeof fc.limit === 'number' && fc.limit > 0 ? fc.limit : 100;
+  const cap = fc.filter ? MAX_DETAIL_FETCHES : Math.min(limit, MAX_DETAIL_FETCHES);
+  candidates = candidates.slice(0, cap);
+  // 4. Hydrate descriptions via the per-posting detail endpoint.
+  const jobs = await Promise.all(candidates.map(async (p) => {
+    const externalPath = p.externalPath || ''; // already begins with '/job/...'
+    let info = {};
+    try {
+      // externalPath already carries the '/job/...' segment, so it is
+      // concatenated directly onto the CXS base. Inserting another
+      // '/job' here yields '/job/job/...' which Workday rejects (422).
+      const dResp = await fetch(`${base}${externalPath}`);
+      if (dResp.ok) {
+        const detail = await dResp.json();
+        info = detail.jobPostingInfo || {};
+      }
+    } catch {
+      // detail failed: fall back to list fields, empty description
+    }
+    return normalize({
+      companySlug: slug,
+      company: ctx.companyName || slug,
+      title: p.title || info.title || '',
+      department: '',
+      location: info.location || p.locationsText || '',
+      description: stripHtml(info.jobDescription || ''),
+      url: `https://${tenant}.${env}.myworkdayjobs.com/${site}${externalPath}`,
+      postedAt: parseWorkdayDate(info.startDate) || normalizePostedOn(p.postedOn),
+      salary: null, // normalizer extracts from description text
+      metadata: {
+        workdayTenant: tenant,
+        workdayEnv: env,
+        workdaySite: site,
+        externalPath,
+      },
+    }, 'workday');
+  }));
+  return jobs;
+}
+/**
+ * Workday list `postedOn` is a relative string ("Posted Today",
+ * "Posted 5 Days Ago", "Posted 30+ Days Ago"). Decide membership in
+ * the last N days WITHOUT a network call. Unparseable -> keep (true);
+ * the library re-filters authoritatively on the real postedAt after
+ * hydration, so a false-keep here is corrected downstream.
+ */
+function withinDays(postedOn, days) {
+  if (!postedOn) return true;
+  const s = String(postedOn).toLowerCase();
+  if (/today/.test(s)) return days >= 0;
+  if (/yesterday/.test(s)) return days >= 1;
+  const m = s.match(/(\d+)\+?\s*days?\s*ago/);
+  if (m) return parseInt(m[1], 10) <= days;
+  return true;
+}
+/**
+ * Coerce a Workday list `postedOn` (relative) into an approx ISO date
+ * so the library's postedWithinDays re-filter has a value to compare.
+ */
+function normalizePostedOn(v) {
+  if (!v) return null;
+  const direct = new Date(v);
+  if (Number.isFinite(direct.getTime())) return direct.toISOString();
+  const s = String(v).toLowerCase();
+  let daysAgo = null;
+  if (/today/.test(s)) daysAgo = 0;
+  else if (/yesterday/.test(s)) daysAgo = 1;
+  else {
+    const m = s.match(/(\d+)\+?\s*days?\s*ago/);
+    if (m) daysAgo = parseInt(m[1], 10);
+  }
+  if (daysAgo === null) return null;
+  return new Date(Date.now() - daysAgo * 86400000).toISOString();
+}
+/**
+ * Workday detail `startDate` ("2026-05-01" or "May 1, 2026"). Return
+ * ISO, or null if unparseable.
+ */
+function parseWorkdayDate(s) {
+  if (!s) return null;
+  const d = new Date(s);
+  return Number.isFinite(d.getTime()) ? d.toISOString() : null;
+}
+/**
+ * Registry-only invariant: the {tenant,env,site} triple can't be
+ * probed from a company name. Always false so detect_ats never selects
+ * Workday and discovery-mode fetchJobs bails via the config guard.
+ */
+export async function hasWorkday() {
+  return false;
+}

package/src/index.js CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import { ADAPTERS, ATS_NAMES } from './adapters/index.js';
-import { loadRegistry, searchRegistry, detectAts, findAtsBySlug } from './registry.js';
+import { loadRegistry, searchRegistry, detectAts, findAtsBySlug, findEntryBySlug } from './registry.js';
 import { applyFilters } from './filters.js';
 /**
@@ -38,21 +38,34 @@ export async function fetchJobs({
   // Unified slug normalization: strip all non-alphanumeric (matches detectAts)
   const slug = company.toLowerCase().replace(/[^a-z0-9]/g, '');
+  // Filter context is passed as an additive 2nd arg to adapters. Existing
+  // adapters declare fetch{Name}(slug) and ignore extra positional args
+  // (JS no-op), so this is backward-compatible. Filter-aware adapters
+  // (e.g. Workday) use it to avoid mass detail-fetching on huge tenants.
+  const filterContext = { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit };
   let jobs;
   if (ats) {
     const adapter = ADAPTERS[ats];
     if (!adapter) throw new Error(`Unknown ATS: ${ats}. Supported: ${ATS_NAMES.join(', ')}`);
-    jobs = await adapter.fetch(slug);
+    jobs = await adapter.fetch(slug, { filterContext });
   } else {
     // Consult registry first — if we know which ATS this company uses,
     // skip probing the others (saves API calls, clearer error semantics).
-    const known = await findAtsBySlug(slug);
-    if (known) {
-      jobs = await ADAPTERS[known].fetch(slug);
+    // The full entry is needed so adapter-specific config (e.g. the
+    // Workday {tenant,env,site} triple) reaches the adapter.
+    const hit = await findEntryBySlug(slug);
+    if (hit) {
+      jobs = await ADAPTERS[hit.ats].fetch(slug, {
+        config: hit.entry.config,
+        companyName: hit.entry.name,
+        filterContext,
+      });
     } else {
-      // Discovery mode: company not in registry, probe all adapters
+      // Discovery mode: company not in registry, probe all adapters.
+      // (Registry-only adapters like Workday bail here via their guard.)
       const results = await Promise.allSettled(
-        Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug))
+        Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug, { filterContext }))
       );
       jobs = results
         .filter(r => r.status === 'fulfilled')
@@ -91,6 +104,7 @@ export const registry = {
   search: searchRegistry,
   detect: detectAts,
   findAtsBySlug,
+  findEntryBySlug,
 };
 // Re-export individual adapters for direct use

package/src/registry.js CHANGED Viewed

@@ -25,7 +25,7 @@ export async function loadRegistry(ats) {
   // Load all
   const all = {};
-  for (const platform of ['greenhouse', 'lever', 'ashby']) {
+  for (const platform of ['greenhouse', 'lever', 'ashby', 'smartrecruiters', 'teamtailor', 'recruitee', 'workday']) {
     try {
       const data = await readFile(join(REGISTRY_DIR, `${platform}.json`), 'utf-8');
       all[platform] = JSON.parse(data);
@@ -70,6 +70,24 @@ export async function findAtsBySlug(slug) {
   return null;
 }
+/**
+ * Look up the full registry entry for a slug, with its ATS.
+ * Unlike findAtsBySlug (returns just the ats name), this returns the
+ * whole entry so callers can read adapter-specific config (e.g. the
+ * Workday {tenant, env, site} triple). Additive — does not change
+ * findAtsBySlug, which has other callers.
+ *
+ * @returns {Promise<{ats: string, entry: object}|null>}
+ */
+export async function findEntryBySlug(slug) {
+  const all = await loadRegistry();
+  for (const [ats, companies] of Object.entries(all)) {
+    const entry = companies.find(c => c.slug === slug);
+    if (entry) return { ats, entry };
+  }
+  return null;
+}
 /**
  * Auto-detect which ATS a company uses.
  */