jd-intel 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,116 +1,136 @@
1
- /**
2
- * jd-intel — JD intelligence toolkit: fetch, normalize, and search job descriptions across every major ATS.
3
- *
4
- * Fetches, normalizes, and enriches job data from public ATS APIs
5
- * (Greenhouse, Lever, Ashby) into a unified schema.
6
- */
7
-
8
- import { ADAPTERS, ATS_NAMES } from './adapters/index.js';
9
- import { loadRegistry, searchRegistry, detectAts, findAtsBySlug, findEntryBySlug } from './registry.js';
10
- import { applyFilters } from './filters.js';
11
-
12
- /**
13
- * Fetch jobs from a company's ATS board.
14
- *
15
- * @param {Object} options
16
- * @param {string} options.company - Company slug or name
17
- * @param {string} [options.ats] - Specific ATS platform. If omitted, auto-detects.
18
- * @param {string} [options.titleFilter] - Regex matched against title only. Use for role identity ("product manager", "staff engineer").
19
- * @param {string} [options.filter] - Regex matched across title, department, description. Use for topic/scope.
20
- * @param {number} [options.postedWithinDays] - Only return jobs posted within N days.
21
- * @param {string[]} [options.locationIncludes] - Keep jobs whose location contains any of these (case-insensitive).
22
- * @param {string[]} [options.locationExcludes] - Drop jobs whose location contains any of these (case-insensitive).
23
- * @param {number} [options.limit=100] - Maximum jobs to return after filtering.
24
- * @returns {Promise<Array>} Normalized, filtered job objects
25
- */
26
- export async function fetchJobs({
27
- company,
28
- ats,
29
- titleFilter,
30
- filter,
31
- postedWithinDays,
32
- locationIncludes,
33
- locationExcludes,
34
- limit = 100,
35
- } = {}) {
36
- if (!company) throw new Error('Company slug required');
37
-
38
- // Unified slug normalization: strip all non-alphanumeric (matches detectAts)
39
- const slug = company.toLowerCase().replace(/[^a-z0-9]/g, '');
40
-
41
- // Filter context is passed as an additive 2nd arg to adapters. Existing
42
- // adapters declare fetch{Name}(slug) and ignore extra positional args
43
- // (JS no-op), so this is backward-compatible. Filter-aware adapters
44
- // (e.g. Workday) use it to avoid mass detail-fetching on huge tenants.
45
- const filterContext = { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit };
46
-
47
- let jobs;
48
- if (ats) {
49
- const adapter = ADAPTERS[ats];
50
- if (!adapter) throw new Error(`Unknown ATS: ${ats}. Supported: ${ATS_NAMES.join(', ')}`);
51
- jobs = await adapter.fetch(slug, { filterContext });
52
- } else {
53
- // Consult registry first if we know which ATS this company uses,
54
- // skip probing the others (saves API calls, clearer error semantics).
55
- // The full entry is needed so adapter-specific config (e.g. the
56
- // Workday {tenant,env,site} triple) reaches the adapter.
57
- const hit = await findEntryBySlug(slug);
58
- if (hit) {
59
- jobs = await ADAPTERS[hit.ats].fetch(slug, {
60
- config: hit.entry.config,
61
- companyName: hit.entry.name,
62
- filterContext,
63
- });
64
- } else {
65
- // Discovery mode: company not in registry, probe all adapters.
66
- // (Registry-only adapters like Workday bail here via their guard.)
67
- const results = await Promise.allSettled(
68
- Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug, { filterContext }))
69
- );
70
- jobs = results
71
- .filter(r => r.status === 'fulfilled')
72
- .flatMap(r => r.value);
73
- }
74
- }
75
-
76
- return applyFilters(jobs, { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit });
77
- }
78
-
79
- /**
80
- * Search for companies in the registry.
81
- */
82
- export async function search({ keyword, location, ats } = {}) {
83
- // For now, search is registry-based. With SQLite store, this becomes a full-text search.
84
- if (!keyword) throw new Error('Keyword required');
85
- return searchRegistry(keyword);
86
- }
87
-
88
- /**
89
- * Detect which ATS platform a company uses (probes each adapter).
90
- */
91
- export { detectAts } from './registry.js';
92
-
93
- /**
94
- * Look up which ATS a slug belongs to in the registry (cached, no network).
95
- * Returns the ATS name ("greenhouse" | "lever" | "ashby") or null if not in registry.
96
- */
97
- export { findAtsBySlug } from './registry.js';
98
-
99
- /**
100
- * Registry management.
101
- */
102
- export const registry = {
103
- load: loadRegistry,
104
- search: searchRegistry,
105
- detect: detectAts,
106
- findAtsBySlug,
107
- findEntryBySlug,
108
- };
109
-
110
- // Re-export individual adapters for direct use
111
- export { fetchGreenhouse } from './adapters/greenhouse.js';
112
- export { fetchLever } from './adapters/lever.js';
113
- export { fetchAshby } from './adapters/ashby.js';
114
-
115
- // Re-export filter logic for reuse (e.g., by the MCP server)
116
- export { applyFilters } from './filters.js';
1
+ /**
2
+ * jd-intel — JD intelligence toolkit: fetch, normalize, and search job descriptions across every major ATS.
3
+ *
4
+ * Fetches, normalizes, and enriches job data from public ATS APIs
5
+ * (Greenhouse, Lever, Ashby, SmartRecruiters, Teamtailor, Recruitee,
6
+ * Workday) into a unified schema.
7
+ */
8
+
9
+ import { ADAPTERS, ATS_NAMES } from './adapters/index.js';
10
+ import { loadRegistry, searchRegistry, detectAts, findAtsBySlug, findEntryBySlug } from './registry.js';
11
+ import { applyFilters } from './filters.js';
12
+
13
+ /**
14
+ * Fetch jobs from a company's ATS board.
15
+ *
16
+ * @param {Object} options
17
+ * @param {string} options.company - Company slug or name
18
+ * @param {string} [options.ats] - Specific ATS platform. If omitted, auto-detects.
19
+ * @param {object} [options.config] - Adapter-specific config (e.g. Workday {tenant, env, site}). Bypasses the registry; the only way to reach a Workday company not in the registry.
20
+ * @param {string} [options.titleFilter] - Regex matched against title only. Use for role identity ("product manager", "staff engineer").
21
+ * @param {string} [options.filter] - Regex matched across title, department, description. Use for topic/scope.
22
+ * @param {number} [options.postedWithinDays] - Only return jobs posted within N days.
23
+ * @param {string[]} [options.locationIncludes] - Keep jobs whose location contains any of these (case-insensitive).
24
+ * @param {string[]} [options.locationExcludes] - Drop jobs whose location contains any of these (case-insensitive).
25
+ * @param {number} [options.limit=100] - Maximum jobs to return after filtering.
26
+ * @returns {Promise<Array>} Normalized, filtered job objects
27
+ */
28
+ export async function fetchJobs({
29
+ company,
30
+ ats,
31
+ config,
32
+ titleFilter,
33
+ filter,
34
+ postedWithinDays,
35
+ locationIncludes,
36
+ locationExcludes,
37
+ limit = 100,
38
+ } = {}) {
39
+ if (!company) throw new Error('Company slug required');
40
+
41
+ // Unified slug normalization: strip all non-alphanumeric (matches detectAts)
42
+ const slug = company.toLowerCase().replace(/[^a-z0-9]/g, '');
43
+
44
+ // Filter context is passed as an additive 2nd arg to adapters. Existing
45
+ // adapters declare fetch{Name}(slug) and ignore extra positional args
46
+ // (JS no-op), so this is backward-compatible. Filter-aware adapters
47
+ // (e.g. Workday) use it to avoid mass detail-fetching on huge tenants.
48
+ const filterContext = { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit };
49
+
50
+ let jobs;
51
+ if (ats) {
52
+ const adapter = ADAPTERS[ats];
53
+ if (!adapter) throw new Error(`Unknown ATS: ${ats}. Supported: ${ATS_NAMES.join(', ')}`);
54
+ // Explicit ATS: an explicitly passed config wins (the only path that
55
+ // can reach a Workday company not in the registry). With no explicit
56
+ // config, fall back to the registry so config-keyed adapters
57
+ // (Workday) and canonically-cased registry slugs (SmartRecruiters
58
+ // "Visa") also work on the explicit path, not just under auto-detect.
59
+ let fetchSlug = slug;
60
+ let cfg = config;
61
+ let companyName;
62
+ if (!cfg) {
63
+ const hit = await findEntryBySlug(slug);
64
+ if (hit && hit.ats === ats) {
65
+ fetchSlug = hit.entry.slug;
66
+ cfg = hit.entry.config;
67
+ companyName = hit.entry.name;
68
+ }
69
+ }
70
+ jobs = await adapter.fetch(fetchSlug, { config: cfg, companyName, filterContext });
71
+ } else {
72
+ // Consult registry first — if we know which ATS this company uses,
73
+ // skip probing the others (saves API calls, clearer error semantics).
74
+ // The registry entry carries the canonical slug (so the adapter is
75
+ // called with the ATS's own casing, e.g. SmartRecruiters "Visa") and
76
+ // any adapter-specific config (the Workday {tenant,env,site} triple).
77
+ const hit = await findEntryBySlug(slug);
78
+ if (hit) {
79
+ jobs = await ADAPTERS[hit.ats].fetch(hit.entry.slug, {
80
+ config: config || hit.entry.config,
81
+ companyName: hit.entry.name,
82
+ filterContext,
83
+ });
84
+ } else {
85
+ // Discovery mode: company not in registry, probe all adapters.
86
+ // (Registry-only adapters like Workday bail here via their guard.)
87
+ const results = await Promise.allSettled(
88
+ Object.entries(ADAPTERS).map(async ([name, adapter]) => adapter.fetch(slug, { filterContext }))
89
+ );
90
+ jobs = results
91
+ .filter(r => r.status === 'fulfilled')
92
+ .flatMap(r => r.value);
93
+ }
94
+ }
95
+
96
+ return applyFilters(jobs, { titleFilter, filter, postedWithinDays, locationIncludes, locationExcludes, limit });
97
+ }
98
+
99
+ /**
100
+ * Search for companies in the registry.
101
+ */
102
+ export async function search({ keyword, location, ats } = {}) {
103
+ // For now, search is registry-based. With SQLite store, this becomes a full-text search.
104
+ if (!keyword) throw new Error('Keyword required');
105
+ return searchRegistry(keyword);
106
+ }
107
+
108
+ /**
109
+ * Detect which ATS platform a company uses (probes each adapter).
110
+ */
111
+ export { detectAts } from './registry.js';
112
+
113
+ /**
114
+ * Look up which ATS a slug belongs to in the registry (cached, no network).
115
+ * Returns the ATS name (e.g. "greenhouse", "workday") or null if not in registry.
116
+ */
117
+ export { findAtsBySlug } from './registry.js';
118
+
119
+ /**
120
+ * Registry management.
121
+ */
122
+ export const registry = {
123
+ load: loadRegistry,
124
+ search: searchRegistry,
125
+ detect: detectAts,
126
+ findAtsBySlug,
127
+ findEntryBySlug,
128
+ };
129
+
130
+ // Re-export individual adapters for direct use
131
+ export { fetchGreenhouse } from './adapters/greenhouse.js';
132
+ export { fetchLever } from './adapters/lever.js';
133
+ export { fetchAshby } from './adapters/ashby.js';
134
+
135
+ // Re-export filter logic for reuse (e.g., by the MCP server)
136
+ export { applyFilters } from './filters.js';
package/src/registry.js CHANGED
@@ -1,106 +1,114 @@
1
- import { readFile } from 'node:fs/promises';
2
- import { join, dirname } from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
-
5
- const __dirname = dirname(fileURLToPath(import.meta.url));
6
- const REGISTRY_DIR = join(__dirname, '..', 'registry');
7
-
8
- let cache = {};
9
-
10
- /**
11
- * Load company registry for a specific ATS or all ATS platforms.
12
- */
13
- export async function loadRegistry(ats) {
14
- if (ats && cache[ats]) return cache[ats];
15
-
16
- if (ats) {
17
- try {
18
- const data = await readFile(join(REGISTRY_DIR, `${ats}.json`), 'utf-8');
19
- cache[ats] = JSON.parse(data);
20
- return cache[ats];
21
- } catch {
22
- return [];
23
- }
24
- }
25
-
26
- // Load all
27
- const all = {};
28
- for (const platform of ['greenhouse', 'lever', 'ashby', 'smartrecruiters', 'teamtailor', 'recruitee', 'workday']) {
29
- try {
30
- const data = await readFile(join(REGISTRY_DIR, `${platform}.json`), 'utf-8');
31
- all[platform] = JSON.parse(data);
32
- cache[platform] = all[platform];
33
- } catch {
34
- all[platform] = [];
35
- }
36
- }
37
- return all;
38
- }
39
-
40
- /**
41
- * Search registry for companies matching a query.
42
- */
43
- export async function searchRegistry(query) {
44
- const all = await loadRegistry();
45
- const lower = query.toLowerCase();
46
- const results = [];
47
-
48
- for (const [ats, companies] of Object.entries(all)) {
49
- for (const company of companies) {
50
- const name = (company.name || company.slug || '').toLowerCase();
51
- const sector = (company.sector || '').toLowerCase();
52
- if (name.includes(lower) || sector.includes(lower)) {
53
- results.push({ ...company, ats });
54
- }
55
- }
56
- }
57
-
58
- return results;
59
- }
60
-
61
- /**
62
- * Look up which ATS a slug belongs to in the registry.
63
- * Returns the ATS name (e.g., "greenhouse") or null if not in registry.
64
- */
65
- export async function findAtsBySlug(slug) {
66
- const all = await loadRegistry();
67
- for (const [ats, companies] of Object.entries(all)) {
68
- if (companies.some(c => c.slug === slug)) return ats;
69
- }
70
- return null;
71
- }
72
-
73
- /**
74
- * Look up the full registry entry for a slug, with its ATS.
75
- * Unlike findAtsBySlug (returns just the ats name), this returns the
76
- * whole entry so callers can read adapter-specific config (e.g. the
77
- * Workday {tenant, env, site} triple). Additive — does not change
78
- * findAtsBySlug, which has other callers.
79
- *
80
- * @returns {Promise<{ats: string, entry: object}|null>}
81
- */
82
- export async function findEntryBySlug(slug) {
83
- const all = await loadRegistry();
84
- for (const [ats, companies] of Object.entries(all)) {
85
- const entry = companies.find(c => c.slug === slug);
86
- if (entry) return { ats, entry };
87
- }
88
- return null;
89
- }
90
-
91
- /**
92
- * Auto-detect which ATS a company uses.
93
- */
94
- export async function detectAts(companyName) {
95
- const { ADAPTERS } = await import('./adapters/index.js');
96
- const slug = companyName.toLowerCase().replace(/[^a-z0-9]/g, '');
97
-
98
- const results = [];
99
- const checks = Object.entries(ADAPTERS).map(async ([ats, adapter]) => {
100
- const found = await adapter.has(slug);
101
- if (found) results.push({ ats, slug });
102
- });
103
-
104
- await Promise.allSettled(checks);
105
- return results;
106
- }
1
+ import { readFile } from 'node:fs/promises';
2
+ import { join, dirname } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+
5
+ const __dirname = dirname(fileURLToPath(import.meta.url));
6
+ const REGISTRY_DIR = join(__dirname, '..', 'registry');
7
+
8
+ let cache = {};
9
+
10
+ /**
11
+ * Load company registry for a specific ATS or all ATS platforms.
12
+ */
13
+ export async function loadRegistry(ats) {
14
+ if (ats && cache[ats]) return cache[ats];
15
+
16
+ if (ats) {
17
+ try {
18
+ const data = await readFile(join(REGISTRY_DIR, `${ats}.json`), 'utf-8');
19
+ cache[ats] = JSON.parse(data);
20
+ return cache[ats];
21
+ } catch {
22
+ return [];
23
+ }
24
+ }
25
+
26
+ // Load all
27
+ const all = {};
28
+ for (const platform of ['greenhouse', 'lever', 'ashby', 'smartrecruiters', 'teamtailor', 'recruitee', 'workday']) {
29
+ try {
30
+ const data = await readFile(join(REGISTRY_DIR, `${platform}.json`), 'utf-8');
31
+ all[platform] = JSON.parse(data);
32
+ cache[platform] = all[platform];
33
+ } catch {
34
+ all[platform] = [];
35
+ }
36
+ }
37
+ return all;
38
+ }
39
+
40
+ /**
41
+ * Search registry for companies matching a query.
42
+ */
43
+ export async function searchRegistry(query) {
44
+ const all = await loadRegistry();
45
+ const lower = query.toLowerCase();
46
+ const results = [];
47
+
48
+ for (const [ats, companies] of Object.entries(all)) {
49
+ for (const company of companies) {
50
+ const name = (company.name || company.slug || '').toLowerCase();
51
+ const sector = (company.sector || '').toLowerCase();
52
+ if (name.includes(lower) || sector.includes(lower)) {
53
+ results.push({ ...company, ats });
54
+ }
55
+ }
56
+ }
57
+
58
+ return results;
59
+ }
60
+
61
+ // Slug match is case/punctuation-insensitive: registry slugs are stored
62
+ // in each ATS's canonical form (SmartRecruiters uses PascalCase, e.g.
63
+ // "Visa"), but callers pass a lowercased/alnum-stripped slug. Comparing
64
+ // normalized forms keeps registry-first routing working for those.
65
+ const normSlug = (s) => String(s).toLowerCase().replace(/[^a-z0-9]/g, '');
66
+
67
+ /**
68
+ * Look up which ATS a slug belongs to in the registry.
69
+ * Returns the ATS name (e.g., "greenhouse") or null if not in registry.
70
+ */
71
+ export async function findAtsBySlug(slug) {
72
+ const all = await loadRegistry();
73
+ const key = normSlug(slug);
74
+ for (const [ats, companies] of Object.entries(all)) {
75
+ if (companies.some(c => normSlug(c.slug) === key)) return ats;
76
+ }
77
+ return null;
78
+ }
79
+
80
+ /**
81
+ * Look up the full registry entry for a slug, with its ATS.
82
+ * Unlike findAtsBySlug (returns just the ats name), this returns the
83
+ * whole entry so callers can read adapter-specific config (e.g. the
84
+ * Workday {tenant, env, site} triple). Additive — does not change
85
+ * findAtsBySlug, which has other callers.
86
+ *
87
+ * @returns {Promise<{ats: string, entry: object}|null>}
88
+ */
89
+ export async function findEntryBySlug(slug) {
90
+ const all = await loadRegistry();
91
+ const key = normSlug(slug);
92
+ for (const [ats, companies] of Object.entries(all)) {
93
+ const entry = companies.find(c => normSlug(c.slug) === key);
94
+ if (entry) return { ats, entry };
95
+ }
96
+ return null;
97
+ }
98
+
99
+ /**
100
+ * Auto-detect which ATS a company uses.
101
+ */
102
+ export async function detectAts(companyName) {
103
+ const { ADAPTERS } = await import('./adapters/index.js');
104
+ const slug = companyName.toLowerCase().replace(/[^a-z0-9]/g, '');
105
+
106
+ const results = [];
107
+ const checks = Object.entries(ADAPTERS).map(async ([ats, adapter]) => {
108
+ const found = await adapter.has(slug);
109
+ if (found) results.push({ ats, slug });
110
+ });
111
+
112
+ await Promise.allSettled(checks);
113
+ return results;
114
+ }