jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.prettierrc.json +10 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +232 -0
  4. package/dist/core/aggregators/yc.d.ts +7 -0
  5. package/dist/core/aggregators/yc.js +320 -0
  6. package/dist/core/browser.d.ts +30 -0
  7. package/dist/core/browser.js +196 -0
  8. package/dist/core/cache.d.ts +13 -0
  9. package/dist/core/cache.js +41 -0
  10. package/dist/core/detect-provider.d.ts +7 -0
  11. package/dist/core/detect-provider.js +125 -0
  12. package/dist/core/discover-careers.d.ts +18 -0
  13. package/dist/core/discover-careers.js +92 -0
  14. package/dist/core/extract-jobs.d.ts +14 -0
  15. package/dist/core/extract-jobs.js +36 -0
  16. package/dist/core/fetch-page.d.ts +11 -0
  17. package/dist/core/fetch-page.js +39 -0
  18. package/dist/core/format-output.d.ts +2 -0
  19. package/dist/core/format-output.js +59 -0
  20. package/dist/core/match-jobs.d.ts +6 -0
  21. package/dist/core/match-jobs.js +43 -0
  22. package/dist/core/providers/ashby.d.ts +6 -0
  23. package/dist/core/providers/ashby.js +58 -0
  24. package/dist/core/providers/generic.d.ts +6 -0
  25. package/dist/core/providers/generic.js +294 -0
  26. package/dist/core/providers/greenhouse.d.ts +6 -0
  27. package/dist/core/providers/greenhouse.js +47 -0
  28. package/dist/core/providers/lever.d.ts +7 -0
  29. package/dist/core/providers/lever.js +60 -0
  30. package/dist/core/providers/yc.d.ts +7 -0
  31. package/dist/core/providers/yc.js +320 -0
  32. package/dist/core/resolve-iframe.d.ts +6 -0
  33. package/dist/core/resolve-iframe.js +51 -0
  34. package/dist/core/save-raw.d.ts +4 -0
  35. package/dist/core/save-raw.js +13 -0
  36. package/dist/data/companies.d.ts +9 -0
  37. package/dist/data/companies.js +2849 -0
  38. package/dist/entrypoints/cli/app.d.ts +3 -0
  39. package/dist/entrypoints/cli/app.js +91 -0
  40. package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
  41. package/dist/entrypoints/cli/components/crawl-view.js +94 -0
  42. package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
  43. package/dist/entrypoints/cli/components/discover-view.js +67 -0
  44. package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
  45. package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
  46. package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
  47. package/dist/entrypoints/cli/crawl-url.js +54 -0
  48. package/dist/entrypoints/cli/crawl.d.ts +32 -0
  49. package/dist/entrypoints/cli/crawl.js +108 -0
  50. package/dist/entrypoints/cli/discover.d.ts +10 -0
  51. package/dist/entrypoints/cli/discover.js +69 -0
  52. package/dist/entrypoints/cli/index.d.ts +2 -0
  53. package/dist/entrypoints/cli/index.js +197 -0
  54. package/dist/entrypoints/cli/init.d.ts +9 -0
  55. package/dist/entrypoints/cli/init.js +94 -0
  56. package/dist/entrypoints/cli/plain.d.ts +6 -0
  57. package/dist/entrypoints/cli/plain.js +77 -0
  58. package/dist/events.d.ts +114 -0
  59. package/dist/events.js +17 -0
  60. package/dist/orchestrators/crawl-all.d.ts +2 -0
  61. package/dist/orchestrators/crawl-all.js +66 -0
  62. package/dist/orchestrators/discover-all.d.ts +10 -0
  63. package/dist/orchestrators/discover-all.js +39 -0
  64. package/dist/threads/pool.d.ts +5 -0
  65. package/dist/threads/pool.js +23 -0
  66. package/dist/threads/process-url.d.ts +9 -0
  67. package/dist/threads/process-url.js +229 -0
  68. package/dist/types/index.d.ts +83 -0
  69. package/dist/types/index.js +6 -0
  70. package/dist/utils/config.d.ts +17 -0
  71. package/dist/utils/config.js +57 -0
  72. package/dist/utils/google-search.d.ts +19 -0
  73. package/dist/utils/google-search.js +139 -0
  74. package/dist/utils/llm.d.ts +8 -0
  75. package/dist/utils/llm.js +25 -0
  76. package/package.json +42 -0
  77. package/src/core/aggregators/yc.ts +415 -0
  78. package/src/core/browser.ts +239 -0
  79. package/src/core/detect-provider.ts +162 -0
  80. package/src/core/discover-careers.ts +117 -0
  81. package/src/core/extract-jobs.ts +50 -0
  82. package/src/core/fetch-page.ts +41 -0
  83. package/src/core/format-output.ts +80 -0
  84. package/src/core/match-jobs.ts +56 -0
  85. package/src/core/providers/ashby.ts +84 -0
  86. package/src/core/providers/generic.ts +332 -0
  87. package/src/core/providers/greenhouse.ts +74 -0
  88. package/src/core/providers/lever.ts +90 -0
  89. package/src/core/resolve-iframe.ts +59 -0
  90. package/src/core/save-raw.ts +18 -0
  91. package/src/data/companies.ts +2859 -0
  92. package/src/entrypoints/cli/app.tsx +173 -0
  93. package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
  94. package/src/entrypoints/cli/components/discover-view.tsx +138 -0
  95. package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
  96. package/src/entrypoints/cli/crawl-url.ts +87 -0
  97. package/src/entrypoints/cli/crawl.ts +163 -0
  98. package/src/entrypoints/cli/discover.ts +96 -0
  99. package/src/entrypoints/cli/index.ts +252 -0
  100. package/src/entrypoints/cli/init.ts +117 -0
  101. package/src/entrypoints/cli/plain.ts +104 -0
  102. package/src/events.ts +79 -0
  103. package/src/orchestrators/crawl-all.ts +96 -0
  104. package/src/orchestrators/discover-all.ts +61 -0
  105. package/src/threads/pool.ts +29 -0
  106. package/src/threads/process-url.ts +312 -0
  107. package/src/types/index.ts +110 -0
  108. package/src/utils/config.ts +79 -0
  109. package/src/utils/google-search.ts +155 -0
  110. package/src/utils/llm.ts +33 -0
  111. package/test/integration/process-url.test.ts +301 -0
  112. package/test/integration/providers/ashby.test.ts +163 -0
  113. package/test/integration/providers/greenhouse.test.ts +191 -0
  114. package/test/integration/providers/lever.test.ts +188 -0
  115. package/test/unit/config.test.ts +64 -0
  116. package/test/unit/detect-provider.test.ts +165 -0
  117. package/test/unit/events.test.ts +104 -0
  118. package/test/unit/format-output.test.ts +165 -0
  119. package/test/unit/match-jobs.test.ts +257 -0
  120. package/test/unit/pool.test.ts +74 -0
  121. package/test/unit/providers/generic.test.ts +139 -0
  122. package/test/unit/resolve-iframe.test.ts +100 -0
  123. package/tsconfig.json +19 -0
  124. package/vitest.config.ts +7 -0
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Filter jobs against search criteria. Case-insensitive substring matching.
3
+ * Jobs with null fields are included (don't exclude for missing data).
4
+ */
5
+ export function matchJobs(jobs, criteria) {
6
+ return jobs.filter((job) => {
7
+ // Keywords: any keyword must match title (include if no keywords specified)
8
+ if (criteria.keywords.length > 0) {
9
+ const title = job.title.toLowerCase();
10
+ const hasMatch = criteria.keywords.some((kw) => title.includes(kw.toLowerCase()));
11
+ if (!hasMatch)
12
+ return false;
13
+ }
14
+ // Exclude keywords: exclude if any match title
15
+ if (criteria.excludeKeywords.length > 0) {
16
+ const title = job.title.toLowerCase();
17
+ const hasExclude = criteria.excludeKeywords.some((kw) => title.includes(kw.toLowerCase()));
18
+ if (hasExclude)
19
+ return false;
20
+ }
21
+ // Location: substring match (include if job has no location)
22
+ if (criteria.location && job.location) {
23
+ if (!job.location.toLowerCase().includes(criteria.location.toLowerCase())) {
24
+ return false;
25
+ }
26
+ }
27
+ // Work mode: include if job's workMode is in allowed set (include if null)
28
+ if (criteria.workMode && criteria.workMode.length > 0 && job.workMode) {
29
+ if (!criteria.workMode.includes(job.workMode))
30
+ return false;
31
+ }
32
+ // Departments: substring match (include if null)
33
+ if (criteria.departments &&
34
+ criteria.departments.length > 0 &&
35
+ job.department) {
36
+ const dept = job.department.toLowerCase();
37
+ const hasMatch = criteria.departments.some((d) => dept.includes(d.toLowerCase()));
38
+ if (!hasMatch)
39
+ return false;
40
+ }
41
+ return true;
42
+ });
43
+ }
@@ -0,0 +1,6 @@
1
+ import type { Job, SearchCriteria } from "../../types/index.js";
2
+ /**
3
+ * Fetch jobs from Ashby's public posting API.
4
+ * No auth required. Returns all jobs as a single dump.
5
+ */
6
+ export declare function extractAshbyJobs(boardToken: string, sourceUrl: string, _criteria: SearchCriteria, saveRaw?: boolean): Promise<Job[]>;
@@ -0,0 +1,58 @@
1
+ import { createHash } from "node:crypto";
2
+ import { saveRawResponse } from "../save-raw.js";
3
+ /**
4
+ * Fetch jobs from Ashby's public posting API.
5
+ * No auth required. Returns all jobs as a single dump.
6
+ */
7
+ export async function extractAshbyJobs(boardToken, sourceUrl, _criteria, saveRaw = false) {
8
+ const apiUrl = `https://api.ashbyhq.com/posting-api/job-board/${boardToken}`;
9
+ const response = await fetch(apiUrl);
10
+ if (!response.ok) {
11
+ throw new Error(`Ashby API returned ${response.status} for board "${boardToken}"`);
12
+ }
13
+ const data = (await response.json());
14
+ if (saveRaw)
15
+ await saveRawResponse("ashby", boardToken, data);
16
+ return data.jobs.map((aj) => {
17
+ const id = createHash("sha256")
18
+ .update(`${aj.jobUrl}:${aj.title}:${boardToken}`)
19
+ .digest("hex")
20
+ .slice(0, 12);
21
+ return {
22
+ id,
23
+ title: aj.title,
24
+ company: boardToken,
25
+ location: aj.location ?? null,
26
+ workMode: inferWorkMode(aj),
27
+ department: aj.department ?? aj.team ?? null,
28
+ url: aj.jobUrl,
29
+ sourceUrl,
30
+ provider: "ashby",
31
+ description: aj.descriptionPlain?.slice(0, 200) ?? null,
32
+ postedAt: aj.publishedAt ?? null,
33
+ extractedAt: new Date().toISOString(),
34
+ raw: aj,
35
+ };
36
+ });
37
+ }
38
+ function inferWorkMode(job) {
39
+ if (job.isRemote === true)
40
+ return "remote";
41
+ if (job.workplaceType) {
42
+ const wt = job.workplaceType.toLowerCase();
43
+ if (wt.includes("remote"))
44
+ return "remote";
45
+ if (wt.includes("hybrid"))
46
+ return "hybrid";
47
+ if (wt.includes("onsite") || wt.includes("on-site"))
48
+ return "onsite";
49
+ }
50
+ if (job.location) {
51
+ const loc = job.location.toLowerCase();
52
+ if (loc.includes("remote"))
53
+ return "remote";
54
+ if (loc.includes("hybrid"))
55
+ return "hybrid";
56
+ }
57
+ return null;
58
+ }
@@ -0,0 +1,6 @@
1
+ import type { Job } from "../../types/index.js";
2
+ /**
3
+ * Heuristic extraction from rendered HTML.
4
+ * Two strategies: (1) job-like <a> links, (2) repeated container elements.
5
+ */
6
+ export declare function extractGenericJobs(html: string, sourceUrl: string): Job[];
@@ -0,0 +1,294 @@
1
+ import { createHash } from "node:crypto";
2
+ import { parse } from "node-html-parser";
3
+ /**
4
+ * Heuristic extraction from rendered HTML.
5
+ * Two strategies: (1) job-like <a> links, (2) repeated container elements.
6
+ */
7
+ export function extractGenericJobs(html, sourceUrl) {
8
+ const root = parse(html);
9
+ const canonical = normalizeUrl(sourceUrl);
10
+ // Strategy 1: Link-based extraction
11
+ const linkJobs = extractFromLinks(root, sourceUrl);
12
+ const realLinkJobs = linkJobs.filter((j) => normalizeUrl(j.url) !== canonical);
13
+ if (realLinkJobs.length >= 3)
14
+ return realLinkJobs;
15
+ // Strategy 2: Repeated structured containers (custom career pages)
16
+ const structuredJobs = extractFromContainers(root, sourceUrl);
17
+ return structuredJobs.length > realLinkJobs.length
18
+ ? structuredJobs
19
+ : realLinkJobs;
20
+ }
21
+ function normalizeUrl(url) {
22
+ return url.replace(/\/+$/, "").replace(/\?.*$/, "");
23
+ }
24
+ // ---------------------------------------------------------------------------
25
+ // Strategy 1: Link-based extraction
26
+ // ---------------------------------------------------------------------------
27
+ function extractFromLinks(root, sourceUrl) {
28
+ const jobs = [];
29
+ const seen = new Set();
30
+ const links = root.querySelectorAll("a[href]");
31
+ for (const link of links) {
32
+ const href = link.getAttribute("href");
33
+ if (!href)
34
+ continue;
35
+ if (!isJobLink(href))
36
+ continue;
37
+ const title = link.textContent.trim();
38
+ if (!title || title.length < 3 || title.length > 200)
39
+ continue;
40
+ let fullUrl;
41
+ try {
42
+ fullUrl = new URL(href, sourceUrl).href;
43
+ }
44
+ catch {
45
+ continue;
46
+ }
47
+ if (seen.has(fullUrl))
48
+ continue;
49
+ seen.add(fullUrl);
50
+ const id = createHash("sha256")
51
+ .update(`${fullUrl}:${title}`)
52
+ .digest("hex")
53
+ .slice(0, 12);
54
+ const parent = link.parentNode;
55
+ const parentText = parent?.textContent ?? "";
56
+ const location = extractLocation(parentText);
57
+ const department = findDepartmentHeading(link);
58
+ jobs.push({
59
+ id,
60
+ title,
61
+ company: new URL(sourceUrl).hostname.replace("www.", ""),
62
+ location,
63
+ workMode: location ? inferWorkMode(location) : null,
64
+ department,
65
+ url: fullUrl,
66
+ sourceUrl,
67
+ provider: "generic",
68
+ description: null,
69
+ postedAt: null,
70
+ extractedAt: new Date().toISOString(),
71
+ raw: null,
72
+ });
73
+ }
74
+ return jobs;
75
+ }
76
+ // ---------------------------------------------------------------------------
77
+ // Strategy 2: Repeated container extraction
78
+ // ---------------------------------------------------------------------------
79
+ const JOB_CLASS_RE = /role|job|position|opening|career|listing|vacancy/i;
80
+ function extractFromContainers(root, sourceUrl) {
81
+ const jobs = [];
82
+ const seen = new Set();
83
+ // Collect page headings — used to filter out section/department headers
84
+ const headings = new Set(root
85
+ .querySelectorAll("h1, h2, h3, h4, h5, h6")
86
+ .map((h) => h.textContent.trim().toLowerCase())
87
+ .filter((t) => t.length > 0));
88
+ // Group elements by their first CSS class when it matches job-related terms
89
+ const groups = new Map();
90
+ for (const el of root.querySelectorAll("[class]")) {
91
+ const cls = el.getAttribute("class") ?? "";
92
+ if (!JOB_CLASS_RE.test(cls))
93
+ continue;
94
+ const key = cls.split(/\s+/)[0];
95
+ const arr = groups.get(key);
96
+ if (arr)
97
+ arr.push(el);
98
+ else
99
+ groups.set(key, [el]);
100
+ }
101
+ // Pick the single best group: most cards after filtering.
102
+ // Processing multiple groups causes wrong departments when parent/child
103
+ // groups both qualify (the parent finds the PREVIOUS section's heading).
104
+ let bestCards = [];
105
+ for (const [, elements] of groups) {
106
+ if (elements.length < 3)
107
+ continue;
108
+ // Filter to "job card" sized containers: must have 2-6 non-SVG <p> tags.
109
+ // This excludes single-text sub-components (title/location wrappers)
110
+ // and large section wrappers that contain many jobs.
111
+ const cards = elements.filter((el) => {
112
+ const pCount = el
113
+ .querySelectorAll("p")
114
+ .filter((p) => !isInsideSvg(p)).length;
115
+ return pCount >= 2 && pCount <= 6;
116
+ });
117
+ if (cards.length > bestCards.length) {
118
+ bestCards = cards;
119
+ }
120
+ }
121
+ if (bestCards.length < 3)
122
+ return jobs;
123
+ for (const el of bestCards) {
124
+ const title = findTitle(el);
125
+ if (!title)
126
+ continue;
127
+ if (isLikelyLocation(title))
128
+ continue;
129
+ if (headings.has(title.toLowerCase()))
130
+ continue;
131
+ const id = createHash("sha256")
132
+ .update(`${sourceUrl}:${title}`)
133
+ .digest("hex")
134
+ .slice(0, 12);
135
+ if (seen.has(id))
136
+ continue;
137
+ seen.add(id);
138
+ const location = findLocationText(el);
139
+ const department = findDepartmentHeading(el);
140
+ jobs.push({
141
+ id,
142
+ title,
143
+ company: new URL(sourceUrl).hostname.replace("www.", ""),
144
+ location,
145
+ workMode: location ? inferWorkMode(location) : null,
146
+ department,
147
+ url: sourceUrl,
148
+ sourceUrl,
149
+ provider: "generic",
150
+ description: null,
151
+ postedAt: null,
152
+ extractedAt: new Date().toISOString(),
153
+ raw: null,
154
+ });
155
+ }
156
+ return jobs;
157
+ }
158
+ /**
159
+ * Reject strings that look like location/city text rather than job titles.
160
+ * E.g. "SF or NYC", "NYC or Remote (EST)", "SF, NYC, or Remote (USA)"
161
+ */
162
+ function isLikelyLocation(text) {
163
+ const words = text.split(/[\s,()]+/).filter(Boolean);
164
+ if (words.length === 0)
165
+ return false;
166
+ const locationish = words.filter((w) => /^(or|and|[A-Z]{2,3}|remote|usa|us|est|pst|cst|mst)$/i.test(w));
167
+ return locationish.length === words.length;
168
+ }
169
+ /**
170
+ * Find the job title inside a container element.
171
+ * Looks for the first <p> or heading with meaningful short text,
172
+ * skipping SVG desc content and generic labels.
173
+ */
174
+ function findTitle(container) {
175
+ const candidates = container.querySelectorAll("h1, h2, h3, h4, h5, h6, p");
176
+ for (const el of candidates) {
177
+ // Skip elements nested inside <svg>
178
+ if (isInsideSvg(el))
179
+ continue;
180
+ const text = el.textContent.trim();
181
+ if (text.length < 3 || text.length > 200)
182
+ continue;
183
+ // Skip generic labels
184
+ if (/^(location|department|team|type|apply|learn more|remote|hybrid|onsite|on-site)$/i.test(text))
185
+ continue;
186
+ return text;
187
+ }
188
+ return null;
189
+ }
190
+ /**
191
+ * Find location text inside a container element.
192
+ * Looks for the second <p> or an element whose class contains "location".
193
+ */
194
+ function findLocationText(container) {
195
+ // Try class-based: element whose class contains "location"
196
+ for (const el of container.querySelectorAll("[class]")) {
197
+ const cls = el.getAttribute("class") ?? "";
198
+ if (!/location/i.test(cls))
199
+ continue;
200
+ if (isInsideSvg(el))
201
+ continue;
202
+ // Get text from <p> children, or direct text
203
+ const p = el.querySelector("p");
204
+ const text = (p ?? el).textContent.trim();
205
+ // Filter out SVG noise (e.g. "location" from <desc>)
206
+ const cleaned = text.replace(/^location\s*/i, "").trim();
207
+ if (cleaned.length >= 2 && cleaned.length <= 100)
208
+ return cleaned;
209
+ }
210
+ // Fallback: second <p> that isn't inside an SVG
211
+ const paragraphs = container.querySelectorAll("p");
212
+ let count = 0;
213
+ for (const p of paragraphs) {
214
+ if (isInsideSvg(p))
215
+ continue;
216
+ count++;
217
+ if (count === 2) {
218
+ const text = p.textContent.trim();
219
+ if (text.length >= 2 && text.length <= 100)
220
+ return text;
221
+ }
222
+ }
223
+ return null;
224
+ }
225
+ function isInsideSvg(el) {
226
+ let node = el;
227
+ while (node) {
228
+ if (node.rawTagName?.toLowerCase() === "svg")
229
+ return true;
230
+ node = node.parentNode;
231
+ }
232
+ return false;
233
+ }
234
+ function isJobLink(href) {
235
+ const lower = href.toLowerCase();
236
+ const patterns = [
237
+ "/jobs/",
238
+ "/job/",
239
+ "/careers/",
240
+ "/positions/",
241
+ "/position/",
242
+ "/openings/",
243
+ "/opening/",
244
+ "/apply/",
245
+ "/role/",
246
+ "/roles/",
247
+ "/vacancies/",
248
+ "/vacancy/",
249
+ ];
250
+ return patterns.some((p) => lower.includes(p));
251
+ }
252
+ function extractLocation(text) {
253
+ // Common location patterns in job listing context
254
+ const patterns = [
255
+ /(?:location|based in|located in)[:\s]+([^,\n]{3,50})/i,
256
+ /(?:remote|hybrid|onsite|on-site)/i,
257
+ ];
258
+ for (const pattern of patterns) {
259
+ const match = text.match(pattern);
260
+ if (match)
261
+ return match[1]?.trim() ?? match[0].trim();
262
+ }
263
+ return null;
264
+ }
265
+ function findDepartmentHeading(el) {
266
+ // Walk up the DOM, then backwards through siblings at each level.
267
+ // Check both siblings themselves and their descendants for headings
268
+ // (handles cases like <div class="info"><h3>Dept</h3></div>).
269
+ let node = el.parentNode;
270
+ for (let depth = 0; depth < 5 && node; depth++) {
271
+ let sibling = node.previousElementSibling;
272
+ for (let i = 0; i < 5 && sibling; i++) {
273
+ const tag = sibling.rawTagName?.toLowerCase();
274
+ if (tag && /^h[1-6]$/.test(tag)) {
275
+ return sibling.textContent.trim() || null;
276
+ }
277
+ const nested = sibling.querySelector("h1, h2, h3, h4, h5, h6");
278
+ if (nested) {
279
+ return nested.textContent.trim() || null;
280
+ }
281
+ sibling = sibling.previousElementSibling;
282
+ }
283
+ node = node.parentNode;
284
+ }
285
+ return null;
286
+ }
287
+ function inferWorkMode(location) {
288
+ const lower = location.toLowerCase();
289
+ if (lower.includes("remote"))
290
+ return "remote";
291
+ if (lower.includes("hybrid"))
292
+ return "hybrid";
293
+ return null;
294
+ }
@@ -0,0 +1,6 @@
1
+ import type { Job, SearchCriteria } from "../../types/index.js";
2
+ /**
3
+ * Fetch jobs from Greenhouse's public JSON API.
4
+ * No auth required. Returns all jobs as a single dump.
5
+ */
6
+ export declare function extractGreenhouseJobs(boardToken: string, sourceUrl: string, _criteria: SearchCriteria, saveRaw?: boolean): Promise<Job[]>;
@@ -0,0 +1,47 @@
1
+ import { createHash } from "node:crypto";
2
+ import { saveRawResponse } from "../save-raw.js";
3
+ /**
4
+ * Fetch jobs from Greenhouse's public JSON API.
5
+ * No auth required. Returns all jobs as a single dump.
6
+ */
7
+ export async function extractGreenhouseJobs(boardToken, sourceUrl, _criteria, saveRaw = false) {
8
+ const apiUrl = `https://boards-api.greenhouse.io/v1/boards/${boardToken}/jobs`;
9
+ const response = await fetch(apiUrl);
10
+ if (!response.ok) {
11
+ throw new Error(`Greenhouse API returned ${response.status} for board "${boardToken}"`);
12
+ }
13
+ const data = (await response.json());
14
+ if (saveRaw)
15
+ await saveRawResponse("greenhouse", boardToken, data);
16
+ return data.jobs.map((gj) => {
17
+ const id = createHash("sha256")
18
+ .update(`${gj.absolute_url}:${gj.title}:${boardToken}`)
19
+ .digest("hex")
20
+ .slice(0, 12);
21
+ return {
22
+ id,
23
+ title: gj.title,
24
+ company: boardToken,
25
+ location: gj.location?.name ?? null,
26
+ workMode: inferWorkMode(gj.location?.name),
27
+ department: gj.departments?.[0]?.name ?? null,
28
+ url: gj.absolute_url,
29
+ sourceUrl,
30
+ provider: "greenhouse",
31
+ description: null,
32
+ postedAt: gj.updated_at ?? null,
33
+ extractedAt: new Date().toISOString(),
34
+ raw: gj,
35
+ };
36
+ });
37
+ }
38
+ function inferWorkMode(location) {
39
+ if (!location)
40
+ return null;
41
+ const lower = location.toLowerCase();
42
+ if (lower.includes("remote"))
43
+ return "remote";
44
+ if (lower.includes("hybrid"))
45
+ return "hybrid";
46
+ return null;
47
+ }
@@ -0,0 +1,7 @@
1
+ import type { Job, SearchCriteria } from "../../types/index.js";
2
+ /**
3
+ * Fetch jobs from Lever's public postings API.
4
+ * No auth required, but many companies don't have it enabled (returns 404).
5
+ * Must fall through gracefully.
6
+ */
7
+ export declare function extractLeverJobs(companySlug: string, sourceUrl: string, _criteria: SearchCriteria, saveRaw?: boolean): Promise<Job[]>;
@@ -0,0 +1,60 @@
1
+ import { createHash } from "node:crypto";
2
+ import { saveRawResponse } from "../save-raw.js";
3
+ /**
4
+ * Fetch jobs from Lever's public postings API.
5
+ * No auth required, but many companies don't have it enabled (returns 404).
6
+ * Must fall through gracefully.
7
+ */
8
+ export async function extractLeverJobs(companySlug, sourceUrl, _criteria, saveRaw = false) {
9
+ const apiUrl = `https://api.lever.co/v0/postings/${companySlug}`;
10
+ const response = await fetch(apiUrl);
11
+ if (!response.ok) {
12
+ throw new Error(`Lever API returned ${response.status} for "${companySlug}"`);
13
+ }
14
+ const data = (await response.json());
15
+ if (!Array.isArray(data)) {
16
+ throw new Error(`Lever API returned unexpected format for "${companySlug}"`);
17
+ }
18
+ if (saveRaw)
19
+ await saveRawResponse("lever", companySlug, data);
20
+ return data.map((lp) => {
21
+ const id = createHash("sha256")
22
+ .update(`${lp.hostedUrl}:${lp.text}:${companySlug}`)
23
+ .digest("hex")
24
+ .slice(0, 12);
25
+ return {
26
+ id,
27
+ title: lp.text,
28
+ company: companySlug,
29
+ location: lp.categories?.location ?? null,
30
+ workMode: inferWorkMode(lp),
31
+ department: lp.categories?.department ?? lp.categories?.team ?? null,
32
+ url: lp.hostedUrl,
33
+ sourceUrl,
34
+ provider: "lever",
35
+ description: null,
36
+ postedAt: lp.createdAt ? new Date(lp.createdAt).toISOString() : null,
37
+ extractedAt: new Date().toISOString(),
38
+ raw: lp,
39
+ };
40
+ });
41
+ }
42
+ function inferWorkMode(posting) {
43
+ if (posting.workplaceType) {
44
+ const wt = posting.workplaceType.toLowerCase();
45
+ if (wt.includes("remote"))
46
+ return "remote";
47
+ if (wt.includes("hybrid"))
48
+ return "hybrid";
49
+ if (wt.includes("onsite") || wt.includes("on-site"))
50
+ return "onsite";
51
+ }
52
+ const loc = posting.categories?.location?.toLowerCase();
53
+ if (loc) {
54
+ if (loc.includes("remote"))
55
+ return "remote";
56
+ if (loc.includes("hybrid"))
57
+ return "hybrid";
58
+ }
59
+ return null;
60
+ }
@@ -0,0 +1,7 @@
1
+ import type { Job, SearchCriteria } from "../../types/index.js";
2
+ /**
3
+ * Fetch jobs from YC's Work at a Startup via Algolia.
4
+ * Maps SearchCriteria to Algolia filters. Falls back to URL query params
5
+ * for backward compatibility with direct WaaS URLs.
6
+ */
7
+ export declare function extractYcJobs(sourceUrl: string, criteria: SearchCriteria, saveRaw?: boolean): Promise<Job[]>;