jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.prettierrc.json +10 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +232 -0
  4. package/dist/core/aggregators/yc.d.ts +7 -0
  5. package/dist/core/aggregators/yc.js +320 -0
  6. package/dist/core/browser.d.ts +30 -0
  7. package/dist/core/browser.js +196 -0
  8. package/dist/core/cache.d.ts +13 -0
  9. package/dist/core/cache.js +41 -0
  10. package/dist/core/detect-provider.d.ts +7 -0
  11. package/dist/core/detect-provider.js +125 -0
  12. package/dist/core/discover-careers.d.ts +18 -0
  13. package/dist/core/discover-careers.js +92 -0
  14. package/dist/core/extract-jobs.d.ts +14 -0
  15. package/dist/core/extract-jobs.js +36 -0
  16. package/dist/core/fetch-page.d.ts +11 -0
  17. package/dist/core/fetch-page.js +39 -0
  18. package/dist/core/format-output.d.ts +2 -0
  19. package/dist/core/format-output.js +59 -0
  20. package/dist/core/match-jobs.d.ts +6 -0
  21. package/dist/core/match-jobs.js +43 -0
  22. package/dist/core/providers/ashby.d.ts +6 -0
  23. package/dist/core/providers/ashby.js +58 -0
  24. package/dist/core/providers/generic.d.ts +6 -0
  25. package/dist/core/providers/generic.js +294 -0
  26. package/dist/core/providers/greenhouse.d.ts +6 -0
  27. package/dist/core/providers/greenhouse.js +47 -0
  28. package/dist/core/providers/lever.d.ts +7 -0
  29. package/dist/core/providers/lever.js +60 -0
  30. package/dist/core/providers/yc.d.ts +7 -0
  31. package/dist/core/providers/yc.js +320 -0
  32. package/dist/core/resolve-iframe.d.ts +6 -0
  33. package/dist/core/resolve-iframe.js +51 -0
  34. package/dist/core/save-raw.d.ts +4 -0
  35. package/dist/core/save-raw.js +13 -0
  36. package/dist/data/companies.d.ts +9 -0
  37. package/dist/data/companies.js +2849 -0
  38. package/dist/entrypoints/cli/app.d.ts +3 -0
  39. package/dist/entrypoints/cli/app.js +91 -0
  40. package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
  41. package/dist/entrypoints/cli/components/crawl-view.js +94 -0
  42. package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
  43. package/dist/entrypoints/cli/components/discover-view.js +67 -0
  44. package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
  45. package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
  46. package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
  47. package/dist/entrypoints/cli/crawl-url.js +54 -0
  48. package/dist/entrypoints/cli/crawl.d.ts +32 -0
  49. package/dist/entrypoints/cli/crawl.js +108 -0
  50. package/dist/entrypoints/cli/discover.d.ts +10 -0
  51. package/dist/entrypoints/cli/discover.js +69 -0
  52. package/dist/entrypoints/cli/index.d.ts +2 -0
  53. package/dist/entrypoints/cli/index.js +197 -0
  54. package/dist/entrypoints/cli/init.d.ts +9 -0
  55. package/dist/entrypoints/cli/init.js +94 -0
  56. package/dist/entrypoints/cli/plain.d.ts +6 -0
  57. package/dist/entrypoints/cli/plain.js +77 -0
  58. package/dist/events.d.ts +114 -0
  59. package/dist/events.js +17 -0
  60. package/dist/orchestrators/crawl-all.d.ts +2 -0
  61. package/dist/orchestrators/crawl-all.js +66 -0
  62. package/dist/orchestrators/discover-all.d.ts +10 -0
  63. package/dist/orchestrators/discover-all.js +39 -0
  64. package/dist/threads/pool.d.ts +5 -0
  65. package/dist/threads/pool.js +23 -0
  66. package/dist/threads/process-url.d.ts +9 -0
  67. package/dist/threads/process-url.js +229 -0
  68. package/dist/types/index.d.ts +83 -0
  69. package/dist/types/index.js +6 -0
  70. package/dist/utils/config.d.ts +17 -0
  71. package/dist/utils/config.js +57 -0
  72. package/dist/utils/google-search.d.ts +19 -0
  73. package/dist/utils/google-search.js +139 -0
  74. package/dist/utils/llm.d.ts +8 -0
  75. package/dist/utils/llm.js +25 -0
  76. package/package.json +42 -0
  77. package/src/core/aggregators/yc.ts +415 -0
  78. package/src/core/browser.ts +239 -0
  79. package/src/core/detect-provider.ts +162 -0
  80. package/src/core/discover-careers.ts +117 -0
  81. package/src/core/extract-jobs.ts +50 -0
  82. package/src/core/fetch-page.ts +41 -0
  83. package/src/core/format-output.ts +80 -0
  84. package/src/core/match-jobs.ts +56 -0
  85. package/src/core/providers/ashby.ts +84 -0
  86. package/src/core/providers/generic.ts +332 -0
  87. package/src/core/providers/greenhouse.ts +74 -0
  88. package/src/core/providers/lever.ts +90 -0
  89. package/src/core/resolve-iframe.ts +59 -0
  90. package/src/core/save-raw.ts +18 -0
  91. package/src/data/companies.ts +2859 -0
  92. package/src/entrypoints/cli/app.tsx +173 -0
  93. package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
  94. package/src/entrypoints/cli/components/discover-view.tsx +138 -0
  95. package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
  96. package/src/entrypoints/cli/crawl-url.ts +87 -0
  97. package/src/entrypoints/cli/crawl.ts +163 -0
  98. package/src/entrypoints/cli/discover.ts +96 -0
  99. package/src/entrypoints/cli/index.ts +252 -0
  100. package/src/entrypoints/cli/init.ts +117 -0
  101. package/src/entrypoints/cli/plain.ts +104 -0
  102. package/src/events.ts +79 -0
  103. package/src/orchestrators/crawl-all.ts +96 -0
  104. package/src/orchestrators/discover-all.ts +61 -0
  105. package/src/threads/pool.ts +29 -0
  106. package/src/threads/process-url.ts +312 -0
  107. package/src/types/index.ts +110 -0
  108. package/src/utils/config.ts +79 -0
  109. package/src/utils/google-search.ts +155 -0
  110. package/src/utils/llm.ts +33 -0
  111. package/test/integration/process-url.test.ts +301 -0
  112. package/test/integration/providers/ashby.test.ts +163 -0
  113. package/test/integration/providers/greenhouse.test.ts +191 -0
  114. package/test/integration/providers/lever.test.ts +188 -0
  115. package/test/unit/config.test.ts +64 -0
  116. package/test/unit/detect-provider.test.ts +165 -0
  117. package/test/unit/events.test.ts +104 -0
  118. package/test/unit/format-output.test.ts +165 -0
  119. package/test/unit/match-jobs.test.ts +257 -0
  120. package/test/unit/pool.test.ts +74 -0
  121. package/test/unit/providers/generic.test.ts +139 -0
  122. package/test/unit/resolve-iframe.test.ts +100 -0
  123. package/tsconfig.json +19 -0
  124. package/vitest.config.ts +7 -0
@@ -0,0 +1,162 @@
1
+ import type { ProviderDetection } from "../types/index.js";
2
+
3
+ /**
4
+ * Detect ATS provider from HTML content and URL.
5
+ * Checks for iframes, embed scripts, and DOM signatures.
6
+ * Extracts board token/slug for API calls when possible.
7
+ */
8
+ export function detectProvider(html: string, url: string): ProviderDetection {
9
+ // 1. URL pattern matching (direct ATS board URLs)
10
+ const urlMatch = detectFromUrl(url);
11
+ if (urlMatch.provider !== "unknown") return urlMatch;
12
+
13
+ // 2. HTML signature detection (embedded ATS on custom domains)
14
+ return detectFromHtml(html);
15
+ }
16
+
17
+ function detectFromUrl(url: string): ProviderDetection {
18
+ const u = new URL(url);
19
+
20
+ // Greenhouse: boards.greenhouse.io/{token} or job-boards.greenhouse.io/{token}
21
+ if (
22
+ u.hostname === "boards.greenhouse.io" ||
23
+ u.hostname === "job-boards.greenhouse.io"
24
+ ) {
25
+ const token = u.pathname.split("/").filter(Boolean)[0];
26
+ if (token) return { provider: "greenhouse", boardToken: token };
27
+ }
28
+
29
+ // Lever: jobs.lever.co/{company}
30
+ if (u.hostname === "jobs.lever.co") {
31
+ const company = u.pathname.split("/").filter(Boolean)[0];
32
+ if (company) return { provider: "lever", boardToken: company };
33
+ }
34
+
35
+ // Ashby: jobs.ashbyhq.com/{company}
36
+ if (u.hostname === "jobs.ashbyhq.com") {
37
+ const company = u.pathname.split("/").filter(Boolean)[0];
38
+ if (company) return { provider: "ashby", boardToken: company };
39
+ }
40
+
41
+ // Workday: {company}.wd{n}.myworkdayjobs.com
42
+ if (u.hostname.includes(".myworkdayjobs.com")) {
43
+ const tenant = u.hostname.split(".")[0];
44
+ if (tenant) return { provider: "workday", boardToken: tenant };
45
+ }
46
+
47
+ // BambooHR: {company}.bamboohr.com
48
+ if (u.hostname.includes(".bamboohr.com")) {
49
+ const company = u.hostname.split(".")[0];
50
+ if (company) return { provider: "bamboohr", boardToken: company };
51
+ }
52
+
53
+ return { provider: "unknown", boardToken: null };
54
+ }
55
+
56
+ function detectFromHtml(html: string): ProviderDetection {
57
+ // Greenhouse embed: <div id="grnhse_app"> or greenhouse embed script
58
+ const greenhouseIframe = html.match(
59
+ /iframe[^>]+src=["']([^"']*(?:boards|job-boards)\.greenhouse\.io[^"']*)/i
60
+ );
61
+ if (greenhouseIframe) {
62
+ const token = extractTokenFromUrl(
63
+ greenhouseIframe[1],
64
+ "boards.greenhouse.io"
65
+ );
66
+ if (token) return { provider: "greenhouse", boardToken: token };
67
+ }
68
+
69
+ const greenhouseEmbed = html.match(
70
+ /src=["'][^"']*api\.greenhouse\.io\/v1\/boards\/([^/"']+)/i
71
+ );
72
+ if (greenhouseEmbed) {
73
+ return { provider: "greenhouse", boardToken: greenhouseEmbed[1] };
74
+ }
75
+
76
+ if (html.includes('id="grnhse_app"') || html.includes("id='grnhse_app'")) {
77
+ // Look for the greenhouse board token in embed script
78
+ const tokenMatch = html.match(
79
+ /Grnhse\.Settings\.board_token\s*=\s*["']([^"']+)/
80
+ );
81
+ if (tokenMatch) {
82
+ return { provider: "greenhouse", boardToken: tokenMatch[1] };
83
+ }
84
+ const scriptSrc = html.match(
85
+ /src=["'][^"']*(?:boards|job-boards)\.greenhouse\.io\/embed\/job_board\/js\?for=([^"'&]+)/i
86
+ );
87
+ if (scriptSrc) {
88
+ return { provider: "greenhouse", boardToken: scriptSrc[1] };
89
+ }
90
+ }
91
+
92
+ // Lever embed
93
+ const leverIframe = html.match(
94
+ /iframe[^>]+src=["']([^"']*jobs\.lever\.co[^"']*)/i
95
+ );
96
+ if (leverIframe) {
97
+ const token = extractTokenFromUrl(leverIframe[1], "jobs.lever.co");
98
+ if (token) return { provider: "lever", boardToken: token };
99
+ }
100
+
101
+ if (html.includes("data-lever-")) {
102
+ const leverCompany = html.match(/data-lever-company=["']([^"']+)/i);
103
+ if (leverCompany) {
104
+ return { provider: "lever", boardToken: leverCompany[1] };
105
+ }
106
+ }
107
+
108
+ // Ashby embed
109
+ const ashbyIframe = html.match(
110
+ /iframe[^>]+src=["']([^"']*jobs\.ashbyhq\.com[^"']*)/i
111
+ );
112
+ if (ashbyIframe) {
113
+ const token = extractTokenFromUrl(ashbyIframe[1], "jobs.ashbyhq.com");
114
+ if (token) return { provider: "ashby", boardToken: token };
115
+ }
116
+
117
+ // Ashby embed script pattern
118
+ const ashbyScript = html.match(
119
+ /src=["'][^"']*ashbyhq\.com\/[^"']*["'][^>]*data-ashby-job-board-id=["']([^"']+)/i
120
+ );
121
+ if (ashbyScript) {
122
+ return { provider: "ashby", boardToken: ashbyScript[1] };
123
+ }
124
+
125
+ // Workday embed
126
+ const workdayIframe = html.match(
127
+ /iframe[^>]+src=["']([^"']*\.myworkdayjobs\.com[^"']*)/i
128
+ );
129
+ if (workdayIframe) {
130
+ const tenant = workdayIframe[1].match(/\/\/([^.]+)\.wd\d+\.myworkdayjobs/);
131
+ if (tenant) return { provider: "workday", boardToken: tenant[1] };
132
+ }
133
+
134
+ // BambooHR embed
135
+ const bambooIframe = html.match(
136
+ /iframe[^>]+src=["']([^"']*\.bamboohr\.com[^"']*)/i
137
+ );
138
+ if (bambooIframe) {
139
+ const company = bambooIframe[1].match(/\/\/([^.]+)\.bamboohr\.com/);
140
+ if (company) return { provider: "bamboohr", boardToken: company[1] };
141
+ }
142
+
143
+ return { provider: "unknown", boardToken: null };
144
+ }
145
+
146
+ function extractTokenFromUrl(
147
+ iframeSrc: string,
148
+ hostname: string
149
+ ): string | null {
150
+ try {
151
+ const url = new URL(
152
+ iframeSrc.startsWith("//") ? `https:${iframeSrc}` : iframeSrc
153
+ );
154
+ if (url.hostname === hostname) {
155
+ const parts = url.pathname.split("/").filter(Boolean);
156
+ return parts[0] || null;
157
+ }
158
+ } catch {
159
+ // Invalid URL
160
+ }
161
+ return null;
162
+ }
@@ -0,0 +1,117 @@
1
+ import {
2
+ webSearch,
3
+ guessCareerUrls,
4
+ type SearchResult,
5
+ } from "../utils/google-search.js";
6
+ import { askClaude } from "../utils/llm.js";
7
+ import { probePage } from "./fetch-page.js";
8
+
9
+ export interface DiscoverResult {
10
+ company: string;
11
+ url: string | null;
12
+ verified: boolean;
13
+ error: string | null;
14
+ }
15
+
16
+ /**
17
+ * Discover the career page URL for a company.
18
+ *
19
+ * Strategy (in order):
20
+ * 1. Web search (DDG) + LLM to pick the best URL
21
+ * 2. Heuristic URL guessing ({company}.com/careers) + HTTP HEAD check
22
+ */
23
+ export async function discoverCareerPage(
24
+ company: string
25
+ ): Promise<DiscoverResult> {
26
+ try {
27
+ // Strategy 1: Web search + LLM
28
+ const results = await webSearch(`"${company}" careers jobs`);
29
+
30
+ if (results.length > 0) {
31
+ const url = await pickCareerUrl(company, results);
32
+ if (url) {
33
+ return { company, url, verified: false, error: null };
34
+ }
35
+ }
36
+
37
+ // Strategy 2: Heuristic URL guessing
38
+ const guessed = await guessCareerUrls(company);
39
+ if (guessed.length > 0) {
40
+ return {
41
+ company,
42
+ url: guessed[0].url,
43
+ verified: false,
44
+ error: null,
45
+ };
46
+ }
47
+
48
+ return {
49
+ company,
50
+ url: null,
51
+ verified: false,
52
+ error: "Could not find career page",
53
+ };
54
+ } catch (err) {
55
+ return {
56
+ company,
57
+ url: null,
58
+ verified: false,
59
+ error: err instanceof Error ? err.message : String(err),
60
+ };
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Verify that a discovered URL is a real career page.
66
+ */
67
+ export async function verifyCareerPage(url: string): Promise<boolean> {
68
+ try {
69
+ const { html } = await probePage(url);
70
+ const lower = html.toLowerCase();
71
+
72
+ const signals = [
73
+ "careers",
74
+ "jobs",
75
+ "openings",
76
+ "positions",
77
+ "join our team",
78
+ "join us",
79
+ "we're hiring",
80
+ "open roles",
81
+ "apply now",
82
+ "job-boards.greenhouse.io",
83
+ "boards.greenhouse.io",
84
+ "jobs.lever.co",
85
+ "jobs.ashbyhq.com",
86
+ "grnhse_app",
87
+ ];
88
+
89
+ const matchCount = signals.filter((s) => lower.includes(s)).length;
90
+ return matchCount >= 2;
91
+ } catch {
92
+ return false;
93
+ }
94
+ }
95
+
96
+ async function pickCareerUrl(
97
+ company: string,
98
+ results: SearchResult[]
99
+ ): Promise<string | null> {
100
+ const formatted = results
101
+ .map((r, i) => `${i + 1}. ${r.title}\n URL: ${r.url}\n ${r.snippet}`)
102
+ .join("\n\n");
103
+
104
+ const prompt = `Given these search results for "${company} careers", return ONLY the URL of their official careers/jobs page. Not individual job postings or third-party job boards (Indeed, LinkedIn, Glassdoor).
105
+
106
+ Search results:
107
+ ${formatted}
108
+
109
+ Respond with ONLY the URL, nothing else. If you can't determine it, respond with "null".`;
110
+
111
+ const response = await askClaude(prompt);
112
+
113
+ const urlMatch = response.match(/https?:\/\/[^\s"'<>]+/);
114
+ if (urlMatch) return urlMatch[0];
115
+
116
+ return null;
117
+ }
@@ -0,0 +1,50 @@
1
+ import type { Job, Provider, Aggregator, SearchCriteria } from "../types/index.js";
2
+ import { extractGreenhouseJobs } from "./providers/greenhouse.js";
3
+ import { extractAshbyJobs } from "./providers/ashby.js";
4
+ import { extractLeverJobs } from "./providers/lever.js";
5
+ import { extractYcJobs } from "./aggregators/yc.js";
6
+ import { extractGenericJobs } from "./providers/generic.js";
7
+
8
+ /**
9
+ * Extract jobs via ATS JSON API. Used when provider + boardToken are known.
10
+ * Criteria is passed through so providers can apply server-side filtering.
11
+ */
12
+ export async function extractViaApi(
13
+ provider: Provider,
14
+ boardToken: string,
15
+ sourceUrl: string,
16
+ criteria: SearchCriteria,
17
+ saveRaw = false
18
+ ): Promise<Job[]> {
19
+ switch (provider) {
20
+ case "greenhouse":
21
+ return extractGreenhouseJobs(boardToken, sourceUrl, criteria, saveRaw);
22
+ case "ashby":
23
+ return extractAshbyJobs(boardToken, sourceUrl, criteria, saveRaw);
24
+ case "lever":
25
+ return extractLeverJobs(boardToken, sourceUrl, criteria, saveRaw);
26
+ default:
27
+ throw new Error(`No API extractor for provider "${provider}"`);
28
+ }
29
+ }
30
+
31
+ /**
32
+ * Extract jobs via aggregator. Aggregators search across multiple companies.
33
+ */
34
+ export async function extractViaAggregator(
35
+ aggregator: Aggregator,
36
+ criteria: SearchCriteria,
37
+ saveRaw = false
38
+ ): Promise<Job[]> {
39
+ switch (aggregator) {
40
+ case "yc":
41
+ return extractYcJobs("https://workatastartup.com", criteria, saveRaw);
42
+ }
43
+ }
44
+
45
+ /**
46
+ * Extract jobs from rendered HTML. Used when no ATS API is available.
47
+ */
48
+ export function extractFromHtml(html: string, sourceUrl: string): Job[] {
49
+ return extractGenericJobs(html, sourceUrl);
50
+ }
@@ -0,0 +1,41 @@
1
+ import { BrowserSession, BrowserNotAvailableError } from "./browser.js";
2
+ import type { FetchResult } from "../types/index.js";
3
+
4
+ /**
5
+ * Quick HTTP probe — simple GET, no browser.
6
+ * Enough to detect ATS signals in static HTML.
7
+ */
8
+ export async function probePage(url: string): Promise<FetchResult> {
9
+ const response = await fetch(url, {
10
+ headers: {
11
+ "User-Agent":
12
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
13
+ },
14
+ redirect: "follow",
15
+ });
16
+ const html = await response.text();
17
+ return { html, finalUrl: response.url };
18
+ }
19
+
20
+ /**
21
+ * Full JS rendering via agent-browser.
22
+ * Falls back to probePage when agent-browser is not installed.
23
+ */
24
+ export async function renderPage(url: string): Promise<FetchResult> {
25
+ const session = new BrowserSession();
26
+ try {
27
+ await session.open(url);
28
+ const [html, finalUrl] = await Promise.all([
29
+ session.getHtml(),
30
+ session.getUrl(),
31
+ ]);
32
+ return { html, finalUrl };
33
+ } catch (err) {
34
+ if (err instanceof BrowserNotAvailableError) {
35
+ return probePage(url);
36
+ }
37
+ throw err;
38
+ } finally {
39
+ await session.close();
40
+ }
41
+ }
@@ -0,0 +1,80 @@
1
+ import type { Job, OutputFormat } from "../types/index.js";
2
+
3
+ export function formatOutput(jobs: Job[], format: OutputFormat): string {
4
+ switch (format) {
5
+ case "json":
6
+ return JSON.stringify(jobs, null, 2);
7
+
8
+ case "table":
9
+ return formatTable(jobs);
10
+
11
+ case "csv":
12
+ return formatCsv(jobs);
13
+
14
+ case "markdown":
15
+ return formatMarkdown(jobs);
16
+ }
17
+ }
18
+
19
+ function formatTable(jobs: Job[]): string {
20
+ if (jobs.length === 0) return "No matching jobs found.";
21
+
22
+ const headers = ["Company", "Title", "Location", "Department", "URL"];
23
+ const rows = jobs.map((j) => [
24
+ j.company,
25
+ j.title,
26
+ j.location ?? "-",
27
+ j.department ?? "-",
28
+ j.url,
29
+ ]);
30
+
31
+ // Calculate column widths
32
+ const widths = headers.map((h, i) =>
33
+ Math.min(50, Math.max(h.length, ...rows.map((r) => r[i].length)))
34
+ );
35
+
36
+ const separator = widths.map((w) => "-".repeat(w)).join(" | ");
37
+ const headerLine = headers.map((h, i) => h.padEnd(widths[i])).join(" | ");
38
+ const bodyLines = rows.map((row) =>
39
+ row.map((cell, i) => cell.slice(0, widths[i]).padEnd(widths[i])).join(" | ")
40
+ );
41
+
42
+ return [headerLine, separator, ...bodyLines].join("\n");
43
+ }
44
+
45
+ function formatCsv(jobs: Job[]): string {
46
+ const headers = "company,title,location,department,workMode,url,postedAt";
47
+ const rows = jobs.map((j) =>
48
+ [
49
+ csvEscape(j.company),
50
+ csvEscape(j.title),
51
+ csvEscape(j.location ?? ""),
52
+ csvEscape(j.department ?? ""),
53
+ csvEscape(j.workMode ?? ""),
54
+ csvEscape(j.url),
55
+ csvEscape(j.postedAt ?? ""),
56
+ ].join(",")
57
+ );
58
+ return [headers, ...rows].join("\n");
59
+ }
60
+
61
+ function csvEscape(value: string): string {
62
+ if (value.includes(",") || value.includes('"') || value.includes("\n")) {
63
+ return `"${value.replace(/"/g, '""')}"`;
64
+ }
65
+ return value;
66
+ }
67
+
68
+ function formatMarkdown(jobs: Job[]): string {
69
+ if (jobs.length === 0) return "No matching jobs found.";
70
+
71
+ const lines = [
72
+ "| Company | Title | Location | Department |",
73
+ "| --- | --- | --- | --- |",
74
+ ...jobs.map(
75
+ (j) =>
76
+ `| ${j.company} | [${j.title}](${j.url}) | ${j.location ?? "-"} | ${j.department ?? "-"} |`
77
+ ),
78
+ ];
79
+ return lines.join("\n");
80
+ }
@@ -0,0 +1,56 @@
1
+ import type { Job, SearchCriteria } from "../types/index.js";
2
+
3
+ /**
4
+ * Filter jobs against search criteria. Case-insensitive substring matching.
5
+ * Jobs with null fields are included (don't exclude for missing data).
6
+ */
7
+ export function matchJobs(jobs: Job[], criteria: SearchCriteria): Job[] {
8
+ return jobs.filter((job) => {
9
+ // Keywords: any keyword must match title (include if no keywords specified)
10
+ if (criteria.keywords.length > 0) {
11
+ const title = job.title.toLowerCase();
12
+ const hasMatch = criteria.keywords.some((kw) =>
13
+ title.includes(kw.toLowerCase())
14
+ );
15
+ if (!hasMatch) return false;
16
+ }
17
+
18
+ // Exclude keywords: exclude if any match title
19
+ if (criteria.excludeKeywords.length > 0) {
20
+ const title = job.title.toLowerCase();
21
+ const hasExclude = criteria.excludeKeywords.some((kw) =>
22
+ title.includes(kw.toLowerCase())
23
+ );
24
+ if (hasExclude) return false;
25
+ }
26
+
27
+ // Location: substring match (include if job has no location)
28
+ if (criteria.location && job.location) {
29
+ if (
30
+ !job.location.toLowerCase().includes(criteria.location.toLowerCase())
31
+ ) {
32
+ return false;
33
+ }
34
+ }
35
+
36
+ // Work mode: include if job's workMode is in allowed set (include if null)
37
+ if (criteria.workMode && criteria.workMode.length > 0 && job.workMode) {
38
+ if (!criteria.workMode.includes(job.workMode)) return false;
39
+ }
40
+
41
+ // Departments: substring match (include if null)
42
+ if (
43
+ criteria.departments &&
44
+ criteria.departments.length > 0 &&
45
+ job.department
46
+ ) {
47
+ const dept = job.department.toLowerCase();
48
+ const hasMatch = criteria.departments.some((d) =>
49
+ dept.includes(d.toLowerCase())
50
+ );
51
+ if (!hasMatch) return false;
52
+ }
53
+
54
+ return true;
55
+ });
56
+ }
@@ -0,0 +1,84 @@
1
+ import { createHash } from "node:crypto";
2
+ import type { Job, SearchCriteria } from "../../types/index.js";
3
+ import { saveRawResponse } from "../save-raw.js";
4
+
5
+ interface AshbyJob {
6
+ id: string;
7
+ title: string;
8
+ department: string | null;
9
+ team: string | null;
10
+ employmentType: string | null;
11
+ location: string | null;
12
+ isRemote: boolean | null;
13
+ workplaceType: string | null;
14
+ jobUrl: string;
15
+ publishedAt: string | null;
16
+ descriptionPlain?: string;
17
+ }
18
+
19
+ interface AshbyResponse {
20
+ jobs: AshbyJob[];
21
+ }
22
+
23
+ /**
24
+ * Fetch jobs from Ashby's public posting API.
25
+ * No auth required. Returns all jobs as a single dump.
26
+ */
27
+ export async function extractAshbyJobs(
28
+ boardToken: string,
29
+ sourceUrl: string,
30
+ _criteria: SearchCriteria,
31
+ saveRaw = false
32
+ ): Promise<Job[]> {
33
+ const apiUrl = `https://api.ashbyhq.com/posting-api/job-board/${boardToken}`;
34
+ const response = await fetch(apiUrl);
35
+
36
+ if (!response.ok) {
37
+ throw new Error(
38
+ `Ashby API returned ${response.status} for board "${boardToken}"`
39
+ );
40
+ }
41
+
42
+ const data = (await response.json()) as AshbyResponse;
43
+
44
+ if (saveRaw) await saveRawResponse("ashby", boardToken, data);
45
+
46
+ return data.jobs.map((aj): Job => {
47
+ const id = createHash("sha256")
48
+ .update(`${aj.jobUrl}:${aj.title}:${boardToken}`)
49
+ .digest("hex")
50
+ .slice(0, 12);
51
+
52
+ return {
53
+ id,
54
+ title: aj.title,
55
+ company: boardToken,
56
+ location: aj.location ?? null,
57
+ workMode: inferWorkMode(aj),
58
+ department: aj.department ?? aj.team ?? null,
59
+ url: aj.jobUrl,
60
+ sourceUrl,
61
+ provider: "ashby",
62
+ description: aj.descriptionPlain?.slice(0, 200) ?? null,
63
+ postedAt: aj.publishedAt ?? null,
64
+ extractedAt: new Date().toISOString(),
65
+ raw: aj as unknown as Record<string, unknown>,
66
+ };
67
+ });
68
+ }
69
+
70
+ function inferWorkMode(job: AshbyJob): "remote" | "onsite" | "hybrid" | null {
71
+ if (job.isRemote === true) return "remote";
72
+ if (job.workplaceType) {
73
+ const wt = job.workplaceType.toLowerCase();
74
+ if (wt.includes("remote")) return "remote";
75
+ if (wt.includes("hybrid")) return "hybrid";
76
+ if (wt.includes("onsite") || wt.includes("on-site")) return "onsite";
77
+ }
78
+ if (job.location) {
79
+ const loc = job.location.toLowerCase();
80
+ if (loc.includes("remote")) return "remote";
81
+ if (loc.includes("hybrid")) return "hybrid";
82
+ }
83
+ return null;
84
+ }