jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.prettierrc.json +10 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +232 -0
  4. package/dist/core/aggregators/yc.d.ts +7 -0
  5. package/dist/core/aggregators/yc.js +320 -0
  6. package/dist/core/browser.d.ts +30 -0
  7. package/dist/core/browser.js +196 -0
  8. package/dist/core/cache.d.ts +13 -0
  9. package/dist/core/cache.js +41 -0
  10. package/dist/core/detect-provider.d.ts +7 -0
  11. package/dist/core/detect-provider.js +125 -0
  12. package/dist/core/discover-careers.d.ts +18 -0
  13. package/dist/core/discover-careers.js +92 -0
  14. package/dist/core/extract-jobs.d.ts +14 -0
  15. package/dist/core/extract-jobs.js +36 -0
  16. package/dist/core/fetch-page.d.ts +11 -0
  17. package/dist/core/fetch-page.js +39 -0
  18. package/dist/core/format-output.d.ts +2 -0
  19. package/dist/core/format-output.js +59 -0
  20. package/dist/core/match-jobs.d.ts +6 -0
  21. package/dist/core/match-jobs.js +43 -0
  22. package/dist/core/providers/ashby.d.ts +6 -0
  23. package/dist/core/providers/ashby.js +58 -0
  24. package/dist/core/providers/generic.d.ts +6 -0
  25. package/dist/core/providers/generic.js +294 -0
  26. package/dist/core/providers/greenhouse.d.ts +6 -0
  27. package/dist/core/providers/greenhouse.js +47 -0
  28. package/dist/core/providers/lever.d.ts +7 -0
  29. package/dist/core/providers/lever.js +60 -0
  30. package/dist/core/providers/yc.d.ts +7 -0
  31. package/dist/core/providers/yc.js +320 -0
  32. package/dist/core/resolve-iframe.d.ts +6 -0
  33. package/dist/core/resolve-iframe.js +51 -0
  34. package/dist/core/save-raw.d.ts +4 -0
  35. package/dist/core/save-raw.js +13 -0
  36. package/dist/data/companies.d.ts +9 -0
  37. package/dist/data/companies.js +2849 -0
  38. package/dist/entrypoints/cli/app.d.ts +3 -0
  39. package/dist/entrypoints/cli/app.js +91 -0
  40. package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
  41. package/dist/entrypoints/cli/components/crawl-view.js +94 -0
  42. package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
  43. package/dist/entrypoints/cli/components/discover-view.js +67 -0
  44. package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
  45. package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
  46. package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
  47. package/dist/entrypoints/cli/crawl-url.js +54 -0
  48. package/dist/entrypoints/cli/crawl.d.ts +32 -0
  49. package/dist/entrypoints/cli/crawl.js +108 -0
  50. package/dist/entrypoints/cli/discover.d.ts +10 -0
  51. package/dist/entrypoints/cli/discover.js +69 -0
  52. package/dist/entrypoints/cli/index.d.ts +2 -0
  53. package/dist/entrypoints/cli/index.js +197 -0
  54. package/dist/entrypoints/cli/init.d.ts +9 -0
  55. package/dist/entrypoints/cli/init.js +94 -0
  56. package/dist/entrypoints/cli/plain.d.ts +6 -0
  57. package/dist/entrypoints/cli/plain.js +77 -0
  58. package/dist/events.d.ts +114 -0
  59. package/dist/events.js +17 -0
  60. package/dist/orchestrators/crawl-all.d.ts +2 -0
  61. package/dist/orchestrators/crawl-all.js +66 -0
  62. package/dist/orchestrators/discover-all.d.ts +10 -0
  63. package/dist/orchestrators/discover-all.js +39 -0
  64. package/dist/threads/pool.d.ts +5 -0
  65. package/dist/threads/pool.js +23 -0
  66. package/dist/threads/process-url.d.ts +9 -0
  67. package/dist/threads/process-url.js +229 -0
  68. package/dist/types/index.d.ts +83 -0
  69. package/dist/types/index.js +6 -0
  70. package/dist/utils/config.d.ts +17 -0
  71. package/dist/utils/config.js +57 -0
  72. package/dist/utils/google-search.d.ts +19 -0
  73. package/dist/utils/google-search.js +139 -0
  74. package/dist/utils/llm.d.ts +8 -0
  75. package/dist/utils/llm.js +25 -0
  76. package/package.json +42 -0
  77. package/src/core/aggregators/yc.ts +415 -0
  78. package/src/core/browser.ts +239 -0
  79. package/src/core/detect-provider.ts +162 -0
  80. package/src/core/discover-careers.ts +117 -0
  81. package/src/core/extract-jobs.ts +50 -0
  82. package/src/core/fetch-page.ts +41 -0
  83. package/src/core/format-output.ts +80 -0
  84. package/src/core/match-jobs.ts +56 -0
  85. package/src/core/providers/ashby.ts +84 -0
  86. package/src/core/providers/generic.ts +332 -0
  87. package/src/core/providers/greenhouse.ts +74 -0
  88. package/src/core/providers/lever.ts +90 -0
  89. package/src/core/resolve-iframe.ts +59 -0
  90. package/src/core/save-raw.ts +18 -0
  91. package/src/data/companies.ts +2859 -0
  92. package/src/entrypoints/cli/app.tsx +173 -0
  93. package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
  94. package/src/entrypoints/cli/components/discover-view.tsx +138 -0
  95. package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
  96. package/src/entrypoints/cli/crawl-url.ts +87 -0
  97. package/src/entrypoints/cli/crawl.ts +163 -0
  98. package/src/entrypoints/cli/discover.ts +96 -0
  99. package/src/entrypoints/cli/index.ts +252 -0
  100. package/src/entrypoints/cli/init.ts +117 -0
  101. package/src/entrypoints/cli/plain.ts +104 -0
  102. package/src/events.ts +79 -0
  103. package/src/orchestrators/crawl-all.ts +96 -0
  104. package/src/orchestrators/discover-all.ts +61 -0
  105. package/src/threads/pool.ts +29 -0
  106. package/src/threads/process-url.ts +312 -0
  107. package/src/types/index.ts +110 -0
  108. package/src/utils/config.ts +79 -0
  109. package/src/utils/google-search.ts +155 -0
  110. package/src/utils/llm.ts +33 -0
  111. package/test/integration/process-url.test.ts +301 -0
  112. package/test/integration/providers/ashby.test.ts +163 -0
  113. package/test/integration/providers/greenhouse.test.ts +191 -0
  114. package/test/integration/providers/lever.test.ts +188 -0
  115. package/test/unit/config.test.ts +64 -0
  116. package/test/unit/detect-provider.test.ts +165 -0
  117. package/test/unit/events.test.ts +104 -0
  118. package/test/unit/format-output.test.ts +165 -0
  119. package/test/unit/match-jobs.test.ts +257 -0
  120. package/test/unit/pool.test.ts +74 -0
  121. package/test/unit/providers/generic.test.ts +139 -0
  122. package/test/unit/resolve-iframe.test.ts +100 -0
  123. package/tsconfig.json +19 -0
  124. package/vitest.config.ts +7 -0
@@ -0,0 +1,165 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { detectProvider } from "../../src/core/detect-provider.js";
3
+
4
+ describe("detectProvider", () => {
5
+ describe("URL-based detection", () => {
6
+ it("detects Greenhouse from boards.greenhouse.io URL", () => {
7
+ const result = detectProvider("", "https://boards.greenhouse.io/acme");
8
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
9
+ });
10
+
11
+ it("detects Greenhouse from job-boards.greenhouse.io URL", () => {
12
+ const result = detectProvider(
13
+ "",
14
+ "https://job-boards.greenhouse.io/stripe"
15
+ );
16
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "stripe" });
17
+ });
18
+
19
+ it("extracts first path segment as token, ignoring deeper paths", () => {
20
+ const result = detectProvider(
21
+ "",
22
+ "https://boards.greenhouse.io/acme/jobs/12345"
23
+ );
24
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
25
+ });
26
+
27
+ it("detects Lever from jobs.lever.co URL", () => {
28
+ const result = detectProvider("", "https://jobs.lever.co/figma");
29
+ expect(result).toEqual({ provider: "lever", boardToken: "figma" });
30
+ });
31
+
32
+ it("detects Ashby from jobs.ashbyhq.com URL", () => {
33
+ const result = detectProvider("", "https://jobs.ashbyhq.com/ramp");
34
+ expect(result).toEqual({ provider: "ashby", boardToken: "ramp" });
35
+ });
36
+
37
+ it("detects Workday from myworkdayjobs.com URL", () => {
38
+ const result = detectProvider(
39
+ "",
40
+ "https://netflix.wd5.myworkdayjobs.com/jobs"
41
+ );
42
+ expect(result).toEqual({ provider: "workday", boardToken: "netflix" });
43
+ });
44
+
45
+ it("detects BambooHR from bamboohr.com URL", () => {
46
+ const result = detectProvider(
47
+ "",
48
+ "https://acme.bamboohr.com/careers"
49
+ );
50
+ expect(result).toEqual({ provider: "bamboohr", boardToken: "acme" });
51
+ });
52
+
53
+ it("returns unknown for workatastartup.com (YC is an aggregator, not a provider)", () => {
54
+ const result = detectProvider(
55
+ "",
56
+ "https://www.workatastartup.com/jobs"
57
+ );
58
+ expect(result).toEqual({ provider: "unknown", boardToken: null });
59
+ });
60
+
61
+ it("returns unknown for unrecognized URLs", () => {
62
+ const result = detectProvider("", "https://example.com/careers");
63
+ expect(result).toEqual({ provider: "unknown", boardToken: null });
64
+ });
65
+ });
66
+
67
+ describe("HTML-based detection", () => {
68
+ it("detects Greenhouse iframe embed", () => {
69
+ const html = `<iframe src="https://boards.greenhouse.io/acme/jobs"></iframe>`;
70
+ const result = detectProvider(html, "https://example.com/careers");
71
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
72
+ });
73
+
74
+ it("does not detect job-boards.greenhouse.io iframe (hostname mismatch in extractTokenFromUrl)", () => {
75
+ // extractTokenFromUrl checks against "boards.greenhouse.io" but iframe has "job-boards.greenhouse.io"
76
+ const html = `<iframe src="https://job-boards.greenhouse.io/stripe"></iframe>`;
77
+ const result = detectProvider(html, "https://example.com/careers");
78
+ expect(result).toEqual({ provider: "unknown", boardToken: null });
79
+ });
80
+
81
+ it("detects Greenhouse API embed script", () => {
82
+ const html = `<script src="https://api.greenhouse.io/v1/boards/acme/embed/job_board"></script>`;
83
+ const result = detectProvider(html, "https://example.com/careers");
84
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
85
+ });
86
+
87
+ it("detects Greenhouse grnhse_app div with board_token setting", () => {
88
+ const html = `
89
+ <div id="grnhse_app"></div>
90
+ <script>Grnhse.Settings.board_token = "mycompany";</script>
91
+ `;
92
+ const result = detectProvider(html, "https://example.com/careers");
93
+ expect(result).toEqual({
94
+ provider: "greenhouse",
95
+ boardToken: "mycompany",
96
+ });
97
+ });
98
+
99
+ it("detects Greenhouse grnhse_app div with embed script for= param", () => {
100
+ const html = `
101
+ <div id="grnhse_app"></div>
102
+ <script src="https://boards.greenhouse.io/embed/job_board/js?for=acme"></script>
103
+ `;
104
+ const result = detectProvider(html, "https://example.com/careers");
105
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
106
+ });
107
+
108
+ it("detects Lever iframe embed", () => {
109
+ const html = `<iframe src="https://jobs.lever.co/figma"></iframe>`;
110
+ const result = detectProvider(html, "https://example.com/careers");
111
+ expect(result).toEqual({ provider: "lever", boardToken: "figma" });
112
+ });
113
+
114
+ it("detects Lever data-lever attribute", () => {
115
+ const html = `<div data-lever-company="figma" data-lever-widget="jobs"></div>`;
116
+ const result = detectProvider(html, "https://example.com/careers");
117
+ expect(result).toEqual({ provider: "lever", boardToken: "figma" });
118
+ });
119
+
120
+ it("detects Ashby iframe embed", () => {
121
+ const html = `<iframe src="https://jobs.ashbyhq.com/ramp"></iframe>`;
122
+ const result = detectProvider(html, "https://example.com/careers");
123
+ expect(result).toEqual({ provider: "ashby", boardToken: "ramp" });
124
+ });
125
+
126
+ it("detects Ashby embed script with data attribute", () => {
127
+ const html = `<script src="https://jobs.ashbyhq.com/embed.js" data-ashby-job-board-id="ramp"></script>`;
128
+ const result = detectProvider(html, "https://example.com/careers");
129
+ expect(result).toEqual({ provider: "ashby", boardToken: "ramp" });
130
+ });
131
+
132
+ it("detects Workday iframe embed", () => {
133
+ const html = `<iframe src="https://netflix.wd5.myworkdayjobs.com/en-US/jobs"></iframe>`;
134
+ const result = detectProvider(html, "https://example.com/careers");
135
+ expect(result).toEqual({ provider: "workday", boardToken: "netflix" });
136
+ });
137
+
138
+ it("detects BambooHR iframe embed", () => {
139
+ const html = `<iframe src="https://acme.bamboohr.com/careers/list"></iframe>`;
140
+ const result = detectProvider(html, "https://example.com/careers");
141
+ expect(result).toEqual({ provider: "bamboohr", boardToken: "acme" });
142
+ });
143
+
144
+ it("returns unknown when no ATS signatures found", () => {
145
+ const html = `<html><body><h1>Jobs</h1><p>We're hiring!</p></body></html>`;
146
+ const result = detectProvider(html, "https://example.com/careers");
147
+ expect(result).toEqual({ provider: "unknown", boardToken: null });
148
+ });
149
+
150
+ it("URL detection takes priority over HTML detection", () => {
151
+ const html = `<iframe src="https://jobs.lever.co/other"></iframe>`;
152
+ const result = detectProvider(
153
+ html,
154
+ "https://boards.greenhouse.io/acme"
155
+ );
156
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
157
+ });
158
+
159
+ it("handles protocol-relative iframe src (//)", () => {
160
+ const html = `<iframe src="//boards.greenhouse.io/acme"></iframe>`;
161
+ const result = detectProvider(html, "https://example.com/careers");
162
+ expect(result).toEqual({ provider: "greenhouse", boardToken: "acme" });
163
+ });
164
+ });
165
+ });
@@ -0,0 +1,104 @@
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import { bus } from "../../src/events.js";
3
+
4
+ describe("TypedEventBus", () => {
5
+ beforeEach(() => {
6
+ bus.removeAllListeners();
7
+ });
8
+
9
+ it("emits and receives events", () => {
10
+ const received: { total: number }[] = [];
11
+ bus.on("crawl:start", (payload) => received.push(payload));
12
+
13
+ bus.emit("crawl:start", { total: 5 });
14
+
15
+ expect(received).toHaveLength(1);
16
+ expect(received[0]).toEqual({ total: 5 });
17
+ });
18
+
19
+ it("supports multiple listeners for same event", () => {
20
+ let count = 0;
21
+ bus.on("crawl:start", () => count++);
22
+ bus.on("crawl:start", () => count++);
23
+
24
+ bus.emit("crawl:start", { total: 1 });
25
+
26
+ expect(count).toBe(2);
27
+ });
28
+
29
+ it("only notifies listeners of the emitted event", () => {
30
+ let startCalled = false;
31
+ let doneCalled = false;
32
+ bus.on("crawl:start", () => (startCalled = true));
33
+ bus.on("crawl:complete", () => (doneCalled = true));
34
+
35
+ bus.emit("crawl:start", { total: 1 });
36
+
37
+ expect(startCalled).toBe(true);
38
+ expect(doneCalled).toBe(false);
39
+ });
40
+
41
+ it("removes specific listener with off()", () => {
42
+ let count = 0;
43
+ const handler = () => count++;
44
+
45
+ bus.on("crawl:start", handler);
46
+ bus.emit("crawl:start", { total: 1 });
47
+ expect(count).toBe(1);
48
+
49
+ bus.off("crawl:start", handler);
50
+ bus.emit("crawl:start", { total: 2 });
51
+ expect(count).toBe(1); // not called again
52
+ });
53
+
54
+ it("removeAllListeners() clears everything", () => {
55
+ let called = false;
56
+ bus.on("crawl:start", () => (called = true));
57
+ bus.on("crawl:complete", () => (called = true));
58
+
59
+ bus.removeAllListeners();
60
+
61
+ bus.emit("crawl:start", { total: 1 });
62
+ bus.emit("crawl:complete", {
63
+ totalJobs: 0,
64
+ totalMatched: 0,
65
+ totalUrls: 0,
66
+ failedUrls: 0,
67
+ });
68
+
69
+ expect(called).toBe(false);
70
+ });
71
+
72
+ it("passes typed payloads correctly", () => {
73
+ let payload: {
74
+ urlId: string;
75
+ company: string;
76
+ matched: number;
77
+ total: number;
78
+ } | null = null;
79
+ bus.on("url:done", (p) => (payload = p));
80
+
81
+ bus.emit("url:done", {
82
+ urlId: "u_abc12",
83
+ company: "Acme",
84
+ matched: 5,
85
+ total: 10,
86
+ });
87
+
88
+ expect(payload).toEqual({
89
+ urlId: "u_abc12",
90
+ company: "Acme",
91
+ matched: 5,
92
+ total: 10,
93
+ });
94
+ });
95
+
96
+ it("handles error events", () => {
97
+ let errorMsg = "";
98
+ bus.on("error", (p) => (errorMsg = p.message));
99
+
100
+ bus.emit("error", { message: "something went wrong" });
101
+
102
+ expect(errorMsg).toBe("something went wrong");
103
+ });
104
+ });
@@ -0,0 +1,165 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { formatOutput } from "../../src/core/format-output.js";
3
+ import type { Job } from "../../src/types/index.js";
4
+
5
+ function makeJob(overrides: Partial<Job> = {}): Job {
6
+ return {
7
+ id: "abc123",
8
+ title: "Software Engineer",
9
+ company: "Acme Corp",
10
+ location: "San Francisco, CA",
11
+ workMode: "onsite",
12
+ department: "Engineering",
13
+ url: "https://example.com/jobs/1",
14
+ sourceUrl: "https://example.com/careers",
15
+ provider: "greenhouse",
16
+ description: null,
17
+ postedAt: "2026-01-15T00:00:00.000Z",
18
+ extractedAt: "2026-01-20T00:00:00.000Z",
19
+ ...overrides,
20
+ };
21
+ }
22
+
23
+ describe("formatOutput", () => {
24
+ const jobs = [
25
+ makeJob(),
26
+ makeJob({
27
+ id: "def456",
28
+ title: "Product Designer",
29
+ company: "Startup Inc",
30
+ location: null,
31
+ department: null,
32
+ workMode: "remote",
33
+ url: "https://example.com/jobs/2",
34
+ postedAt: null,
35
+ }),
36
+ ];
37
+
38
+ describe("json", () => {
39
+ it("returns pretty-printed JSON", () => {
40
+ const result = formatOutput(jobs, "json");
41
+ const parsed = JSON.parse(result);
42
+ expect(parsed).toHaveLength(2);
43
+ expect(parsed[0].title).toBe("Software Engineer");
44
+ expect(parsed[1].title).toBe("Product Designer");
45
+ });
46
+
47
+ it("returns empty array JSON for no jobs", () => {
48
+ const result = formatOutput([], "json");
49
+ expect(JSON.parse(result)).toEqual([]);
50
+ });
51
+ });
52
+
53
+ describe("table", () => {
54
+ it("returns formatted table with headers", () => {
55
+ const result = formatOutput(jobs, "table");
56
+ const lines = result.split("\n");
57
+ expect(lines[0]).toContain("Company");
58
+ expect(lines[0]).toContain("Title");
59
+ expect(lines[0]).toContain("Location");
60
+ expect(lines[0]).toContain("Department");
61
+ expect(lines[0]).toContain("URL");
62
+ // separator line
63
+ expect(lines[1]).toMatch(/^-+/);
64
+ // data rows
65
+ expect(lines[2]).toContain("Acme Corp");
66
+ expect(lines[3]).toContain("Startup Inc");
67
+ });
68
+
69
+ it("shows dashes for null values", () => {
70
+ const result = formatOutput(jobs, "table");
71
+ const lines = result.split("\n");
72
+ // Second job has null location and department
73
+ expect(lines[3]).toContain("-");
74
+ });
75
+
76
+ it("returns message for empty jobs", () => {
77
+ const result = formatOutput([], "table");
78
+ expect(result).toBe("No matching jobs found.");
79
+ });
80
+ });
81
+
82
+ describe("csv", () => {
83
+ it("includes header row", () => {
84
+ const result = formatOutput(jobs, "csv");
85
+ const lines = result.split("\n");
86
+ expect(lines[0]).toBe(
87
+ "company,title,location,department,workMode,url,postedAt"
88
+ );
89
+ });
90
+
91
+ it("outputs correct data rows", () => {
92
+ const result = formatOutput(jobs, "csv");
93
+ const lines = result.split("\n");
94
+ expect(lines).toHaveLength(3); // header + 2 rows
95
+ expect(lines[1]).toContain("Acme Corp");
96
+ expect(lines[1]).toContain("Software Engineer");
97
+ });
98
+
99
+ it("escapes values containing commas", () => {
100
+ const jobsWithComma = [
101
+ makeJob({ company: "Acme, Inc" }),
102
+ ];
103
+ const result = formatOutput(jobsWithComma, "csv");
104
+ const lines = result.split("\n");
105
+ expect(lines[1]).toContain('"Acme, Inc"');
106
+ });
107
+
108
+ it("escapes values containing double quotes", () => {
109
+ const jobsWithQuote = [
110
+ makeJob({ title: 'Senior "Staff" Engineer' }),
111
+ ];
112
+ const result = formatOutput(jobsWithQuote, "csv");
113
+ const lines = result.split("\n");
114
+ expect(lines[1]).toContain('"Senior ""Staff"" Engineer"');
115
+ });
116
+
117
+ it("escapes values containing newlines", () => {
118
+ const jobsWithNewline = [
119
+ makeJob({ company: "Line1\nLine2" }),
120
+ ];
121
+ const result = formatOutput(jobsWithNewline, "csv");
122
+ expect(result).toContain('"Line1\nLine2"');
123
+ });
124
+
125
+ it("uses empty string for null values", () => {
126
+ const result = formatOutput(
127
+ [makeJob({ location: null, department: null, postedAt: null })],
128
+ "csv"
129
+ );
130
+ const lines = result.split("\n");
131
+ const fields = lines[1].split(",");
132
+ // location (index 2), department (index 3) should be empty
133
+ expect(fields[2]).toBe("");
134
+ expect(fields[3]).toBe("");
135
+ });
136
+ });
137
+
138
+ describe("markdown", () => {
139
+ it("returns markdown table with headers", () => {
140
+ const result = formatOutput(jobs, "markdown");
141
+ const lines = result.split("\n");
142
+ expect(lines[0]).toBe("| Company | Title | Location | Department |");
143
+ expect(lines[1]).toBe("| --- | --- | --- | --- |");
144
+ });
145
+
146
+ it("wraps title in link", () => {
147
+ const result = formatOutput(jobs, "markdown");
148
+ expect(result).toContain(
149
+ "[Software Engineer](https://example.com/jobs/1)"
150
+ );
151
+ });
152
+
153
+ it("shows dashes for null values", () => {
154
+ const result = formatOutput(jobs, "markdown");
155
+ const lines = result.split("\n");
156
+ // Second job row has null location and department
157
+ expect(lines[3]).toContain("| - |");
158
+ });
159
+
160
+ it("returns message for empty jobs", () => {
161
+ const result = formatOutput([], "markdown");
162
+ expect(result).toBe("No matching jobs found.");
163
+ });
164
+ });
165
+ });
@@ -0,0 +1,257 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { matchJobs } from "../../src/core/match-jobs.js";
3
+ import type { Job, SearchCriteria } from "../../src/types/index.js";
4
+
5
+ function makeJob(overrides: Partial<Job> = {}): Job {
6
+ return {
7
+ id: "abc123",
8
+ title: "Software Engineer",
9
+ company: "Acme",
10
+ location: "San Francisco, CA",
11
+ workMode: "onsite",
12
+ department: "Engineering",
13
+ url: "https://example.com/jobs/1",
14
+ sourceUrl: "https://example.com/careers",
15
+ provider: "greenhouse",
16
+ description: null,
17
+ postedAt: null,
18
+ extractedAt: "2026-01-01T00:00:00.000Z",
19
+ ...overrides,
20
+ };
21
+ }
22
+
23
+ function emptyCriteria(overrides: Partial<SearchCriteria> = {}): SearchCriteria {
24
+ return {
25
+ keywords: [],
26
+ excludeKeywords: [],
27
+ location: null,
28
+ workMode: null,
29
+ departments: null,
30
+ role: null,
31
+ roleType: null,
32
+ jobType: null,
33
+ minExperience: null,
34
+ companyStage: null,
35
+ industry: null,
36
+ companySize: null,
37
+ hasSalary: null,
38
+ hasEquity: null,
39
+ hasInterviewProcess: null,
40
+ visaSponsorship: null,
41
+ ...overrides,
42
+ };
43
+ }
44
+
45
+ describe("matchJobs", () => {
46
+ const jobs = [
47
+ makeJob({ title: "Software Engineer", department: "Engineering" }),
48
+ makeJob({
49
+ title: "Senior Product Designer",
50
+ department: "Design",
51
+ location: "Remote",
52
+ workMode: "remote",
53
+ }),
54
+ makeJob({
55
+ title: "Data Scientist",
56
+ department: "Data",
57
+ location: "New York, NY",
58
+ }),
59
+ makeJob({
60
+ title: "Engineering Manager",
61
+ department: "Engineering",
62
+ location: null,
63
+ workMode: null,
64
+ }),
65
+ ];
66
+
67
+ it("returns all jobs when no criteria specified", () => {
68
+ const result = matchJobs(jobs, emptyCriteria());
69
+ expect(result).toHaveLength(4);
70
+ });
71
+
72
+ describe("keyword matching", () => {
73
+ it("filters by keyword in title (case-insensitive)", () => {
74
+ const result = matchJobs(
75
+ jobs,
76
+ emptyCriteria({ keywords: ["engineer"] })
77
+ );
78
+ expect(result).toHaveLength(2);
79
+ expect(result.map((j) => j.title)).toEqual([
80
+ "Software Engineer",
81
+ "Engineering Manager",
82
+ ]);
83
+ });
84
+
85
+ it("matches any keyword (OR logic)", () => {
86
+ const result = matchJobs(
87
+ jobs,
88
+ emptyCriteria({ keywords: ["designer", "scientist"] })
89
+ );
90
+ expect(result).toHaveLength(2);
91
+ });
92
+
93
+ it("is case-insensitive", () => {
94
+ const result = matchJobs(
95
+ jobs,
96
+ emptyCriteria({ keywords: ["SOFTWARE"] })
97
+ );
98
+ expect(result).toHaveLength(1);
99
+ expect(result[0].title).toBe("Software Engineer");
100
+ });
101
+ });
102
+
103
+ describe("exclude keywords", () => {
104
+ it("excludes jobs matching exclude keywords", () => {
105
+ const result = matchJobs(
106
+ jobs,
107
+ emptyCriteria({ excludeKeywords: ["manager"] })
108
+ );
109
+ expect(result).toHaveLength(3);
110
+ expect(result.every((j) => !j.title.includes("Manager"))).toBe(true);
111
+ });
112
+
113
+ it("exclude is case-insensitive", () => {
114
+ const result = matchJobs(
115
+ jobs,
116
+ emptyCriteria({ excludeKeywords: ["SENIOR"] })
117
+ );
118
+ expect(result).toHaveLength(3);
119
+ });
120
+
121
+ it("applies both keywords and excludeKeywords together", () => {
122
+ const result = matchJobs(
123
+ jobs,
124
+ emptyCriteria({
125
+ keywords: ["engineer"],
126
+ excludeKeywords: ["manager"],
127
+ })
128
+ );
129
+ expect(result).toHaveLength(1);
130
+ expect(result[0].title).toBe("Software Engineer");
131
+ });
132
+ });
133
+
134
+ describe("location matching", () => {
135
+ it("filters by location substring", () => {
136
+ const result = matchJobs(
137
+ jobs,
138
+ emptyCriteria({ location: "San Francisco" })
139
+ );
140
+ expect(result).toHaveLength(2); // SF job + null-location job
141
+ });
142
+
143
+ it("includes jobs with null location (doesn't exclude for missing data)", () => {
144
+ const result = matchJobs(
145
+ jobs,
146
+ emptyCriteria({ location: "Tokyo" })
147
+ );
148
+ // Only the null-location job passes (null is included)
149
+ expect(result).toHaveLength(1);
150
+ expect(result[0].location).toBeNull();
151
+ });
152
+
153
+ it("is case-insensitive", () => {
154
+ const result = matchJobs(
155
+ jobs,
156
+ emptyCriteria({ location: "new york" })
157
+ );
158
+ expect(result).toHaveLength(2); // NY job + null-location job
159
+ });
160
+ });
161
+
162
+ describe("work mode matching", () => {
163
+ it("filters by work mode", () => {
164
+ const result = matchJobs(
165
+ jobs,
166
+ emptyCriteria({ workMode: ["remote"] })
167
+ );
168
+ expect(result).toHaveLength(2); // remote job + null-workMode job
169
+ });
170
+
171
+ it("includes jobs with null workMode", () => {
172
+ const result = matchJobs(
173
+ jobs,
174
+ emptyCriteria({ workMode: ["hybrid"] })
175
+ );
176
+ // Only null-workMode job passes
177
+ expect(result).toHaveLength(1);
178
+ expect(result[0].workMode).toBeNull();
179
+ });
180
+
181
+ it("matches multiple work modes (OR logic)", () => {
182
+ const result = matchJobs(
183
+ jobs,
184
+ emptyCriteria({ workMode: ["remote", "onsite"] })
185
+ );
186
+ // remote (1) + onsite (default in 2 jobs) + null passes through (1) = 4
187
+ expect(result).toHaveLength(4);
188
+ });
189
+ });
190
+
191
+ describe("department matching", () => {
192
+ it("filters by department substring", () => {
193
+ const result = matchJobs(
194
+ jobs,
195
+ emptyCriteria({ departments: ["engineering"] })
196
+ );
197
+ // "Software Engineer" (Engineering) + "Engineering Manager" (Engineering) = 2
198
+ // No null-dept jobs in the test data
199
+ expect(result).toHaveLength(2);
200
+ });
201
+
202
+ it("includes jobs with null department", () => {
203
+ const nullDeptJobs = [
204
+ makeJob({ department: null }),
205
+ makeJob({ department: "Sales" }),
206
+ ];
207
+ const result = matchJobs(
208
+ nullDeptJobs,
209
+ emptyCriteria({ departments: ["Engineering"] })
210
+ );
211
+ expect(result).toHaveLength(1);
212
+ expect(result[0].department).toBeNull();
213
+ });
214
+
215
+ it("matches any department (OR logic)", () => {
216
+ const result = matchJobs(
217
+ jobs,
218
+ emptyCriteria({ departments: ["engineering", "design"] })
219
+ );
220
+ // Eng (2) + Design (1) = 3 (no null-dept jobs in test data)
221
+ expect(result).toHaveLength(3);
222
+ });
223
+ });
224
+
225
+ describe("combined criteria", () => {
226
+ it("applies all criteria as AND filters", () => {
227
+ const result = matchJobs(
228
+ jobs,
229
+ emptyCriteria({
230
+ keywords: ["engineer"],
231
+ location: "San Francisco",
232
+ workMode: ["onsite"],
233
+ departments: ["engineering"],
234
+ })
235
+ );
236
+ // "Software Engineer" matches all. "Engineering Manager" also matches:
237
+ // keyword "engineer" in "Engineering Manager", null location passes,
238
+ // null workMode passes, dept "Engineering" matches.
239
+ expect(result).toHaveLength(2);
240
+ expect(result.map((j) => j.title)).toContain("Software Engineer");
241
+ expect(result.map((j) => j.title)).toContain("Engineering Manager");
242
+ });
243
+ });
244
+
245
+ it("returns empty array when no jobs match", () => {
246
+ const result = matchJobs(
247
+ jobs,
248
+ emptyCriteria({ keywords: ["nonexistent"] })
249
+ );
250
+ expect(result).toHaveLength(0);
251
+ });
252
+
253
+ it("handles empty jobs array", () => {
254
+ const result = matchJobs([], emptyCriteria({ keywords: ["engineer"] }));
255
+ expect(result).toHaveLength(0);
256
+ });
257
+ });