jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.prettierrc.json +10 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +232 -0
  4. package/dist/core/aggregators/yc.d.ts +7 -0
  5. package/dist/core/aggregators/yc.js +320 -0
  6. package/dist/core/browser.d.ts +30 -0
  7. package/dist/core/browser.js +196 -0
  8. package/dist/core/cache.d.ts +13 -0
  9. package/dist/core/cache.js +41 -0
  10. package/dist/core/detect-provider.d.ts +7 -0
  11. package/dist/core/detect-provider.js +125 -0
  12. package/dist/core/discover-careers.d.ts +18 -0
  13. package/dist/core/discover-careers.js +92 -0
  14. package/dist/core/extract-jobs.d.ts +14 -0
  15. package/dist/core/extract-jobs.js +36 -0
  16. package/dist/core/fetch-page.d.ts +11 -0
  17. package/dist/core/fetch-page.js +39 -0
  18. package/dist/core/format-output.d.ts +2 -0
  19. package/dist/core/format-output.js +59 -0
  20. package/dist/core/match-jobs.d.ts +6 -0
  21. package/dist/core/match-jobs.js +43 -0
  22. package/dist/core/providers/ashby.d.ts +6 -0
  23. package/dist/core/providers/ashby.js +58 -0
  24. package/dist/core/providers/generic.d.ts +6 -0
  25. package/dist/core/providers/generic.js +294 -0
  26. package/dist/core/providers/greenhouse.d.ts +6 -0
  27. package/dist/core/providers/greenhouse.js +47 -0
  28. package/dist/core/providers/lever.d.ts +7 -0
  29. package/dist/core/providers/lever.js +60 -0
  30. package/dist/core/providers/yc.d.ts +7 -0
  31. package/dist/core/providers/yc.js +320 -0
  32. package/dist/core/resolve-iframe.d.ts +6 -0
  33. package/dist/core/resolve-iframe.js +51 -0
  34. package/dist/core/save-raw.d.ts +4 -0
  35. package/dist/core/save-raw.js +13 -0
  36. package/dist/data/companies.d.ts +9 -0
  37. package/dist/data/companies.js +2849 -0
  38. package/dist/entrypoints/cli/app.d.ts +3 -0
  39. package/dist/entrypoints/cli/app.js +91 -0
  40. package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
  41. package/dist/entrypoints/cli/components/crawl-view.js +94 -0
  42. package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
  43. package/dist/entrypoints/cli/components/discover-view.js +67 -0
  44. package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
  45. package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
  46. package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
  47. package/dist/entrypoints/cli/crawl-url.js +54 -0
  48. package/dist/entrypoints/cli/crawl.d.ts +32 -0
  49. package/dist/entrypoints/cli/crawl.js +108 -0
  50. package/dist/entrypoints/cli/discover.d.ts +10 -0
  51. package/dist/entrypoints/cli/discover.js +69 -0
  52. package/dist/entrypoints/cli/index.d.ts +2 -0
  53. package/dist/entrypoints/cli/index.js +197 -0
  54. package/dist/entrypoints/cli/init.d.ts +9 -0
  55. package/dist/entrypoints/cli/init.js +94 -0
  56. package/dist/entrypoints/cli/plain.d.ts +6 -0
  57. package/dist/entrypoints/cli/plain.js +77 -0
  58. package/dist/events.d.ts +114 -0
  59. package/dist/events.js +17 -0
  60. package/dist/orchestrators/crawl-all.d.ts +2 -0
  61. package/dist/orchestrators/crawl-all.js +66 -0
  62. package/dist/orchestrators/discover-all.d.ts +10 -0
  63. package/dist/orchestrators/discover-all.js +39 -0
  64. package/dist/threads/pool.d.ts +5 -0
  65. package/dist/threads/pool.js +23 -0
  66. package/dist/threads/process-url.d.ts +9 -0
  67. package/dist/threads/process-url.js +229 -0
  68. package/dist/types/index.d.ts +83 -0
  69. package/dist/types/index.js +6 -0
  70. package/dist/utils/config.d.ts +17 -0
  71. package/dist/utils/config.js +57 -0
  72. package/dist/utils/google-search.d.ts +19 -0
  73. package/dist/utils/google-search.js +139 -0
  74. package/dist/utils/llm.d.ts +8 -0
  75. package/dist/utils/llm.js +25 -0
  76. package/package.json +42 -0
  77. package/src/core/aggregators/yc.ts +415 -0
  78. package/src/core/browser.ts +239 -0
  79. package/src/core/detect-provider.ts +162 -0
  80. package/src/core/discover-careers.ts +117 -0
  81. package/src/core/extract-jobs.ts +50 -0
  82. package/src/core/fetch-page.ts +41 -0
  83. package/src/core/format-output.ts +80 -0
  84. package/src/core/match-jobs.ts +56 -0
  85. package/src/core/providers/ashby.ts +84 -0
  86. package/src/core/providers/generic.ts +332 -0
  87. package/src/core/providers/greenhouse.ts +74 -0
  88. package/src/core/providers/lever.ts +90 -0
  89. package/src/core/resolve-iframe.ts +59 -0
  90. package/src/core/save-raw.ts +18 -0
  91. package/src/data/companies.ts +2859 -0
  92. package/src/entrypoints/cli/app.tsx +173 -0
  93. package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
  94. package/src/entrypoints/cli/components/discover-view.tsx +138 -0
  95. package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
  96. package/src/entrypoints/cli/crawl-url.ts +87 -0
  97. package/src/entrypoints/cli/crawl.ts +163 -0
  98. package/src/entrypoints/cli/discover.ts +96 -0
  99. package/src/entrypoints/cli/index.ts +252 -0
  100. package/src/entrypoints/cli/init.ts +117 -0
  101. package/src/entrypoints/cli/plain.ts +104 -0
  102. package/src/events.ts +79 -0
  103. package/src/orchestrators/crawl-all.ts +96 -0
  104. package/src/orchestrators/discover-all.ts +61 -0
  105. package/src/threads/pool.ts +29 -0
  106. package/src/threads/process-url.ts +312 -0
  107. package/src/types/index.ts +110 -0
  108. package/src/utils/config.ts +79 -0
  109. package/src/utils/google-search.ts +155 -0
  110. package/src/utils/llm.ts +33 -0
  111. package/test/integration/process-url.test.ts +301 -0
  112. package/test/integration/providers/ashby.test.ts +163 -0
  113. package/test/integration/providers/greenhouse.test.ts +191 -0
  114. package/test/integration/providers/lever.test.ts +188 -0
  115. package/test/unit/config.test.ts +64 -0
  116. package/test/unit/detect-provider.test.ts +165 -0
  117. package/test/unit/events.test.ts +104 -0
  118. package/test/unit/format-output.test.ts +165 -0
  119. package/test/unit/match-jobs.test.ts +257 -0
  120. package/test/unit/pool.test.ts +74 -0
  121. package/test/unit/providers/generic.test.ts +139 -0
  122. package/test/unit/resolve-iframe.test.ts +100 -0
  123. package/tsconfig.json +19 -0
  124. package/vitest.config.ts +7 -0
@@ -0,0 +1,173 @@
1
+ import React, { useState, useEffect } from "react";
2
+ import { Text, Box, useApp } from "ink";
3
+ import { bus } from "../../events.js";
4
+ import { CrawlView } from "./components/crawl-view.js";
5
+ import { DiscoverView } from "./components/discover-view.js";
6
+
7
+ type ViewState =
8
+ | { type: "idle" }
9
+ | { type: "crawl" }
10
+ | { type: "discover" }
11
+ | { type: "json"; data: unknown }
12
+ | { type: "text"; text: string }
13
+ | { type: "table"; headers: string[]; rows: string[][]; footer?: string }
14
+ | { type: "message"; text: string; style?: "success" | "warning" | "dim" }
15
+ | { type: "error"; message: string };
16
+
17
+ export function App({ run }: { run: () => Promise<void> }) {
18
+ const { exit } = useApp();
19
+ const [view, setView] = useState<ViewState>({ type: "idle" });
20
+ const [output, setOutput] = useState<ViewState | null>(null);
21
+ const [error, setError] = useState<string | null>(null);
22
+
23
+ useEffect(() => {
24
+ // Crawl events → switch to crawl view
25
+ const onCrawlStart = () => setView({ type: "crawl" });
26
+
27
+ // Discover events → switch to discover view
28
+ const onDiscoverSearching = () => setView({ type: "discover" });
29
+
30
+ // Output events → store separately so they don't replace the progress view
31
+ const onOutputJson = (p: { data: unknown }) =>
32
+ setOutput({ type: "json", data: p.data });
33
+ const onOutputText = (p: { text: string }) =>
34
+ setOutput({ type: "text", text: p.text });
35
+ const onOutputTable = (p: {
36
+ headers: string[];
37
+ rows: string[][];
38
+ footer?: string;
39
+ }) => setOutput({ type: "table", ...p });
40
+ const onOutputMessage = (p: {
41
+ text: string;
42
+ style?: "success" | "warning" | "dim";
43
+ }) => setOutput({ type: "message", ...p });
44
+
45
+ // Error + lifecycle
46
+ const onError = (p: { message: string }) => setError(p.message);
47
+ const onDone = () => {
48
+ setTimeout(() => exit(), 16);
49
+ };
50
+
51
+ bus.on("crawl:start", onCrawlStart);
52
+ bus.on("discover:searching", onDiscoverSearching);
53
+ bus.on("output:json", onOutputJson);
54
+ bus.on("output:text", onOutputText);
55
+ bus.on("output:table", onOutputTable);
56
+ bus.on("output:message", onOutputMessage);
57
+ bus.on("error", onError);
58
+ bus.on("done", onDone);
59
+
60
+ // Execute command after all listeners are attached
61
+ run()
62
+ .then(() => bus.emit("done", { exitCode: 0 }))
63
+ .catch((err: unknown) => {
64
+ bus.emit("error", {
65
+ message: err instanceof Error ? err.message : String(err),
66
+ });
67
+ bus.emit("done", { exitCode: 1 });
68
+ });
69
+
70
+ return () => {
71
+ bus.off("crawl:start", onCrawlStart);
72
+ bus.off("discover:searching", onDiscoverSearching);
73
+ bus.off("output:json", onOutputJson);
74
+ bus.off("output:text", onOutputText);
75
+ bus.off("output:table", onOutputTable);
76
+ bus.off("output:message", onOutputMessage);
77
+ bus.off("error", onError);
78
+ bus.off("done", onDone);
79
+ };
80
+ }, [exit]);
81
+
82
+ return (
83
+ <Box flexDirection="column">
84
+ {error && <Text color="red">Error: {error}</Text>}
85
+ {/* Always mount CrawlView so it subscribes before run() fires events.
86
+ Child useEffect runs before parent useEffect, guaranteeing the
87
+ subscription is active when the first url:fetching arrives. */}
88
+ <CrawlView />
89
+ <ViewRenderer view={view} />
90
+ {output && <ViewRenderer view={output} />}
91
+ </Box>
92
+ );
93
+ }
94
+
95
+ function ViewRenderer({ view }: { view: ViewState }) {
96
+ switch (view.type) {
97
+ case "idle":
98
+ return null;
99
+
100
+ case "crawl":
101
+ return null;
102
+
103
+ case "discover":
104
+ return <DiscoverView />;
105
+
106
+ case "json":
107
+ return <Text>{JSON.stringify(view.data, null, 2)}</Text>;
108
+
109
+ case "text":
110
+ return <Text>{view.text}</Text>;
111
+
112
+ case "table":
113
+ return (
114
+ <TableView
115
+ headers={view.headers}
116
+ rows={view.rows}
117
+ footer={view.footer}
118
+ />
119
+ );
120
+
121
+ case "message": {
122
+ const color =
123
+ view.style === "success"
124
+ ? "green"
125
+ : view.style === "warning"
126
+ ? "yellow"
127
+ : undefined;
128
+ const dim = view.style === "dim";
129
+ return (
130
+ <Text color={color} dimColor={dim}>
131
+ {view.text}
132
+ </Text>
133
+ );
134
+ }
135
+
136
+ case "error":
137
+ return <Text color="red">{view.message}</Text>;
138
+ }
139
+ }
140
+
141
+ function TableView({
142
+ headers,
143
+ rows,
144
+ footer,
145
+ }: {
146
+ headers: string[];
147
+ rows: string[][];
148
+ footer?: string;
149
+ }) {
150
+ const widths = headers.map((h, i) =>
151
+ Math.min(50, Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)))
152
+ );
153
+
154
+ const headerLine = headers.map((h, i) => h.padEnd(widths[i])).join(" | ");
155
+ const separator = widths.map((w) => "-".repeat(w)).join("-+-");
156
+
157
+ return (
158
+ <Box flexDirection="column">
159
+ <Text bold>{headerLine}</Text>
160
+ <Text>{separator}</Text>
161
+ {rows.map((row, i) => (
162
+ <Text key={i}>
163
+ {row
164
+ .map((cell, j) =>
165
+ (cell ?? "").slice(0, widths[j]).padEnd(widths[j])
166
+ )
167
+ .join(" | ")}
168
+ </Text>
169
+ ))}
170
+ {footer && <Text dimColor>{footer}</Text>}
171
+ </Box>
172
+ );
173
+ }
@@ -0,0 +1,163 @@
1
+ import React, { useState, useEffect } from "react";
2
+ import { Text, Box } from "ink";
3
+ import Spinner from "ink-spinner";
4
+ import { bus } from "../../../events.js";
5
+
6
+ type StepLine = {
7
+ text: string;
8
+ status: "active" | "done" | "failed";
9
+ };
10
+
11
+ type UrlThread = {
12
+ company: string;
13
+ steps: StepLine[];
14
+ };
15
+
16
+ interface CrawlSummary {
17
+ totalJobs: number;
18
+ totalMatched: number;
19
+ totalUrls: number;
20
+ failedUrls: number;
21
+ }
22
+
23
+ export function CrawlView() {
24
+ const [threads, setThreads] = useState<Map<string, UrlThread>>(new Map());
25
+ const [summary, setSummary] = useState<CrawlSummary | null>(null);
26
+
27
+ useEffect(() => {
28
+ /** Mark the current active step as done, then push a new active step. */
29
+ const pushStep = (urlId: string, text: string) => {
30
+ setThreads((prev) => {
31
+ const next = new Map(prev);
32
+ const thread = next.get(urlId);
33
+ if (!thread) return prev;
34
+ const steps = thread.steps.map((s) =>
35
+ s.status === "active" ? { ...s, status: "done" as const } : s
36
+ );
37
+ steps.push({ text, status: "active" });
38
+ next.set(urlId, { ...thread, steps });
39
+ return next;
40
+ });
41
+ };
42
+
43
+ /** Mark all active steps as done/failed. */
44
+ const finalize = (urlId: string, status: "done" | "failed", finalText?: string) => {
45
+ setThreads((prev) => {
46
+ const next = new Map(prev);
47
+ const thread = next.get(urlId);
48
+ if (!thread) return prev;
49
+ const steps = thread.steps.map((s) =>
50
+ s.status === "active" ? { ...s, status } : s
51
+ );
52
+ if (finalText) steps.push({ text: finalText, status });
53
+ next.set(urlId, { ...thread, steps });
54
+ return next;
55
+ });
56
+ };
57
+
58
+ const onCrawlStart = () => {};
59
+
60
+ const onFetching = (p: { urlId: string; url: string; company: string }) => {
61
+ setThreads((prev) => {
62
+ const next = new Map(prev);
63
+ next.set(p.urlId, {
64
+ company: p.company,
65
+ steps: [{ text: "fetching", status: "active" }],
66
+ });
67
+ return next;
68
+ });
69
+ };
70
+
71
+ const onDetecting = (p: { urlId: string; provider: string }) => {
72
+ pushStep(p.urlId, `detected ${p.provider}`);
73
+ };
74
+
75
+ const onExtracting = (p: { urlId: string; jobCount?: number }) => {
76
+ const suffix = p.jobCount !== undefined ? ` (${p.jobCount} jobs)` : "";
77
+ pushStep(p.urlId, `extracting${suffix}`);
78
+ };
79
+
80
+ const onMatching = (p: { urlId: string; matched: number; total: number }) => {
81
+ pushStep(p.urlId, `matching ${p.matched}/${p.total}`);
82
+ };
83
+
84
+ const onDone = (p: { urlId: string; company: string; matched: number; total: number }) => {
85
+ finalize(p.urlId, "done", `${p.matched}/${p.total} matched`);
86
+ };
87
+
88
+ const onFailed = (p: { urlId: string; company: string; error: string }) => {
89
+ finalize(p.urlId, "failed", p.error);
90
+ };
91
+
92
+ const onComplete = (p: CrawlSummary) => setSummary(p);
93
+
94
+ bus.on("crawl:start", onCrawlStart);
95
+ bus.on("url:fetching", onFetching);
96
+ bus.on("url:detecting", onDetecting);
97
+ bus.on("url:extracting", onExtracting);
98
+ bus.on("url:matching", onMatching);
99
+ bus.on("url:done", onDone);
100
+ bus.on("url:failed", onFailed);
101
+ bus.on("crawl:complete", onComplete);
102
+
103
+ return () => {
104
+ bus.off("crawl:start", onCrawlStart);
105
+ bus.off("url:fetching", onFetching);
106
+ bus.off("url:detecting", onDetecting);
107
+ bus.off("url:extracting", onExtracting);
108
+ bus.off("url:matching", onMatching);
109
+ bus.off("url:done", onDone);
110
+ bus.off("url:failed", onFailed);
111
+ bus.off("crawl:complete", onComplete);
112
+ };
113
+ }, []);
114
+
115
+ return (
116
+ <Box flexDirection="column">
117
+ {[...threads.entries()].map(([urlId, thread]) => (
118
+ <Box key={urlId} flexDirection="column" marginBottom={1}>
119
+ <Text bold>{thread.company}</Text>
120
+ {thread.steps.map((step, i) => (
121
+ <StepRow key={i} step={step} />
122
+ ))}
123
+ </Box>
124
+ ))}
125
+
126
+ {summary && (
127
+ <Box marginTop={0}>
128
+ <Text color={summary.totalMatched > 0 ? "green" : "yellow"}>
129
+ {"\u2714"} {summary.totalMatched} jobs matched across{" "}
130
+ {summary.totalUrls} URL{summary.totalUrls === 1 ? "" : "s"}
131
+ {summary.failedUrls > 0 && (
132
+ <Text color="yellow"> ({summary.failedUrls} failed)</Text>
133
+ )}
134
+ </Text>
135
+ </Box>
136
+ )}
137
+ </Box>
138
+ );
139
+ }
140
+
141
+ function StepRow({ step }: { step: StepLine }) {
142
+ if (step.status === "done") {
143
+ return (
144
+ <Text>
145
+ {" "}<Text color="green">{"\u2714"}</Text> <Text dimColor>{step.text}</Text>
146
+ </Text>
147
+ );
148
+ }
149
+
150
+ if (step.status === "failed") {
151
+ return (
152
+ <Text>
153
+ {" "}<Text color="red">{"\u2718"}</Text> <Text color="red">{step.text}</Text>
154
+ </Text>
155
+ );
156
+ }
157
+
158
+ return (
159
+ <Text>
160
+ {" "}<Text color="cyan"><Spinner type="dots" /></Text> {step.text}
161
+ </Text>
162
+ );
163
+ }
@@ -0,0 +1,138 @@
1
+ import React, { useState, useEffect } from "react";
2
+ import { Text, Box } from "ink";
3
+ import Spinner from "ink-spinner";
4
+ import { bus } from "../../../events.js";
5
+
6
+ type CompanyStatus =
7
+ | { state: "searching" }
8
+ | { state: "verifying"; url: string }
9
+ | { state: "found"; url: string }
10
+ | { state: "not-found"; reason: string };
11
+
12
+ export function DiscoverView() {
13
+ const [companies, setCompanies] = useState<Map<string, CompanyStatus>>(
14
+ new Map()
15
+ );
16
+ const [summary, setSummary] = useState<{
17
+ found: number;
18
+ notFound: number;
19
+ total: number;
20
+ } | null>(null);
21
+
22
+ useEffect(() => {
23
+ const onSearching = (p: { company: string }) => {
24
+ setCompanies((prev) => {
25
+ const next = new Map(prev);
26
+ next.set(p.company, { state: "searching" });
27
+ return next;
28
+ });
29
+ };
30
+
31
+ const onVerifying = (p: { company: string; url: string }) => {
32
+ setCompanies((prev) => {
33
+ const next = new Map(prev);
34
+ next.set(p.company, { state: "verifying", url: p.url });
35
+ return next;
36
+ });
37
+ };
38
+
39
+ const onFound = (p: { company: string; url: string }) => {
40
+ setCompanies((prev) => {
41
+ const next = new Map(prev);
42
+ next.set(p.company, { state: "found", url: p.url });
43
+ return next;
44
+ });
45
+ };
46
+
47
+ const onNotFound = (p: { company: string; reason: string }) => {
48
+ setCompanies((prev) => {
49
+ const next = new Map(prev);
50
+ next.set(p.company, { state: "not-found", reason: p.reason });
51
+ return next;
52
+ });
53
+ };
54
+
55
+ const onComplete = (p: {
56
+ found: number;
57
+ notFound: number;
58
+ total: number;
59
+ }) => {
60
+ setSummary(p);
61
+ };
62
+
63
+ bus.on("discover:searching", onSearching);
64
+ bus.on("discover:verifying", onVerifying);
65
+ bus.on("discover:found", onFound);
66
+ bus.on("discover:not-found", onNotFound);
67
+ bus.on("discover:complete", onComplete);
68
+
69
+ return () => {
70
+ bus.off("discover:searching", onSearching);
71
+ bus.off("discover:verifying", onVerifying);
72
+ bus.off("discover:found", onFound);
73
+ bus.off("discover:not-found", onNotFound);
74
+ bus.off("discover:complete", onComplete);
75
+ };
76
+ }, []);
77
+
78
+ return (
79
+ <Box flexDirection="column">
80
+ {[...companies.entries()].map(([company, status]) => (
81
+ <CompanyRow key={company} company={company} status={status} />
82
+ ))}
83
+
84
+ {summary && (
85
+ <Box marginTop={1}>
86
+ <Text color="green">
87
+ {"\u2713"} {summary.found}/{summary.total} career pages found
88
+ </Text>
89
+ </Box>
90
+ )}
91
+ </Box>
92
+ );
93
+ }
94
+
95
+ function CompanyRow({
96
+ company,
97
+ status,
98
+ }: {
99
+ company: string;
100
+ status: CompanyStatus;
101
+ }) {
102
+ switch (status.state) {
103
+ case "searching":
104
+ return (
105
+ <Text>
106
+ <Spinner type="dots" /> {company} <Text dimColor>searching...</Text>
107
+ </Text>
108
+ );
109
+
110
+ case "verifying":
111
+ return (
112
+ <Text>
113
+ <Spinner type="dots" /> {company}{" "}
114
+ <Text dimColor>verifying {status.url}</Text>
115
+ </Text>
116
+ );
117
+
118
+ case "found":
119
+ return (
120
+ <Text>
121
+ <Text color="green">{"\u2713"}</Text> {company}{" "}
122
+ <Text dimColor>
123
+ {"\u2192"} {status.url}
124
+ </Text>
125
+ </Text>
126
+ );
127
+
128
+ case "not-found":
129
+ return (
130
+ <Text>
131
+ <Text color="red">{"\u2717"}</Text> {company}{" "}
132
+ <Text color="red">
133
+ {"\u2014"} {status.reason}
134
+ </Text>
135
+ </Text>
136
+ );
137
+ }
138
+ }
@@ -0,0 +1,112 @@
1
+ import { bus } from "../../events.js";
2
+ import { extractViaAggregator } from "../../core/extract-jobs.js";
3
+ import { matchJobs } from "../../core/match-jobs.js";
4
+ import { formatOutput } from "../../core/format-output.js";
5
+ import type {
6
+ Aggregator,
7
+ SearchCriteria,
8
+ OutputFormat,
9
+ } from "../../types/index.js";
10
+
11
+ interface CrawlAggregatorsOptions {
12
+ keywords?: string[];
13
+ exclude?: string[];
14
+ location?: string;
15
+ remote?: boolean;
16
+ onsite?: boolean;
17
+ hybrid?: boolean;
18
+ department?: string[];
19
+ role?: string[];
20
+ roleType?: string[];
21
+ jobType?: string[];
22
+ minExperience?: string[];
23
+ companyStage?: string[];
24
+ industry?: string[];
25
+ companySize?: string[];
26
+ hasSalary?: boolean;
27
+ hasEquity?: boolean;
28
+ hasInterviewProcess?: boolean;
29
+ visaSponsorship?: boolean;
30
+ output?: OutputFormat;
31
+ out?: string;
32
+ saveRaw?: boolean;
33
+ }
34
+
35
+ export async function crawlAggregatorsCommand(
36
+ aggregatorNames: string[],
37
+ opts: CrawlAggregatorsOptions
38
+ ): Promise<void> {
39
+ const valid: Aggregator[] = [];
40
+ for (const name of aggregatorNames) {
41
+ if (name === "yc") {
42
+ valid.push(name);
43
+ } else {
44
+ throw new Error(`Unknown aggregator: "${name}". Available: yc`);
45
+ }
46
+ }
47
+
48
+ if (valid.length === 0) {
49
+ throw new Error("No aggregators specified. Available: yc");
50
+ }
51
+
52
+ const criteria: SearchCriteria = {
53
+ keywords: opts.keywords ?? [],
54
+ excludeKeywords: opts.exclude ?? [],
55
+ location: opts.location ?? null,
56
+ workMode: buildWorkMode(opts),
57
+ departments: opts.department ?? null,
58
+ role: opts.role ?? null,
59
+ roleType: opts.roleType ?? null,
60
+ jobType: opts.jobType ?? null,
61
+ minExperience: opts.minExperience
62
+ ? opts.minExperience.map((v) => parseInt(v, 10))
63
+ : null,
64
+ companyStage: opts.companyStage ?? null,
65
+ industry: opts.industry ?? null,
66
+ companySize: opts.companySize ?? null,
67
+ hasSalary: opts.hasSalary ?? null,
68
+ hasEquity: opts.hasEquity ?? null,
69
+ hasInterviewProcess: opts.hasInterviewProcess ?? null,
70
+ visaSponsorship: opts.visaSponsorship ?? null,
71
+ };
72
+
73
+ const format = opts.output ?? "json";
74
+ const allJobs = [];
75
+
76
+ for (const aggregator of valid) {
77
+ bus.emit("aggregator:start", { type: aggregator });
78
+ try {
79
+ const jobs = await extractViaAggregator(aggregator, criteria, opts.saveRaw);
80
+ const matched = matchJobs(jobs, criteria);
81
+ bus.emit("aggregator:done", { type: aggregator, jobCount: matched.length });
82
+ allJobs.push(...matched);
83
+ } catch (err) {
84
+ const message = err instanceof Error ? err.message : String(err);
85
+ bus.emit("aggregator:failed", { type: aggregator, error: message });
86
+ throw new Error(`Aggregator "${aggregator}" failed: ${message}`);
87
+ }
88
+ }
89
+
90
+ const output = formatOutput(allJobs, format);
91
+
92
+ if (opts.out) {
93
+ const { writeFile } = await import("node:fs/promises");
94
+ await writeFile(opts.out, output + "\n");
95
+ bus.emit("output:message", {
96
+ text: `Wrote ${allJobs.length} jobs to ${opts.out}`,
97
+ style: "success",
98
+ });
99
+ } else {
100
+ bus.emit("output:text", { text: output });
101
+ }
102
+ }
103
+
104
+ function buildWorkMode(
105
+ opts: CrawlAggregatorsOptions
106
+ ): ("remote" | "onsite" | "hybrid")[] | null {
107
+ const modes: ("remote" | "onsite" | "hybrid")[] = [];
108
+ if (opts.remote) modes.push("remote");
109
+ if (opts.onsite) modes.push("onsite");
110
+ if (opts.hybrid) modes.push("hybrid");
111
+ return modes.length > 0 ? modes : null;
112
+ }
@@ -0,0 +1,87 @@
1
+ import { bus } from "../../events.js";
2
+ import { processTarget } from "../../threads/process-url.js";
3
+ import { formatOutput } from "../../core/format-output.js";
4
+ import type { SearchCriteria, OutputFormat } from "../../types/index.js";
5
+
6
+ interface CrawlUrlOptions {
7
+ keywords?: string[];
8
+ exclude?: string[];
9
+ location?: string;
10
+ remote?: boolean;
11
+ onsite?: boolean;
12
+ hybrid?: boolean;
13
+ department?: string[];
14
+ role?: string[];
15
+ roleType?: string[];
16
+ jobType?: string[];
17
+ minExperience?: string[];
18
+ companyStage?: string[];
19
+ industry?: string[];
20
+ companySize?: string[];
21
+ hasSalary?: boolean;
22
+ hasEquity?: boolean;
23
+ hasInterviewProcess?: boolean;
24
+ visaSponsorship?: boolean;
25
+ output?: OutputFormat;
26
+ out?: string;
27
+ saveRaw?: boolean;
28
+ }
29
+
30
+ export async function crawlUrlCommand(
31
+ url: string,
32
+ opts: CrawlUrlOptions
33
+ ): Promise<void> {
34
+ const criteria: SearchCriteria = {
35
+ keywords: opts.keywords ?? [],
36
+ excludeKeywords: opts.exclude ?? [],
37
+ location: opts.location ?? null,
38
+ workMode: buildWorkMode(opts),
39
+ departments: opts.department ?? null,
40
+ role: opts.role ?? null,
41
+ roleType: opts.roleType ?? null,
42
+ jobType: opts.jobType ?? null,
43
+ minExperience: opts.minExperience
44
+ ? opts.minExperience.map((v) => parseInt(v, 10))
45
+ : null,
46
+ companyStage: opts.companyStage ?? null,
47
+ industry: opts.industry ?? null,
48
+ companySize: opts.companySize ?? null,
49
+ hasSalary: opts.hasSalary ?? null,
50
+ hasEquity: opts.hasEquity ?? null,
51
+ hasInterviewProcess: opts.hasInterviewProcess ?? null,
52
+ visaSponsorship: opts.visaSponsorship ?? null,
53
+ };
54
+
55
+ const format = opts.output ?? "json";
56
+
57
+ const result = await processTarget({ url }, criteria, {
58
+ saveRaw: opts.saveRaw,
59
+ });
60
+
61
+ if (result.error) {
62
+ throw new Error(`Failed to crawl ${url}: ${result.error}`);
63
+ }
64
+
65
+ const output = formatOutput(result.jobs, format);
66
+
67
+ if (opts.out) {
68
+ const { writeFile } = await import("node:fs/promises");
69
+ await writeFile(opts.out, output + "\n");
70
+ bus.emit("output:message", {
71
+ text: `Wrote ${result.jobs.length} jobs to ${opts.out}`,
72
+ style: "success",
73
+ });
74
+ } else {
75
+ bus.emit("output:text", { text: output });
76
+ }
77
+ }
78
+
79
+ function buildWorkMode(
80
+ opts: CrawlUrlOptions
81
+ ): ("remote" | "onsite" | "hybrid")[] | null {
82
+ const modes: ("remote" | "onsite" | "hybrid")[] = [];
83
+ if (opts.remote) modes.push("remote");
84
+ if (opts.onsite) modes.push("onsite");
85
+ if (opts.hybrid) modes.push("hybrid");
86
+ return modes.length > 0 ? modes : null;
87
+ }