jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.prettierrc.json +10 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +232 -0
  4. package/dist/core/aggregators/yc.d.ts +7 -0
  5. package/dist/core/aggregators/yc.js +320 -0
  6. package/dist/core/browser.d.ts +30 -0
  7. package/dist/core/browser.js +196 -0
  8. package/dist/core/cache.d.ts +13 -0
  9. package/dist/core/cache.js +41 -0
  10. package/dist/core/detect-provider.d.ts +7 -0
  11. package/dist/core/detect-provider.js +125 -0
  12. package/dist/core/discover-careers.d.ts +18 -0
  13. package/dist/core/discover-careers.js +92 -0
  14. package/dist/core/extract-jobs.d.ts +14 -0
  15. package/dist/core/extract-jobs.js +36 -0
  16. package/dist/core/fetch-page.d.ts +11 -0
  17. package/dist/core/fetch-page.js +39 -0
  18. package/dist/core/format-output.d.ts +2 -0
  19. package/dist/core/format-output.js +59 -0
  20. package/dist/core/match-jobs.d.ts +6 -0
  21. package/dist/core/match-jobs.js +43 -0
  22. package/dist/core/providers/ashby.d.ts +6 -0
  23. package/dist/core/providers/ashby.js +58 -0
  24. package/dist/core/providers/generic.d.ts +6 -0
  25. package/dist/core/providers/generic.js +294 -0
  26. package/dist/core/providers/greenhouse.d.ts +6 -0
  27. package/dist/core/providers/greenhouse.js +47 -0
  28. package/dist/core/providers/lever.d.ts +7 -0
  29. package/dist/core/providers/lever.js +60 -0
  30. package/dist/core/providers/yc.d.ts +7 -0
  31. package/dist/core/providers/yc.js +320 -0
  32. package/dist/core/resolve-iframe.d.ts +6 -0
  33. package/dist/core/resolve-iframe.js +51 -0
  34. package/dist/core/save-raw.d.ts +4 -0
  35. package/dist/core/save-raw.js +13 -0
  36. package/dist/data/companies.d.ts +9 -0
  37. package/dist/data/companies.js +2849 -0
  38. package/dist/entrypoints/cli/app.d.ts +3 -0
  39. package/dist/entrypoints/cli/app.js +91 -0
  40. package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
  41. package/dist/entrypoints/cli/components/crawl-view.js +94 -0
  42. package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
  43. package/dist/entrypoints/cli/components/discover-view.js +67 -0
  44. package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
  45. package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
  46. package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
  47. package/dist/entrypoints/cli/crawl-url.js +54 -0
  48. package/dist/entrypoints/cli/crawl.d.ts +32 -0
  49. package/dist/entrypoints/cli/crawl.js +108 -0
  50. package/dist/entrypoints/cli/discover.d.ts +10 -0
  51. package/dist/entrypoints/cli/discover.js +69 -0
  52. package/dist/entrypoints/cli/index.d.ts +2 -0
  53. package/dist/entrypoints/cli/index.js +197 -0
  54. package/dist/entrypoints/cli/init.d.ts +9 -0
  55. package/dist/entrypoints/cli/init.js +94 -0
  56. package/dist/entrypoints/cli/plain.d.ts +6 -0
  57. package/dist/entrypoints/cli/plain.js +77 -0
  58. package/dist/events.d.ts +114 -0
  59. package/dist/events.js +17 -0
  60. package/dist/orchestrators/crawl-all.d.ts +2 -0
  61. package/dist/orchestrators/crawl-all.js +66 -0
  62. package/dist/orchestrators/discover-all.d.ts +10 -0
  63. package/dist/orchestrators/discover-all.js +39 -0
  64. package/dist/threads/pool.d.ts +5 -0
  65. package/dist/threads/pool.js +23 -0
  66. package/dist/threads/process-url.d.ts +9 -0
  67. package/dist/threads/process-url.js +229 -0
  68. package/dist/types/index.d.ts +83 -0
  69. package/dist/types/index.js +6 -0
  70. package/dist/utils/config.d.ts +17 -0
  71. package/dist/utils/config.js +57 -0
  72. package/dist/utils/google-search.d.ts +19 -0
  73. package/dist/utils/google-search.js +139 -0
  74. package/dist/utils/llm.d.ts +8 -0
  75. package/dist/utils/llm.js +25 -0
  76. package/package.json +42 -0
  77. package/src/core/aggregators/yc.ts +415 -0
  78. package/src/core/browser.ts +239 -0
  79. package/src/core/detect-provider.ts +162 -0
  80. package/src/core/discover-careers.ts +117 -0
  81. package/src/core/extract-jobs.ts +50 -0
  82. package/src/core/fetch-page.ts +41 -0
  83. package/src/core/format-output.ts +80 -0
  84. package/src/core/match-jobs.ts +56 -0
  85. package/src/core/providers/ashby.ts +84 -0
  86. package/src/core/providers/generic.ts +332 -0
  87. package/src/core/providers/greenhouse.ts +74 -0
  88. package/src/core/providers/lever.ts +90 -0
  89. package/src/core/resolve-iframe.ts +59 -0
  90. package/src/core/save-raw.ts +18 -0
  91. package/src/data/companies.ts +2859 -0
  92. package/src/entrypoints/cli/app.tsx +173 -0
  93. package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
  94. package/src/entrypoints/cli/components/discover-view.tsx +138 -0
  95. package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
  96. package/src/entrypoints/cli/crawl-url.ts +87 -0
  97. package/src/entrypoints/cli/crawl.ts +163 -0
  98. package/src/entrypoints/cli/discover.ts +96 -0
  99. package/src/entrypoints/cli/index.ts +252 -0
  100. package/src/entrypoints/cli/init.ts +117 -0
  101. package/src/entrypoints/cli/plain.ts +104 -0
  102. package/src/events.ts +79 -0
  103. package/src/orchestrators/crawl-all.ts +96 -0
  104. package/src/orchestrators/discover-all.ts +61 -0
  105. package/src/threads/pool.ts +29 -0
  106. package/src/threads/process-url.ts +312 -0
  107. package/src/types/index.ts +110 -0
  108. package/src/utils/config.ts +79 -0
  109. package/src/utils/google-search.ts +155 -0
  110. package/src/utils/llm.ts +33 -0
  111. package/test/integration/process-url.test.ts +301 -0
  112. package/test/integration/providers/ashby.test.ts +163 -0
  113. package/test/integration/providers/greenhouse.test.ts +191 -0
  114. package/test/integration/providers/lever.test.ts +188 -0
  115. package/test/unit/config.test.ts +64 -0
  116. package/test/unit/detect-provider.test.ts +165 -0
  117. package/test/unit/events.test.ts +104 -0
  118. package/test/unit/format-output.test.ts +165 -0
  119. package/test/unit/match-jobs.test.ts +257 -0
  120. package/test/unit/pool.test.ts +74 -0
  121. package/test/unit/providers/generic.test.ts +139 -0
  122. package/test/unit/resolve-iframe.test.ts +100 -0
  123. package/tsconfig.json +19 -0
  124. package/vitest.config.ts +7 -0
@@ -0,0 +1,163 @@
1
+ import { existsSync } from "node:fs";
2
+ import { readFile } from "node:fs/promises";
3
+ import { bus } from "../../events.js";
4
+ import { crawlAll } from "../../orchestrators/crawl-all.js";
5
+ import { loadConfig, parseUrlList } from "../../utils/config.js";
6
+ import { formatOutput } from "../../core/format-output.js";
7
+ import { CONFIG_FILE } from "./init.js";
8
+ import type { Config } from "../../utils/config.js";
9
+ import type {
10
+ Target,
11
+ Aggregator,
12
+ SearchCriteria,
13
+ OutputFormat,
14
+ } from "../../types/index.js";
15
+
16
+ interface CrawlOptions {
17
+ urls?: string[];
18
+ file?: string;
19
+ aggregators?: string[];
20
+ keywords?: string[];
21
+ exclude?: string[];
22
+ location?: string;
23
+ remote?: boolean;
24
+ onsite?: boolean;
25
+ hybrid?: boolean;
26
+ department?: string[];
27
+ role?: string[];
28
+ roleType?: string[];
29
+ jobType?: string[];
30
+ minExperience?: string[];
31
+ companyStage?: string[];
32
+ industry?: string[];
33
+ companySize?: string[];
34
+ hasSalary?: boolean;
35
+ hasEquity?: boolean;
36
+ hasInterviewProcess?: boolean;
37
+ visaSponsorship?: boolean;
38
+ output?: OutputFormat;
39
+ out?: string;
40
+ concurrency?: string;
41
+ saveRaw?: boolean;
42
+ networkTimeout?: string;
43
+ maxBubbleLevels?: string;
44
+ }
45
+
46
+ export async function crawlCommand(opts: CrawlOptions): Promise<void> {
47
+ const { targets, config } = await resolveTargets(opts);
48
+
49
+ if (targets.length === 0) {
50
+ throw new Error(
51
+ "No targets provided. Use --urls, --file, or run `jobcrawl init` to set up default targets."
52
+ );
53
+ }
54
+
55
+ const criteria: SearchCriteria = {
56
+ keywords: opts.keywords ?? [],
57
+ excludeKeywords: opts.exclude ?? [],
58
+ location: opts.location ?? null,
59
+ workMode: buildWorkMode(opts),
60
+ departments: opts.department ?? null,
61
+ role: opts.role ?? null,
62
+ roleType: opts.roleType ?? null,
63
+ jobType: opts.jobType ?? null,
64
+ minExperience: opts.minExperience
65
+ ? opts.minExperience.map((v) => parseInt(v, 10))
66
+ : null,
67
+ companyStage: opts.companyStage ?? null,
68
+ industry: opts.industry ?? null,
69
+ companySize: opts.companySize ?? null,
70
+ hasSalary: opts.hasSalary ?? null,
71
+ hasEquity: opts.hasEquity ?? null,
72
+ hasInterviewProcess: opts.hasInterviewProcess ?? null,
73
+ visaSponsorship: opts.visaSponsorship ?? null,
74
+ };
75
+
76
+ const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : 5;
77
+ const format = opts.output ?? "json";
78
+
79
+ // Parse --aggregators flag
80
+ const aggregators: Aggregator[] | undefined = opts.aggregators?.map((name) => {
81
+ if (name === "yc") return name;
82
+ throw new Error(`Unknown aggregator: "${name}". Available: yc`);
83
+ });
84
+
85
+ // Browser options: CLI flags override config defaults
86
+ const configBrowser = config?.defaults?.browser;
87
+ const browser = {
88
+ networkTimeout: opts.networkTimeout
89
+ ? parseInt(opts.networkTimeout, 10)
90
+ : configBrowser?.networkTimeout,
91
+ maxBubbleLevels: opts.maxBubbleLevels
92
+ ? parseInt(opts.maxBubbleLevels, 10)
93
+ : configBrowser?.maxBubbleLevels,
94
+ };
95
+
96
+ const result = await crawlAll(targets, criteria, {
97
+ concurrency,
98
+ saveRaw: opts.saveRaw,
99
+ browser,
100
+ }, aggregators);
101
+
102
+ const output = formatOutput(result.jobs, format);
103
+
104
+ if (opts.out) {
105
+ const { writeFile } = await import("node:fs/promises");
106
+ await writeFile(opts.out, output + "\n");
107
+ bus.emit("output:message", {
108
+ text: `Wrote ${result.jobs.length} jobs to ${opts.out}`,
109
+ style: "success",
110
+ });
111
+ } else {
112
+ bus.emit("output:text", { text: output });
113
+ }
114
+ }
115
+
116
+ async function resolveTargets(
117
+ opts: CrawlOptions
118
+ ): Promise<{ targets: Target[]; config: Config | null }> {
119
+ const targets: Target[] = [];
120
+ let config: Config | null = null;
121
+
122
+ // From --urls flag
123
+ if (opts.urls) {
124
+ for (const url of opts.urls) {
125
+ targets.push({ url });
126
+ }
127
+ }
128
+
129
+ // From --file flag
130
+ if (opts.file) {
131
+ config = await loadConfig(opts.file);
132
+ targets.push(...config.companies);
133
+ }
134
+
135
+ // From ~/.jobcrawl/config.yaml (default config)
136
+ if (targets.length === 0 && existsSync(CONFIG_FILE)) {
137
+ config = await loadConfig(CONFIG_FILE);
138
+ targets.push(...config.companies);
139
+ }
140
+
141
+ // Always load config for defaults even when using --urls
142
+ if (!config && existsSync(CONFIG_FILE)) {
143
+ config = await loadConfig(CONFIG_FILE);
144
+ }
145
+
146
+ // From stdin (if not a TTY and no other input)
147
+ if (targets.length === 0 && !process.stdin.isTTY) {
148
+ const input = await readFile("/dev/stdin", "utf-8");
149
+ targets.push(...parseUrlList(input));
150
+ }
151
+
152
+ return { targets, config };
153
+ }
154
+
155
+ function buildWorkMode(
156
+ opts: CrawlOptions
157
+ ): ("remote" | "onsite" | "hybrid")[] | null {
158
+ const modes: ("remote" | "onsite" | "hybrid")[] = [];
159
+ if (opts.remote) modes.push("remote");
160
+ if (opts.onsite) modes.push("onsite");
161
+ if (opts.hybrid) modes.push("hybrid");
162
+ return modes.length > 0 ? modes : null;
163
+ }
@@ -0,0 +1,96 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { bus } from "../../events.js";
3
+ import { discoverAll } from "../../orchestrators/discover-all.js";
4
+ import type { UrlTarget } from "../../types/index.js";
5
+ import yaml from "js-yaml";
6
+
7
+ interface DiscoverOptions {
8
+ companies?: string[];
9
+ file?: string;
10
+ output?: "urls" | "yaml" | "json";
11
+ out?: string;
12
+ verify?: boolean;
13
+ concurrency?: string;
14
+ }
15
+
16
+ export async function discoverCommand(opts: DiscoverOptions): Promise<void> {
17
+ const companies = await resolveCompanies(opts);
18
+
19
+ if (companies.length === 0) {
20
+ throw new Error("No company names provided. Use --companies or --file.");
21
+ }
22
+
23
+ const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : 3;
24
+ const format = opts.output ?? "urls";
25
+
26
+ const result = await discoverAll(companies, {
27
+ concurrency,
28
+ verify: opts.verify ?? false,
29
+ });
30
+
31
+ const found = result.results.filter((r) => r.url !== null);
32
+ const output = formatDiscoverOutput(found, format);
33
+
34
+ if (opts.out) {
35
+ const { writeFile } = await import("node:fs/promises");
36
+ await writeFile(opts.out, output + "\n");
37
+ bus.emit("output:message", {
38
+ text: `Wrote ${found.length} targets to ${opts.out}`,
39
+ style: "success",
40
+ });
41
+ } else {
42
+ bus.emit("output:text", { text: output });
43
+ }
44
+ }
45
+
46
+ async function resolveCompanies(opts: DiscoverOptions): Promise<string[]> {
47
+ const companies: string[] = [];
48
+
49
+ if (opts.companies) {
50
+ companies.push(...opts.companies);
51
+ }
52
+
53
+ if (opts.file) {
54
+ const content = await readFile(opts.file, "utf-8");
55
+ const lines = content
56
+ .split("\n")
57
+ .map((l) => l.trim())
58
+ .filter((l) => l.length > 0 && !l.startsWith("#"));
59
+ companies.push(...lines);
60
+ }
61
+
62
+ // stdin if no other input and not a TTY
63
+ if (companies.length === 0 && !process.stdin.isTTY) {
64
+ const input = await readFile("/dev/stdin", "utf-8");
65
+ const lines = input
66
+ .split("\n")
67
+ .map((l) => l.trim())
68
+ .filter((l) => l.length > 0 && !l.startsWith("#"));
69
+ companies.push(...lines);
70
+ }
71
+
72
+ return companies;
73
+ }
74
+
75
+ function formatDiscoverOutput(
76
+ found: Array<{ company: string; url: string | null }>,
77
+ format: "urls" | "yaml" | "json"
78
+ ): string {
79
+ const targets: UrlTarget[] = found
80
+ .filter((r): r is { company: string; url: string } => r.url !== null)
81
+ .map((r) => ({ url: r.url, company: r.company }));
82
+
83
+ switch (format) {
84
+ case "urls":
85
+ return targets.map((t) => t.url).join("\n");
86
+
87
+ case "json":
88
+ return JSON.stringify(targets, null, 2);
89
+
90
+ case "yaml":
91
+ return yaml.dump({
92
+ targets: targets.map((t) => ({ url: t.url, company: t.company })),
93
+ defaults: { concurrency: 5 },
94
+ });
95
+ }
96
+ }
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env node
2
+
3
+ import React from "react";
4
+ import { render } from "ink";
5
+ import { Command } from "commander";
6
+ import { bus } from "../../events.js";
7
+ import { mountPlain } from "./plain.js";
8
+ import { App } from "./app.js";
9
+ import { crawlUrlCommand } from "./crawl-url.js";
10
+ import { crawlCommand } from "./crawl.js";
11
+ import { crawlAggregatorsCommand } from "./crawl-aggregators.js";
12
+ import { discoverCommand } from "./discover.js";
13
+ import { initCommand } from "./init.js";
14
+
15
+ /**
16
+ * Lifecycle manager. Mounts Ink (TTY) or plain text (piped) subscriber,
17
+ * executes the command, and handles teardown.
18
+ */
19
+ function wrap(
20
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
+ fn: (...args: any[]) => Promise<void>
22
+ ): (...args: unknown[]) => void {
23
+ return (...args: unknown[]) => {
24
+ if (process.stdout.isTTY) {
25
+ // Interactive: mount Ink (handles run + done + cleanup internally)
26
+ const run = () => fn(...args);
27
+ const instance = render(React.createElement(App, { run }));
28
+ const onDone = () => {
29
+ bus.off("done", onDone);
30
+ setTimeout(() => instance.unmount(), 32);
31
+ };
32
+ bus.on("done", onDone);
33
+ } else {
34
+ // Piped: plain text subscriber
35
+ const cleanup = mountPlain();
36
+
37
+ fn(...args)
38
+ .then(() => {
39
+ bus.emit("done", { exitCode: 0 });
40
+ })
41
+ .catch((err: unknown) => {
42
+ bus.emit("error", {
43
+ message: err instanceof Error ? err.message : String(err),
44
+ });
45
+ bus.emit("done", { exitCode: 1 });
46
+ })
47
+ .finally(() => {
48
+ cleanup();
49
+ });
50
+ }
51
+ };
52
+ }
53
+
54
+ function addFilterOptions(cmd: Command): Command {
55
+ return cmd
56
+ .option("--role <roles...>", "Role filter (e.g. engineering, design)")
57
+ .option("--role-type <types...>", "Role sub-type (e.g. backend, frontend)")
58
+ .option("--job-type <types...>", "Job type (e.g. fulltime, internship)")
59
+ .option("--min-experience <years...>", "Min years experience (e.g. 0, 3)")
60
+ .option("--company-stage <stages...>", "Company stage (e.g. seed, growth)")
61
+ .option("--industry <industries...>", "Industry filter")
62
+ .option("--company-size <sizes...>", "Company size (e.g. 1-10, 11-50)")
63
+ .option("--has-salary", "Only jobs with salary listed")
64
+ .option("--has-equity", "Only jobs with equity")
65
+ .option("--has-interview-process", "Only jobs with interview process")
66
+ .option("--visa-sponsorship", "Only jobs not requiring US visa");
67
+ }
68
+
69
+ const program = new Command("jobcrawl")
70
+ .version("1.0.0")
71
+ .description("Crawl career pages for jobs matching your search criteria");
72
+
73
+ addFilterOptions(
74
+ program
75
+ .command("crawl-url <url>")
76
+ .description("Crawl a single career page URL and return matching jobs")
77
+ .option("--keywords <terms...>", "Job title keywords to match")
78
+ .option("--exclude <terms...>", "Keywords to exclude")
79
+ .option("--location <location>", "Location filter")
80
+ .option("--remote", "Only remote jobs")
81
+ .option("--onsite", "Only onsite jobs")
82
+ .option("--hybrid", "Only hybrid jobs")
83
+ .option("--department <depts...>", "Department filter")
84
+ .option(
85
+ "--output <format>",
86
+ "Output format: json, table, markdown, csv",
87
+ "json"
88
+ )
89
+ .option("-o, --out <file>", "Write output to file")
90
+ .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")
91
+ ).action(
92
+ wrap(async (url: string, opts: Record<string, unknown>) => {
93
+ await crawlUrlCommand(url as string, opts);
94
+ })
95
+ );
96
+
97
+ addFilterOptions(
98
+ program
99
+ .command("crawl")
100
+ .description("Crawl multiple career pages and return matching jobs")
101
+ .option("--urls <urls...>", "Career page URLs to crawl")
102
+ .option("--file <path>", "Config file with targets (YAML/JSON)")
103
+ .option("--keywords <terms...>", "Job title keywords to match")
104
+ .option("--exclude <terms...>", "Keywords to exclude")
105
+ .option("--location <location>", "Location filter")
106
+ .option("--remote", "Only remote jobs")
107
+ .option("--onsite", "Only onsite jobs")
108
+ .option("--hybrid", "Only hybrid jobs")
109
+ .option("--department <depts...>", "Department filter")
110
+ .option(
111
+ "--output <format>",
112
+ "Output format: json, table, markdown, csv",
113
+ "json"
114
+ )
115
+ .option("-o, --out <file>", "Write output to file")
116
+ .option("--concurrency <n>", "Max concurrent crawls", "5")
117
+ .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")
118
+ .option("--aggregators <names...>", "Also run aggregators (e.g. yc)")
119
+ .option("--network-timeout <ms>", "Timeout for browser network commands (ms)")
120
+ .option("--max-bubble-levels <n>", "Max parent levels to try when clicking job cards")
121
+ ).action(
122
+ wrap(async (opts: Record<string, unknown>) => {
123
+ await crawlCommand(opts);
124
+ })
125
+ );
126
+
127
+ addFilterOptions(
128
+ program
129
+ .command("crawl-aggregators <aggregators...>")
130
+ .description("Crawl aggregator sources (e.g. yc) for matching jobs")
131
+ .option("--keywords <terms...>", "Job title keywords to match")
132
+ .option("--exclude <terms...>", "Keywords to exclude")
133
+ .option("--location <location>", "Location filter")
134
+ .option("--remote", "Only remote jobs")
135
+ .option("--onsite", "Only onsite jobs")
136
+ .option("--hybrid", "Only hybrid jobs")
137
+ .option("--department <depts...>", "Department filter")
138
+ .option(
139
+ "--output <format>",
140
+ "Output format: json, table, markdown, csv",
141
+ "json"
142
+ )
143
+ .option("-o, --out <file>", "Write output to file")
144
+ .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")
145
+ ).action(
146
+ wrap(async (aggregators: string[], opts: Record<string, unknown>) => {
147
+ await crawlAggregatorsCommand(aggregators, opts);
148
+ })
149
+ );
150
+
151
+ program
152
+ .command("discover")
153
+ .description("Find career page URLs from company names")
154
+ .option("--companies <names...>", "Company names to search for")
155
+ .option("--file <path>", "File with company names (one per line)")
156
+ .option("--output <format>", "Output format: urls, yaml, json", "urls")
157
+ .option("-o, --out <file>", "Write output to file")
158
+ .option("--verify", "Verify each discovered URL is a real career page")
159
+ .option("--concurrency <n>", "Max concurrent searches", "3")
160
+ .action(
161
+ wrap(async (opts: Record<string, unknown>) => {
162
+ await discoverCommand(opts);
163
+ })
164
+ );
165
+
166
+ program
167
+ .command("init")
168
+ .description("Create config file at ~/.jobcrawl/config.yaml")
169
+ .option("--force", "Overwrite existing config")
170
+ .action(
171
+ wrap(async (opts: Record<string, unknown>) => {
172
+ await initCommand(opts);
173
+ })
174
+ );
175
+
176
+ program
177
+ .command("detect <url>")
178
+ .description("Detect which ATS provider a career page uses")
179
+ .action(
180
+ wrap(async (url: string) => {
181
+ const { probePage } = await import("../../core/fetch-page.js");
182
+ const { detectProvider } = await import("../../core/detect-provider.js");
183
+ const { html, finalUrl } = await probePage(url);
184
+ const result = detectProvider(html, finalUrl);
185
+ bus.emit("output:json", { data: { url, finalUrl, ...result } });
186
+ })
187
+ );
188
+
189
+ addFilterOptions(
190
+ program
191
+ .command("match <file>")
192
+ .description("Filter a jobs JSON file against search criteria")
193
+ .option("--keywords <terms...>", "Job title keywords to match")
194
+ .option("--exclude <terms...>", "Keywords to exclude")
195
+ .option("--location <location>", "Location filter")
196
+ .option("--remote", "Only remote jobs")
197
+ .option("--onsite", "Only onsite jobs")
198
+ .option("--hybrid", "Only hybrid jobs")
199
+ .option("--department <depts...>", "Department filter")
200
+ .option(
201
+ "--output <format>",
202
+ "Output format: json, table, markdown, csv",
203
+ "json"
204
+ )
205
+ ).action(
206
+ wrap(async (file: string, opts: Record<string, unknown>) => {
207
+ const { readFile } = await import("node:fs/promises");
208
+ const { matchJobs } = await import("../../core/match-jobs.js");
209
+ const { formatOutput } = await import("../../core/format-output.js");
210
+ const jobs = JSON.parse(await readFile(file, "utf-8"));
211
+ const criteria = {
212
+ keywords: (opts.keywords as string[]) ?? [],
213
+ excludeKeywords: (opts.exclude as string[]) ?? [],
214
+ location: (opts.location as string) ?? null,
215
+ workMode: (() => {
216
+ const modes = [
217
+ ...(opts.remote ? ["remote" as const] : []),
218
+ ...(opts.onsite ? ["onsite" as const] : []),
219
+ ...(opts.hybrid ? ["hybrid" as const] : []),
220
+ ];
221
+ return modes.length ? modes : null;
222
+ })(),
223
+ departments: (opts.department as string[]) ?? null,
224
+ role: (opts.role as string[]) ?? null,
225
+ roleType: (opts.roleType as string[]) ?? null,
226
+ jobType: (opts.jobType as string[]) ?? null,
227
+ minExperience: opts.minExperience
228
+ ? (opts.minExperience as string[]).map((v) => parseInt(v, 10))
229
+ : null,
230
+ companyStage: (opts.companyStage as string[]) ?? null,
231
+ industry: (opts.industry as string[]) ?? null,
232
+ companySize: (opts.companySize as string[]) ?? null,
233
+ hasSalary: (opts.hasSalary as boolean) ?? null,
234
+ hasEquity: (opts.hasEquity as boolean) ?? null,
235
+ hasInterviewProcess: (opts.hasInterviewProcess as boolean) ?? null,
236
+ visaSponsorship: (opts.visaSponsorship as boolean) ?? null,
237
+ };
238
+ const matched = matchJobs(jobs, criteria);
239
+ const output = formatOutput(
240
+ matched,
241
+ (opts.output as string as "json") ?? "json"
242
+ );
243
+ bus.emit("output:text", { text: output });
244
+ })
245
+ );
246
+
247
+ // Handle exit
248
+ bus.on("done", (p) => {
249
+ process.exitCode = p.exitCode;
250
+ });
251
+
252
+ program.parse();
@@ -0,0 +1,117 @@
1
+ import { existsSync } from "node:fs";
2
+ import { mkdir, writeFile } from "node:fs/promises";
3
+ import { homedir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { bus } from "../../events.js";
6
+ import { companies } from "../../data/companies.js";
7
+
8
+ const CONFIG_DIR = join(homedir(), ".jobcrawl");
9
+ const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
10
+ const CREDENTIALS_FILE = join(CONFIG_DIR, "credentials.json");
11
+ const RAW_DIR = join(CONFIG_DIR, "raw");
12
+
13
+ function generateDefaultConfig(): string {
14
+ const lines: string[] = [
15
+ "# jobcrawl config — auto-generated by `jobcrawl init`",
16
+ "#",
17
+ "# Aggregators (cross-company search engines):",
18
+ "# Run with: jobcrawl crawl-aggregators yc",
19
+ "# Or alongside companies: jobcrawl crawl --aggregators yc",
20
+ "",
21
+ "aggregators:",
22
+ " - type: yc",
23
+ " enabled: true",
24
+ "",
25
+ "# Companies (add your own or modify existing ones):",
26
+ "#",
27
+ "# Slug-based (recommended):",
28
+ "# - company: Company Name",
29
+ "# slug: board-slug",
30
+ "# provider: greenhouse | ashby | lever (optional — auto-detected if omitted)",
31
+ "# fallback: https://... (optional — used when no ATS API matches)",
32
+ "#",
33
+ "# URL-based:",
34
+ "# - url: https://example.com/careers",
35
+ "# company: Company Name",
36
+ "",
37
+ "companies:",
38
+ ];
39
+
40
+ let currentSection = "";
41
+
42
+ for (const c of companies) {
43
+ if (c.section !== currentSection) {
44
+ currentSection = c.section;
45
+ if (currentSection) {
46
+ lines.push("");
47
+ lines.push(` # --- ${currentSection} ---`);
48
+ }
49
+ }
50
+
51
+ lines.push(` - company: ${c.company}`);
52
+ lines.push(` slug: ${c.slug}`);
53
+ if (c.provider) {
54
+ lines.push(` provider: ${c.provider}`);
55
+ }
56
+ if (c.fallback) {
57
+ lines.push(` fallback: ${c.fallback}`);
58
+ }
59
+ }
60
+
61
+ lines.push("");
62
+ lines.push("defaults:");
63
+ lines.push(" concurrency: 5");
64
+ lines.push("");
65
+
66
+ return lines.join("\n");
67
+ }
68
+
69
+ export { CONFIG_DIR, CONFIG_FILE, CREDENTIALS_FILE, RAW_DIR };
70
+
71
+ interface InitOptions {
72
+ force?: boolean;
73
+ }
74
+
75
+ export async function initCommand(opts: InitOptions): Promise<void> {
76
+ const created: string[] = [];
77
+ const skipped: string[] = [];
78
+
79
+ await mkdir(RAW_DIR, { recursive: true, mode: 0o700 });
80
+ created.push(`${RAW_DIR}/`);
81
+
82
+ if (!existsSync(CONFIG_FILE) || opts.force) {
83
+ await writeFile(CONFIG_FILE, generateDefaultConfig());
84
+ created.push(CONFIG_FILE);
85
+ } else {
86
+ skipped.push(CONFIG_FILE);
87
+ }
88
+
89
+ if (!existsSync(CREDENTIALS_FILE) || opts.force) {
90
+ const defaultCredentials = {
91
+ yc: {
92
+ algoliaAppId: "",
93
+ algoliaApiKey: "",
94
+ },
95
+ };
96
+ await writeFile(
97
+ CREDENTIALS_FILE,
98
+ JSON.stringify(defaultCredentials, null, 2) + "\n",
99
+ { mode: 0o600 },
100
+ );
101
+ created.push(CREDENTIALS_FILE);
102
+ } else {
103
+ skipped.push(CREDENTIALS_FILE);
104
+ }
105
+
106
+ const lines: string[] = [];
107
+ for (const f of created) lines.push(`Created ${f}`);
108
+ for (const f of skipped) lines.push(`Skipped ${f} (already exists)`);
109
+ lines.push(
110
+ "",
111
+ `Add your YC Algolia credentials to ${CREDENTIALS_FILE}, then run:`,
112
+ "",
113
+ " jobcrawl crawl --keywords \"engineer\"",
114
+ );
115
+
116
+ bus.emit("output:message", { text: lines.join("\n"), style: "success" });
117
+ }