npm - jobcrawl - Versions diffs - 0.1.0 - Mend

jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/.prettierrc.json +10 -0
package/CHANGELOG.md +40 -0
package/README.md +232 -0
package/dist/core/aggregators/yc.d.ts +7 -0
package/dist/core/aggregators/yc.js +320 -0
package/dist/core/browser.d.ts +30 -0
package/dist/core/browser.js +196 -0
package/dist/core/cache.d.ts +13 -0
package/dist/core/cache.js +41 -0
package/dist/core/detect-provider.d.ts +7 -0
package/dist/core/detect-provider.js +125 -0
package/dist/core/discover-careers.d.ts +18 -0
package/dist/core/discover-careers.js +92 -0
package/dist/core/extract-jobs.d.ts +14 -0
package/dist/core/extract-jobs.js +36 -0
package/dist/core/fetch-page.d.ts +11 -0
package/dist/core/fetch-page.js +39 -0
package/dist/core/format-output.d.ts +2 -0
package/dist/core/format-output.js +59 -0
package/dist/core/match-jobs.d.ts +6 -0
package/dist/core/match-jobs.js +43 -0
package/dist/core/providers/ashby.d.ts +6 -0
package/dist/core/providers/ashby.js +58 -0
package/dist/core/providers/generic.d.ts +6 -0
package/dist/core/providers/generic.js +294 -0
package/dist/core/providers/greenhouse.d.ts +6 -0
package/dist/core/providers/greenhouse.js +47 -0
package/dist/core/providers/lever.d.ts +7 -0
package/dist/core/providers/lever.js +60 -0
package/dist/core/providers/yc.d.ts +7 -0
package/dist/core/providers/yc.js +320 -0
package/dist/core/resolve-iframe.d.ts +6 -0
package/dist/core/resolve-iframe.js +51 -0
package/dist/core/save-raw.d.ts +4 -0
package/dist/core/save-raw.js +13 -0
package/dist/data/companies.d.ts +9 -0
package/dist/data/companies.js +2849 -0
package/dist/entrypoints/cli/app.d.ts +3 -0
package/dist/entrypoints/cli/app.js +91 -0
package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
package/dist/entrypoints/cli/components/crawl-view.js +94 -0
package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
package/dist/entrypoints/cli/components/discover-view.js +67 -0
package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
package/dist/entrypoints/cli/crawl-url.js +54 -0
package/dist/entrypoints/cli/crawl.d.ts +32 -0
package/dist/entrypoints/cli/crawl.js +108 -0
package/dist/entrypoints/cli/discover.d.ts +10 -0
package/dist/entrypoints/cli/discover.js +69 -0
package/dist/entrypoints/cli/index.d.ts +2 -0
package/dist/entrypoints/cli/index.js +197 -0
package/dist/entrypoints/cli/init.d.ts +9 -0
package/dist/entrypoints/cli/init.js +94 -0
package/dist/entrypoints/cli/plain.d.ts +6 -0
package/dist/entrypoints/cli/plain.js +77 -0
package/dist/events.d.ts +114 -0
package/dist/events.js +17 -0
package/dist/orchestrators/crawl-all.d.ts +2 -0
package/dist/orchestrators/crawl-all.js +66 -0
package/dist/orchestrators/discover-all.d.ts +10 -0
package/dist/orchestrators/discover-all.js +39 -0
package/dist/threads/pool.d.ts +5 -0
package/dist/threads/pool.js +23 -0
package/dist/threads/process-url.d.ts +9 -0
package/dist/threads/process-url.js +229 -0
package/dist/types/index.d.ts +83 -0
package/dist/types/index.js +6 -0
package/dist/utils/config.d.ts +17 -0
package/dist/utils/config.js +57 -0
package/dist/utils/google-search.d.ts +19 -0
package/dist/utils/google-search.js +139 -0
package/dist/utils/llm.d.ts +8 -0
package/dist/utils/llm.js +25 -0
package/package.json +42 -0
package/src/core/aggregators/yc.ts +415 -0
package/src/core/browser.ts +239 -0
package/src/core/detect-provider.ts +162 -0
package/src/core/discover-careers.ts +117 -0
package/src/core/extract-jobs.ts +50 -0
package/src/core/fetch-page.ts +41 -0
package/src/core/format-output.ts +80 -0
package/src/core/match-jobs.ts +56 -0
package/src/core/providers/ashby.ts +84 -0
package/src/core/providers/generic.ts +332 -0
package/src/core/providers/greenhouse.ts +74 -0
package/src/core/providers/lever.ts +90 -0
package/src/core/resolve-iframe.ts +59 -0
package/src/core/save-raw.ts +18 -0
package/src/data/companies.ts +2859 -0
package/src/entrypoints/cli/app.tsx +173 -0
package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
package/src/entrypoints/cli/components/discover-view.tsx +138 -0
package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
package/src/entrypoints/cli/crawl-url.ts +87 -0
package/src/entrypoints/cli/crawl.ts +163 -0
package/src/entrypoints/cli/discover.ts +96 -0
package/src/entrypoints/cli/index.ts +252 -0
package/src/entrypoints/cli/init.ts +117 -0
package/src/entrypoints/cli/plain.ts +104 -0
package/src/events.ts +79 -0
package/src/orchestrators/crawl-all.ts +96 -0
package/src/orchestrators/discover-all.ts +61 -0
package/src/threads/pool.ts +29 -0
package/src/threads/process-url.ts +312 -0
package/src/types/index.ts +110 -0
package/src/utils/config.ts +79 -0
package/src/utils/google-search.ts +155 -0
package/src/utils/llm.ts +33 -0
package/test/integration/process-url.test.ts +301 -0
package/test/integration/providers/ashby.test.ts +163 -0
package/test/integration/providers/greenhouse.test.ts +191 -0
package/test/integration/providers/lever.test.ts +188 -0
package/test/unit/config.test.ts +64 -0
package/test/unit/detect-provider.test.ts +165 -0
package/test/unit/events.test.ts +104 -0
package/test/unit/format-output.test.ts +165 -0
package/test/unit/match-jobs.test.ts +257 -0
package/test/unit/pool.test.ts +74 -0
package/test/unit/providers/generic.test.ts +139 -0
package/test/unit/resolve-iframe.test.ts +100 -0
package/tsconfig.json +19 -0
package/vitest.config.ts +7 -0

package/dist/entrypoints/cli/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env node
2	+ export {};

package/dist/entrypoints/cli/index.js ADDED Viewed

@@ -0,0 +1,197 @@
+#!/usr/bin/env node
+import React from "react";
+import { render } from "ink";
+import { Command } from "commander";
+import { bus } from "../../events.js";
+import { mountPlain } from "./plain.js";
+import { App } from "./app.js";
+import { crawlUrlCommand } from "./crawl-url.js";
+import { crawlCommand } from "./crawl.js";
+import { crawlAggregatorsCommand } from "./crawl-aggregators.js";
+import { discoverCommand } from "./discover.js";
+import { initCommand } from "./init.js";
+/**
+ * Lifecycle manager. Mounts Ink (TTY) or plain text (piped) subscriber,
+ * executes the command, and handles teardown.
+ */
+function wrap(
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+fn) {
+    return (...args) => {
+        if (process.stdout.isTTY) {
+            // Interactive: mount Ink (handles run + done + cleanup internally)
+            const run = () => fn(...args);
+            const instance = render(React.createElement(App, { run }));
+            const onDone = () => {
+                bus.off("done", onDone);
+                setTimeout(() => instance.unmount(), 32);
+            };
+            bus.on("done", onDone);
+        }
+        else {
+            // Piped: plain text subscriber
+            const cleanup = mountPlain();
+            fn(...args)
+                .then(() => {
+                bus.emit("done", { exitCode: 0 });
+            })
+                .catch((err) => {
+                bus.emit("error", {
+                    message: err instanceof Error ? err.message : String(err),
+                });
+                bus.emit("done", { exitCode: 1 });
+            })
+                .finally(() => {
+                cleanup();
+            });
+        }
+    };
+}
+function addFilterOptions(cmd) {
+    return cmd
+        .option("--role <roles...>", "Role filter (e.g. engineering, design)")
+        .option("--role-type <types...>", "Role sub-type (e.g. backend, frontend)")
+        .option("--job-type <types...>", "Job type (e.g. fulltime, internship)")
+        .option("--min-experience <years...>", "Min years experience (e.g. 0, 3)")
+        .option("--company-stage <stages...>", "Company stage (e.g. seed, growth)")
+        .option("--industry <industries...>", "Industry filter")
+        .option("--company-size <sizes...>", "Company size (e.g. 1-10, 11-50)")
+        .option("--has-salary", "Only jobs with salary listed")
+        .option("--has-equity", "Only jobs with equity")
+        .option("--has-interview-process", "Only jobs with interview process")
+        .option("--visa-sponsorship", "Only jobs not requiring US visa");
+}
+const program = new Command("jobcrawl")
+    .version("1.0.0")
+    .description("Crawl career pages for jobs matching your search criteria");
+addFilterOptions(program
+    .command("crawl-url <url>")
+    .description("Crawl a single career page URL and return matching jobs")
+    .option("--keywords <terms...>", "Job title keywords to match")
+    .option("--exclude <terms...>", "Keywords to exclude")
+    .option("--location <location>", "Location filter")
+    .option("--remote", "Only remote jobs")
+    .option("--onsite", "Only onsite jobs")
+    .option("--hybrid", "Only hybrid jobs")
+    .option("--department <depts...>", "Department filter")
+    .option("--output <format>", "Output format: json, table, markdown, csv", "json")
+    .option("-o, --out <file>", "Write output to file")
+    .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")).action(wrap(async (url, opts) => {
+    await crawlUrlCommand(url, opts);
+}));
+addFilterOptions(program
+    .command("crawl")
+    .description("Crawl multiple career pages and return matching jobs")
+    .option("--urls <urls...>", "Career page URLs to crawl")
+    .option("--file <path>", "Config file with targets (YAML/JSON)")
+    .option("--keywords <terms...>", "Job title keywords to match")
+    .option("--exclude <terms...>", "Keywords to exclude")
+    .option("--location <location>", "Location filter")
+    .option("--remote", "Only remote jobs")
+    .option("--onsite", "Only onsite jobs")
+    .option("--hybrid", "Only hybrid jobs")
+    .option("--department <depts...>", "Department filter")
+    .option("--output <format>", "Output format: json, table, markdown, csv", "json")
+    .option("-o, --out <file>", "Write output to file")
+    .option("--concurrency <n>", "Max concurrent crawls", "5")
+    .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")
+    .option("--aggregators <names...>", "Also run aggregators (e.g. yc)")
+    .option("--network-timeout <ms>", "Timeout for browser network commands (ms)")
+    .option("--max-bubble-levels <n>", "Max parent levels to try when clicking job cards")).action(wrap(async (opts) => {
+    await crawlCommand(opts);
+}));
+addFilterOptions(program
+    .command("crawl-aggregators <aggregators...>")
+    .description("Crawl aggregator sources (e.g. yc) for matching jobs")
+    .option("--keywords <terms...>", "Job title keywords to match")
+    .option("--exclude <terms...>", "Keywords to exclude")
+    .option("--location <location>", "Location filter")
+    .option("--remote", "Only remote jobs")
+    .option("--onsite", "Only onsite jobs")
+    .option("--hybrid", "Only hybrid jobs")
+    .option("--department <depts...>", "Department filter")
+    .option("--output <format>", "Output format: json, table, markdown, csv", "json")
+    .option("-o, --out <file>", "Write output to file")
+    .option("--save-raw", "Save raw API responses to ~/.jobcrawl/raw/")).action(wrap(async (aggregators, opts) => {
+    await crawlAggregatorsCommand(aggregators, opts);
+}));
+program
+    .command("discover")
+    .description("Find career page URLs from company names")
+    .option("--companies <names...>", "Company names to search for")
+    .option("--file <path>", "File with company names (one per line)")
+    .option("--output <format>", "Output format: urls, yaml, json", "urls")
+    .option("-o, --out <file>", "Write output to file")
+    .option("--verify", "Verify each discovered URL is a real career page")
+    .option("--concurrency <n>", "Max concurrent searches", "3")
+    .action(wrap(async (opts) => {
+    await discoverCommand(opts);
+}));
+program
+    .command("init")
+    .description("Create config file at ~/.jobcrawl/config.yaml")
+    .option("--force", "Overwrite existing config")
+    .action(wrap(async (opts) => {
+    await initCommand(opts);
+}));
+program
+    .command("detect <url>")
+    .description("Detect which ATS provider a career page uses")
+    .action(wrap(async (url) => {
+    const { probePage } = await import("../../core/fetch-page.js");
+    const { detectProvider } = await import("../../core/detect-provider.js");
+    const { html, finalUrl } = await probePage(url);
+    const result = detectProvider(html, finalUrl);
+    bus.emit("output:json", { data: { url, finalUrl, ...result } });
+}));
+addFilterOptions(program
+    .command("match <file>")
+    .description("Filter a jobs JSON file against search criteria")
+    .option("--keywords <terms...>", "Job title keywords to match")
+    .option("--exclude <terms...>", "Keywords to exclude")
+    .option("--location <location>", "Location filter")
+    .option("--remote", "Only remote jobs")
+    .option("--onsite", "Only onsite jobs")
+    .option("--hybrid", "Only hybrid jobs")
+    .option("--department <depts...>", "Department filter")
+    .option("--output <format>", "Output format: json, table, markdown, csv", "json")).action(wrap(async (file, opts) => {
+    const { readFile } = await import("node:fs/promises");
+    const { matchJobs } = await import("../../core/match-jobs.js");
+    const { formatOutput } = await import("../../core/format-output.js");
+    const jobs = JSON.parse(await readFile(file, "utf-8"));
+    const criteria = {
+        keywords: opts.keywords ?? [],
+        excludeKeywords: opts.exclude ?? [],
+        location: opts.location ?? null,
+        workMode: (() => {
+            const modes = [
+                ...(opts.remote ? ["remote"] : []),
+                ...(opts.onsite ? ["onsite"] : []),
+                ...(opts.hybrid ? ["hybrid"] : []),
+            ];
+            return modes.length ? modes : null;
+        })(),
+        departments: opts.department ?? null,
+        role: opts.role ?? null,
+        roleType: opts.roleType ?? null,
+        jobType: opts.jobType ?? null,
+        minExperience: opts.minExperience
+            ? opts.minExperience.map((v) => parseInt(v, 10))
+            : null,
+        companyStage: opts.companyStage ?? null,
+        industry: opts.industry ?? null,
+        companySize: opts.companySize ?? null,
+        hasSalary: opts.hasSalary ?? null,
+        hasEquity: opts.hasEquity ?? null,
+        hasInterviewProcess: opts.hasInterviewProcess ?? null,
+        visaSponsorship: opts.visaSponsorship ?? null,
+    };
+    const matched = matchJobs(jobs, criteria);
+    const output = formatOutput(matched, opts.output ?? "json");
+    bus.emit("output:text", { text: output });
+}));
+// Handle exit
+bus.on("done", (p) => {
+    process.exitCode = p.exitCode;
+});
+program.parse();

package/dist/entrypoints/cli/init.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+declare const CONFIG_DIR: string;
+declare const CONFIG_FILE: string;
+declare const CREDENTIALS_FILE: string;
+declare const RAW_DIR: string;
+export { CONFIG_DIR, CONFIG_FILE, CREDENTIALS_FILE, RAW_DIR };
+interface InitOptions {
+    force?: boolean;
+}
+export declare function initCommand(opts: InitOptions): Promise<void>;

package/dist/entrypoints/cli/init.js ADDED Viewed

@@ -0,0 +1,94 @@
+import { existsSync } from "node:fs";
+import { mkdir, writeFile } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { bus } from "../../events.js";
+import { companies } from "../../data/companies.js";
+const CONFIG_DIR = join(homedir(), ".jobcrawl");
+const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
+const CREDENTIALS_FILE = join(CONFIG_DIR, "credentials.json");
+const RAW_DIR = join(CONFIG_DIR, "raw");
+function generateDefaultConfig() {
+    const lines = [
+        "# jobcrawl config — auto-generated by `jobcrawl init`",
+        "#",
+        "# Aggregators (cross-company search engines):",
+        "#   Run with: jobcrawl crawl-aggregators yc",
+        "#   Or alongside companies: jobcrawl crawl --aggregators yc",
+        "",
+        "aggregators:",
+        "  - type: yc",
+        "    enabled: true",
+        "",
+        "# Companies (add your own or modify existing ones):",
+        "#",
+        "#   Slug-based (recommended):",
+        "#     - company: Company Name",
+        "#       slug: board-slug",
+        "#       provider: greenhouse | ashby | lever  (optional — auto-detected if omitted)",
+        "#       fallback: https://...                 (optional — used when no ATS API matches)",
+        "#",
+        "#   URL-based:",
+        "#     - url: https://example.com/careers",
+        "#       company: Company Name",
+        "",
+        "companies:",
+    ];
+    let currentSection = "";
+    for (const c of companies) {
+        if (c.section !== currentSection) {
+            currentSection = c.section;
+            if (currentSection) {
+                lines.push("");
+                lines.push(`  # --- ${currentSection} ---`);
+            }
+        }
+        lines.push(`  - company: ${c.company}`);
+        lines.push(`    slug: ${c.slug}`);
+        if (c.provider) {
+            lines.push(`    provider: ${c.provider}`);
+        }
+        if (c.fallback) {
+            lines.push(`    fallback: ${c.fallback}`);
+        }
+    }
+    lines.push("");
+    lines.push("defaults:");
+    lines.push("  concurrency: 5");
+    lines.push("");
+    return lines.join("\n");
+}
+export { CONFIG_DIR, CONFIG_FILE, CREDENTIALS_FILE, RAW_DIR };
+export async function initCommand(opts) {
+    const created = [];
+    const skipped = [];
+    await mkdir(RAW_DIR, { recursive: true, mode: 0o700 });
+    created.push(`${RAW_DIR}/`);
+    if (!existsSync(CONFIG_FILE) || opts.force) {
+        await writeFile(CONFIG_FILE, generateDefaultConfig());
+        created.push(CONFIG_FILE);
+    }
+    else {
+        skipped.push(CONFIG_FILE);
+    }
+    if (!existsSync(CREDENTIALS_FILE) || opts.force) {
+        const defaultCredentials = {
+            yc: {
+                algoliaAppId: "",
+                algoliaApiKey: "",
+            },
+        };
+        await writeFile(CREDENTIALS_FILE, JSON.stringify(defaultCredentials, null, 2) + "\n", { mode: 0o600 });
+        created.push(CREDENTIALS_FILE);
+    }
+    else {
+        skipped.push(CREDENTIALS_FILE);
+    }
+    const lines = [];
+    for (const f of created)
+        lines.push(`Created ${f}`);
+    for (const f of skipped)
+        lines.push(`Skipped ${f} (already exists)`);
+    lines.push("", `Add your YC Algolia credentials to ${CREDENTIALS_FILE}, then run:`, "", "  jobcrawl crawl --keywords \"engineer\"");
+    bus.emit("output:message", { text: lines.join("\n"), style: "success" });
+}

package/dist/entrypoints/cli/plain.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+/**
+ * Mount plain-text event subscribers for piped (non-TTY) output.
+ * Data goes to stdout, status/progress to stderr.
+ * Returns a cleanup function that unsubscribes all handlers.
+ */
+export declare function mountPlain(): () => void;

package/dist/entrypoints/cli/plain.js ADDED Viewed

@@ -0,0 +1,77 @@
+import { bus } from "../../events.js";
+/**
+ * Mount plain-text event subscribers for piped (non-TTY) output.
+ * Data goes to stdout, status/progress to stderr.
+ * Returns a cleanup function that unsubscribes all handlers.
+ */
+export function mountPlain() {
+    const handlers = [];
+    function on(event, handler) {
+        bus.on(event, handler);
+        handlers.push([event, handler]);
+    }
+    // Status events → stderr
+    on("url:fetching", (p) => {
+        process.stderr.write(`  [${p.urlId}] ${p.company}: fetching\n`);
+    });
+    on("url:detecting", (p) => {
+        process.stderr.write(`  [${p.urlId}] provider: ${p.provider}\n`);
+    });
+    on("url:extracting", (p) => {
+        const count = p.jobCount !== undefined ? ` (${p.jobCount} jobs)` : "";
+        process.stderr.write(`  [${p.urlId}] extracting${count}\n`);
+    });
+    on("url:done", (p) => {
+        process.stderr.write(`  [${p.urlId}] ${p.company}: ${p.matched}/${p.total} matched\n`);
+    });
+    on("url:failed", (p) => {
+        process.stderr.write(`  [${p.urlId}] ${p.company}: FAILED — ${p.error}\n`);
+    });
+    on("crawl:start", (p) => {
+        process.stderr.write(`Crawling ${p.total} URLs...\n`);
+    });
+    on("crawl:complete", (p) => {
+        process.stderr.write(`Done. ${p.totalMatched} jobs matched across ${p.totalUrls} URLs (${p.failedUrls} failed)\n`);
+    });
+    // Discover events → stderr
+    on("discover:searching", (p) => {
+        process.stderr.write(`  Searching for ${p.company}...\n`);
+    });
+    on("discover:found", (p) => {
+        process.stderr.write(`  ✓ ${p.company} → ${p.url}\n`);
+    });
+    on("discover:not-found", (p) => {
+        process.stderr.write(`  ✗ ${p.company}: ${p.reason}\n`);
+    });
+    // Output events → stdout
+    on("output:json", (p) => {
+        process.stdout.write(JSON.stringify(p.data, null, 2) + "\n");
+    });
+    on("output:text", (p) => {
+        process.stdout.write(p.text + "\n");
+    });
+    on("output:table", (p) => {
+        // Simple table rendering
+        const widths = p.headers.map((h, i) => Math.max(h.length, ...p.rows.map((r) => (r[i] ?? "").length)));
+        const headerLine = p.headers.map((h, i) => h.padEnd(widths[i])).join(" | ");
+        const separator = widths.map((w) => "-".repeat(w)).join("-+-");
+        process.stdout.write(headerLine + "\n" + separator + "\n");
+        for (const row of p.rows) {
+            process.stdout.write(row.map((c, i) => (c ?? "").padEnd(widths[i])).join(" | ") + "\n");
+        }
+        if (p.footer)
+            process.stdout.write(p.footer + "\n");
+    });
+    on("output:message", (p) => {
+        process.stderr.write(p.text + "\n");
+    });
+    on("error", (p) => {
+        process.stderr.write(`Error: ${p.message}\n`);
+    });
+    // Cleanup function
+    return () => {
+        for (const [event, handler] of handlers) {
+            bus.off(event, handler);
+        }
+    };
+}

package/dist/events.d.ts ADDED Viewed

@@ -0,0 +1,114 @@
+export type Events = {
+    "discover:searching": {
+        company: string;
+    };
+    "discover:found": {
+        company: string;
+        url: string;
+    };
+    "discover:not-found": {
+        company: string;
+        reason: string;
+    };
+    "discover:verifying": {
+        company: string;
+        url: string;
+    };
+    "discover:complete": {
+        found: number;
+        notFound: number;
+        total: number;
+    };
+    "crawl:start": {
+        total: number;
+    };
+    "url:fetching": {
+        urlId: string;
+        url: string;
+        company: string;
+    };
+    "url:detecting": {
+        urlId: string;
+        provider: string;
+    };
+    "url:extracting": {
+        urlId: string;
+        jobCount?: number;
+    };
+    "url:matching": {
+        urlId: string;
+        matched: number;
+        total: number;
+    };
+    "url:done": {
+        urlId: string;
+        company: string;
+        matched: number;
+        total: number;
+    };
+    "url:failed": {
+        urlId: string;
+        company: string;
+        error: string;
+    };
+    "url:rendering": {
+        urlId: string;
+        url: string;
+    };
+    "url:resolving-urls": {
+        urlId: string;
+        count: number;
+    };
+    "target:probing": {
+        urlId: string;
+        provider: string;
+    };
+    "aggregator:start": {
+        type: string;
+    };
+    "aggregator:done": {
+        type: string;
+        jobCount: number;
+    };
+    "aggregator:failed": {
+        type: string;
+        error: string;
+    };
+    "crawl:complete": {
+        totalJobs: number;
+        totalMatched: number;
+        totalUrls: number;
+        failedUrls: number;
+    };
+    "output:json": {
+        data: unknown;
+    };
+    "output:table": {
+        headers: string[];
+        rows: string[][];
+        footer?: string;
+    };
+    "output:text": {
+        text: string;
+    };
+    "output:message": {
+        text: string;
+        style?: "success" | "warning" | "dim";
+    };
+    error: {
+        message: string;
+    };
+    done: {
+        exitCode: number;
+    };
+};
+type EventName = keyof Events;
+declare class TypedEventBus {
+    private emitter;
+    emit<K extends EventName>(event: K, payload: Events[K]): void;
+    on<K extends EventName>(event: K, handler: (payload: Events[K]) => void): void;
+    off<K extends EventName>(event: K, handler: (payload: Events[K]) => void): void;
+    removeAllListeners(): void;
+}
+export declare const bus: TypedEventBus;
+export {};

package/dist/events.js ADDED Viewed

@@ -0,0 +1,17 @@
+import { EventEmitter } from "node:events";
+class TypedEventBus {
+    emitter = new EventEmitter();
+    emit(event, payload) {
+        this.emitter.emit(event, payload);
+    }
+    on(event, handler) {
+        this.emitter.on(event, handler);
+    }
+    off(event, handler) {
+        this.emitter.off(event, handler);
+    }
+    removeAllListeners() {
+        this.emitter.removeAllListeners();
+    }
+}
+export const bus = new TypedEventBus();

package/dist/orchestrators/crawl-all.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { Target, Aggregator, SearchCriteria, CrawlOptions, CrawlAllResult } from "../types/index.js";
2	+ export declare function crawlAll(targets: Target[], criteria: SearchCriteria, options: CrawlOptions, aggregators?: Aggregator[]): Promise<CrawlAllResult>;

package/dist/orchestrators/crawl-all.js ADDED Viewed

@@ -0,0 +1,66 @@
+import { bus } from "../events.js";
+import { runPool } from "../threads/pool.js";
+import { processTarget } from "../threads/process-url.js";
+import { extractViaAggregator } from "../core/extract-jobs.js";
+import { matchJobs } from "../core/match-jobs.js";
+async function runAggregators(aggregators, criteria, saveRaw) {
+    const allJobs = [];
+    for (const aggregator of aggregators) {
+        bus.emit("aggregator:start", { type: aggregator });
+        try {
+            const jobs = await extractViaAggregator(aggregator, criteria, saveRaw);
+            const matched = matchJobs(jobs, criteria);
+            bus.emit("aggregator:done", { type: aggregator, jobCount: matched.length });
+            allJobs.push(...matched);
+        }
+        catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            bus.emit("aggregator:failed", { type: aggregator, error: message });
+        }
+    }
+    return allJobs;
+}
+function dedup(jobs) {
+    const seen = new Map();
+    for (const job of jobs) {
+        const key = `${job.url}::${job.title}`;
+        const existing = seen.get(key);
+        // Prefer provider results over aggregator results (richer data)
+        if (!existing || existing.provider === "yc") {
+            seen.set(key, job);
+        }
+    }
+    return [...seen.values()];
+}
+export async function crawlAll(targets, criteria, options, aggregators) {
+    const start = Date.now();
+    bus.emit("crawl:start", { total: targets.length });
+    const targetTasks = targets.map((target) => () => processTarget(target, criteria, {
+        saveRaw: options.saveRaw,
+        browser: options.browser,
+    }));
+    // Run aggregators and targets in parallel
+    const [aggregatorJobs, results] = await Promise.all([
+        aggregators && aggregators.length > 0
+            ? runAggregators(aggregators, criteria, options.saveRaw)
+            : Promise.resolve([]),
+        runPool(targetTasks, options.concurrency),
+    ]);
+    const targetJobs = results.flatMap((r) => r.jobs);
+    const allTargetJobs = results.flatMap((r) => r.allJobs);
+    const failed = results.filter((r) => r.error !== null);
+    // Merge and dedup aggregator + target jobs
+    const jobs = dedup([...targetJobs, ...aggregatorJobs]);
+    const allJobs = dedup([...allTargetJobs, ...aggregatorJobs]);
+    bus.emit("crawl:complete", {
+        totalJobs: allJobs.length,
+        totalMatched: jobs.length,
+        totalUrls: targets.length,
+        failedUrls: failed.length,
+    });
+    return {
+        jobs,
+        results,
+        totalDurationMs: Date.now() - start,
+    };
+}

package/dist/orchestrators/discover-all.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import { type DiscoverResult } from "../core/discover-careers.js";
+export interface DiscoverAllResult {
+    results: DiscoverResult[];
+    found: number;
+    notFound: number;
+}
+export declare function discoverAll(companies: string[], options: {
+    concurrency: number;
+    verify: boolean;
+}): Promise<DiscoverAllResult>;

package/dist/orchestrators/discover-all.js ADDED Viewed

@@ -0,0 +1,39 @@
+import { bus } from "../events.js";
+import { runPool } from "../threads/pool.js";
+import { discoverCareerPage, verifyCareerPage, } from "../core/discover-careers.js";
+export async function discoverAll(companies, options) {
+    const tasks = companies.map((company) => async () => {
+        bus.emit("discover:searching", { company });
+        const result = await discoverCareerPage(company);
+        if (result.url) {
+            if (options.verify) {
+                bus.emit("discover:verifying", { company, url: result.url });
+                const valid = await verifyCareerPage(result.url);
+                if (!valid) {
+                    bus.emit("discover:not-found", {
+                        company,
+                        reason: "URL found but failed verification",
+                    });
+                    return { ...result, url: null, error: "Failed verification" };
+                }
+            }
+            bus.emit("discover:found", { company, url: result.url });
+        }
+        else {
+            bus.emit("discover:not-found", {
+                company,
+                reason: result.error ?? "Not found",
+            });
+        }
+        return result;
+    });
+    const results = await runPool(tasks, options.concurrency);
+    const found = results.filter((r) => r.url !== null).length;
+    const notFound = results.filter((r) => r.url === null).length;
+    bus.emit("discover:complete", {
+        found,
+        notFound,
+        total: companies.length,
+    });
+    return { results, found, notFound };
+}

package/dist/threads/pool.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Bounded concurrency pool. Runs up to `concurrency` tasks simultaneously.
+ * ~20 lines, no external dependency.
+ */
+export declare function runPool<T>(tasks: Array<() => Promise<T>>, concurrency: number): Promise<T[]>;

package/dist/threads/pool.js ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * Bounded concurrency pool. Runs up to `concurrency` tasks simultaneously.
+ * ~20 lines, no external dependency.
+ */
+export async function runPool(tasks, concurrency) {
+    const results = [];
+    const executing = new Set();
+    for (const task of tasks) {
+        const p = task()
+            .then((result) => {
+            results.push(result);
+        })
+            .finally(() => {
+            executing.delete(p);
+        });
+        executing.add(p);
+        if (executing.size >= concurrency) {
+            await Promise.race(executing);
+        }
+    }
+    await Promise.all(executing);
+    return results;
+}