npm - jobcrawl - Versions diffs - 0.1.0 - Mend

jobcrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/.prettierrc.json +10 -0
package/CHANGELOG.md +40 -0
package/README.md +232 -0
package/dist/core/aggregators/yc.d.ts +7 -0
package/dist/core/aggregators/yc.js +320 -0
package/dist/core/browser.d.ts +30 -0
package/dist/core/browser.js +196 -0
package/dist/core/cache.d.ts +13 -0
package/dist/core/cache.js +41 -0
package/dist/core/detect-provider.d.ts +7 -0
package/dist/core/detect-provider.js +125 -0
package/dist/core/discover-careers.d.ts +18 -0
package/dist/core/discover-careers.js +92 -0
package/dist/core/extract-jobs.d.ts +14 -0
package/dist/core/extract-jobs.js +36 -0
package/dist/core/fetch-page.d.ts +11 -0
package/dist/core/fetch-page.js +39 -0
package/dist/core/format-output.d.ts +2 -0
package/dist/core/format-output.js +59 -0
package/dist/core/match-jobs.d.ts +6 -0
package/dist/core/match-jobs.js +43 -0
package/dist/core/providers/ashby.d.ts +6 -0
package/dist/core/providers/ashby.js +58 -0
package/dist/core/providers/generic.d.ts +6 -0
package/dist/core/providers/generic.js +294 -0
package/dist/core/providers/greenhouse.d.ts +6 -0
package/dist/core/providers/greenhouse.js +47 -0
package/dist/core/providers/lever.d.ts +7 -0
package/dist/core/providers/lever.js +60 -0
package/dist/core/providers/yc.d.ts +7 -0
package/dist/core/providers/yc.js +320 -0
package/dist/core/resolve-iframe.d.ts +6 -0
package/dist/core/resolve-iframe.js +51 -0
package/dist/core/save-raw.d.ts +4 -0
package/dist/core/save-raw.js +13 -0
package/dist/data/companies.d.ts +9 -0
package/dist/data/companies.js +2849 -0
package/dist/entrypoints/cli/app.d.ts +3 -0
package/dist/entrypoints/cli/app.js +91 -0
package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
package/dist/entrypoints/cli/components/crawl-view.js +94 -0
package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
package/dist/entrypoints/cli/components/discover-view.js +67 -0
package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
package/dist/entrypoints/cli/crawl-url.js +54 -0
package/dist/entrypoints/cli/crawl.d.ts +32 -0
package/dist/entrypoints/cli/crawl.js +108 -0
package/dist/entrypoints/cli/discover.d.ts +10 -0
package/dist/entrypoints/cli/discover.js +69 -0
package/dist/entrypoints/cli/index.d.ts +2 -0
package/dist/entrypoints/cli/index.js +197 -0
package/dist/entrypoints/cli/init.d.ts +9 -0
package/dist/entrypoints/cli/init.js +94 -0
package/dist/entrypoints/cli/plain.d.ts +6 -0
package/dist/entrypoints/cli/plain.js +77 -0
package/dist/events.d.ts +114 -0
package/dist/events.js +17 -0
package/dist/orchestrators/crawl-all.d.ts +2 -0
package/dist/orchestrators/crawl-all.js +66 -0
package/dist/orchestrators/discover-all.d.ts +10 -0
package/dist/orchestrators/discover-all.js +39 -0
package/dist/threads/pool.d.ts +5 -0
package/dist/threads/pool.js +23 -0
package/dist/threads/process-url.d.ts +9 -0
package/dist/threads/process-url.js +229 -0
package/dist/types/index.d.ts +83 -0
package/dist/types/index.js +6 -0
package/dist/utils/config.d.ts +17 -0
package/dist/utils/config.js +57 -0
package/dist/utils/google-search.d.ts +19 -0
package/dist/utils/google-search.js +139 -0
package/dist/utils/llm.d.ts +8 -0
package/dist/utils/llm.js +25 -0
package/package.json +42 -0
package/src/core/aggregators/yc.ts +415 -0
package/src/core/browser.ts +239 -0
package/src/core/detect-provider.ts +162 -0
package/src/core/discover-careers.ts +117 -0
package/src/core/extract-jobs.ts +50 -0
package/src/core/fetch-page.ts +41 -0
package/src/core/format-output.ts +80 -0
package/src/core/match-jobs.ts +56 -0
package/src/core/providers/ashby.ts +84 -0
package/src/core/providers/generic.ts +332 -0
package/src/core/providers/greenhouse.ts +74 -0
package/src/core/providers/lever.ts +90 -0
package/src/core/resolve-iframe.ts +59 -0
package/src/core/save-raw.ts +18 -0
package/src/data/companies.ts +2859 -0
package/src/entrypoints/cli/app.tsx +173 -0
package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
package/src/entrypoints/cli/components/discover-view.tsx +138 -0
package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
package/src/entrypoints/cli/crawl-url.ts +87 -0
package/src/entrypoints/cli/crawl.ts +163 -0
package/src/entrypoints/cli/discover.ts +96 -0
package/src/entrypoints/cli/index.ts +252 -0
package/src/entrypoints/cli/init.ts +117 -0
package/src/entrypoints/cli/plain.ts +104 -0
package/src/events.ts +79 -0
package/src/orchestrators/crawl-all.ts +96 -0
package/src/orchestrators/discover-all.ts +61 -0
package/src/threads/pool.ts +29 -0
package/src/threads/process-url.ts +312 -0
package/src/types/index.ts +110 -0
package/src/utils/config.ts +79 -0
package/src/utils/google-search.ts +155 -0
package/src/utils/llm.ts +33 -0
package/test/integration/process-url.test.ts +301 -0
package/test/integration/providers/ashby.test.ts +163 -0
package/test/integration/providers/greenhouse.test.ts +191 -0
package/test/integration/providers/lever.test.ts +188 -0
package/test/unit/config.test.ts +64 -0
package/test/unit/detect-provider.test.ts +165 -0
package/test/unit/events.test.ts +104 -0
package/test/unit/format-output.test.ts +165 -0
package/test/unit/match-jobs.test.ts +257 -0
package/test/unit/pool.test.ts +74 -0
package/test/unit/providers/generic.test.ts +139 -0
package/test/unit/resolve-iframe.test.ts +100 -0
package/tsconfig.json +19 -0
package/vitest.config.ts +7 -0

package/dist/threads/process-url.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { type Target, type UrlTarget, type SearchCriteria, type CrawlResult, type BrowserOptions } from "../types/index.js";
+export declare function processUrl(target: UrlTarget, criteria: SearchCriteria, options?: {
+    saveRaw?: boolean;
+    browser?: BrowserOptions;
+}): Promise<CrawlResult>;
+export declare function processTarget(target: Target, criteria: SearchCriteria, options?: {
+    saveRaw?: boolean;
+    browser?: BrowserOptions;
+}): Promise<CrawlResult>;

package/dist/threads/process-url.js ADDED Viewed

@@ -0,0 +1,229 @@
+import { createHash } from "node:crypto";
+import { setTimeout as delay } from "node:timers/promises";
+import { bus } from "../events.js";
+import { probePage } from "../core/fetch-page.js";
+import { BrowserSession, BrowserNotAvailableError, resolveJobUrls, } from "../core/browser.js";
+import { detectProvider } from "../core/detect-provider.js";
+import { extractViaApi, extractFromHtml } from "../core/extract-jobs.js";
+import { matchJobs } from "../core/match-jobs.js";
+import { isSlugTarget, } from "../types/index.js";
+/** Yield to the event loop so Ink can re-render between steps. */
+const tick = () => delay(0);
+function urlId(url) {
+    return "u_" + createHash("sha256").update(url).digest("hex").slice(0, 5);
+}
+export async function processUrl(target, criteria, options) {
+    const id = urlId(target.url);
+    const company = target.company ?? new URL(target.url).hostname;
+    const start = Date.now();
+    try {
+        // Step 1: Quick probe — simple HTTP GET (no browser)
+        bus.emit("url:fetching", { urlId: id, url: target.url, company });
+        await tick();
+        const probe = await probePage(target.url);
+        // Step 2: Detect provider from static HTML
+        const detection = detectProvider(probe.html, probe.finalUrl);
+        bus.emit("url:detecting", {
+            urlId: id,
+            provider: detection.provider,
+        });
+        await tick();
+        let jobs;
+        if (detection.provider !== "unknown" && detection.boardToken) {
+            // Tier 1: ATS JSON API — no browser needed
+            bus.emit("url:extracting", { urlId: id });
+            await tick();
+            try {
+                jobs = await extractViaApi(detection.provider, detection.boardToken, target.url, criteria, options?.saveRaw);
+            }
+            catch {
+                // API failed, fall through to Tier 2
+                jobs = await fallbackToHtml(id, target.url, company, criteria, options?.saveRaw, options?.browser);
+            }
+        }
+        else {
+            // Tier 2: Render with agent-browser, then extract
+            jobs = await fallbackToHtml(id, target.url, company, criteria, options?.saveRaw, options?.browser);
+        }
+        // Apply company name override
+        if (target.company) {
+            for (const job of jobs) {
+                job.company = target.company;
+            }
+        }
+        // Match against criteria
+        const matched = matchJobs(jobs, criteria);
+        bus.emit("url:matching", {
+            urlId: id,
+            matched: matched.length,
+            total: jobs.length,
+        });
+        await tick();
+        bus.emit("url:done", {
+            urlId: id,
+            company,
+            matched: matched.length,
+            total: jobs.length,
+        });
+        return {
+            url: target.url,
+            provider: detection.provider,
+            jobs: matched,
+            allJobs: jobs,
+            error: null,
+            durationMs: Date.now() - start,
+        };
+    }
+    catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        bus.emit("url:failed", { urlId: id, company, error: message });
+        return {
+            url: target.url,
+            provider: "unknown",
+            jobs: [],
+            allJobs: [],
+            error: message,
+            durationMs: Date.now() - start,
+        };
+    }
+}
+const PROVIDER_SOURCE_URLS = {
+    greenhouse: (slug) => `https://boards.greenhouse.io/${slug}`,
+    ashby: (slug) => `https://jobs.ashbyhq.com/${slug}`,
+    lever: (slug) => `https://jobs.lever.co/${slug}`,
+};
+const PROBE_ORDER = ["greenhouse", "ashby", "lever"];
+export async function processTarget(target, criteria, options) {
+    if (isSlugTarget(target)) {
+        return processSlug(target, criteria, options);
+    }
+    return processUrl(target, criteria, options);
+}
+async function processSlug(target, criteria, options) {
+    const id = "s_" + createHash("sha256").update(target.slug).digest("hex").slice(0, 5);
+    const company = target.company;
+    const start = Date.now();
+    try {
+        // Build provider order: hint first, then probe all
+        const providers = [];
+        if (target.provider) {
+            providers.push(target.provider);
+        }
+        else {
+            for (const p of PROBE_ORDER) {
+                providers.push(p);
+            }
+        }
+        // Try each provider
+        for (const provider of providers) {
+            const sourceUrl = PROVIDER_SOURCE_URLS[provider]?.(target.slug) ?? target.slug;
+            bus.emit("url:fetching", { urlId: id, url: sourceUrl, company });
+            await tick();
+            bus.emit("url:detecting", { urlId: id, provider });
+            await tick();
+            bus.emit("url:extracting", { urlId: id });
+            await tick();
+            try {
+                const jobs = await extractViaApi(provider, target.slug, sourceUrl, criteria, options?.saveRaw);
+                // Apply company name
+                for (const job of jobs) {
+                    job.company = company;
+                }
+                const matched = matchJobs(jobs, criteria);
+                bus.emit("url:matching", { urlId: id, matched: matched.length, total: jobs.length });
+                await tick();
+                bus.emit("url:done", { urlId: id, company, matched: matched.length, total: jobs.length });
+                return {
+                    url: sourceUrl,
+                    provider,
+                    jobs: matched,
+                    allJobs: jobs,
+                    error: null,
+                    durationMs: Date.now() - start,
+                };
+            }
+            catch {
+                // This provider didn't work, try next
+                bus.emit("target:probing", { urlId: id, provider });
+                await tick();
+            }
+        }
+        // All providers failed — try fallback URL
+        if (target.fallback) {
+            return processUrl({ url: target.fallback, company: target.company }, criteria, options);
+        }
+        // No fallback
+        const message = `No ATS provider found for slug "${target.slug}" and no fallback URL provided`;
+        bus.emit("url:failed", { urlId: id, company, error: message });
+        return {
+            url: target.slug,
+            provider: "unknown",
+            jobs: [],
+            allJobs: [],
+            error: message,
+            durationMs: Date.now() - start,
+        };
+    }
+    catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        bus.emit("url:failed", { urlId: id, company, error: message });
+        return {
+            url: target.slug,
+            provider: "unknown",
+            jobs: [],
+            allJobs: [],
+            error: message,
+            durationMs: Date.now() - start,
+        };
+    }
+}
+async function fallbackToHtml(id, url, company, criteria, saveRaw, browser) {
+    const session = new BrowserSession({
+        networkTimeout: browser?.networkTimeout,
+    });
+    try {
+        bus.emit("url:rendering", { urlId: id, url });
+        await tick();
+        await session.open(url);
+        const [html, finalUrl] = await Promise.all([
+            session.getHtml(),
+            session.getUrl(),
+        ]);
+        // Re-detect on rendered HTML
+        const recheck = detectProvider(html, finalUrl);
+        if (recheck.provider !== "unknown" && recheck.boardToken) {
+            bus.emit("url:detecting", { urlId: id, provider: recheck.provider });
+            await tick();
+            return extractViaApi(recheck.provider, recheck.boardToken, url, criteria, saveRaw);
+        }
+        bus.emit("url:extracting", { urlId: id });
+        await tick();
+        let jobs = extractFromHtml(html, finalUrl);
+        // Click-capture: resolve individual URLs for container-extracted jobs
+        const allShareSourceUrl = jobs.length > 0 && jobs.every((j) => j.url === finalUrl);
+        if (allShareSourceUrl) {
+            bus.emit("url:resolving-urls", { urlId: id, count: jobs.length });
+            await tick();
+            jobs = await resolveJobUrls(jobs, finalUrl, session, {
+                maxBubbleLevels: browser?.maxBubbleLevels,
+            });
+        }
+        return jobs;
+    }
+    catch (err) {
+        if (err instanceof BrowserNotAvailableError) {
+            // Graceful fallback: simple HTTP GET + generic HTML extraction.
+            // Skip ATS re-detection — the caller already tried the API path.
+            bus.emit("url:fetching", { urlId: id, url, company });
+            await tick();
+            const probe = await probePage(url);
+            bus.emit("url:extracting", { urlId: id });
+            await tick();
+            return extractFromHtml(probe.html, url);
+        }
+        throw err;
+    }
+    finally {
+        await session.close();
+    }
+}

package/dist/types/index.d.ts ADDED Viewed

@@ -0,0 +1,83 @@
+export type Provider = "greenhouse" | "lever" | "ashby" | "workday" | "bamboohr" | "workable" | "generic";
+export type Aggregator = "yc";
+export interface AggregatorConfig {
+    type: Aggregator;
+    enabled: boolean;
+}
+export interface Job {
+    id: string;
+    title: string;
+    company: string;
+    location: string | null;
+    workMode: "remote" | "onsite" | "hybrid" | null;
+    department: string | null;
+    url: string;
+    sourceUrl: string;
+    provider: Provider | Aggregator | "unknown";
+    description: string | null;
+    postedAt: string | null;
+    extractedAt: string;
+    raw: Record<string, unknown> | null;
+}
+export interface SearchCriteria {
+    keywords: string[];
+    excludeKeywords: string[];
+    location: string | null;
+    workMode: ("remote" | "onsite" | "hybrid")[] | null;
+    departments: string[] | null;
+    role: string[] | null;
+    roleType: string[] | null;
+    jobType: string[] | null;
+    minExperience: number[] | null;
+    companyStage: string[] | null;
+    industry: string[] | null;
+    companySize: string[] | null;
+    hasSalary: boolean | null;
+    hasEquity: boolean | null;
+    hasInterviewProcess: boolean | null;
+    visaSponsorship: boolean | null;
+}
+export interface SlugTarget {
+    company: string;
+    slug: string;
+    provider?: Provider;
+    fallback?: string;
+}
+export interface UrlTarget {
+    url: string;
+    company?: string;
+}
+export type Target = SlugTarget | UrlTarget;
+export declare function isSlugTarget(t: Target): t is SlugTarget;
+export declare function isUrlTarget(t: Target): t is UrlTarget;
+export interface BrowserOptions {
+    networkTimeout?: number;
+    maxBubbleLevels?: number;
+}
+export interface CrawlOptions {
+    concurrency: number;
+    saveRaw?: boolean;
+    browser?: BrowserOptions;
+}
+export interface CrawlResult {
+    url: string;
+    provider: Provider | "unknown";
+    jobs: Job[];
+    allJobs: Job[];
+    error: string | null;
+    durationMs: number;
+}
+export interface CrawlAllResult {
+    jobs: Job[];
+    results: CrawlResult[];
+    totalDurationMs: number;
+}
+export interface ProviderDetection {
+    provider: Provider | "unknown";
+    boardToken: string | null;
+}
+export interface FetchResult {
+    html: string;
+    finalUrl: string;
+}
+export type OutputFormat = "json" | "table" | "markdown" | "csv";

package/dist/types/index.js ADDED Viewed

@@ -0,0 +1,6 @@
+export function isSlugTarget(t) {
+    return "slug" in t;
+}
+export function isUrlTarget(t) {
+    return "url" in t;
+}

package/dist/utils/config.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import type { Target, AggregatorConfig } from "../types/index.js";
+export interface Config {
+    aggregators?: AggregatorConfig[];
+    companies: Target[];
+    defaults?: {
+        concurrency?: number;
+        browser?: {
+            networkTimeout?: number;
+            maxBubbleLevels?: number;
+        };
+    };
+}
+export declare function loadConfig(filePath: string): Promise<Config>;
+/**
+ * Parse URLs from stdin (newline-delimited).
+ */
+export declare function parseUrlList(input: string): Target[];

package/dist/utils/config.js ADDED Viewed

@@ -0,0 +1,57 @@
+import { readFile } from "node:fs/promises";
+import yaml from "js-yaml";
+import { z } from "zod/v4";
+const SlugTargetSchema = z.object({
+    company: z.string(),
+    slug: z.string(),
+    provider: z
+        .enum(["greenhouse", "lever", "ashby", "workday", "bamboohr", "workable"])
+        .optional(),
+    fallback: z.url().optional(),
+});
+const UrlTargetSchema = z.object({
+    url: z.url(),
+    company: z.string().optional(),
+});
+const TargetSchema = z.union([SlugTargetSchema, UrlTargetSchema]);
+const AggregatorSchema = z.object({
+    type: z.enum(["yc"]),
+    enabled: z.boolean().default(true),
+});
+const BrowserSchema = z.object({
+    networkTimeout: z.number().int().positive().optional(),
+    maxBubbleLevels: z.number().int().min(0).max(10).optional(),
+});
+const ConfigSchema = z.object({
+    aggregators: z.array(AggregatorSchema).optional(),
+    companies: z.array(TargetSchema),
+    defaults: z
+        .object({
+        concurrency: z.number().int().positive().optional(),
+        browser: BrowserSchema.optional(),
+    })
+        .optional(),
+});
+export async function loadConfig(filePath) {
+    const content = await readFile(filePath, "utf-8");
+    let parsed;
+    if (filePath.endsWith(".json")) {
+        parsed = JSON.parse(content);
+    }
+    else {
+        // YAML (default for .yaml, .yml, or anything else)
+        parsed = yaml.load(content);
+    }
+    const result = ConfigSchema.parse(parsed);
+    return result;
+}
+/**
+ * Parse URLs from stdin (newline-delimited).
+ */
+export function parseUrlList(input) {
+    return input
+        .split("\n")
+        .map((line) => line.trim())
+        .filter((line) => line.length > 0 && !line.startsWith("#"))
+        .map((url) => ({ url }));
+}

package/dist/utils/google-search.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+export interface SearchResult {
+    title: string;
+    url: string;
+    snippet: string;
+}
+/**
+ * Web search via DuckDuckGo HTML lite.
+ * Falls back to heuristic URL guessing if DDG blocks the request.
+ *
+ * NOTE: Both Google and DDG block plain HTTP scraping with captchas.
+ * For reliable search, this needs agent-browser (real Chrome).
+ * The heuristic fallback covers most tech companies.
+ */
+export declare function webSearch(query: string): Promise<SearchResult[]>;
+/**
+ * Heuristic career URL guessing for common tech companies.
+ * Tries common career page patterns and verifies they return 200.
+ */
+export declare function guessCareerUrls(company: string): Promise<SearchResult[]>;

package/dist/utils/google-search.js ADDED Viewed

@@ -0,0 +1,139 @@
+import { probePage } from "../core/fetch-page.js";
+import { parse } from "node-html-parser";
+/**
+ * Web search via DuckDuckGo HTML lite.
+ * Falls back to heuristic URL guessing if DDG blocks the request.
+ *
+ * NOTE: Both Google and DDG block plain HTTP scraping with captchas.
+ * For reliable search, this needs agent-browser (real Chrome).
+ * The heuristic fallback covers most tech companies.
+ */
+export async function webSearch(query) {
+    try {
+        return await ddgSearch(query);
+    }
+    catch {
+        return [];
+    }
+}
+async function ddgSearch(query) {
+    const encoded = encodeURIComponent(query);
+    const url = `https://html.duckduckgo.com/html/?q=${encoded}`;
+    const { html } = await probePage(url);
+    const root = parse(html);
+    const results = [];
+    // DDG lite results use .result__a links
+    const resultLinks = root.querySelectorAll("a.result__a");
+    for (const link of resultLinks) {
+        const href = link.getAttribute("href") ?? "";
+        if (!href.startsWith("http") && !href.startsWith("//"))
+            continue;
+        let targetUrl;
+        if (href.includes("duckduckgo.com/l/?")) {
+            try {
+                const params = new URL(href.startsWith("//") ? `https:${href}` : href)
+                    .searchParams;
+                const uddg = params.get("uddg");
+                if (!uddg)
+                    continue;
+                targetUrl = uddg;
+            }
+            catch {
+                continue;
+            }
+        }
+        else {
+            targetUrl = href.startsWith("//") ? `https:${href}` : href;
+        }
+        try {
+            const parsed = new URL(targetUrl);
+            if (parsed.hostname.includes("duckduckgo.com"))
+                continue;
+        }
+        catch {
+            continue;
+        }
+        const title = link.textContent.trim();
+        if (!title || title.length < 3)
+            continue;
+        const resultDiv = link.closest(".result");
+        const snippetEl = resultDiv?.querySelector(".result__snippet");
+        const snippet = snippetEl?.textContent?.trim().slice(0, 200) ?? "";
+        if (results.some((r) => r.url === targetUrl))
+            continue;
+        results.push({ title, url: targetUrl, snippet });
+    }
+    return results.slice(0, 10);
+}
+/**
+ * Heuristic career URL guessing for common tech companies.
+ * Tries common career page patterns and verifies they return 200.
+ */
+export async function guessCareerUrls(company) {
+    const domain = guessDomain(company);
+    const candidates = [
+        `https://www.${domain}/careers`,
+        `https://${domain}/careers`,
+        `https://www.${domain}/careers/`,
+        `https://${domain}/careers/`,
+        `https://www.${domain}/jobs`,
+        `https://${domain}/jobs`,
+        // ATS board URLs for well-known companies
+        `https://jobs.ashbyhq.com/${company.toLowerCase().replace(/\s+/g, "")}`,
+        `https://boards.greenhouse.io/${company.toLowerCase().replace(/\s+/g, "")}`,
+        `https://jobs.lever.co/${company.toLowerCase().replace(/\s+/g, "")}`,
+    ];
+    const results = [];
+    for (const url of candidates) {
+        try {
+            const response = await fetch(url, {
+                method: "HEAD",
+                redirect: "follow",
+                headers: {
+                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+                },
+                signal: AbortSignal.timeout(5000),
+            });
+            if (response.ok) {
+                results.push({
+                    title: `${company} Careers`,
+                    url: response.url, // use final URL after redirects
+                    snippet: "",
+                });
+                break; // first hit is enough
+            }
+        }
+        catch {
+            // timeout or network error, try next
+        }
+    }
+    return results;
+}
+function guessDomain(company) {
+    // Common tech company name → domain mappings
+    const overrides = {
+        openai: "openai.com",
+        deepmind: "deepmind.google",
+        "deep mind": "deepmind.google",
+        xai: "x.ai",
+        "x.ai": "x.ai",
+        ai21: "ai21.com",
+        "ai21 labs": "ai21.com",
+        "hugging face": "huggingface.co",
+        huggingface: "huggingface.co",
+        "eleven labs": "elevenlabs.io",
+        elevenlabs: "elevenlabs.io",
+        "luma ai": "lumalabs.ai",
+        "together ai": "together.ai",
+        "character.ai": "character.ai",
+        "character ai": "character.ai",
+        "stability ai": "stability.ai",
+        "mistral ai": "mistral.ai",
+        "inflection ai": "inflection.ai",
+    };
+    const lower = company.toLowerCase();
+    if (overrides[lower])
+        return overrides[lower];
+    // Default: lowercase, remove spaces/punctuation, add .com
+    return lower.replace(/[^a-z0-9]/g, "") + ".com";
+}

package/dist/utils/llm.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * Ask Claude to extract a structured answer from context.
+ * Uses Haiku for cheap, fast classification tasks.
+ */
+export declare function askClaude(prompt: string, options?: {
+    model?: string;
+    maxTokens?: number;
+}): Promise<string>;

package/dist/utils/llm.js ADDED Viewed

@@ -0,0 +1,25 @@
+import Anthropic from "@anthropic-ai/sdk";
+let client = null;
+function getClient() {
+    if (!client) {
+        client = new Anthropic();
+    }
+    return client;
+}
+/**
+ * Ask Claude to extract a structured answer from context.
+ * Uses Haiku for cheap, fast classification tasks.
+ */
+export async function askClaude(prompt, options) {
+    const anthropic = getClient();
+    const response = await anthropic.messages.create({
+        model: options?.model ?? "claude-haiku-4-5-20251001",
+        max_tokens: options?.maxTokens ?? 256,
+        messages: [{ role: "user", content: prompt }],
+    });
+    const block = response.content[0];
+    if (block.type === "text") {
+        return block.text.trim();
+    }
+    throw new Error("Unexpected response format from Claude API");
+}

package/package.json ADDED Viewed

@@ -0,0 +1,42 @@
+{
+  "name": "jobcrawl",
+  "version": "0.1.0",
+  "description": "Find your next role",
+  "license": "ISC",
+  "author": "",
+  "type": "module",
+  "bin": {
+    "jobcrawl": "dist/entrypoints/cli/index.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "postbuild": "chmod +x dist/entrypoints/cli/index.js",
+    "dev": "tsx src/entrypoints/cli/index.ts",
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "test": "vitest run",
+    "test:watch": "vitest"
+  },
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.81.0",
+    "commander": "^14.0.3",
+    "ink": "^6.8.0",
+    "ink-spinner": "^5.0.0",
+    "js-yaml": "^4.1.1",
+    "node-html-parser": "^7.1.0",
+    "react": "^19.2.4",
+    "zod": "^4.3.6"
+  },
+  "optionalDependencies": {
+    "agent-browser": "^0.23.4"
+  },
+  "devDependencies": {
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^25.5.0",
+    "@types/react": "^19.2.14",
+    "prettier": "^3.8.1",
+    "tsx": "^4.21.0",
+    "typescript": "^6.0.2",
+    "vitest": "^4.1.2"
+  }
+}