jobspy-js 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +158 -0
  2. package/dist/cli/index.d.ts +1 -0
  3. package/dist/cli/index.js +98 -0
  4. package/dist/cli/index.js.map +1 -0
  5. package/dist/index.d.ts +2 -0
  6. package/dist/index.js +13 -0
  7. package/dist/index.js.map +1 -0
  8. package/dist/mcp/index.d.ts +1 -0
  9. package/dist/mcp/index.js +113 -0
  10. package/dist/mcp/index.js.map +1 -0
  11. package/dist/scraper.d.ts +58 -0
  12. package/dist/scrapers/base.d.ts +15 -0
  13. package/dist/scrapers/bayt/index.d.ts +13 -0
  14. package/dist/scrapers/bdjobs/index.d.ts +14 -0
  15. package/dist/scrapers/glassdoor/constants.d.ts +3 -0
  16. package/dist/scrapers/glassdoor/index.d.ts +19 -0
  17. package/dist/scrapers/google/constants.d.ts +2 -0
  18. package/dist/scrapers/google/index.d.ts +21 -0
  19. package/dist/scrapers/google-careers/index.d.ts +13 -0
  20. package/dist/scrapers/indeed/constants.d.ts +2 -0
  21. package/dist/scrapers/indeed/index.d.ts +19 -0
  22. package/dist/scrapers/linkedin/constants.d.ts +1 -0
  23. package/dist/scrapers/linkedin/index.d.ts +20 -0
  24. package/dist/scrapers/naukri/constants.d.ts +1 -0
  25. package/dist/scrapers/naukri/index.d.ts +19 -0
  26. package/dist/scrapers/ziprecruiter/constants.d.ts +2 -0
  27. package/dist/scrapers/ziprecruiter/index.d.ts +15 -0
  28. package/dist/shared/scraper-CuXnl6Gf.js +2445 -0
  29. package/dist/shared/scraper-CuXnl6Gf.js.map +1 -0
  30. package/dist/types.d.ts +134 -0
  31. package/dist/utils.d.ts +46 -0
  32. package/package.json +66 -0
  33. package/src/cli/index.ts +127 -0
  34. package/src/index.ts +19 -0
  35. package/src/mcp/index.ts +154 -0
  36. package/src/scraper.ts +280 -0
  37. package/src/scrapers/base.ts +41 -0
  38. package/src/scrapers/bayt/index.ts +113 -0
  39. package/src/scrapers/bdjobs/index.ts +132 -0
  40. package/src/scrapers/glassdoor/constants.ts +90 -0
  41. package/src/scrapers/glassdoor/index.ts +344 -0
  42. package/src/scrapers/google/constants.ts +34 -0
  43. package/src/scrapers/google/index.ts +289 -0
  44. package/src/scrapers/google-careers/index.ts +237 -0
  45. package/src/scrapers/indeed/constants.ts +112 -0
  46. package/src/scrapers/indeed/index.ts +265 -0
  47. package/src/scrapers/linkedin/constants.ts +10 -0
  48. package/src/scrapers/linkedin/index.ts +311 -0
  49. package/src/scrapers/naukri/constants.ts +14 -0
  50. package/src/scrapers/naukri/index.ts +268 -0
  51. package/src/scrapers/ziprecruiter/constants.ts +31 -0
  52. package/src/scrapers/ziprecruiter/index.ts +224 -0
  53. package/src/types.ts +381 -0
  54. package/src/utils.ts +289 -0
  55. package/tests/integration/bayt.test.ts +29 -0
  56. package/tests/integration/bdjobs.test.ts +29 -0
  57. package/tests/integration/glassdoor.test.ts +30 -0
  58. package/tests/integration/google-careers.test.ts +30 -0
  59. package/tests/integration/google.test.ts +31 -0
  60. package/tests/integration/indeed.test.ts +30 -0
  61. package/tests/integration/linkedin.test.ts +29 -0
  62. package/tests/integration/naukri.test.ts +29 -0
  63. package/tests/integration/ziprecruiter.test.ts +29 -0
  64. package/tsconfig.json +16 -0
  65. package/vite.config.ts +35 -0
package/README.md ADDED
@@ -0,0 +1,158 @@
1
+ # jobspy-js
2
+
3
+ TypeScript port of [JobSpy](https://github.com/speedyapply/JobSpy) — scrape job postings from LinkedIn, Indeed, Glassdoor, Google Jobs, Google Careers, ZipRecruiter, Bayt, Naukri & BDJobs.
4
+
5
+ Uses [wreq-js](https://github.com/nicehash/wreq-js) for browser TLS fingerprint emulation (Chrome/Firefox/Safari).
6
+
7
+ ## Features
8
+
9
+ - **9 job boards** — LinkedIn, Indeed, Glassdoor, Google Jobs, Google Careers, ZipRecruiter, Bayt, Naukri, BDJobs
10
+ - **3 interfaces** — SDK, CLI, MCP server
11
+ - **Browser emulation** — wreq-js with full TLS fingerprinting (JA3/JA4)
12
+ - **Proxy rotation** — built-in rotating proxy support
13
+ - **Concurrent scraping** — all sites scraped in parallel
14
+ - **Salary extraction** — parses compensation from descriptions when not provided directly
15
+ - **60+ countries** — Indeed/Glassdoor regional domain support
16
+
17
+ ## Supported Sites
18
+
19
+ | Site | Key | Notes |
20
+ |------|-----|-------|
21
+ | LinkedIn | `linkedin` | HTML scraping |
22
+ | Indeed | `indeed` | GraphQL API |
23
+ | Glassdoor | `glassdoor` | GraphQL API |
24
+ | Google Jobs | `google` | Playwright (headless Chrome); requires clean residential IP or proxy |
25
+ | Google Careers | `google_careers` | Plain HTTP; scrapes jobs at Google the company |
26
+ | ZipRecruiter | `zip_recruiter` | Web scraping |
27
+ | Bayt | `bayt` | HTML scraping |
28
+ | Naukri | `naukri` | REST API |
29
+ | BDJobs | `bdjobs` | REST API |
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ npm install jobspy-js
35
+ ```
36
+
37
+ > **Google Jobs** (`google`) uses [Playwright](https://playwright.dev) to execute JavaScript. After installing, run:
38
+ > ```bash
39
+ > npx playwright install chromium
40
+ > ```
41
+
42
+ ## SDK Usage
43
+
44
+ ```ts
45
+ import { scrapeJobs } from "jobspy-js";
46
+
47
+ const result = await scrapeJobs({
48
+ site_name: ["indeed", "linkedin"],
49
+ search_term: "software engineer",
50
+ location: "San Francisco, CA",
51
+ results_wanted: 20,
52
+ });
53
+
54
+ console.log(`Found ${result.jobs.length} jobs`);
55
+ for (const job of result.jobs) {
56
+ console.log(`${job.title} at ${job.company} — ${job.job_url}`);
57
+ }
58
+ ```
59
+
60
+ ### Parameters
61
+
62
+ | Parameter | Type | Default | Description |
63
+ |-----------|------|---------|-------------|
64
+ | `site_name` | `string[]` | all sites | Job boards to scrape |
65
+ | `search_term` | `string` | — | Job title / search query |
66
+ | `location` | `string` | — | Job location (e.g. `"San Francisco, CA"`) |
67
+ | `distance` | `number` | `50` | Search radius in miles |
68
+ | `is_remote` | `boolean` | `false` | Filter for remote jobs |
69
+ | `job_type` | `string` | — | `fulltime`, `parttime`, `contract`, `internship` |
70
+ | `results_wanted` | `number` | `15` | Results per site |
71
+ | `country_indeed` | `string` | `"usa"` | Country for Indeed/Glassdoor |
72
+ | `hours_old` | `number` | — | Filter jobs posted within N hours |
73
+ | `description_format` | `string` | `"markdown"` | `markdown`, `html`, or `plain` |
74
+ | `proxies` | `string \| string[]` | — | Proxy servers (`user:pass@host:port`) |
75
+ | `linkedin_fetch_description` | `boolean` | `false` | Fetch full LinkedIn descriptions (slower) |
76
+ | `enforce_annual_salary` | `boolean` | `false` | Convert all salaries to annual |
77
+
78
+ ## CLI
79
+
80
+ ```bash
81
+ # Search for React jobs on LinkedIn
82
+ npx jobspy -s linkedin -q "react developer" -l "New York, NY" -n 20
83
+
84
+ # Multiple sites, remote only, output to file
85
+ npx jobspy -s linkedin indeed -q "typescript" -r -o results.json
86
+
87
+ # CSV output
88
+ npx jobspy -s indeed -q "python" -o jobs.csv
89
+
90
+ # Google Careers (jobs at Google)
91
+ npx jobspy -s google_careers -q "software engineer" -l "USA" -n 10
92
+ ```
93
+
94
+ Run `npx jobspy --help` for all options.
95
+
96
+ ## MCP Server
97
+
98
+ Add to your MCP client config:
99
+
100
+ ```json
101
+ {
102
+ "mcpServers": {
103
+ "jobspy": {
104
+ "command": "npx",
105
+ "args": ["-y", "jobspy-js", "--mcp"]
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ The MCP server exposes a `scrape_jobs` tool with all the same parameters as the SDK.
112
+
113
+ ## Development
114
+
115
+ ```bash
116
+ git clone https://github.com/borgius/jobspy-js.git
117
+ cd jobspy-js
118
+ npm install
119
+
120
+ # Build
121
+ npm run build
122
+
123
+ # Type check
124
+ npm run typecheck
125
+
126
+ # Run CLI from source
127
+ npm run cli -- -s linkedin -q "react" -n 5
128
+
129
+ # Run tests
130
+ npm test
131
+ ```
132
+
133
+ ## Project Structure
134
+
135
+ ```
136
+ src/
137
+ ├── index.ts # SDK entry point
138
+ ├── scraper.ts # Main scrapeJobs() orchestrator
139
+ ├── types.ts # All types, enums, country config
140
+ ├── utils.ts # Logger, proxy rotation, HTML helpers
141
+ ├── cli/index.ts # CLI (commander)
142
+ ├── mcp/index.ts # MCP server
143
+ └── scrapers/
144
+ ├── base.ts # Abstract Scraper base class
145
+ ├── indeed/ # GraphQL API
146
+ ├── linkedin/ # HTML scraping
147
+ ├── glassdoor/ # GraphQL API
148
+ ├── google/ # Playwright headless Chrome
149
+ ├── google-careers/ # Plain HTTP; AF_initDataCallback JSON parsing
150
+ ├── ziprecruiter/ # Web scraping
151
+ ├── bayt/ # HTML scraping
152
+ ├── naukri/ # REST API
153
+ └── bdjobs/ # REST API
154
+ ```
155
+
156
+ ## License
157
+
158
+ MIT
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from "commander";
3
+ import { s as scrapeJobs } from "../shared/scraper-CuXnl6Gf.js";
4
+ import { writeFileSync } from "node:fs";
5
+ const program = new Command();
6
+ program.name("jobspy").description(
7
+ "Job scraper for LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri & BDJobs"
8
+ ).version("1.0.0").option(
9
+ "-s, --site <sites...>",
10
+ "Job boards to scrape (linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs)"
11
+ ).option("-q, --search-term <term>", "Search term").option("--google-search-term <term>", "Google-specific search term").option("-l, --location <location>", "Job location").option("-d, --distance <miles>", "Distance in miles", "50").option("-r, --remote", "Filter for remote jobs").option(
12
+ "-t, --job-type <type>",
13
+ "Job type (fulltime, parttime, contract, internship)"
14
+ ).option("--easy-apply", "Filter for easy apply jobs").option("-n, --results <count>", "Number of results wanted", "15").option(
15
+ "-c, --country <country>",
16
+ "Country for Indeed/Glassdoor",
17
+ "usa"
18
+ ).option(
19
+ "-p, --proxies <proxies...>",
20
+ "Proxy servers (user:pass@host:port)"
21
+ ).option(
22
+ "--format <format>",
23
+ "Description format (markdown, html, plain)",
24
+ "markdown"
25
+ ).option("--linkedin-fetch-description", "Fetch full LinkedIn descriptions").option(
26
+ "--linkedin-company-ids <ids...>",
27
+ "LinkedIn company IDs to filter"
28
+ ).option("--offset <offset>", "Start from offset", "0").option(
29
+ "--hours-old <hours>",
30
+ "Filter jobs posted within N hours"
31
+ ).option("--enforce-annual-salary", "Convert all salaries to annual").option("-v, --verbose <level>", "Verbosity (0=errors, 1=warnings, 2=all)", "0").option("-o, --output <file>", "Output file path (JSON or CSV based on extension)").action(async (opts) => {
32
+ try {
33
+ const result = await scrapeJobs({
34
+ site_name: opts.site,
35
+ search_term: opts.searchTerm,
36
+ google_search_term: opts.googleSearchTerm,
37
+ location: opts.location,
38
+ distance: parseInt(opts.distance),
39
+ is_remote: opts.remote ?? false,
40
+ job_type: opts.jobType,
41
+ easy_apply: opts.easyApply,
42
+ results_wanted: parseInt(opts.results),
43
+ country_indeed: opts.country,
44
+ proxies: opts.proxies,
45
+ description_format: opts.format,
46
+ linkedin_fetch_description: opts.linkedinFetchDescription,
47
+ linkedin_company_ids: opts.linkedinCompanyIds?.map(Number),
48
+ offset: parseInt(opts.offset),
49
+ hours_old: opts.hoursOld ? parseInt(opts.hoursOld) : void 0,
50
+ enforce_annual_salary: opts.enforceAnnualSalary ?? false,
51
+ verbose: parseInt(opts.verbose)
52
+ });
53
+ console.log(`Found ${result.jobs.length} jobs`);
54
+ if (opts.output) {
55
+ const outPath = opts.output;
56
+ if (outPath.endsWith(".csv")) {
57
+ writeFileSync(outPath, jobsToCsv(result.jobs));
58
+ console.log(`Results written to ${outPath}`);
59
+ } else {
60
+ writeFileSync(outPath, JSON.stringify(result.jobs, null, 2));
61
+ console.log(`Results written to ${outPath}`);
62
+ }
63
+ } else {
64
+ for (const job of result.jobs) {
65
+ const line = [
66
+ job.site?.padEnd(14),
67
+ (job.title ?? "").slice(0, 40).padEnd(42),
68
+ (job.company ?? "").slice(0, 20).padEnd(22),
69
+ (job.location ?? "").slice(0, 25).padEnd(27),
70
+ job.date_posted ?? ""
71
+ ].join("");
72
+ console.log(line);
73
+ }
74
+ }
75
+ } catch (e) {
76
+ console.error(`Error: ${e.message}`);
77
+ process.exit(1);
78
+ }
79
+ });
80
+ function jobsToCsv(jobs) {
81
+ if (jobs.length === 0) return "";
82
+ const headers = Object.keys(jobs[0]);
83
+ const escape = (val) => {
84
+ if (val == null) return "";
85
+ const str = String(val);
86
+ if (str.includes(",") || str.includes('"') || str.includes("\n")) {
87
+ return `"${str.replace(/"/g, '""')}"`;
88
+ }
89
+ return str;
90
+ };
91
+ const lines = [headers.join(",")];
92
+ for (const job of jobs) {
93
+ lines.push(headers.map((h) => escape(job[h])).join(","));
94
+ }
95
+ return lines.join("\n");
96
+ }
97
+ program.parse();
98
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":["../../src/cli/index.ts"],"sourcesContent":["import { Command } from \"commander\";\nimport { scrapeJobs } from \"../scraper\";\nimport { Site } from \"../types\";\nimport { writeFileSync } from \"node:fs\";\n\nconst program = new Command();\n\nprogram\n .name(\"jobspy\")\n .description(\n \"Job scraper for LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri & BDJobs\",\n )\n .version(\"1.0.0\")\n .option(\n \"-s, --site <sites...>\",\n \"Job boards to scrape (linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs)\",\n )\n .option(\"-q, --search-term <term>\", \"Search term\")\n .option(\"--google-search-term <term>\", \"Google-specific search term\")\n .option(\"-l, --location <location>\", \"Job location\")\n .option(\"-d, --distance <miles>\", \"Distance in miles\", \"50\")\n .option(\"-r, --remote\", \"Filter for remote jobs\")\n .option(\n \"-t, --job-type <type>\",\n \"Job type (fulltime, parttime, contract, internship)\",\n )\n .option(\"--easy-apply\", \"Filter for easy apply jobs\")\n .option(\"-n, --results <count>\", \"Number of results wanted\", \"15\")\n .option(\n \"-c, --country <country>\",\n \"Country for Indeed/Glassdoor\",\n \"usa\",\n )\n .option(\n \"-p, --proxies <proxies...>\",\n \"Proxy servers (user:pass@host:port)\",\n )\n .option(\n \"--format <format>\",\n \"Description format (markdown, html, plain)\",\n \"markdown\",\n )\n .option(\"--linkedin-fetch-description\", \"Fetch full LinkedIn descriptions\")\n .option(\n \"--linkedin-company-ids <ids...>\",\n \"LinkedIn company IDs to filter\",\n )\n .option(\"--offset <offset>\", \"Start from offset\", \"0\")\n .option(\n \"--hours-old <hours>\",\n \"Filter jobs posted within N hours\",\n )\n .option(\"--enforce-annual-salary\", \"Convert all salaries to annual\")\n .option(\"-v, --verbose <level>\", \"Verbosity (0=errors, 1=warnings, 2=all)\", \"0\")\n .option(\"-o, --output <file>\", \"Output file path (JSON or CSV based on extension)\")\n .action(async (opts) => {\n try {\n const result = await scrapeJobs({\n site_name: opts.site,\n search_term: opts.searchTerm,\n google_search_term: opts.googleSearchTerm,\n location: opts.location,\n distance: parseInt(opts.distance),\n is_remote: opts.remote ?? false,\n job_type: opts.jobType,\n easy_apply: opts.easyApply,\n results_wanted: parseInt(opts.results),\n country_indeed: opts.country,\n proxies: opts.proxies,\n description_format: opts.format,\n linkedin_fetch_description: opts.linkedinFetchDescription,\n linkedin_company_ids: opts.linkedinCompanyIds?.map(Number),\n offset: parseInt(opts.offset),\n hours_old: opts.hoursOld ? parseInt(opts.hoursOld) : undefined,\n enforce_annual_salary: opts.enforceAnnualSalary ?? false,\n verbose: parseInt(opts.verbose),\n });\n\n console.log(`Found ${result.jobs.length} jobs`);\n\n if (opts.output) {\n const outPath = opts.output as string;\n if (outPath.endsWith(\".csv\")) {\n writeFileSync(outPath, jobsToCsv(result.jobs));\n console.log(`Results written to ${outPath}`);\n } else {\n writeFileSync(outPath, JSON.stringify(result.jobs, null, 2));\n console.log(`Results written to ${outPath}`);\n }\n } else {\n // Print summary table to stdout\n for (const job of result.jobs) {\n const line = [\n job.site?.padEnd(14),\n (job.title ?? \"\").slice(0, 40).padEnd(42),\n (job.company ?? \"\").slice(0, 20).padEnd(22),\n (job.location ?? \"\").slice(0, 25).padEnd(27),\n job.date_posted ?? \"\",\n ].join(\"\");\n console.log(line);\n }\n }\n } catch (e: any) {\n console.error(`Error: ${e.message}`);\n process.exit(1);\n }\n });\n\nfunction jobsToCsv(jobs: any[]): string {\n if (jobs.length === 0) return \"\";\n const headers = Object.keys(jobs[0]);\n const escape = (val: any): string => {\n if (val == null) return \"\";\n const str = String(val);\n if (str.includes(\",\") || str.includes('\"') || str.includes(\"\\n\")) {\n return `\"${str.replace(/\"/g, '\"\"')}\"`;\n }\n return str;\n };\n const lines = [headers.join(\",\")];\n for (const job of jobs) {\n lines.push(headers.map((h) => escape(job[h])).join(\",\"));\n }\n return lines.join(\"\\n\");\n}\n\nprogram.parse();\n"],"names":[],"mappings":";;;;AAKA,MAAM,UAAU,IAAI,QAAA;AAEpB,QACG,KAAK,QAAQ,EACb;AAAA,EACC;AACF,EACC,QAAQ,OAAO,EACf;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,4BAA4B,aAAa,EAChD,OAAO,+BAA+B,6BAA6B,EACnE,OAAO,6BAA6B,cAAc,EAClD,OAAO,0BAA0B,qBAAqB,IAAI,EAC1D,OAAO,gBAAgB,wBAAwB,EAC/C;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,gBAAgB,4BAA4B,EACnD,OAAO,yBAAyB,4BAA4B,IAAI,EAChE;AAAA,EACC;AAAA,EACA;AAAA,EACA;AACF,EACC;AAAA,EACC;AAAA,EACA;AACF,EACC;AAAA,EACC;AAAA,EACA;AAAA,EACA;AACF,EACC,OAAO,gCAAgC,kCAAkC,EACzE;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,qBAAqB,qBAAqB,GAAG,EACpD;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,2BAA2B,gCAAgC,EAClE,OAAO,yBAAyB,2CAA2C,GAAG,EAC9E,OAAO,uBAAuB,mDAAmD,EACjF,OAAO,OAAO,SAAS;AACtB,MAAI;AACF,UAAM,SAAS,MAAM,WAAW;AAAA,MAC9B,WAAW,KAAK;AAAA,MAChB,aAAa,KAAK;AAAA,MAClB,oBAAoB,KAAK;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,UAAU,SAAS,KAAK,QAAQ;AAAA,MAChC,WAAW,KAAK,UAAU;AAAA,MAC1B,UAAU,KAAK;AAAA,MACf,YAAY,KAAK;AAAA,MACjB,gBAAgB,SAAS,KAAK,OAAO;AAAA,MACrC,gBAAgB,KAAK;AAAA,MACrB,SAAS,KAAK;AAAA,MACd,oBAAoB,KAAK;AAAA,MACzB,4BAA4B,KAAK;AAAA,MACjC,sBAAsB,KAAK,oBAAoB,IAAI,MAAM;AAAA,MACzD,QAAQ,SAAS,KAAK,MAAM;AAAA,MAC5B,WAAW,KAAK,WAAW,SAAS,KAAK,QAAQ,IAAI;AAAA,MACrD,uBAAuB,KAAK,uBAAuB;AAAA,MACnD,SAAS,SAAS,KAAK,OAAO;AAAA,IAAA,CAC/B;AAED,YAAQ,IAAI,SAAS,OAAO,KAAK,MAAM,OAAO;AAE9C,QAAI,KAAK,QAAQ;AACf,YAAM,UAAU,KAAK;AACrB,UAAI,QAAQ,SAAS,MAAM,GAAG;AAC5B,sBAAc,SAAS,UAAU,OAAO,IAAI,CAAC;AAC7C,gBAAQ,IAAI,sBAAsB,OAAO,EAAE;AAAA,MAC7C,OAAO;AACL,sBAAc,SAAS,KAAK,UAAU,OAAO,MAAM,MAAM,CAAC,CAAC;AAC3D,gBAAQ,IAAI,sBAAsB,OAAO,EAAE;AAAA,MAC7C;AAAA,IACF,OAAO;AAEL,iBAAW,OAAO,OAAO,MAAM;AAC7B,cAAM,OAAO;AAAA,UACX,IAAI,MAAM,OAAO,EAAE;AAAA,WAClB,IAAI,SAAS,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,WACvC,IAAI,WAAW,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,WACzC,IAAI,YAAY,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,UAC3C,IAAI,eAAe;AAAA,QAAA,EACnB,KAAK,EAAE;AACT,gBAAQ,IAAI,IAAI;AAAA,MAClB;AAAA,IACF;AAAA,EACF,SAAS,GAAQ;AACf,YAAQ,MAAM,UAAU,EAAE,OAAO,EAAE;AACnC,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,SAAS,UAAU,MAAqB;AACtC,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,UAAU,OAAO,KAAK,KAAK,CAAC,CAAC;AACnC,QAAM,SAAS,CAAC,QAAqB;AACnC,QAAI,OAAO,KAAM,QAAO;AACxB,UAAM,MAAM,OAAO,GAAG;AACtB,QAAI,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,IAAI,GAAG;AAChE,aAAO,IAAI,IAAI,QAAQ,MAAM,IAAI,CAAC;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AACA,QAAM,QAAQ,CAAC,QAAQ,KAAK,GAAG,CAAC;AAChC,aAAW,OAAO,MAAM;AACtB,UAAM,KAAK,QAAQ,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC;AAAA,EACzD;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,QAAQ,MAAA;"}
@@ -0,0 +1,2 @@
1
+ export { Site, JobType, CompensationInterval, DescriptionFormat, SalarySource, type JobPost, type JobResponse, type Compensation, type Location, type ScrapeJobsParams, type ScraperInput, type Country, DESIRED_COLUMNS, getCountry, displayLocation, } from './types';
2
+ export { scrapeJobs } from './scraper';
package/dist/index.js ADDED
@@ -0,0 +1,13 @@
1
+ import { C, D, a, J, S, b, d, g, s } from "./shared/scraper-CuXnl6Gf.js";
2
+ export {
3
+ C as CompensationInterval,
4
+ D as DESIRED_COLUMNS,
5
+ a as DescriptionFormat,
6
+ J as JobType,
7
+ S as SalarySource,
8
+ b as Site,
9
+ d as displayLocation,
10
+ g as getCountry,
11
+ s as scrapeJobs
12
+ };
13
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":[],"sourcesContent":[],"names":[],"mappings":";"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { z } from "zod";
5
+ import { s as scrapeJobs } from "../shared/scraper-CuXnl6Gf.js";
6
+ const server = new McpServer({
7
+ name: "jobspy",
8
+ version: "1.0.0"
9
+ });
10
+ server.tool(
11
+ "scrape_jobs",
12
+ "Scrape job listings from multiple job boards (LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri, BDJobs)",
13
+ {
14
+ site_name: z.array(
15
+ z.enum([
16
+ "linkedin",
17
+ "indeed",
18
+ "zip_recruiter",
19
+ "glassdoor",
20
+ "google",
21
+ "bayt",
22
+ "naukri",
23
+ "bdjobs"
24
+ ])
25
+ ).optional().describe(
26
+ "Job boards to scrape. Defaults to all. Options: linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs"
27
+ ),
28
+ search_term: z.string().optional().describe("Search term / job title to search for"),
29
+ google_search_term: z.string().optional().describe("Google-specific search term (overrides search_term for Google)"),
30
+ location: z.string().optional().describe("Job location (e.g. 'San Francisco, CA')"),
31
+ distance: z.number().optional().default(50).describe("Search radius in miles"),
32
+ is_remote: z.boolean().optional().default(false).describe("Filter for remote jobs"),
33
+ job_type: z.enum(["fulltime", "parttime", "contract", "internship"]).optional().describe("Filter by job type"),
34
+ results_wanted: z.number().optional().default(10).describe("Number of results to return per site"),
35
+ country_indeed: z.string().optional().default("usa").describe("Country for Indeed/Glassdoor (e.g. 'usa', 'uk', 'canada')"),
36
+ hours_old: z.number().optional().describe("Filter jobs posted within the last N hours"),
37
+ description_format: z.enum(["markdown", "html", "plain"]).optional().default("markdown").describe("Format for job descriptions"),
38
+ linkedin_fetch_description: z.boolean().optional().default(false).describe("Fetch full descriptions from LinkedIn (slower)")
39
+ },
40
+ async (params) => {
41
+ try {
42
+ const result = await scrapeJobs({
43
+ site_name: params.site_name,
44
+ search_term: params.search_term,
45
+ google_search_term: params.google_search_term,
46
+ location: params.location,
47
+ distance: params.distance,
48
+ is_remote: params.is_remote,
49
+ job_type: params.job_type,
50
+ results_wanted: params.results_wanted,
51
+ country_indeed: params.country_indeed,
52
+ hours_old: params.hours_old,
53
+ description_format: params.description_format,
54
+ linkedin_fetch_description: params.linkedin_fetch_description
55
+ });
56
+ if (result.jobs.length === 0) {
57
+ return {
58
+ content: [
59
+ {
60
+ type: "text",
61
+ text: "No jobs found matching the search criteria."
62
+ }
63
+ ]
64
+ };
65
+ }
66
+ const summary = result.jobs.map((job, i) => {
67
+ const parts = [
68
+ `${i + 1}. **${job.title}**`,
69
+ ` Company: ${job.company ?? "N/A"}`,
70
+ ` Location: ${job.location ?? "N/A"}${job.is_remote ? " (Remote)" : ""}`,
71
+ ` URL: ${job.job_url}`
72
+ ];
73
+ if (job.date_posted) parts.push(` Posted: ${job.date_posted}`);
74
+ if (job.min_amount && job.max_amount) {
75
+ parts.push(
76
+ ` Salary: ${job.currency ?? "$"}${job.min_amount.toLocaleString()} - ${job.currency ?? "$"}${job.max_amount.toLocaleString()} (${job.interval ?? "yearly"})`
77
+ );
78
+ }
79
+ if (job.job_type) parts.push(` Type: ${job.job_type}`);
80
+ return parts.join("\n");
81
+ }).join("\n\n");
82
+ return {
83
+ content: [
84
+ {
85
+ type: "text",
86
+ text: `Found ${result.jobs.length} jobs:
87
+
88
+ ${summary}`
89
+ }
90
+ ]
91
+ };
92
+ } catch (e) {
93
+ return {
94
+ content: [
95
+ {
96
+ type: "text",
97
+ text: `Error scraping jobs: ${e.message}`
98
+ }
99
+ ],
100
+ isError: true
101
+ };
102
+ }
103
+ }
104
+ );
105
+ async function main() {
106
+ const transport = new StdioServerTransport();
107
+ await server.connect(transport);
108
+ }
109
+ main().catch((e) => {
110
+ console.error("MCP server error:", e);
111
+ process.exit(1);
112
+ });
113
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":["../../src/mcp/index.ts"],"sourcesContent":["import { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\";\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\";\nimport { z } from \"zod\";\nimport { scrapeJobs } from \"../scraper\";\n\nconst server = new McpServer({\n name: \"jobspy\",\n version: \"1.0.0\",\n});\n\nserver.tool(\n \"scrape_jobs\",\n \"Scrape job listings from multiple job boards (LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri, BDJobs)\",\n {\n site_name: z\n .array(\n z.enum([\n \"linkedin\",\n \"indeed\",\n \"zip_recruiter\",\n \"glassdoor\",\n \"google\",\n \"bayt\",\n \"naukri\",\n \"bdjobs\",\n ]),\n )\n .optional()\n .describe(\n \"Job boards to scrape. Defaults to all. Options: linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs\",\n ),\n search_term: z\n .string()\n .optional()\n .describe(\"Search term / job title to search for\"),\n google_search_term: z\n .string()\n .optional()\n .describe(\"Google-specific search term (overrides search_term for Google)\"),\n location: z.string().optional().describe(\"Job location (e.g. 'San Francisco, CA')\"),\n distance: z\n .number()\n .optional()\n .default(50)\n .describe(\"Search radius in miles\"),\n is_remote: z.boolean().optional().default(false).describe(\"Filter for remote jobs\"),\n job_type: z\n .enum([\"fulltime\", \"parttime\", \"contract\", \"internship\"])\n .optional()\n .describe(\"Filter by job type\"),\n results_wanted: z\n .number()\n .optional()\n .default(10)\n .describe(\"Number of results to return per site\"),\n country_indeed: z\n .string()\n .optional()\n .default(\"usa\")\n .describe(\"Country for Indeed/Glassdoor (e.g. 'usa', 'uk', 'canada')\"),\n hours_old: z\n .number()\n .optional()\n .describe(\"Filter jobs posted within the last N hours\"),\n description_format: z\n .enum([\"markdown\", \"html\", \"plain\"])\n .optional()\n .default(\"markdown\")\n .describe(\"Format for job descriptions\"),\n linkedin_fetch_description: z\n .boolean()\n .optional()\n .default(false)\n .describe(\"Fetch full descriptions from LinkedIn (slower)\"),\n },\n async (params) => {\n try {\n const result = await scrapeJobs({\n site_name: params.site_name,\n search_term: params.search_term,\n google_search_term: params.google_search_term,\n location: params.location,\n distance: params.distance,\n is_remote: params.is_remote,\n job_type: params.job_type,\n results_wanted: params.results_wanted,\n country_indeed: params.country_indeed,\n hours_old: params.hours_old,\n description_format: params.description_format,\n linkedin_fetch_description: params.linkedin_fetch_description,\n });\n\n if (result.jobs.length === 0) {\n return {\n content: [\n {\n type: \"text\" as const,\n text: \"No jobs found matching the search criteria.\",\n },\n ],\n };\n }\n\n // Format jobs as a readable summary + structured data\n const summary = result.jobs\n .map((job, i) => {\n const parts = [\n `${i + 1}. **${job.title}**`,\n ` Company: ${job.company ?? \"N/A\"}`,\n ` Location: ${job.location ?? \"N/A\"}${job.is_remote ? \" (Remote)\" : \"\"}`,\n ` URL: ${job.job_url}`,\n ];\n if (job.date_posted) parts.push(` Posted: ${job.date_posted}`);\n if (job.min_amount && job.max_amount) {\n parts.push(\n ` Salary: ${job.currency ?? \"$\"}${job.min_amount.toLocaleString()} - ${job.currency ?? \"$\"}${job.max_amount.toLocaleString()} (${job.interval ?? \"yearly\"})`,\n );\n }\n if (job.job_type) parts.push(` Type: ${job.job_type}`);\n return parts.join(\"\\n\");\n })\n .join(\"\\n\\n\");\n\n return {\n content: [\n {\n type: \"text\" as const,\n text: `Found ${result.jobs.length} jobs:\\n\\n${summary}`,\n },\n ],\n };\n } catch (e: any) {\n return {\n content: [\n {\n type: \"text\" as const,\n text: `Error scraping jobs: ${e.message}`,\n },\n ],\n isError: true,\n };\n }\n },\n);\n\nasync function main() {\n const transport = new StdioServerTransport();\n await server.connect(transport);\n}\n\nmain().catch((e) => {\n console.error(\"MCP server error:\", e);\n process.exit(1);\n});\n"],"names":[],"mappings":";;;;;AAKA,MAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAED,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EACR;AAAA,MACC,EAAE,KAAK;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MAAA,CACD;AAAA,IAAA,EAEF,WACA;AAAA,MACC;AAAA,IAAA;AAAA,IAEJ,aAAa,EACV,OAAA,EACA,SAAA,EACA,SAAS,uCAAuC;AAAA,IACnD,oBAAoB,EACjB,OAAA,EACA,SAAA,EACA,SAAS,gEAAgE;AAAA,IAC5E,UAAU,EAAE,OAAA,EAAS,SAAA,EAAW,SAAS,yCAAyC;AAAA,IAClF,UAAU,EACP,OAAA,EACA,SAAA,EACA,QAAQ,EAAE,EACV,SAAS,wBAAwB;AAAA,IACpC,WAAW,EAAE,QAAA,EAAU,SAAA,EAAW,QAAQ,KAAK,EAAE,SAAS,wBAAwB;AAAA,IAClF,UAAU,EACP,KAAK,CAAC,YAAY,YAAY,YAAY,YAAY,CAAC,EACvD,WACA,SAAS,oBAAoB;AAAA,IAChC,gBAAgB,EACb,OAAA,EACA,SAAA,EACA,QAAQ,EAAE,EACV,SAAS,sCAAsC;AAAA,IAClD,gBAAgB,EACb,OAAA,EACA,SAAA,EACA,QAAQ,KAAK,EACb,SAAS,2DAA2D;AAAA,IACvE,WAAW,EACR,OAAA,EACA,SAAA,EACA,SAAS,4CAA4C;AAAA,IACxD,oBAAoB,EACjB,KAAK,CAAC,YAAY,QAAQ,OAAO,CAAC,EAClC,WACA,QAAQ,UAAU,EAClB,SAAS,6BAA6B;AAAA,IACzC,4BAA4B,EACzB,UACA,SAAA,EACA,QAAQ,KAAK,EACb,SAAS,gDAAgD;AAAA,EAAA;AAAA,EAE9D,OAAO,WAAW;AAChB,QAAI;AACF,YAAM,SAAS,MAAM,WAAW;AAAA,QAC9B,WAAW,OAAO;AAAA,QAClB,aAAa,OAAO;AAAA,QACpB,oBAAoB,OAAO;AAAA,QAC3B,UAAU,OAAO;AAAA,QACjB,UAAU,OAAO;AAAA,QACjB,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,gBAAgB,OAAO;AAAA,QACvB,gBAAgB,OAAO;AAAA,QACvB,WAAW,OAAO;AAAA,QAClB,oBAAoB,OAAO;AAAA,QAC3B,4BAA4B,OAAO;AAAA,MAAA,CACpC;AAED,UAAI,OAAO,KAAK,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS;AAAA,YACP;AAAA,cACE,MAAM;AAAA,cACN,MAAM;AAAA,YAAA;AAAA,UACR;AAAA,QACF;AAAA,MAEJ;AAGA,YAAM,UAAU,OAAO,KACpB,IAAI,CAAC,KAAK,MAAM;AACf,cAAM,QAAQ;AAAA,UACZ,GAAG,IAAI,CAAC,OAAO,IAAI,KAAK;AAAA,UACxB,eAAe,IAAI,WAAW,KAAK;AAAA,UACnC,gBAAgB,IAAI,YAAY,KAAK,GAAG,IAAI,YAAY,cAAc,EAAE;AAAA,UACxE,WAAW,IAAI,OAAO;AAAA,QAAA;AAExB,YAAI,IAAI,YAAa,OAAM,KAAK,cAAc,IAAI,WAAW,EAAE;AAC/D,YAAI,IAAI,cAAc,IAAI,YAAY;AACpC,gBAAM;AAAA,YACJ,cAAc,IAAI,YAAY,GAAG,GAAG,IAAI,WAAW,gBAAgB,MAAM,IAAI,YAAY,GAAG,GAAG,IAAI,WAAW,gBAAgB,KAAK,IAAI,YAAY,QAAQ;AAAA,UAAA;AAAA,QAE/J;AACA,YAAI,IAAI,SAAU,OAAM,KAAK,YAAY,IAAI,QAAQ,EAAE;AACvD,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,CAAC,EACA,KAAK,MAAM;AAEd,aAAO;AAAA,QACL,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,MAAM,SAAS,OAAO,KAAK,MAAM;AAAA;AAAA,EAAa,OAAO;AAAA,UAAA;AAAA,QACvD;AAAA,MACF;AAAA,IAEJ,SAAS,GAAQ;AACf,aAAO;AAAA,QACL,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,MAAM,wBAAwB,EAAE,OAAO;AAAA,UAAA;AAAA,QACzC;AAAA,QAEF,SAAS;AAAA,MAAA;AAAA,IAEb;AAAA,EACF;AACF;AAEA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAA;AACtB,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,OAAO,MAAM,CAAC,MAAM;AAClB,UAAQ,MAAM,qBAAqB,CAAC;AACpC,UAAQ,KAAK,CAAC;AAChB,CAAC;"}
@@ -0,0 +1,58 @@
1
+ import { ScrapeJobsParams } from './types';
2
+ export interface ScrapeJobsResult {
3
+ jobs: FlatJobRecord[];
4
+ }
5
+ export interface FlatJobRecord {
6
+ id?: string;
7
+ site: string;
8
+ job_url: string;
9
+ job_url_direct?: string;
10
+ title: string;
11
+ company?: string;
12
+ location?: string;
13
+ date_posted?: string;
14
+ job_type?: string;
15
+ salary_source?: string;
16
+ interval?: string;
17
+ min_amount?: number;
18
+ max_amount?: number;
19
+ currency?: string;
20
+ is_remote?: boolean;
21
+ job_level?: string;
22
+ job_function?: string;
23
+ listing_type?: string;
24
+ emails?: string;
25
+ description?: string;
26
+ company_industry?: string;
27
+ company_url?: string;
28
+ company_logo?: string;
29
+ company_url_direct?: string;
30
+ company_addresses?: string;
31
+ company_num_employees?: string;
32
+ company_revenue?: string;
33
+ company_description?: string;
34
+ skills?: string;
35
+ experience_range?: string;
36
+ company_rating?: number;
37
+ company_reviews_count?: number;
38
+ vacancy_count?: number;
39
+ work_from_home_type?: string;
40
+ }
41
+ /**
42
+ * Scrapes jobs from multiple job boards concurrently.
43
+ *
44
+ * @example
45
+ * ```ts
46
+ * import { scrapeJobs } from "jobspy-js";
47
+ *
48
+ * const result = await scrapeJobs({
49
+ * site_name: ["indeed", "linkedin"],
50
+ * search_term: "software engineer",
51
+ * location: "San Francisco, CA",
52
+ * results_wanted: 20,
53
+ * });
54
+ *
55
+ * console.log(`Found ${result.jobs.length} jobs`);
56
+ * ```
57
+ */
58
+ export declare function scrapeJobs(params?: ScrapeJobsParams): Promise<ScrapeJobsResult>;
@@ -0,0 +1,15 @@
1
+ import { Session } from 'wreq-js';
2
+ import { JobResponse, ScraperInput, Site } from '../types';
3
+ import { ProxyRotator } from '../utils';
4
+ export declare abstract class Scraper {
5
+ readonly site: Site;
6
+ protected proxyRotator: ProxyRotator;
7
+ protected session: Session;
8
+ constructor(site: Site, options?: {
9
+ proxies?: string | string[] | null;
10
+ });
11
+ protected initSession(browser?: string, insecure?: boolean): Promise<void>;
12
+ protected fetchWithProxy(url: string, init?: any): Promise<any>;
13
+ abstract scrape(input: ScraperInput): Promise<JobResponse>;
14
+ close(): Promise<void>;
15
+ }
@@ -0,0 +1,13 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class Bayt extends Scraper {
4
+ private baseUrl;
5
+ private delay;
6
+ private bandDelay;
7
+ constructor(options?: {
8
+ proxies?: string | string[] | null;
9
+ });
10
+ scrape(input: ScraperInput): Promise<JobResponse>;
11
+ private fetchJobs;
12
+ private extractJobInfo;
13
+ }
@@ -0,0 +1,14 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class BDJobs extends Scraper {
4
+ private searchUrl;
5
+ private jobDetailsBaseUrl;
6
+ private scraper_input;
7
+ constructor(options?: {
8
+ proxies?: string | string[] | null;
9
+ });
10
+ scrape(input: ScraperInput): Promise<JobResponse>;
11
+ private fetchPage;
12
+ private processItem;
13
+ private parseLocation;
14
+ }
@@ -0,0 +1,3 @@
1
+ export declare const HEADERS: Record<string, string>;
2
+ export declare const QUERY_TEMPLATE = "\n query JobSearchResultsQuery(\n $excludeJobListingIds: [Long!],\n $keyword: String,\n $locationId: Int,\n $locationType: LocationTypeEnum,\n $numJobsToShow: Int!,\n $pageCursor: String,\n $pageNumber: Int,\n $filterParams: [FilterParams],\n $originalPageUrl: String,\n $seoFriendlyUrlInput: String,\n $parameterUrlInput: String,\n $seoUrl: Boolean\n ) {\n jobListings(\n contextHolder: {\n searchParams: {\n excludeJobListingIds: $excludeJobListingIds,\n keyword: $keyword,\n locationId: $locationId,\n locationType: $locationType,\n numPerPage: $numJobsToShow,\n pageCursor: $pageCursor,\n pageNumber: $pageNumber,\n filterParams: $filterParams,\n originalPageUrl: $originalPageUrl,\n seoFriendlyUrlInput: $seoFriendlyUrlInput,\n parameterUrlInput: $parameterUrlInput,\n seoUrl: $seoUrl,\n searchType: SR\n }\n }\n ) {\n jobListings {\n ...JobView\n __typename\n }\n paginationCursors { cursor pageNumber __typename }\n totalJobsCount\n __typename\n }\n }\n\n fragment JobView on JobListingSearchResult {\n jobview {\n header {\n adOrderId advertiserType adOrderSponsorshipLevel ageInDays\n divisionEmployerName easyApply\n employer { id name shortName __typename }\n employerNameFromSearch goc gocConfidence gocId\n jobCountryId jobLink jobResultTrackingKey jobTitleText\n locationName locationType locId needsCommission\n payCurrency payPeriod\n payPeriodAdjustedPay { p10 p50 p90 __typename }\n rating salarySource savedJobId sponsored __typename\n }\n job { description importConfigId jobTitleId jobTitleText listingId __typename }\n jobListingAdminDetails {\n cpcVal importConfigId jobListingId jobSourceId\n userEligibleForAdminJobDetails __typename\n }\n overview { shortName squareLogoUrl __typename }\n __typename\n }\n __typename\n }";
3
+ export declare const FALLBACK_TOKEN = "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok";
@@ -0,0 +1,19 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class Glassdoor extends Scraper {
4
+ private baseUrl;
5
+ private scraper_input;
6
+ private seenUrls;
7
+ private jobsPerPage;
8
+ private maxPages;
9
+ constructor(options?: {
10
+ proxies?: string | string[] | null;
11
+ });
12
+ scrape(input: ScraperInput): Promise<JobResponse>;
13
+ private getCsrfToken;
14
+ private fetchJobsPage;
15
+ private processJob;
16
+ private parseCompensation;
17
+ private fetchDescription;
18
+ private getLocation;
19
+ }
@@ -0,0 +1,2 @@
1
+ export declare const HEADERS_INITIAL: Record<string, string>;
2
+ export declare const HEADERS_JOBS: Record<string, string>;
@@ -0,0 +1,21 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class Google extends Scraper {
4
+ private searchUrl;
5
+ private jobsUrl;
6
+ private scraper_input;
7
+ private seenUrls;
8
+ private jobsPerPage;
9
+ private browser;
10
+ private pw_page;
11
+ constructor(options?: {
12
+ proxies?: string | string[] | null;
13
+ });
14
+ scrape(input: ScraperInput): Promise<JobResponse>;
15
+ private getInitialCursorAndJobs;
16
+ private findInitialJobs;
17
+ private getJobsNextPage;
18
+ private parseJobs;
19
+ private findJobInfo;
20
+ private parseJob;
21
+ }
@@ -0,0 +1,13 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class GoogleCareers extends Scraper {
4
+ private baseUrl;
5
+ private scraper_input;
6
+ private jobsPerPage;
7
+ constructor(options?: {
8
+ proxies?: string | string[] | null;
9
+ });
10
+ scrape(input: ScraperInput): Promise<JobResponse>;
11
+ private buildUrl;
12
+ private parseJob;
13
+ }
@@ -0,0 +1,2 @@
1
+ export declare const JOB_SEARCH_QUERY = "\n query GetJobData {\n jobSearch(\n {what}\n {location}\n limit: 100\n {cursor}\n sort: RELEVANCE\n {filters}\n ) {\n pageInfo {\n nextCursor\n }\n results {\n trackingKey\n job {\n source {\n name\n }\n key\n title\n datePublished\n dateOnIndeed\n description {\n html\n }\n location {\n countryName\n countryCode\n admin1Code\n city\n postalCode\n streetAddress\n formatted {\n short\n long\n }\n }\n compensation {\n estimated {\n currencyCode\n baseSalary {\n unitOfWork\n range {\n ... on Range {\n min\n max\n }\n }\n }\n }\n baseSalary {\n unitOfWork\n range {\n ... on Range {\n min\n max\n }\n }\n }\n currencyCode\n }\n attributes {\n key\n label\n }\n employer {\n relativeCompanyPageUrl\n name\n dossier {\n employerDetails {\n addresses\n industry\n employeesLocalizedLabel\n revenueLocalizedLabel\n briefDescription\n ceoName\n ceoPhotoUrl\n }\n images {\n headerImageUrl\n squareLogoUrl\n }\n links {\n corporateWebsite\n }\n }\n }\n recruit {\n viewJobUrl\n detailedSalary\n workSchedule\n }\n }\n }\n }\n }\n ";
2
+ export declare const API_HEADERS: Record<string, string>;
@@ -0,0 +1,19 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class Indeed extends Scraper {
4
+ private apiUrl;
5
+ private baseUrl;
6
+ private apiCountryCode;
7
+ private seenUrls;
8
+ private scraper_input;
9
+ constructor(options?: {
10
+ proxies?: string | string[] | null;
11
+ });
12
+ scrape(input: ScraperInput): Promise<JobResponse>;
13
+ private scrapePage;
14
+ private buildFilters;
15
+ private processJob;
16
+ private getJobType;
17
+ private getCompensation;
18
+ private isRemote;
19
+ }
@@ -0,0 +1 @@
1
+ export declare const HEADERS: Record<string, string>;
@@ -0,0 +1,20 @@
1
+ import { Scraper } from '../base';
2
+ import { JobResponse, ScraperInput } from '../../types';
3
+ export declare class LinkedIn extends Scraper {
4
+ private baseUrl;
5
+ private delay;
6
+ private bandDelay;
7
+ private scraper_input;
8
+ private urlRegex;
9
+ constructor(options?: {
10
+ proxies?: string | string[] | null;
11
+ });
12
+ scrape(input: ScraperInput): Promise<JobResponse>;
13
+ private processJob;
14
+ private getJobDetails;
15
+ private parseCriteria;
16
+ private parseJobType;
17
+ private parseJobUrlDirect;
18
+ private getLocation;
19
+ private isJobRemote;
20
+ }