jobspy-js 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +98 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/index.d.ts +1 -0
- package/dist/mcp/index.js +113 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/scraper.d.ts +58 -0
- package/dist/scrapers/base.d.ts +15 -0
- package/dist/scrapers/bayt/index.d.ts +13 -0
- package/dist/scrapers/bdjobs/index.d.ts +14 -0
- package/dist/scrapers/glassdoor/constants.d.ts +3 -0
- package/dist/scrapers/glassdoor/index.d.ts +19 -0
- package/dist/scrapers/google/constants.d.ts +2 -0
- package/dist/scrapers/google/index.d.ts +21 -0
- package/dist/scrapers/google-careers/index.d.ts +13 -0
- package/dist/scrapers/indeed/constants.d.ts +2 -0
- package/dist/scrapers/indeed/index.d.ts +19 -0
- package/dist/scrapers/linkedin/constants.d.ts +1 -0
- package/dist/scrapers/linkedin/index.d.ts +20 -0
- package/dist/scrapers/naukri/constants.d.ts +1 -0
- package/dist/scrapers/naukri/index.d.ts +19 -0
- package/dist/scrapers/ziprecruiter/constants.d.ts +2 -0
- package/dist/scrapers/ziprecruiter/index.d.ts +15 -0
- package/dist/shared/scraper-CuXnl6Gf.js +2445 -0
- package/dist/shared/scraper-CuXnl6Gf.js.map +1 -0
- package/dist/types.d.ts +134 -0
- package/dist/utils.d.ts +46 -0
- package/package.json +66 -0
- package/src/cli/index.ts +127 -0
- package/src/index.ts +19 -0
- package/src/mcp/index.ts +154 -0
- package/src/scraper.ts +280 -0
- package/src/scrapers/base.ts +41 -0
- package/src/scrapers/bayt/index.ts +113 -0
- package/src/scrapers/bdjobs/index.ts +132 -0
- package/src/scrapers/glassdoor/constants.ts +90 -0
- package/src/scrapers/glassdoor/index.ts +344 -0
- package/src/scrapers/google/constants.ts +34 -0
- package/src/scrapers/google/index.ts +289 -0
- package/src/scrapers/google-careers/index.ts +237 -0
- package/src/scrapers/indeed/constants.ts +112 -0
- package/src/scrapers/indeed/index.ts +265 -0
- package/src/scrapers/linkedin/constants.ts +10 -0
- package/src/scrapers/linkedin/index.ts +311 -0
- package/src/scrapers/naukri/constants.ts +14 -0
- package/src/scrapers/naukri/index.ts +268 -0
- package/src/scrapers/ziprecruiter/constants.ts +31 -0
- package/src/scrapers/ziprecruiter/index.ts +224 -0
- package/src/types.ts +381 -0
- package/src/utils.ts +289 -0
- package/tests/integration/bayt.test.ts +29 -0
- package/tests/integration/bdjobs.test.ts +29 -0
- package/tests/integration/glassdoor.test.ts +30 -0
- package/tests/integration/google-careers.test.ts +30 -0
- package/tests/integration/google.test.ts +31 -0
- package/tests/integration/indeed.test.ts +30 -0
- package/tests/integration/linkedin.test.ts +29 -0
- package/tests/integration/naukri.test.ts +29 -0
- package/tests/integration/ziprecruiter.test.ts +29 -0
- package/tsconfig.json +16 -0
- package/vite.config.ts +35 -0
package/README.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# jobspy-js
|
|
2
|
+
|
|
3
|
+
TypeScript port of [JobSpy](https://github.com/speedyapply/JobSpy) — scrape job postings from LinkedIn, Indeed, Glassdoor, Google Jobs, Google Careers, ZipRecruiter, Bayt, Naukri & BDJobs.
|
|
4
|
+
|
|
5
|
+
Uses [wreq-js](https://github.com/nicehash/wreq-js) for browser TLS fingerprint emulation (Chrome/Firefox/Safari).
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **9 job boards** — LinkedIn, Indeed, Glassdoor, Google Jobs, Google Careers, ZipRecruiter, Bayt, Naukri, BDJobs
|
|
10
|
+
- **3 interfaces** — SDK, CLI, MCP server
|
|
11
|
+
- **Browser emulation** — wreq-js with full TLS fingerprinting (JA3/JA4)
|
|
12
|
+
- **Proxy rotation** — built-in rotating proxy support
|
|
13
|
+
- **Concurrent scraping** — all sites scraped in parallel
|
|
14
|
+
- **Salary extraction** — parses compensation from descriptions when not provided directly
|
|
15
|
+
- **60+ countries** — Indeed/Glassdoor regional domain support
|
|
16
|
+
|
|
17
|
+
## Supported Sites
|
|
18
|
+
|
|
19
|
+
| Site | Key | Notes |
|
|
20
|
+
|------|-----|-------|
|
|
21
|
+
| LinkedIn | `linkedin` | HTML scraping |
|
|
22
|
+
| Indeed | `indeed` | GraphQL API |
|
|
23
|
+
| Glassdoor | `glassdoor` | GraphQL API |
|
|
24
|
+
| Google Jobs | `google` | Playwright (headless Chrome); requires clean residential IP or proxy |
|
|
25
|
+
| Google Careers | `google_careers` | Plain HTTP; scrapes jobs at Google the company |
|
|
26
|
+
| ZipRecruiter | `zip_recruiter` | Web scraping |
|
|
27
|
+
| Bayt | `bayt` | HTML scraping |
|
|
28
|
+
| Naukri | `naukri` | REST API |
|
|
29
|
+
| BDJobs | `bdjobs` | REST API |
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npm install jobspy-js
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
> **Google Jobs** (`google`) uses [Playwright](https://playwright.dev) to execute JavaScript. After installing, run:
|
|
38
|
+
> ```bash
|
|
39
|
+
> npx playwright install chromium
|
|
40
|
+
> ```
|
|
41
|
+
|
|
42
|
+
## SDK Usage
|
|
43
|
+
|
|
44
|
+
```ts
|
|
45
|
+
import { scrapeJobs } from "jobspy-js";
|
|
46
|
+
|
|
47
|
+
const result = await scrapeJobs({
|
|
48
|
+
site_name: ["indeed", "linkedin"],
|
|
49
|
+
search_term: "software engineer",
|
|
50
|
+
location: "San Francisco, CA",
|
|
51
|
+
results_wanted: 20,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
console.log(`Found ${result.jobs.length} jobs`);
|
|
55
|
+
for (const job of result.jobs) {
|
|
56
|
+
console.log(`${job.title} at ${job.company} — ${job.job_url}`);
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Parameters
|
|
61
|
+
|
|
62
|
+
| Parameter | Type | Default | Description |
|
|
63
|
+
|-----------|------|---------|-------------|
|
|
64
|
+
| `site_name` | `string[]` | all sites | Job boards to scrape |
|
|
65
|
+
| `search_term` | `string` | — | Job title / search query |
|
|
66
|
+
| `location` | `string` | — | Job location (e.g. `"San Francisco, CA"`) |
|
|
67
|
+
| `distance` | `number` | `50` | Search radius in miles |
|
|
68
|
+
| `is_remote` | `boolean` | `false` | Filter for remote jobs |
|
|
69
|
+
| `job_type` | `string` | — | `fulltime`, `parttime`, `contract`, `internship` |
|
|
70
|
+
| `results_wanted` | `number` | `15` | Results per site |
|
|
71
|
+
| `country_indeed` | `string` | `"usa"` | Country for Indeed/Glassdoor |
|
|
72
|
+
| `hours_old` | `number` | — | Filter jobs posted within N hours |
|
|
73
|
+
| `description_format` | `string` | `"markdown"` | `markdown`, `html`, or `plain` |
|
|
74
|
+
| `proxies` | `string \| string[]` | — | Proxy servers (`user:pass@host:port`) |
|
|
75
|
+
| `linkedin_fetch_description` | `boolean` | `false` | Fetch full LinkedIn descriptions (slower) |
|
|
76
|
+
| `enforce_annual_salary` | `boolean` | `false` | Convert all salaries to annual |
|
|
77
|
+
|
|
78
|
+
## CLI
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Search for React jobs on LinkedIn
|
|
82
|
+
npx jobspy -s linkedin -q "react developer" -l "New York, NY" -n 20
|
|
83
|
+
|
|
84
|
+
# Multiple sites, remote only, output to file
|
|
85
|
+
npx jobspy -s linkedin indeed -q "typescript" -r -o results.json
|
|
86
|
+
|
|
87
|
+
# CSV output
|
|
88
|
+
npx jobspy -s indeed -q "python" -o jobs.csv
|
|
89
|
+
|
|
90
|
+
# Google Careers (jobs at Google)
|
|
91
|
+
npx jobspy -s google_careers -q "software engineer" -l "USA" -n 10
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Run `npx jobspy --help` for all options.
|
|
95
|
+
|
|
96
|
+
## MCP Server
|
|
97
|
+
|
|
98
|
+
Add to your MCP client config:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"mcpServers": {
|
|
103
|
+
"jobspy": {
|
|
104
|
+
"command": "npx",
|
|
105
|
+
"args": ["-y", "jobspy-js", "--mcp"]
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
The MCP server exposes a `scrape_jobs` tool with all the same parameters as the SDK.
|
|
112
|
+
|
|
113
|
+
## Development
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
git clone https://github.com/borgius/jobspy-js.git
|
|
117
|
+
cd jobspy-js
|
|
118
|
+
npm install
|
|
119
|
+
|
|
120
|
+
# Build
|
|
121
|
+
npm run build
|
|
122
|
+
|
|
123
|
+
# Type check
|
|
124
|
+
npm run typecheck
|
|
125
|
+
|
|
126
|
+
# Run CLI from source
|
|
127
|
+
npm run cli -- -s linkedin -q "react" -n 5
|
|
128
|
+
|
|
129
|
+
# Run tests
|
|
130
|
+
npm test
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Project Structure
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
src/
|
|
137
|
+
├── index.ts # SDK entry point
|
|
138
|
+
├── scraper.ts # Main scrapeJobs() orchestrator
|
|
139
|
+
├── types.ts # All types, enums, country config
|
|
140
|
+
├── utils.ts # Logger, proxy rotation, HTML helpers
|
|
141
|
+
├── cli/index.ts # CLI (commander)
|
|
142
|
+
├── mcp/index.ts # MCP server
|
|
143
|
+
└── scrapers/
|
|
144
|
+
├── base.ts # Abstract Scraper base class
|
|
145
|
+
├── indeed/ # GraphQL API
|
|
146
|
+
├── linkedin/ # HTML scraping
|
|
147
|
+
├── glassdoor/ # GraphQL API
|
|
148
|
+
├── google/ # Playwright headless Chrome
|
|
149
|
+
├── google-careers/ # Plain HTTP; AF_initDataCallback JSON parsing
|
|
150
|
+
├── ziprecruiter/ # Web scraping
|
|
151
|
+
├── bayt/ # HTML scraping
|
|
152
|
+
├── naukri/ # REST API
|
|
153
|
+
└── bdjobs/ # REST API
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { s as scrapeJobs } from "../shared/scraper-CuXnl6Gf.js";
|
|
4
|
+
import { writeFileSync } from "node:fs";
|
|
5
|
+
const program = new Command();
|
|
6
|
+
program.name("jobspy").description(
|
|
7
|
+
"Job scraper for LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri & BDJobs"
|
|
8
|
+
).version("1.0.0").option(
|
|
9
|
+
"-s, --site <sites...>",
|
|
10
|
+
"Job boards to scrape (linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs)"
|
|
11
|
+
).option("-q, --search-term <term>", "Search term").option("--google-search-term <term>", "Google-specific search term").option("-l, --location <location>", "Job location").option("-d, --distance <miles>", "Distance in miles", "50").option("-r, --remote", "Filter for remote jobs").option(
|
|
12
|
+
"-t, --job-type <type>",
|
|
13
|
+
"Job type (fulltime, parttime, contract, internship)"
|
|
14
|
+
).option("--easy-apply", "Filter for easy apply jobs").option("-n, --results <count>", "Number of results wanted", "15").option(
|
|
15
|
+
"-c, --country <country>",
|
|
16
|
+
"Country for Indeed/Glassdoor",
|
|
17
|
+
"usa"
|
|
18
|
+
).option(
|
|
19
|
+
"-p, --proxies <proxies...>",
|
|
20
|
+
"Proxy servers (user:pass@host:port)"
|
|
21
|
+
).option(
|
|
22
|
+
"--format <format>",
|
|
23
|
+
"Description format (markdown, html, plain)",
|
|
24
|
+
"markdown"
|
|
25
|
+
).option("--linkedin-fetch-description", "Fetch full LinkedIn descriptions").option(
|
|
26
|
+
"--linkedin-company-ids <ids...>",
|
|
27
|
+
"LinkedIn company IDs to filter"
|
|
28
|
+
).option("--offset <offset>", "Start from offset", "0").option(
|
|
29
|
+
"--hours-old <hours>",
|
|
30
|
+
"Filter jobs posted within N hours"
|
|
31
|
+
).option("--enforce-annual-salary", "Convert all salaries to annual").option("-v, --verbose <level>", "Verbosity (0=errors, 1=warnings, 2=all)", "0").option("-o, --output <file>", "Output file path (JSON or CSV based on extension)").action(async (opts) => {
|
|
32
|
+
try {
|
|
33
|
+
const result = await scrapeJobs({
|
|
34
|
+
site_name: opts.site,
|
|
35
|
+
search_term: opts.searchTerm,
|
|
36
|
+
google_search_term: opts.googleSearchTerm,
|
|
37
|
+
location: opts.location,
|
|
38
|
+
distance: parseInt(opts.distance),
|
|
39
|
+
is_remote: opts.remote ?? false,
|
|
40
|
+
job_type: opts.jobType,
|
|
41
|
+
easy_apply: opts.easyApply,
|
|
42
|
+
results_wanted: parseInt(opts.results),
|
|
43
|
+
country_indeed: opts.country,
|
|
44
|
+
proxies: opts.proxies,
|
|
45
|
+
description_format: opts.format,
|
|
46
|
+
linkedin_fetch_description: opts.linkedinFetchDescription,
|
|
47
|
+
linkedin_company_ids: opts.linkedinCompanyIds?.map(Number),
|
|
48
|
+
offset: parseInt(opts.offset),
|
|
49
|
+
hours_old: opts.hoursOld ? parseInt(opts.hoursOld) : void 0,
|
|
50
|
+
enforce_annual_salary: opts.enforceAnnualSalary ?? false,
|
|
51
|
+
verbose: parseInt(opts.verbose)
|
|
52
|
+
});
|
|
53
|
+
console.log(`Found ${result.jobs.length} jobs`);
|
|
54
|
+
if (opts.output) {
|
|
55
|
+
const outPath = opts.output;
|
|
56
|
+
if (outPath.endsWith(".csv")) {
|
|
57
|
+
writeFileSync(outPath, jobsToCsv(result.jobs));
|
|
58
|
+
console.log(`Results written to ${outPath}`);
|
|
59
|
+
} else {
|
|
60
|
+
writeFileSync(outPath, JSON.stringify(result.jobs, null, 2));
|
|
61
|
+
console.log(`Results written to ${outPath}`);
|
|
62
|
+
}
|
|
63
|
+
} else {
|
|
64
|
+
for (const job of result.jobs) {
|
|
65
|
+
const line = [
|
|
66
|
+
job.site?.padEnd(14),
|
|
67
|
+
(job.title ?? "").slice(0, 40).padEnd(42),
|
|
68
|
+
(job.company ?? "").slice(0, 20).padEnd(22),
|
|
69
|
+
(job.location ?? "").slice(0, 25).padEnd(27),
|
|
70
|
+
job.date_posted ?? ""
|
|
71
|
+
].join("");
|
|
72
|
+
console.log(line);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
} catch (e) {
|
|
76
|
+
console.error(`Error: ${e.message}`);
|
|
77
|
+
process.exit(1);
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
function jobsToCsv(jobs) {
|
|
81
|
+
if (jobs.length === 0) return "";
|
|
82
|
+
const headers = Object.keys(jobs[0]);
|
|
83
|
+
const escape = (val) => {
|
|
84
|
+
if (val == null) return "";
|
|
85
|
+
const str = String(val);
|
|
86
|
+
if (str.includes(",") || str.includes('"') || str.includes("\n")) {
|
|
87
|
+
return `"${str.replace(/"/g, '""')}"`;
|
|
88
|
+
}
|
|
89
|
+
return str;
|
|
90
|
+
};
|
|
91
|
+
const lines = [headers.join(",")];
|
|
92
|
+
for (const job of jobs) {
|
|
93
|
+
lines.push(headers.map((h) => escape(job[h])).join(","));
|
|
94
|
+
}
|
|
95
|
+
return lines.join("\n");
|
|
96
|
+
}
|
|
97
|
+
program.parse();
|
|
98
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":["../../src/cli/index.ts"],"sourcesContent":["import { Command } from \"commander\";\nimport { scrapeJobs } from \"../scraper\";\nimport { Site } from \"../types\";\nimport { writeFileSync } from \"node:fs\";\n\nconst program = new Command();\n\nprogram\n .name(\"jobspy\")\n .description(\n \"Job scraper for LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri & BDJobs\",\n )\n .version(\"1.0.0\")\n .option(\n \"-s, --site <sites...>\",\n \"Job boards to scrape (linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs)\",\n )\n .option(\"-q, --search-term <term>\", \"Search term\")\n .option(\"--google-search-term <term>\", \"Google-specific search term\")\n .option(\"-l, --location <location>\", \"Job location\")\n .option(\"-d, --distance <miles>\", \"Distance in miles\", \"50\")\n .option(\"-r, --remote\", \"Filter for remote jobs\")\n .option(\n \"-t, --job-type <type>\",\n \"Job type (fulltime, parttime, contract, internship)\",\n )\n .option(\"--easy-apply\", \"Filter for easy apply jobs\")\n .option(\"-n, --results <count>\", \"Number of results wanted\", \"15\")\n .option(\n \"-c, --country <country>\",\n \"Country for Indeed/Glassdoor\",\n \"usa\",\n )\n .option(\n \"-p, --proxies <proxies...>\",\n \"Proxy servers (user:pass@host:port)\",\n )\n .option(\n \"--format <format>\",\n \"Description format (markdown, html, plain)\",\n \"markdown\",\n )\n .option(\"--linkedin-fetch-description\", \"Fetch full LinkedIn descriptions\")\n .option(\n \"--linkedin-company-ids <ids...>\",\n \"LinkedIn company IDs to filter\",\n )\n .option(\"--offset <offset>\", \"Start from offset\", \"0\")\n .option(\n \"--hours-old <hours>\",\n \"Filter jobs posted within N hours\",\n )\n .option(\"--enforce-annual-salary\", \"Convert all salaries to annual\")\n .option(\"-v, --verbose <level>\", \"Verbosity (0=errors, 1=warnings, 2=all)\", \"0\")\n .option(\"-o, --output <file>\", \"Output file path (JSON or CSV based on extension)\")\n .action(async (opts) => {\n try {\n const result = await scrapeJobs({\n site_name: opts.site,\n search_term: opts.searchTerm,\n google_search_term: opts.googleSearchTerm,\n location: opts.location,\n distance: parseInt(opts.distance),\n is_remote: opts.remote ?? false,\n job_type: opts.jobType,\n easy_apply: opts.easyApply,\n results_wanted: parseInt(opts.results),\n country_indeed: opts.country,\n proxies: opts.proxies,\n description_format: opts.format,\n linkedin_fetch_description: opts.linkedinFetchDescription,\n linkedin_company_ids: opts.linkedinCompanyIds?.map(Number),\n offset: parseInt(opts.offset),\n hours_old: opts.hoursOld ? parseInt(opts.hoursOld) : undefined,\n enforce_annual_salary: opts.enforceAnnualSalary ?? false,\n verbose: parseInt(opts.verbose),\n });\n\n console.log(`Found ${result.jobs.length} jobs`);\n\n if (opts.output) {\n const outPath = opts.output as string;\n if (outPath.endsWith(\".csv\")) {\n writeFileSync(outPath, jobsToCsv(result.jobs));\n console.log(`Results written to ${outPath}`);\n } else {\n writeFileSync(outPath, JSON.stringify(result.jobs, null, 2));\n console.log(`Results written to ${outPath}`);\n }\n } else {\n // Print summary table to stdout\n for (const job of result.jobs) {\n const line = [\n job.site?.padEnd(14),\n (job.title ?? \"\").slice(0, 40).padEnd(42),\n (job.company ?? \"\").slice(0, 20).padEnd(22),\n (job.location ?? \"\").slice(0, 25).padEnd(27),\n job.date_posted ?? \"\",\n ].join(\"\");\n console.log(line);\n }\n }\n } catch (e: any) {\n console.error(`Error: ${e.message}`);\n process.exit(1);\n }\n });\n\nfunction jobsToCsv(jobs: any[]): string {\n if (jobs.length === 0) return \"\";\n const headers = Object.keys(jobs[0]);\n const escape = (val: any): string => {\n if (val == null) return \"\";\n const str = String(val);\n if (str.includes(\",\") || str.includes('\"') || str.includes(\"\\n\")) {\n return `\"${str.replace(/\"/g, '\"\"')}\"`;\n }\n return str;\n };\n const lines = [headers.join(\",\")];\n for (const job of jobs) {\n lines.push(headers.map((h) => escape(job[h])).join(\",\"));\n }\n return lines.join(\"\\n\");\n}\n\nprogram.parse();\n"],"names":[],"mappings":";;;;AAKA,MAAM,UAAU,IAAI,QAAA;AAEpB,QACG,KAAK,QAAQ,EACb;AAAA,EACC;AACF,EACC,QAAQ,OAAO,EACf;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,4BAA4B,aAAa,EAChD,OAAO,+BAA+B,6BAA6B,EACnE,OAAO,6BAA6B,cAAc,EAClD,OAAO,0BAA0B,qBAAqB,IAAI,EAC1D,OAAO,gBAAgB,wBAAwB,EAC/C;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,gBAAgB,4BAA4B,EACnD,OAAO,yBAAyB,4BAA4B,IAAI,EAChE;AAAA,EACC;AAAA,EACA;AAAA,EACA;AACF,EACC;AAAA,EACC;AAAA,EACA;AACF,EACC;AAAA,EACC;AAAA,EACA;AAAA,EACA;AACF,EACC,OAAO,gCAAgC,kCAAkC,EACzE;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,qBAAqB,qBAAqB,GAAG,EACpD;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,2BAA2B,gCAAgC,EAClE,OAAO,yBAAyB,2CAA2C,GAAG,EAC9E,OAAO,uBAAuB,mDAAmD,EACjF,OAAO,OAAO,SAAS;AACtB,MAAI;AACF,UAAM,SAAS,MAAM,WAAW;AAAA,MAC9B,WAAW,KAAK;AAAA,MAChB,aAAa,KAAK;AAAA,MAClB,oBAAoB,KAAK;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,UAAU,SAAS,KAAK,QAAQ;AAAA,MAChC,WAAW,KAAK,UAAU;AAAA,MAC1B,UAAU,KAAK;AAAA,MACf,YAAY,KAAK;AAAA,MACjB,gBAAgB,SAAS,KAAK,OAAO;AAAA,MACrC,gBAAgB,KAAK;AAAA,MACrB,SAAS,KAAK;AAAA,MACd,oBAAoB,KAAK;AAAA,MACzB,4BAA4B,KAAK;AAAA,MACjC,sBAAsB,KAAK,oBAAoB,IAAI,MAAM;AAAA,MACzD,QAAQ,SAAS,KAAK,MAAM;AAAA,MAC5B,WAAW,KAAK,WAAW,SAAS,KAAK,QAAQ,IAAI;AAAA,MACrD,uBAAuB,KAAK,uBAAuB;AAAA,MACnD,SAAS,SAAS,KAAK,OAAO;AAAA,IAAA,CAC/B;AAED,YAAQ,IAAI,SAAS,OAAO,KAAK,MAAM,OAAO;AAE9C,QAAI,KAAK,QAAQ;AACf,YAAM,UAAU,KAAK;AACrB,UAAI,QAAQ,SAAS,MAAM,GAAG;AAC5B,sBAAc,SAAS,UAAU,OAAO,IAAI,CAAC;AAC7C,gBAAQ,IAAI,sBAAsB,OAAO,EAAE;AAAA,MAC7C,OAAO;AACL,sBAAc,SAAS,KAAK,UAAU,OAAO,MAAM,MAAM,CAAC,CAAC;AAC3D,gBAAQ,IAAI,sBAAsB,OAAO,EAAE;AAAA,MAC7C;AAAA,IACF,OAAO;AAEL,iBAAW,OAAO,OAAO,MAAM;AAC7B,cAAM,OAAO;AAAA,UACX,IAAI,MAAM,OAAO,EAAE;AAAA,WAClB,IAAI,SAAS,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,WACvC,IAAI,WAAW,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,WACzC,IAAI,YAAY,IAAI,MAAM,GAAG,EAAE,EAAE,OAAO,EAAE;AAAA,UAC3C,IAAI,eAAe;AAAA,QAAA,EACnB,KAAK,EAAE;AACT,gBAAQ,IAAI,IAAI;AAAA,MAClB;AAAA,IACF;AAAA,EACF,SAAS,GAAQ;AACf,YAAQ,MAAM,UAAU,EAAE,OAAO,EAAE;AACnC,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,SAAS,UAAU,MAAqB;AACtC,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,UAAU,OAAO,KAAK,KAAK,CAAC,CAAC;AACnC,QAAM,SAAS,CAAC,QAAqB;AACnC,QAAI,OAAO,KAAM,QAAO;AACxB,UAAM,MAAM,OAAO,GAAG;AACtB,QAAI,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,IAAI,GAAG;AAChE,aAAO,IAAI,IAAI,QAAQ,MAAM,IAAI,CAAC;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AACA,QAAM,QAAQ,CAAC,QAAQ,KAAK,GAAG,CAAC;AAChC,aAAW,OAAO,MAAM;AACtB,UAAM,KAAK,QAAQ,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC;AAAA,EACzD;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,QAAQ,MAAA;"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { Site, JobType, CompensationInterval, DescriptionFormat, SalarySource, type JobPost, type JobResponse, type Compensation, type Location, type ScrapeJobsParams, type ScraperInput, type Country, DESIRED_COLUMNS, getCountry, displayLocation, } from './types';
|
|
2
|
+
export { scrapeJobs } from './scraper';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { C, D, a, J, S, b, d, g, s } from "./shared/scraper-CuXnl6Gf.js";
|
|
2
|
+
export {
|
|
3
|
+
C as CompensationInterval,
|
|
4
|
+
D as DESIRED_COLUMNS,
|
|
5
|
+
a as DescriptionFormat,
|
|
6
|
+
J as JobType,
|
|
7
|
+
S as SalarySource,
|
|
8
|
+
b as Site,
|
|
9
|
+
d as displayLocation,
|
|
10
|
+
g as getCountry,
|
|
11
|
+
s as scrapeJobs
|
|
12
|
+
};
|
|
13
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":[],"sourcesContent":[],"names":[],"mappings":";"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import { s as scrapeJobs } from "../shared/scraper-CuXnl6Gf.js";
|
|
6
|
+
const server = new McpServer({
|
|
7
|
+
name: "jobspy",
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
});
|
|
10
|
+
server.tool(
|
|
11
|
+
"scrape_jobs",
|
|
12
|
+
"Scrape job listings from multiple job boards (LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri, BDJobs)",
|
|
13
|
+
{
|
|
14
|
+
site_name: z.array(
|
|
15
|
+
z.enum([
|
|
16
|
+
"linkedin",
|
|
17
|
+
"indeed",
|
|
18
|
+
"zip_recruiter",
|
|
19
|
+
"glassdoor",
|
|
20
|
+
"google",
|
|
21
|
+
"bayt",
|
|
22
|
+
"naukri",
|
|
23
|
+
"bdjobs"
|
|
24
|
+
])
|
|
25
|
+
).optional().describe(
|
|
26
|
+
"Job boards to scrape. Defaults to all. Options: linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs"
|
|
27
|
+
),
|
|
28
|
+
search_term: z.string().optional().describe("Search term / job title to search for"),
|
|
29
|
+
google_search_term: z.string().optional().describe("Google-specific search term (overrides search_term for Google)"),
|
|
30
|
+
location: z.string().optional().describe("Job location (e.g. 'San Francisco, CA')"),
|
|
31
|
+
distance: z.number().optional().default(50).describe("Search radius in miles"),
|
|
32
|
+
is_remote: z.boolean().optional().default(false).describe("Filter for remote jobs"),
|
|
33
|
+
job_type: z.enum(["fulltime", "parttime", "contract", "internship"]).optional().describe("Filter by job type"),
|
|
34
|
+
results_wanted: z.number().optional().default(10).describe("Number of results to return per site"),
|
|
35
|
+
country_indeed: z.string().optional().default("usa").describe("Country for Indeed/Glassdoor (e.g. 'usa', 'uk', 'canada')"),
|
|
36
|
+
hours_old: z.number().optional().describe("Filter jobs posted within the last N hours"),
|
|
37
|
+
description_format: z.enum(["markdown", "html", "plain"]).optional().default("markdown").describe("Format for job descriptions"),
|
|
38
|
+
linkedin_fetch_description: z.boolean().optional().default(false).describe("Fetch full descriptions from LinkedIn (slower)")
|
|
39
|
+
},
|
|
40
|
+
async (params) => {
|
|
41
|
+
try {
|
|
42
|
+
const result = await scrapeJobs({
|
|
43
|
+
site_name: params.site_name,
|
|
44
|
+
search_term: params.search_term,
|
|
45
|
+
google_search_term: params.google_search_term,
|
|
46
|
+
location: params.location,
|
|
47
|
+
distance: params.distance,
|
|
48
|
+
is_remote: params.is_remote,
|
|
49
|
+
job_type: params.job_type,
|
|
50
|
+
results_wanted: params.results_wanted,
|
|
51
|
+
country_indeed: params.country_indeed,
|
|
52
|
+
hours_old: params.hours_old,
|
|
53
|
+
description_format: params.description_format,
|
|
54
|
+
linkedin_fetch_description: params.linkedin_fetch_description
|
|
55
|
+
});
|
|
56
|
+
if (result.jobs.length === 0) {
|
|
57
|
+
return {
|
|
58
|
+
content: [
|
|
59
|
+
{
|
|
60
|
+
type: "text",
|
|
61
|
+
text: "No jobs found matching the search criteria."
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
const summary = result.jobs.map((job, i) => {
|
|
67
|
+
const parts = [
|
|
68
|
+
`${i + 1}. **${job.title}**`,
|
|
69
|
+
` Company: ${job.company ?? "N/A"}`,
|
|
70
|
+
` Location: ${job.location ?? "N/A"}${job.is_remote ? " (Remote)" : ""}`,
|
|
71
|
+
` URL: ${job.job_url}`
|
|
72
|
+
];
|
|
73
|
+
if (job.date_posted) parts.push(` Posted: ${job.date_posted}`);
|
|
74
|
+
if (job.min_amount && job.max_amount) {
|
|
75
|
+
parts.push(
|
|
76
|
+
` Salary: ${job.currency ?? "$"}${job.min_amount.toLocaleString()} - ${job.currency ?? "$"}${job.max_amount.toLocaleString()} (${job.interval ?? "yearly"})`
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
if (job.job_type) parts.push(` Type: ${job.job_type}`);
|
|
80
|
+
return parts.join("\n");
|
|
81
|
+
}).join("\n\n");
|
|
82
|
+
return {
|
|
83
|
+
content: [
|
|
84
|
+
{
|
|
85
|
+
type: "text",
|
|
86
|
+
text: `Found ${result.jobs.length} jobs:
|
|
87
|
+
|
|
88
|
+
${summary}`
|
|
89
|
+
}
|
|
90
|
+
]
|
|
91
|
+
};
|
|
92
|
+
} catch (e) {
|
|
93
|
+
return {
|
|
94
|
+
content: [
|
|
95
|
+
{
|
|
96
|
+
type: "text",
|
|
97
|
+
text: `Error scraping jobs: ${e.message}`
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
isError: true
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
);
|
|
105
|
+
async function main() {
|
|
106
|
+
const transport = new StdioServerTransport();
|
|
107
|
+
await server.connect(transport);
|
|
108
|
+
}
|
|
109
|
+
main().catch((e) => {
|
|
110
|
+
console.error("MCP server error:", e);
|
|
111
|
+
process.exit(1);
|
|
112
|
+
});
|
|
113
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":["../../src/mcp/index.ts"],"sourcesContent":["import { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\";\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\";\nimport { z } from \"zod\";\nimport { scrapeJobs } from \"../scraper\";\n\nconst server = new McpServer({\n name: \"jobspy\",\n version: \"1.0.0\",\n});\n\nserver.tool(\n \"scrape_jobs\",\n \"Scrape job listings from multiple job boards (LinkedIn, Indeed, Glassdoor, Google, ZipRecruiter, Bayt, Naukri, BDJobs)\",\n {\n site_name: z\n .array(\n z.enum([\n \"linkedin\",\n \"indeed\",\n \"zip_recruiter\",\n \"glassdoor\",\n \"google\",\n \"bayt\",\n \"naukri\",\n \"bdjobs\",\n ]),\n )\n .optional()\n .describe(\n \"Job boards to scrape. Defaults to all. Options: linkedin, indeed, zip_recruiter, glassdoor, google, bayt, naukri, bdjobs\",\n ),\n search_term: z\n .string()\n .optional()\n .describe(\"Search term / job title to search for\"),\n google_search_term: z\n .string()\n .optional()\n .describe(\"Google-specific search term (overrides search_term for Google)\"),\n location: z.string().optional().describe(\"Job location (e.g. 'San Francisco, CA')\"),\n distance: z\n .number()\n .optional()\n .default(50)\n .describe(\"Search radius in miles\"),\n is_remote: z.boolean().optional().default(false).describe(\"Filter for remote jobs\"),\n job_type: z\n .enum([\"fulltime\", \"parttime\", \"contract\", \"internship\"])\n .optional()\n .describe(\"Filter by job type\"),\n results_wanted: z\n .number()\n .optional()\n .default(10)\n .describe(\"Number of results to return per site\"),\n country_indeed: z\n .string()\n .optional()\n .default(\"usa\")\n .describe(\"Country for Indeed/Glassdoor (e.g. 'usa', 'uk', 'canada')\"),\n hours_old: z\n .number()\n .optional()\n .describe(\"Filter jobs posted within the last N hours\"),\n description_format: z\n .enum([\"markdown\", \"html\", \"plain\"])\n .optional()\n .default(\"markdown\")\n .describe(\"Format for job descriptions\"),\n linkedin_fetch_description: z\n .boolean()\n .optional()\n .default(false)\n .describe(\"Fetch full descriptions from LinkedIn (slower)\"),\n },\n async (params) => {\n try {\n const result = await scrapeJobs({\n site_name: params.site_name,\n search_term: params.search_term,\n google_search_term: params.google_search_term,\n location: params.location,\n distance: params.distance,\n is_remote: params.is_remote,\n job_type: params.job_type,\n results_wanted: params.results_wanted,\n country_indeed: params.country_indeed,\n hours_old: params.hours_old,\n description_format: params.description_format,\n linkedin_fetch_description: params.linkedin_fetch_description,\n });\n\n if (result.jobs.length === 0) {\n return {\n content: [\n {\n type: \"text\" as const,\n text: \"No jobs found matching the search criteria.\",\n },\n ],\n };\n }\n\n // Format jobs as a readable summary + structured data\n const summary = result.jobs\n .map((job, i) => {\n const parts = [\n `${i + 1}. **${job.title}**`,\n ` Company: ${job.company ?? \"N/A\"}`,\n ` Location: ${job.location ?? \"N/A\"}${job.is_remote ? \" (Remote)\" : \"\"}`,\n ` URL: ${job.job_url}`,\n ];\n if (job.date_posted) parts.push(` Posted: ${job.date_posted}`);\n if (job.min_amount && job.max_amount) {\n parts.push(\n ` Salary: ${job.currency ?? \"$\"}${job.min_amount.toLocaleString()} - ${job.currency ?? \"$\"}${job.max_amount.toLocaleString()} (${job.interval ?? \"yearly\"})`,\n );\n }\n if (job.job_type) parts.push(` Type: ${job.job_type}`);\n return parts.join(\"\\n\");\n })\n .join(\"\\n\\n\");\n\n return {\n content: [\n {\n type: \"text\" as const,\n text: `Found ${result.jobs.length} jobs:\\n\\n${summary}`,\n },\n ],\n };\n } catch (e: any) {\n return {\n content: [\n {\n type: \"text\" as const,\n text: `Error scraping jobs: ${e.message}`,\n },\n ],\n isError: true,\n };\n }\n },\n);\n\nasync function main() {\n const transport = new StdioServerTransport();\n await server.connect(transport);\n}\n\nmain().catch((e) => {\n console.error(\"MCP server error:\", e);\n process.exit(1);\n});\n"],"names":[],"mappings":";;;;;AAKA,MAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAED,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EACR;AAAA,MACC,EAAE,KAAK;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MAAA,CACD;AAAA,IAAA,EAEF,WACA;AAAA,MACC;AAAA,IAAA;AAAA,IAEJ,aAAa,EACV,OAAA,EACA,SAAA,EACA,SAAS,uCAAuC;AAAA,IACnD,oBAAoB,EACjB,OAAA,EACA,SAAA,EACA,SAAS,gEAAgE;AAAA,IAC5E,UAAU,EAAE,OAAA,EAAS,SAAA,EAAW,SAAS,yCAAyC;AAAA,IAClF,UAAU,EACP,OAAA,EACA,SAAA,EACA,QAAQ,EAAE,EACV,SAAS,wBAAwB;AAAA,IACpC,WAAW,EAAE,QAAA,EAAU,SAAA,EAAW,QAAQ,KAAK,EAAE,SAAS,wBAAwB;AAAA,IAClF,UAAU,EACP,KAAK,CAAC,YAAY,YAAY,YAAY,YAAY,CAAC,EACvD,WACA,SAAS,oBAAoB;AAAA,IAChC,gBAAgB,EACb,OAAA,EACA,SAAA,EACA,QAAQ,EAAE,EACV,SAAS,sCAAsC;AAAA,IAClD,gBAAgB,EACb,OAAA,EACA,SAAA,EACA,QAAQ,KAAK,EACb,SAAS,2DAA2D;AAAA,IACvE,WAAW,EACR,OAAA,EACA,SAAA,EACA,SAAS,4CAA4C;AAAA,IACxD,oBAAoB,EACjB,KAAK,CAAC,YAAY,QAAQ,OAAO,CAAC,EAClC,WACA,QAAQ,UAAU,EAClB,SAAS,6BAA6B;AAAA,IACzC,4BAA4B,EACzB,UACA,SAAA,EACA,QAAQ,KAAK,EACb,SAAS,gDAAgD;AAAA,EAAA;AAAA,EAE9D,OAAO,WAAW;AAChB,QAAI;AACF,YAAM,SAAS,MAAM,WAAW;AAAA,QAC9B,WAAW,OAAO;AAAA,QAClB,aAAa,OAAO;AAAA,QACpB,oBAAoB,OAAO;AAAA,QAC3B,UAAU,OAAO;AAAA,QACjB,UAAU,OAAO;AAAA,QACjB,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,gBAAgB,OAAO;AAAA,QACvB,gBAAgB,OAAO;AAAA,QACvB,WAAW,OAAO;AAAA,QAClB,oBAAoB,OAAO;AAAA,QAC3B,4BAA4B,OAAO;AAAA,MAAA,CACpC;AAED,UAAI,OAAO,KAAK,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS;AAAA,YACP;AAAA,cACE,MAAM;AAAA,cACN,MAAM;AAAA,YAAA;AAAA,UACR;AAAA,QACF;AAAA,MAEJ;AAGA,YAAM,UAAU,OAAO,KACpB,IAAI,CAAC,KAAK,MAAM;AACf,cAAM,QAAQ;AAAA,UACZ,GAAG,IAAI,CAAC,OAAO,IAAI,KAAK;AAAA,UACxB,eAAe,IAAI,WAAW,KAAK;AAAA,UACnC,gBAAgB,IAAI,YAAY,KAAK,GAAG,IAAI,YAAY,cAAc,EAAE;AAAA,UACxE,WAAW,IAAI,OAAO;AAAA,QAAA;AAExB,YAAI,IAAI,YAAa,OAAM,KAAK,cAAc,IAAI,WAAW,EAAE;AAC/D,YAAI,IAAI,cAAc,IAAI,YAAY;AACpC,gBAAM;AAAA,YACJ,cAAc,IAAI,YAAY,GAAG,GAAG,IAAI,WAAW,gBAAgB,MAAM,IAAI,YAAY,GAAG,GAAG,IAAI,WAAW,gBAAgB,KAAK,IAAI,YAAY,QAAQ;AAAA,UAAA;AAAA,QAE/J;AACA,YAAI,IAAI,SAAU,OAAM,KAAK,YAAY,IAAI,QAAQ,EAAE;AACvD,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,CAAC,EACA,KAAK,MAAM;AAEd,aAAO;AAAA,QACL,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,MAAM,SAAS,OAAO,KAAK,MAAM;AAAA;AAAA,EAAa,OAAO;AAAA,UAAA;AAAA,QACvD;AAAA,MACF;AAAA,IAEJ,SAAS,GAAQ;AACf,aAAO;AAAA,QACL,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,MAAM,wBAAwB,EAAE,OAAO;AAAA,UAAA;AAAA,QACzC;AAAA,QAEF,SAAS;AAAA,MAAA;AAAA,IAEb;AAAA,EACF;AACF;AAEA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAA;AACtB,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,OAAO,MAAM,CAAC,MAAM;AAClB,UAAQ,MAAM,qBAAqB,CAAC;AACpC,UAAQ,KAAK,CAAC;AAChB,CAAC;"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { ScrapeJobsParams } from './types';
|
|
2
|
+
export interface ScrapeJobsResult {
|
|
3
|
+
jobs: FlatJobRecord[];
|
|
4
|
+
}
|
|
5
|
+
export interface FlatJobRecord {
|
|
6
|
+
id?: string;
|
|
7
|
+
site: string;
|
|
8
|
+
job_url: string;
|
|
9
|
+
job_url_direct?: string;
|
|
10
|
+
title: string;
|
|
11
|
+
company?: string;
|
|
12
|
+
location?: string;
|
|
13
|
+
date_posted?: string;
|
|
14
|
+
job_type?: string;
|
|
15
|
+
salary_source?: string;
|
|
16
|
+
interval?: string;
|
|
17
|
+
min_amount?: number;
|
|
18
|
+
max_amount?: number;
|
|
19
|
+
currency?: string;
|
|
20
|
+
is_remote?: boolean;
|
|
21
|
+
job_level?: string;
|
|
22
|
+
job_function?: string;
|
|
23
|
+
listing_type?: string;
|
|
24
|
+
emails?: string;
|
|
25
|
+
description?: string;
|
|
26
|
+
company_industry?: string;
|
|
27
|
+
company_url?: string;
|
|
28
|
+
company_logo?: string;
|
|
29
|
+
company_url_direct?: string;
|
|
30
|
+
company_addresses?: string;
|
|
31
|
+
company_num_employees?: string;
|
|
32
|
+
company_revenue?: string;
|
|
33
|
+
company_description?: string;
|
|
34
|
+
skills?: string;
|
|
35
|
+
experience_range?: string;
|
|
36
|
+
company_rating?: number;
|
|
37
|
+
company_reviews_count?: number;
|
|
38
|
+
vacancy_count?: number;
|
|
39
|
+
work_from_home_type?: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Scrapes jobs from multiple job boards concurrently.
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```ts
|
|
46
|
+
* import { scrapeJobs } from "jobspy-js";
|
|
47
|
+
*
|
|
48
|
+
* const result = await scrapeJobs({
|
|
49
|
+
* site_name: ["indeed", "linkedin"],
|
|
50
|
+
* search_term: "software engineer",
|
|
51
|
+
* location: "San Francisco, CA",
|
|
52
|
+
* results_wanted: 20,
|
|
53
|
+
* });
|
|
54
|
+
*
|
|
55
|
+
* console.log(`Found ${result.jobs.length} jobs`);
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export declare function scrapeJobs(params?: ScrapeJobsParams): Promise<ScrapeJobsResult>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Session } from 'wreq-js';
|
|
2
|
+
import { JobResponse, ScraperInput, Site } from '../types';
|
|
3
|
+
import { ProxyRotator } from '../utils';
|
|
4
|
+
export declare abstract class Scraper {
|
|
5
|
+
readonly site: Site;
|
|
6
|
+
protected proxyRotator: ProxyRotator;
|
|
7
|
+
protected session: Session;
|
|
8
|
+
constructor(site: Site, options?: {
|
|
9
|
+
proxies?: string | string[] | null;
|
|
10
|
+
});
|
|
11
|
+
protected initSession(browser?: string, insecure?: boolean): Promise<void>;
|
|
12
|
+
protected fetchWithProxy(url: string, init?: any): Promise<any>;
|
|
13
|
+
abstract scrape(input: ScraperInput): Promise<JobResponse>;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class Bayt extends Scraper {
|
|
4
|
+
private baseUrl;
|
|
5
|
+
private delay;
|
|
6
|
+
private bandDelay;
|
|
7
|
+
constructor(options?: {
|
|
8
|
+
proxies?: string | string[] | null;
|
|
9
|
+
});
|
|
10
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
11
|
+
private fetchJobs;
|
|
12
|
+
private extractJobInfo;
|
|
13
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class BDJobs extends Scraper {
|
|
4
|
+
private searchUrl;
|
|
5
|
+
private jobDetailsBaseUrl;
|
|
6
|
+
private scraper_input;
|
|
7
|
+
constructor(options?: {
|
|
8
|
+
proxies?: string | string[] | null;
|
|
9
|
+
});
|
|
10
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
11
|
+
private fetchPage;
|
|
12
|
+
private processItem;
|
|
13
|
+
private parseLocation;
|
|
14
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export declare const HEADERS: Record<string, string>;
|
|
2
|
+
export declare const QUERY_TEMPLATE = "\n query JobSearchResultsQuery(\n $excludeJobListingIds: [Long!],\n $keyword: String,\n $locationId: Int,\n $locationType: LocationTypeEnum,\n $numJobsToShow: Int!,\n $pageCursor: String,\n $pageNumber: Int,\n $filterParams: [FilterParams],\n $originalPageUrl: String,\n $seoFriendlyUrlInput: String,\n $parameterUrlInput: String,\n $seoUrl: Boolean\n ) {\n jobListings(\n contextHolder: {\n searchParams: {\n excludeJobListingIds: $excludeJobListingIds,\n keyword: $keyword,\n locationId: $locationId,\n locationType: $locationType,\n numPerPage: $numJobsToShow,\n pageCursor: $pageCursor,\n pageNumber: $pageNumber,\n filterParams: $filterParams,\n originalPageUrl: $originalPageUrl,\n seoFriendlyUrlInput: $seoFriendlyUrlInput,\n parameterUrlInput: $parameterUrlInput,\n seoUrl: $seoUrl,\n searchType: SR\n }\n }\n ) {\n jobListings {\n ...JobView\n __typename\n }\n paginationCursors { cursor pageNumber __typename }\n totalJobsCount\n __typename\n }\n }\n\n fragment JobView on JobListingSearchResult {\n jobview {\n header {\n adOrderId advertiserType adOrderSponsorshipLevel ageInDays\n divisionEmployerName easyApply\n employer { id name shortName __typename }\n employerNameFromSearch goc gocConfidence gocId\n jobCountryId jobLink jobResultTrackingKey jobTitleText\n locationName locationType locId needsCommission\n payCurrency payPeriod\n payPeriodAdjustedPay { p10 p50 p90 __typename }\n rating salarySource savedJobId sponsored __typename\n }\n job { description importConfigId jobTitleId jobTitleText listingId __typename }\n jobListingAdminDetails {\n cpcVal importConfigId jobListingId jobSourceId\n userEligibleForAdminJobDetails __typename\n }\n overview { shortName squareLogoUrl __typename }\n __typename\n }\n __typename\n }";
|
|
3
|
+
export declare const FALLBACK_TOKEN = "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok";
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class Glassdoor extends Scraper {
|
|
4
|
+
private baseUrl;
|
|
5
|
+
private scraper_input;
|
|
6
|
+
private seenUrls;
|
|
7
|
+
private jobsPerPage;
|
|
8
|
+
private maxPages;
|
|
9
|
+
constructor(options?: {
|
|
10
|
+
proxies?: string | string[] | null;
|
|
11
|
+
});
|
|
12
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
13
|
+
private getCsrfToken;
|
|
14
|
+
private fetchJobsPage;
|
|
15
|
+
private processJob;
|
|
16
|
+
private parseCompensation;
|
|
17
|
+
private fetchDescription;
|
|
18
|
+
private getLocation;
|
|
19
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class Google extends Scraper {
|
|
4
|
+
private searchUrl;
|
|
5
|
+
private jobsUrl;
|
|
6
|
+
private scraper_input;
|
|
7
|
+
private seenUrls;
|
|
8
|
+
private jobsPerPage;
|
|
9
|
+
private browser;
|
|
10
|
+
private pw_page;
|
|
11
|
+
constructor(options?: {
|
|
12
|
+
proxies?: string | string[] | null;
|
|
13
|
+
});
|
|
14
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
15
|
+
private getInitialCursorAndJobs;
|
|
16
|
+
private findInitialJobs;
|
|
17
|
+
private getJobsNextPage;
|
|
18
|
+
private parseJobs;
|
|
19
|
+
private findJobInfo;
|
|
20
|
+
private parseJob;
|
|
21
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class GoogleCareers extends Scraper {
|
|
4
|
+
private baseUrl;
|
|
5
|
+
private scraper_input;
|
|
6
|
+
private jobsPerPage;
|
|
7
|
+
constructor(options?: {
|
|
8
|
+
proxies?: string | string[] | null;
|
|
9
|
+
});
|
|
10
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
11
|
+
private buildUrl;
|
|
12
|
+
private parseJob;
|
|
13
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export declare const JOB_SEARCH_QUERY = "\n query GetJobData {\n jobSearch(\n {what}\n {location}\n limit: 100\n {cursor}\n sort: RELEVANCE\n {filters}\n ) {\n pageInfo {\n nextCursor\n }\n results {\n trackingKey\n job {\n source {\n name\n }\n key\n title\n datePublished\n dateOnIndeed\n description {\n html\n }\n location {\n countryName\n countryCode\n admin1Code\n city\n postalCode\n streetAddress\n formatted {\n short\n long\n }\n }\n compensation {\n estimated {\n currencyCode\n baseSalary {\n unitOfWork\n range {\n ... on Range {\n min\n max\n }\n }\n }\n }\n baseSalary {\n unitOfWork\n range {\n ... on Range {\n min\n max\n }\n }\n }\n currencyCode\n }\n attributes {\n key\n label\n }\n employer {\n relativeCompanyPageUrl\n name\n dossier {\n employerDetails {\n addresses\n industry\n employeesLocalizedLabel\n revenueLocalizedLabel\n briefDescription\n ceoName\n ceoPhotoUrl\n }\n images {\n headerImageUrl\n squareLogoUrl\n }\n links {\n corporateWebsite\n }\n }\n }\n recruit {\n viewJobUrl\n detailedSalary\n workSchedule\n }\n }\n }\n }\n }\n ";
|
|
2
|
+
export declare const API_HEADERS: Record<string, string>;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class Indeed extends Scraper {
|
|
4
|
+
private apiUrl;
|
|
5
|
+
private baseUrl;
|
|
6
|
+
private apiCountryCode;
|
|
7
|
+
private seenUrls;
|
|
8
|
+
private scraper_input;
|
|
9
|
+
constructor(options?: {
|
|
10
|
+
proxies?: string | string[] | null;
|
|
11
|
+
});
|
|
12
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
13
|
+
private scrapePage;
|
|
14
|
+
private buildFilters;
|
|
15
|
+
private processJob;
|
|
16
|
+
private getJobType;
|
|
17
|
+
private getCompensation;
|
|
18
|
+
private isRemote;
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const HEADERS: Record<string, string>;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Scraper } from '../base';
|
|
2
|
+
import { JobResponse, ScraperInput } from '../../types';
|
|
3
|
+
export declare class LinkedIn extends Scraper {
|
|
4
|
+
private baseUrl;
|
|
5
|
+
private delay;
|
|
6
|
+
private bandDelay;
|
|
7
|
+
private scraper_input;
|
|
8
|
+
private urlRegex;
|
|
9
|
+
constructor(options?: {
|
|
10
|
+
proxies?: string | string[] | null;
|
|
11
|
+
});
|
|
12
|
+
scrape(input: ScraperInput): Promise<JobResponse>;
|
|
13
|
+
private processJob;
|
|
14
|
+
private getJobDetails;
|
|
15
|
+
private parseCriteria;
|
|
16
|
+
private parseJobType;
|
|
17
|
+
private parseJobUrlDirect;
|
|
18
|
+
private getLocation;
|
|
19
|
+
private isJobRemote;
|
|
20
|
+
}
|