jobcrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prettierrc.json +10 -0
- package/CHANGELOG.md +40 -0
- package/README.md +232 -0
- package/dist/core/aggregators/yc.d.ts +7 -0
- package/dist/core/aggregators/yc.js +320 -0
- package/dist/core/browser.d.ts +30 -0
- package/dist/core/browser.js +196 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +41 -0
- package/dist/core/detect-provider.d.ts +7 -0
- package/dist/core/detect-provider.js +125 -0
- package/dist/core/discover-careers.d.ts +18 -0
- package/dist/core/discover-careers.js +92 -0
- package/dist/core/extract-jobs.d.ts +14 -0
- package/dist/core/extract-jobs.js +36 -0
- package/dist/core/fetch-page.d.ts +11 -0
- package/dist/core/fetch-page.js +39 -0
- package/dist/core/format-output.d.ts +2 -0
- package/dist/core/format-output.js +59 -0
- package/dist/core/match-jobs.d.ts +6 -0
- package/dist/core/match-jobs.js +43 -0
- package/dist/core/providers/ashby.d.ts +6 -0
- package/dist/core/providers/ashby.js +58 -0
- package/dist/core/providers/generic.d.ts +6 -0
- package/dist/core/providers/generic.js +294 -0
- package/dist/core/providers/greenhouse.d.ts +6 -0
- package/dist/core/providers/greenhouse.js +47 -0
- package/dist/core/providers/lever.d.ts +7 -0
- package/dist/core/providers/lever.js +60 -0
- package/dist/core/providers/yc.d.ts +7 -0
- package/dist/core/providers/yc.js +320 -0
- package/dist/core/resolve-iframe.d.ts +6 -0
- package/dist/core/resolve-iframe.js +51 -0
- package/dist/core/save-raw.d.ts +4 -0
- package/dist/core/save-raw.js +13 -0
- package/dist/data/companies.d.ts +9 -0
- package/dist/data/companies.js +2849 -0
- package/dist/entrypoints/cli/app.d.ts +3 -0
- package/dist/entrypoints/cli/app.js +91 -0
- package/dist/entrypoints/cli/components/crawl-view.d.ts +1 -0
- package/dist/entrypoints/cli/components/crawl-view.js +94 -0
- package/dist/entrypoints/cli/components/discover-view.d.ts +1 -0
- package/dist/entrypoints/cli/components/discover-view.js +67 -0
- package/dist/entrypoints/cli/crawl-aggregators.d.ts +26 -0
- package/dist/entrypoints/cli/crawl-aggregators.js +76 -0
- package/dist/entrypoints/cli/crawl-url.d.ts +26 -0
- package/dist/entrypoints/cli/crawl-url.js +54 -0
- package/dist/entrypoints/cli/crawl.d.ts +32 -0
- package/dist/entrypoints/cli/crawl.js +108 -0
- package/dist/entrypoints/cli/discover.d.ts +10 -0
- package/dist/entrypoints/cli/discover.js +69 -0
- package/dist/entrypoints/cli/index.d.ts +2 -0
- package/dist/entrypoints/cli/index.js +197 -0
- package/dist/entrypoints/cli/init.d.ts +9 -0
- package/dist/entrypoints/cli/init.js +94 -0
- package/dist/entrypoints/cli/plain.d.ts +6 -0
- package/dist/entrypoints/cli/plain.js +77 -0
- package/dist/events.d.ts +114 -0
- package/dist/events.js +17 -0
- package/dist/orchestrators/crawl-all.d.ts +2 -0
- package/dist/orchestrators/crawl-all.js +66 -0
- package/dist/orchestrators/discover-all.d.ts +10 -0
- package/dist/orchestrators/discover-all.js +39 -0
- package/dist/threads/pool.d.ts +5 -0
- package/dist/threads/pool.js +23 -0
- package/dist/threads/process-url.d.ts +9 -0
- package/dist/threads/process-url.js +229 -0
- package/dist/types/index.d.ts +83 -0
- package/dist/types/index.js +6 -0
- package/dist/utils/config.d.ts +17 -0
- package/dist/utils/config.js +57 -0
- package/dist/utils/google-search.d.ts +19 -0
- package/dist/utils/google-search.js +139 -0
- package/dist/utils/llm.d.ts +8 -0
- package/dist/utils/llm.js +25 -0
- package/package.json +42 -0
- package/src/core/aggregators/yc.ts +415 -0
- package/src/core/browser.ts +239 -0
- package/src/core/detect-provider.ts +162 -0
- package/src/core/discover-careers.ts +117 -0
- package/src/core/extract-jobs.ts +50 -0
- package/src/core/fetch-page.ts +41 -0
- package/src/core/format-output.ts +80 -0
- package/src/core/match-jobs.ts +56 -0
- package/src/core/providers/ashby.ts +84 -0
- package/src/core/providers/generic.ts +332 -0
- package/src/core/providers/greenhouse.ts +74 -0
- package/src/core/providers/lever.ts +90 -0
- package/src/core/resolve-iframe.ts +59 -0
- package/src/core/save-raw.ts +18 -0
- package/src/data/companies.ts +2859 -0
- package/src/entrypoints/cli/app.tsx +173 -0
- package/src/entrypoints/cli/components/crawl-view.tsx +163 -0
- package/src/entrypoints/cli/components/discover-view.tsx +138 -0
- package/src/entrypoints/cli/crawl-aggregators.ts +112 -0
- package/src/entrypoints/cli/crawl-url.ts +87 -0
- package/src/entrypoints/cli/crawl.ts +163 -0
- package/src/entrypoints/cli/discover.ts +96 -0
- package/src/entrypoints/cli/index.ts +252 -0
- package/src/entrypoints/cli/init.ts +117 -0
- package/src/entrypoints/cli/plain.ts +104 -0
- package/src/events.ts +79 -0
- package/src/orchestrators/crawl-all.ts +96 -0
- package/src/orchestrators/discover-all.ts +61 -0
- package/src/threads/pool.ts +29 -0
- package/src/threads/process-url.ts +312 -0
- package/src/types/index.ts +110 -0
- package/src/utils/config.ts +79 -0
- package/src/utils/google-search.ts +155 -0
- package/src/utils/llm.ts +33 -0
- package/test/integration/process-url.test.ts +301 -0
- package/test/integration/providers/ashby.test.ts +163 -0
- package/test/integration/providers/greenhouse.test.ts +191 -0
- package/test/integration/providers/lever.test.ts +188 -0
- package/test/unit/config.test.ts +64 -0
- package/test/unit/detect-provider.test.ts +165 -0
- package/test/unit/events.test.ts +104 -0
- package/test/unit/format-output.test.ts +165 -0
- package/test/unit/match-jobs.test.ts +257 -0
- package/test/unit/pool.test.ts +74 -0
- package/test/unit/providers/generic.test.ts +139 -0
- package/test/unit/resolve-iframe.test.ts +100 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +7 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { saveRawResponse } from "../save-raw.js";
|
|
6
|
+
const HITS_PER_PAGE = 100;
|
|
7
|
+
const INDEX_BY_DATE = "WaaSPublicCompanyJob_created_at_desc_production";
|
|
8
|
+
const INDEX_BY_RELEVANCE = "WaaSPublicCompanyJob_production";
|
|
9
|
+
// --- Value mapping tables ---
|
|
10
|
+
const ROLE_MAP = {
|
|
11
|
+
engineering: "eng",
|
|
12
|
+
design: "design",
|
|
13
|
+
product: "product",
|
|
14
|
+
science: "science",
|
|
15
|
+
sales: "sales",
|
|
16
|
+
marketing: "marketing",
|
|
17
|
+
support: "support",
|
|
18
|
+
operations: "operations",
|
|
19
|
+
"recruiting-hr": "recruiting",
|
|
20
|
+
finance: "finance",
|
|
21
|
+
legal: "legal",
|
|
22
|
+
};
|
|
23
|
+
const ENG_TYPE_MAP = {
|
|
24
|
+
android: "android",
|
|
25
|
+
backend: "be",
|
|
26
|
+
"data-science": "data_sci",
|
|
27
|
+
devops: "devops",
|
|
28
|
+
"embedded-systems": "embedded",
|
|
29
|
+
"engineering-manager": "eng_mgmt",
|
|
30
|
+
frontend: "fe",
|
|
31
|
+
"full-stack": "fs",
|
|
32
|
+
ios: "ios",
|
|
33
|
+
"machine-learning": "ml",
|
|
34
|
+
"qa-engineer": "qa",
|
|
35
|
+
robotics: "robotics",
|
|
36
|
+
hardware: "hw",
|
|
37
|
+
electrical: "electrical",
|
|
38
|
+
mechanical: "mechanical",
|
|
39
|
+
bioengineering: "bio",
|
|
40
|
+
"chemical-engineering": "chemical",
|
|
41
|
+
};
|
|
42
|
+
const DESIGN_TYPE_MAP = {
|
|
43
|
+
"web-design": "web",
|
|
44
|
+
"mobile-design": "mobile",
|
|
45
|
+
"product-design": "product",
|
|
46
|
+
"ui-ux": "ui_ux",
|
|
47
|
+
"user-research": "user_research",
|
|
48
|
+
"brand-graphic-design": "brand_graphic",
|
|
49
|
+
illustration: "illustration",
|
|
50
|
+
animation: "animation",
|
|
51
|
+
hardware: "hardware",
|
|
52
|
+
"3d-ar-vr": "ar_vr",
|
|
53
|
+
"design-manager": "design_mgmt",
|
|
54
|
+
};
|
|
55
|
+
const SCIENCE_TYPE_MAP = {
|
|
56
|
+
biology: "bio",
|
|
57
|
+
biotechnology: "biotech",
|
|
58
|
+
chemistry: "chem",
|
|
59
|
+
genetics: "genetics",
|
|
60
|
+
healthcare: "health",
|
|
61
|
+
immunology: "immuno",
|
|
62
|
+
laboratory: "lab",
|
|
63
|
+
oncology: "onc",
|
|
64
|
+
pharmacology: "pharma",
|
|
65
|
+
"process-engineer": "process",
|
|
66
|
+
research: "research",
|
|
67
|
+
};
|
|
68
|
+
const JOB_TYPE_MAP = {
|
|
69
|
+
fulltime: "fulltime",
|
|
70
|
+
internship: "intern",
|
|
71
|
+
contract: "contract",
|
|
72
|
+
};
|
|
73
|
+
const COMPANY_STAGE_MAP = {
|
|
74
|
+
seed: "seed",
|
|
75
|
+
"series-a": "series_a",
|
|
76
|
+
growth: "growth",
|
|
77
|
+
scale: "scale",
|
|
78
|
+
};
|
|
79
|
+
const WORK_MODE_MAP = {
|
|
80
|
+
remote: "only",
|
|
81
|
+
onsite: "no",
|
|
82
|
+
hybrid: "yes",
|
|
83
|
+
};
|
|
84
|
+
const ROLE_TYPE_FIELD_MAP = {
|
|
85
|
+
eng: "eng_type",
|
|
86
|
+
design: "design_type",
|
|
87
|
+
science: "science_type",
|
|
88
|
+
};
|
|
89
|
+
const ROLE_TYPE_VALUE_MAP = {
|
|
90
|
+
eng: ENG_TYPE_MAP,
|
|
91
|
+
design: DESIGN_TYPE_MAP,
|
|
92
|
+
science: SCIENCE_TYPE_MAP,
|
|
93
|
+
};
|
|
94
|
+
const COMPANY_SIZE_MAP = {
|
|
95
|
+
"1-10": "company_team_size <= 10",
|
|
96
|
+
"11-50": "company_team_size: 11 TO 50",
|
|
97
|
+
"51-300": "company_team_size: 51 TO 300",
|
|
98
|
+
"301+": "company_team_size >= 301",
|
|
99
|
+
};
|
|
100
|
+
// --- Filter helpers ---
|
|
101
|
+
function mapValues(values, map) {
|
|
102
|
+
return values.map((v) => map[v] ?? v);
|
|
103
|
+
}
|
|
104
|
+
function orFilter(field, values, quoted = false) {
|
|
105
|
+
const parts = values.map((v) => quoted ? `${field}:"${v}"` : `${field}:${v}`);
|
|
106
|
+
return `(${parts.join(" OR ")})`;
|
|
107
|
+
}
|
|
108
|
+
function buildFilters(criteria, sourceUrl) {
|
|
109
|
+
const params = new URL(sourceUrl).searchParams;
|
|
110
|
+
const filters = [];
|
|
111
|
+
// role
|
|
112
|
+
const roles = criteria.role
|
|
113
|
+
? mapValues(criteria.role, ROLE_MAP)
|
|
114
|
+
: fallbackArray(params, "role");
|
|
115
|
+
if (roles.length > 0)
|
|
116
|
+
filters.push(orFilter("role", roles));
|
|
117
|
+
// roleType → field depends on role
|
|
118
|
+
if (criteria.roleType && criteria.role) {
|
|
119
|
+
const algoliaRoles = mapValues(criteria.role, ROLE_MAP);
|
|
120
|
+
for (const algoliaRole of algoliaRoles) {
|
|
121
|
+
const field = ROLE_TYPE_FIELD_MAP[algoliaRole];
|
|
122
|
+
const valueMap = ROLE_TYPE_VALUE_MAP[algoliaRole];
|
|
123
|
+
if (field && valueMap) {
|
|
124
|
+
const mapped = mapValues(criteria.roleType, valueMap);
|
|
125
|
+
filters.push(orFilter(field, mapped));
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
const roleTypeParam = params.get("role_type");
|
|
131
|
+
if (roleTypeParam && roleTypeParam !== "any")
|
|
132
|
+
filters.push(`(eng_type:${roleTypeParam})`);
|
|
133
|
+
}
|
|
134
|
+
// workMode → remote
|
|
135
|
+
const remoteModes = criteria.workMode
|
|
136
|
+
? mapValues(criteria.workMode, WORK_MODE_MAP)
|
|
137
|
+
: fallbackArray(params, "remote");
|
|
138
|
+
if (remoteModes.length > 0)
|
|
139
|
+
filters.push(orFilter("remote", remoteModes));
|
|
140
|
+
// location
|
|
141
|
+
const location = criteria.location ?? params.get("locations");
|
|
142
|
+
if (location && location !== "any")
|
|
143
|
+
filters.push(`(locations_for_search:"${location}")`);
|
|
144
|
+
// jobType
|
|
145
|
+
const jobTypes = criteria.jobType
|
|
146
|
+
? mapValues(criteria.jobType, JOB_TYPE_MAP)
|
|
147
|
+
: fallbackArray(params, "jobType");
|
|
148
|
+
if (jobTypes.length > 0)
|
|
149
|
+
filters.push(orFilter("job_type", jobTypes, true));
|
|
150
|
+
// minExperience
|
|
151
|
+
const minExps = criteria.minExperience
|
|
152
|
+
? criteria.minExperience.map(String)
|
|
153
|
+
: fallbackArray(params, "minExperience");
|
|
154
|
+
if (minExps.length > 0)
|
|
155
|
+
filters.push(orFilter("min_experience", minExps));
|
|
156
|
+
// companyStage
|
|
157
|
+
const stages = criteria.companyStage
|
|
158
|
+
? mapValues(criteria.companyStage, COMPANY_STAGE_MAP)
|
|
159
|
+
: fallbackArray(params, "companyStage");
|
|
160
|
+
if (stages.length > 0)
|
|
161
|
+
filters.push(orFilter("company_waas_stage", stages));
|
|
162
|
+
// industry
|
|
163
|
+
const industries = criteria.industry ?? fallbackQuotedArray(params, "industry");
|
|
164
|
+
if (industries.length > 0)
|
|
165
|
+
filters.push(orFilter("company_parent_sector", industries, true));
|
|
166
|
+
// companySize → range syntax
|
|
167
|
+
if (criteria.companySize && criteria.companySize.length > 0) {
|
|
168
|
+
const rangeClauses = criteria.companySize
|
|
169
|
+
.map((s) => COMPANY_SIZE_MAP[s])
|
|
170
|
+
.filter(Boolean);
|
|
171
|
+
if (rangeClauses.length > 0)
|
|
172
|
+
filters.push(`(${rangeClauses.join(" OR ")})`);
|
|
173
|
+
}
|
|
174
|
+
// hasSalary
|
|
175
|
+
if (criteria.hasSalary)
|
|
176
|
+
filters.push("(has_salary:true)");
|
|
177
|
+
// hasEquity
|
|
178
|
+
if (criteria.hasEquity)
|
|
179
|
+
filters.push("(has_equity:true)");
|
|
180
|
+
// hasInterviewProcess
|
|
181
|
+
if (criteria.hasInterviewProcess)
|
|
182
|
+
filters.push("(has_interview_process:true)");
|
|
183
|
+
// visaSponsorship
|
|
184
|
+
if (criteria.visaSponsorship)
|
|
185
|
+
filters.push("(us_visa_required:none OR us_visa_required:possible)");
|
|
186
|
+
return filters.join(" AND ");
|
|
187
|
+
}
|
|
188
|
+
function fallbackArray(params, key) {
|
|
189
|
+
const val = params.get(key);
|
|
190
|
+
if (!val || val === "any")
|
|
191
|
+
return [];
|
|
192
|
+
return [val];
|
|
193
|
+
}
|
|
194
|
+
function fallbackQuotedArray(params, key) {
|
|
195
|
+
const val = params.get(key);
|
|
196
|
+
if (!val || val === "any")
|
|
197
|
+
return [];
|
|
198
|
+
return [val];
|
|
199
|
+
}
|
|
200
|
+
// --- Algolia API ---
|
|
201
|
+
async function fetchPage(appId, apiKey, query, filters, indexName, page) {
|
|
202
|
+
const algoliaParams = new URLSearchParams({
|
|
203
|
+
query,
|
|
204
|
+
page: String(page),
|
|
205
|
+
filters,
|
|
206
|
+
attributesToRetrieve: JSON.stringify(["*"]),
|
|
207
|
+
attributesToHighlight: JSON.stringify([]),
|
|
208
|
+
attributesToSnippet: JSON.stringify([]),
|
|
209
|
+
hitsPerPage: String(HITS_PER_PAGE),
|
|
210
|
+
clickAnalytics: "true",
|
|
211
|
+
distinct: "true",
|
|
212
|
+
});
|
|
213
|
+
const url = `https://${appId}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${encodeURIComponent("Algolia for JavaScript (3.35.1); Browser")}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
|
|
214
|
+
const response = await fetch(url, {
|
|
215
|
+
method: "POST",
|
|
216
|
+
headers: {
|
|
217
|
+
accept: "application/json",
|
|
218
|
+
"content-type": "application/x-www-form-urlencoded",
|
|
219
|
+
Origin: "https://www.workatastartup.com",
|
|
220
|
+
Referer: "https://www.workatastartup.com/",
|
|
221
|
+
},
|
|
222
|
+
body: JSON.stringify({
|
|
223
|
+
requests: [
|
|
224
|
+
{
|
|
225
|
+
indexName,
|
|
226
|
+
params: algoliaParams.toString(),
|
|
227
|
+
},
|
|
228
|
+
],
|
|
229
|
+
}),
|
|
230
|
+
});
|
|
231
|
+
if (!response.ok) {
|
|
232
|
+
throw new Error(`Algolia API returned ${response.status}`);
|
|
233
|
+
}
|
|
234
|
+
return (await response.json());
|
|
235
|
+
}
|
|
236
|
+
// --- Job mapping ---
|
|
237
|
+
function mapHit(hit, sourceUrl) {
|
|
238
|
+
const id = createHash("sha256")
|
|
239
|
+
.update(`${hit.objectID}:${hit.title}:yc`)
|
|
240
|
+
.digest("hex")
|
|
241
|
+
.slice(0, 12);
|
|
242
|
+
const jobUrl = hit.search_path ??
|
|
243
|
+
`https://www.workatastartup.com/jobs/${hit.objectID}`;
|
|
244
|
+
return {
|
|
245
|
+
id,
|
|
246
|
+
title: hit.title,
|
|
247
|
+
company: hit.company_name,
|
|
248
|
+
location: hit.locations_for_search?.[0] ?? null,
|
|
249
|
+
workMode: inferWorkMode(hit.remote),
|
|
250
|
+
department: hit.role ?? null,
|
|
251
|
+
url: jobUrl,
|
|
252
|
+
sourceUrl,
|
|
253
|
+
provider: "yc",
|
|
254
|
+
description: hit.description?.slice(0, 200) ?? null,
|
|
255
|
+
postedAt: hit.created_at ?? null,
|
|
256
|
+
extractedAt: new Date().toISOString(),
|
|
257
|
+
raw: hit,
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
function inferWorkMode(remote) {
|
|
261
|
+
if (!remote)
|
|
262
|
+
return null;
|
|
263
|
+
const r = remote.toLowerCase();
|
|
264
|
+
if (r === "only" || r === "yes")
|
|
265
|
+
return "remote";
|
|
266
|
+
if (r === "no")
|
|
267
|
+
return null;
|
|
268
|
+
return null;
|
|
269
|
+
}
|
|
270
|
+
// --- Entry point ---
|
|
271
|
+
/**
|
|
272
|
+
* Fetch jobs from YC's Work at a Startup via Algolia.
|
|
273
|
+
* Maps SearchCriteria to Algolia filters. Falls back to URL query params
|
|
274
|
+
* for backward compatibility with direct WaaS URLs.
|
|
275
|
+
*/
|
|
276
|
+
export async function extractYcJobs(sourceUrl, criteria, saveRaw = false) {
|
|
277
|
+
let appId = process.env.YC_ALGOLIA_APP_ID;
|
|
278
|
+
let apiKey = process.env.YC_ALGOLIA_API_KEY;
|
|
279
|
+
if (!appId || !apiKey) {
|
|
280
|
+
const credPath = join(homedir(), ".jobcrawl", "credentials.json");
|
|
281
|
+
if (existsSync(credPath)) {
|
|
282
|
+
const creds = JSON.parse(readFileSync(credPath, "utf-8"));
|
|
283
|
+
appId = appId || creds.yc?.algoliaAppId;
|
|
284
|
+
apiKey = apiKey || creds.yc?.algoliaApiKey;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
if (!appId || !apiKey) {
|
|
288
|
+
throw new Error("YC provider requires credentials. Run `jobcrawl init` and add your Algolia keys to ~/.jobcrawl/credentials.json");
|
|
289
|
+
}
|
|
290
|
+
const queries = criteria.keywords.length > 0
|
|
291
|
+
? criteria.keywords
|
|
292
|
+
: [""];
|
|
293
|
+
const filters = buildFilters(criteria, sourceUrl);
|
|
294
|
+
// Fetch each keyword as a separate Algolia query and deduplicate
|
|
295
|
+
const jobMap = new Map();
|
|
296
|
+
const allHits = [];
|
|
297
|
+
for (const query of queries) {
|
|
298
|
+
const indexName = query ? INDEX_BY_RELEVANCE : INDEX_BY_DATE;
|
|
299
|
+
const first = await fetchPage(appId, apiKey, query, filters, indexName, 0);
|
|
300
|
+
const result = first.results[0];
|
|
301
|
+
for (const hit of result.hits) {
|
|
302
|
+
if (!jobMap.has(hit.objectID)) {
|
|
303
|
+
jobMap.set(hit.objectID, mapHit(hit, sourceUrl));
|
|
304
|
+
allHits.push(hit);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
for (let page = 1; page < result.nbPages; page++) {
|
|
308
|
+
const resp = await fetchPage(appId, apiKey, query, filters, indexName, page);
|
|
309
|
+
for (const hit of resp.results[0].hits) {
|
|
310
|
+
if (!jobMap.has(hit.objectID)) {
|
|
311
|
+
jobMap.set(hit.objectID, mapHit(hit, sourceUrl));
|
|
312
|
+
allHits.push(hit);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
if (saveRaw)
|
|
318
|
+
await saveRawResponse("yc", "workatastartup", allHits);
|
|
319
|
+
return [...jobMap.values()];
|
|
320
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { ProviderDetection } from "../types/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* When a career page embeds an ATS via iframe, extract the board token
|
|
4
|
+
* from the iframe src URL so we can call the ATS JSON API directly.
|
|
5
|
+
*/
|
|
6
|
+
export declare function resolveIframeToken(iframeSrc: string): ProviderDetection | null;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* When a career page embeds an ATS via iframe, extract the board token
|
|
3
|
+
* from the iframe src URL so we can call the ATS JSON API directly.
|
|
4
|
+
*/
|
|
5
|
+
export function resolveIframeToken(iframeSrc) {
|
|
6
|
+
try {
|
|
7
|
+
const url = new URL(iframeSrc.startsWith("//") ? `https:${iframeSrc}` : iframeSrc);
|
|
8
|
+
// Greenhouse embed: boards.greenhouse.io/embed/job_board/js?for={token}
|
|
9
|
+
// Must check before general Greenhouse pattern to avoid extracting "embed" as token
|
|
10
|
+
if (url.pathname.includes("/embed/job_board")) {
|
|
11
|
+
const token = url.searchParams.get("for");
|
|
12
|
+
if (token)
|
|
13
|
+
return { provider: "greenhouse", boardToken: token };
|
|
14
|
+
}
|
|
15
|
+
// Greenhouse iframe: boards.greenhouse.io/{token} or job-boards.greenhouse.io/{token}
|
|
16
|
+
if (url.hostname === "boards.greenhouse.io" ||
|
|
17
|
+
url.hostname === "job-boards.greenhouse.io") {
|
|
18
|
+
const token = url.pathname.split("/").filter(Boolean)[0];
|
|
19
|
+
if (token)
|
|
20
|
+
return { provider: "greenhouse", boardToken: token };
|
|
21
|
+
}
|
|
22
|
+
// Lever iframe: jobs.lever.co/{company}
|
|
23
|
+
if (url.hostname === "jobs.lever.co") {
|
|
24
|
+
const company = url.pathname.split("/").filter(Boolean)[0];
|
|
25
|
+
if (company)
|
|
26
|
+
return { provider: "lever", boardToken: company };
|
|
27
|
+
}
|
|
28
|
+
// Ashby iframe: jobs.ashbyhq.com/{company}
|
|
29
|
+
if (url.hostname === "jobs.ashbyhq.com") {
|
|
30
|
+
const company = url.pathname.split("/").filter(Boolean)[0];
|
|
31
|
+
if (company)
|
|
32
|
+
return { provider: "ashby", boardToken: company };
|
|
33
|
+
}
|
|
34
|
+
// Workday iframe: {tenant}.wd{n}.myworkdayjobs.com
|
|
35
|
+
if (url.hostname.includes(".myworkdayjobs.com")) {
|
|
36
|
+
const tenant = url.hostname.split(".")[0];
|
|
37
|
+
if (tenant)
|
|
38
|
+
return { provider: "workday", boardToken: tenant };
|
|
39
|
+
}
|
|
40
|
+
// BambooHR iframe: {company}.bamboohr.com
|
|
41
|
+
if (url.hostname.includes(".bamboohr.com")) {
|
|
42
|
+
const company = url.hostname.split(".")[0];
|
|
43
|
+
if (company)
|
|
44
|
+
return { provider: "bamboohr", boardToken: company };
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
// Invalid URL
|
|
49
|
+
}
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
/**
|
|
5
|
+
* Save a raw API response to ~/.jobcrawl/raw/{provider}/{boardToken}_{timestamp}.json
|
|
6
|
+
*/
|
|
7
|
+
export async function saveRawResponse(provider, boardToken, data) {
|
|
8
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
9
|
+
const dir = join(homedir(), ".jobcrawl", "raw", provider);
|
|
10
|
+
await mkdir(dir, { recursive: true });
|
|
11
|
+
const filename = `${boardToken}_${timestamp}.json`;
|
|
12
|
+
await writeFile(join(dir, filename), JSON.stringify(data, null, 2));
|
|
13
|
+
}
|