@servation/job-search-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,829 @@
1
+ import { globalState, addRefinerLog } from "./config.js";
2
+ import { readDb, writeDb } from "./db.js";
3
+ import { communitySlugToName, matchesKeywords, isBlocklistedRole, matchesLocation, stripHtmlCommunity, } from "./utils.js";
4
+ const LINKEDIN_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
5
+ /** Refresh the cached company directories from the remote registry (throttled to 12h). */
6
+ export async function updateCompanyDirectoriesFromRegistry() {
7
+ const now = Date.now();
8
+ const TWELVE_HOURS_MS = 12 * 60 * 60 * 1000;
9
+ if (now - globalState.lastRegistryFetchTime < TWELVE_HOURS_MS && globalState.lastRegistryFetchTime !== 0) {
10
+ return; // memory cache is fresh
11
+ }
12
+ console.error("[Registry] Checking for company directory updates from remote registry...");
13
+ try {
14
+ const response = await fetch("https://raw.githubusercontent.com/Servation/job-search-agent-slugs/main/slugs.json", {
15
+ headers: { "User-Agent": "Mozilla/5.0" },
16
+ });
17
+ if (response.ok) {
18
+ const data = (await response.json());
19
+ if (data) {
20
+ if (Array.isArray(data.greenhouse))
21
+ globalState.cachedGreenhouseSlugs = data.greenhouse;
22
+ if (Array.isArray(data.lever))
23
+ globalState.cachedLeverSlugs = data.lever;
24
+ if (Array.isArray(data.ashby))
25
+ globalState.cachedAshbySlugs = data.ashby;
26
+ if (Array.isArray(data.workday))
27
+ globalState.cachedWorkdayDirectory = data.workday;
28
+ if (Array.isArray(data.smartrecruiters))
29
+ globalState.cachedSmartRecruitersDirectory = data.smartrecruiters;
30
+ if (data.templates) {
31
+ if (data.templates.workdaySearch)
32
+ globalState.templates.workdaySearch = data.templates.workdaySearch;
33
+ if (data.templates.workdayDetails)
34
+ globalState.templates.workdayDetails = data.templates.workdayDetails;
35
+ if (data.templates.smartrecruitersPostings)
36
+ globalState.templates.smartrecruitersPostings = data.templates.smartrecruitersPostings;
37
+ if (data.templates.smartrecruitersDetails)
38
+ globalState.templates.smartrecruitersDetails = data.templates.smartrecruitersDetails;
39
+ }
40
+ globalState.lastRegistryFetchTime = now;
41
+ console.error("[Registry] Updated company directories from remote registry.");
42
+ return;
43
+ }
44
+ }
45
+ }
46
+ catch (err) {
47
+ console.error("[Registry] Remote registry update failed (using static lists):", err?.message);
48
+ }
49
+ // Set the timestamp even on failure to avoid hammering the request within a run.
50
+ globalState.lastRegistryFetchTime = now;
51
+ }
52
+ /** Run an async map in batches with a brief pause between batches. */
53
+ async function batchPromises(items, fn, batchSize) {
54
+ const results = [];
55
+ for (let i = 0; i < items.length; i += batchSize) {
56
+ const batch = items.slice(i, i + batchSize);
57
+ const batchRes = await Promise.allSettled(batch.map((item) => fn(item)));
58
+ for (const res of batchRes) {
59
+ if (res.status === "fulfilled")
60
+ results.push(...res.value);
61
+ }
62
+ if (i + batchSize < items.length) {
63
+ await new Promise((r) => setTimeout(r, 200));
64
+ }
65
+ }
66
+ return results;
67
+ }
68
+ /** Track per-company Workday fetch failures in the dynamic directory; prune after 5. */
69
+ function applyDynamicCompanyFailures(failures) {
70
+ if (failures.length === 0)
71
+ return;
72
+ const db = readDb();
73
+ if (!db.workdayDirectory)
74
+ return;
75
+ const tenantIndexMap = new Map();
76
+ for (let i = 0; i < db.workdayDirectory.length; i++) {
77
+ tenantIndexMap.set(db.workdayDirectory[i].tenant.toLowerCase(), i);
78
+ }
79
+ let dbWasModified = false;
80
+ for (const { company, failed } of failures) {
81
+ const idx = tenantIndexMap.get(company.tenant.toLowerCase());
82
+ if (idx === undefined || idx === -1)
83
+ continue;
84
+ const dynamicCompany = db.workdayDirectory[idx];
85
+ if (!dynamicCompany)
86
+ continue;
87
+ if (failed) {
88
+ dynamicCompany.consecutiveFailures = (dynamicCompany.consecutiveFailures || 0) + 1;
89
+ if (dynamicCompany.consecutiveFailures >= 5) {
90
+ addRefinerLog(`Removed broken company board: ${dynamicCompany.name}.`);
91
+ db.workdayDirectory[idx] = null;
92
+ }
93
+ dbWasModified = true;
94
+ }
95
+ else if (dynamicCompany.consecutiveFailures && dynamicCompany.consecutiveFailures > 0) {
96
+ dynamicCompany.consecutiveFailures = 0;
97
+ dbWasModified = true;
98
+ }
99
+ }
100
+ if (dbWasModified) {
101
+ db.workdayDirectory = db.workdayDirectory.filter(Boolean);
102
+ writeDb(db);
103
+ }
104
+ }
105
+ export async function fetchGreenhouseJobs(slugs, keywords, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
106
+ return batchPromises(slugs, async (slug) => {
107
+ const ctrl = new AbortController();
108
+ const tid = setTimeout(() => ctrl.abort(), 8000);
109
+ try {
110
+ const res = await fetch(`https://boards-api.greenhouse.io/v1/boards/${slug}/jobs?content=true`, {
111
+ signal: ctrl.signal,
112
+ headers: { "User-Agent": "Mozilla/5.0" },
113
+ });
114
+ clearTimeout(tid);
115
+ if (!res.ok)
116
+ return [];
117
+ const data = (await res.json());
118
+ const name = communitySlugToName(slug);
119
+ return (data.jobs || [])
120
+ .filter((j) => {
121
+ const title = j.title || "";
122
+ const locName = j.location?.name || "";
123
+ return (matchesKeywords(title, keywords) &&
124
+ !isBlocklistedRole(title, targetRoles, yearsOfExperience) &&
125
+ matchesLocation(locName, searchLocation, prefersRemote));
126
+ })
127
+ .map((j) => ({
128
+ title: j.title || "Unknown Role",
129
+ company: name,
130
+ location: j.location?.name || "Not specified",
131
+ description: stripHtmlCommunity(j.content || "").slice(0, 15000),
132
+ url: j.absolute_url || "",
133
+ postedAt: j.updated_at || new Date().toISOString(),
134
+ type: "Full-Time",
135
+ isRemote: (j.location?.name || "").toLowerCase().includes("remote"),
136
+ source: "greenhouse",
137
+ }));
138
+ }
139
+ catch {
140
+ clearTimeout(tid);
141
+ return [];
142
+ }
143
+ }, 8);
144
+ }
145
+ export async function fetchLeverJobs(slugs, keywords, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
146
+ return batchPromises(slugs, async (slug) => {
147
+ const ctrl = new AbortController();
148
+ const tid = setTimeout(() => ctrl.abort(), 8000);
149
+ try {
150
+ const res = await fetch(`https://api.lever.co/v0/postings/${slug}?mode=json`, {
151
+ signal: ctrl.signal,
152
+ headers: { "User-Agent": "Mozilla/5.0" },
153
+ });
154
+ clearTimeout(tid);
155
+ if (!res.ok)
156
+ return [];
157
+ const jobs = (await res.json());
158
+ if (!Array.isArray(jobs))
159
+ return [];
160
+ const name = communitySlugToName(slug);
161
+ return jobs
162
+ .filter((j) => {
163
+ const title = j.text || "";
164
+ const loc = j.categories?.location || j.location || "";
165
+ return (matchesKeywords(title, keywords) &&
166
+ !isBlocklistedRole(title, targetRoles, yearsOfExperience) &&
167
+ matchesLocation(loc, searchLocation, prefersRemote));
168
+ })
169
+ .map((j) => {
170
+ const sr = j.salaryRange;
171
+ const salary = sr?.min && sr?.max
172
+ ? `${sr.currency || "USD"} ${Math.round(sr.min / 1000)}k–${Math.round(sr.max / 1000)}k`
173
+ : undefined;
174
+ const loc = j.categories?.location || j.location || "";
175
+ return {
176
+ title: j.text || "Unknown Role",
177
+ company: name,
178
+ location: loc || "Not specified",
179
+ description: (j.descriptionPlain || stripHtmlCommunity(j.description || "")).slice(0, 15000),
180
+ url: j.hostedUrl || j.applyUrl || "",
181
+ applyUrl: j.applyUrl,
182
+ postedAt: j.createdAt ? new Date(j.createdAt).toISOString() : new Date().toISOString(),
183
+ type: j.categories?.commitment || "Full-Time",
184
+ salary,
185
+ isRemote: j.workplaceType === "remote" || loc.toLowerCase().includes("remote"),
186
+ source: "lever",
187
+ };
188
+ });
189
+ }
190
+ catch {
191
+ clearTimeout(tid);
192
+ return [];
193
+ }
194
+ }, 2);
195
+ }
196
+ export async function fetchAshbyJobs(slugs, keywords, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
197
+ return batchPromises(slugs, async (slug) => {
198
+ const ctrl = new AbortController();
199
+ const tid = setTimeout(() => ctrl.abort(), 15000);
200
+ try {
201
+ const res = await fetch(`https://api.ashbyhq.com/posting-api/job-board/${slug}?includeCompensation=true`, {
202
+ signal: ctrl.signal,
203
+ headers: { "User-Agent": "Mozilla/5.0" },
204
+ });
205
+ clearTimeout(tid);
206
+ if (!res.ok)
207
+ return [];
208
+ const data = (await res.json());
209
+ const name = communitySlugToName(slug);
210
+ return (data.jobs || [])
211
+ .filter((j) => {
212
+ if (!j.isListed)
213
+ return false;
214
+ const title = j.title || "";
215
+ const locName = j.location || "";
216
+ return (matchesKeywords(title, keywords) &&
217
+ !isBlocklistedRole(title, targetRoles, yearsOfExperience) &&
218
+ matchesLocation(locName, searchLocation, prefersRemote));
219
+ })
220
+ .map((j) => {
221
+ const isRemote = j.workplaceType === "Remote" || (j.location || "").toLowerCase().includes("remote");
222
+ const desc = (j.descriptionPlain || (j.descriptionHtml ? stripHtmlCommunity(j.descriptionHtml) : "")).slice(0, 15000);
223
+ let salaryStr = "Not specified";
224
+ if (j.compensation) {
225
+ if (j.compensation.summary) {
226
+ salaryStr = j.compensation.summary;
227
+ }
228
+ else if (j.compensation.minValue && j.compensation.maxValue) {
229
+ const cur = j.compensation.currencyCode || "USD";
230
+ salaryStr = `${cur} ${Math.round(j.compensation.minValue / 1000)}k–${Math.round(j.compensation.maxValue / 1000)}k`;
231
+ }
232
+ }
233
+ return {
234
+ title: j.title || "Unknown Role",
235
+ company: name,
236
+ location: j.location || "Remote",
237
+ description: desc,
238
+ url: `https://jobs.ashbyhq.com/${slug}/${j.id}`,
239
+ postedAt: new Date().toISOString(),
240
+ type: j.employmentType === "Contract" ? "Contract" : j.employmentType === "PartTime" ? "Part-Time" : "Full-Time",
241
+ isRemote,
242
+ salary: salaryStr,
243
+ source: "ashby",
244
+ };
245
+ });
246
+ }
247
+ catch {
248
+ clearTimeout(tid);
249
+ return [];
250
+ }
251
+ }, 4);
252
+ }
253
+ export async function fetchWorkdayJobs(companies, keywords, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
254
+ const initialDb = readDb();
255
+ const dynamicCompanies = initialDb.workdayDirectory || [];
256
+ const mergedCompanies = [...companies];
257
+ const seenTenants = new Set(mergedCompanies.map((c) => c.tenant.toLowerCase()));
258
+ for (const c of dynamicCompanies) {
259
+ if (!seenTenants.has(c.tenant.toLowerCase())) {
260
+ mergedCompanies.push(c);
261
+ seenTenants.add(c.tenant.toLowerCase());
262
+ }
263
+ }
264
+ const failuresQueue = [];
265
+ const results = await batchPromises(mergedCompanies, async (company) => {
266
+ const ctrl = new AbortController();
267
+ const tid = setTimeout(() => ctrl.abort(), 12000);
268
+ const host = company.host || `${company.tenant}.myworkdayjobs.com`;
269
+ try {
270
+ const queryText = targetRoles.length > 0 ? targetRoles[0] : "Software Engineer";
271
+ const searchUrl = `https://${host}/wday/cxs/${company.tenant}/${company.site}/jobs`;
272
+ const response = await fetch(searchUrl, {
273
+ method: "POST",
274
+ headers: {
275
+ "Content-Type": "application/json",
276
+ Accept: "application/json",
277
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
278
+ Origin: `https://${host}`,
279
+ Referer: `https://${host}/en-US/${company.site}/`,
280
+ },
281
+ body: JSON.stringify({ searchText: queryText, limit: 20, offset: 0, appliedFacets: {} }),
282
+ signal: ctrl.signal,
283
+ });
284
+ clearTimeout(tid);
285
+ if (!response.ok) {
286
+ console.error(`[Workday] Fetch failed for ${company.name} (${host}): HTTP ${response.status}`);
287
+ failuresQueue.push({ company, failed: true });
288
+ return [];
289
+ }
290
+ const data = (await response.json());
291
+ failuresQueue.push({ company, failed: false });
292
+ const postings = (data.jobPostings || []);
293
+ const matchingPostings = postings.filter((p) => {
294
+ const title = p.title || "";
295
+ return matchesKeywords(title, keywords) && !isBlocklistedRole(title, targetRoles, yearsOfExperience);
296
+ });
297
+ const detailedJobs = await Promise.all(matchingPostings.map(async (p) => {
298
+ const pathParts = (p.externalPath || "").split("/");
299
+ const jobId = pathParts[pathParts.length - 1];
300
+ if (!jobId)
301
+ return null;
302
+ const detailUrl = `https://${host}/wday/cxs/${company.tenant}/${company.site}/job/${jobId}`;
303
+ const dCtrl = new AbortController();
304
+ const dTid = setTimeout(() => dCtrl.abort(), 6000);
305
+ try {
306
+ const dRes = await fetch(detailUrl, {
307
+ headers: {
308
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
309
+ Origin: `https://${host}`,
310
+ Referer: `https://${host}/en-US/${company.site}/`,
311
+ },
312
+ signal: dCtrl.signal,
313
+ });
314
+ clearTimeout(dTid);
315
+ if (dRes.ok) {
316
+ const dData = (await dRes.json());
317
+ const desc = stripHtmlCommunity(dData.jobPostingInfo?.jobDescription || "").slice(0, 15000);
318
+ const loc = p.locationsText || "Specified on site";
319
+ if (!matchesLocation(loc, searchLocation, prefersRemote))
320
+ return null;
321
+ return {
322
+ title: p.title,
323
+ company: company.name,
324
+ location: loc,
325
+ description: desc,
326
+ url: `https://${host}/en-US/${company.site}${p.externalPath}`,
327
+ postedAt: p.postedOn || new Date().toISOString(),
328
+ type: "Full-Time",
329
+ isRemote: loc.toLowerCase().includes("remote"),
330
+ source: "workday",
331
+ };
332
+ }
333
+ }
334
+ catch (err) {
335
+ clearTimeout(dTid);
336
+ console.error(`[Workday] Details failed for ${company.name} job ${jobId}:`, err?.message);
337
+ }
338
+ const loc = p.locationsText || "Specified on site";
339
+ if (!matchesLocation(loc, searchLocation, prefersRemote))
340
+ return null;
341
+ return {
342
+ title: p.title,
343
+ company: company.name,
344
+ location: loc,
345
+ description: "Position details available on application site.",
346
+ url: `https://${host}/en-US/${company.site}${p.externalPath}`,
347
+ postedAt: p.postedOn || new Date().toISOString(),
348
+ type: "Full-Time",
349
+ isRemote: loc.toLowerCase().includes("remote"),
350
+ source: "workday",
351
+ };
352
+ }));
353
+ return detailedJobs.filter(Boolean);
354
+ }
355
+ catch (err) {
356
+ clearTimeout(tid);
357
+ console.error(`[Workday] Failed fetching ${company.name} jobs:`, err?.message);
358
+ failuresQueue.push({ company, failed: true });
359
+ return [];
360
+ }
361
+ }, 3);
362
+ applyDynamicCompanyFailures(failuresQueue);
363
+ return results;
364
+ }
365
+ export async function fetchSmartRecruitersJobs(companies, keywords, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
366
+ const results = await Promise.allSettled(companies.map(async (company) => {
367
+ const ctrl = new AbortController();
368
+ const tid = setTimeout(() => ctrl.abort(), 8000);
369
+ try {
370
+ const searchUrl = globalState.templates.smartrecruitersPostings.replace(/{slug}/g, company.slug);
371
+ const response = await fetch(searchUrl, {
372
+ headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
373
+ signal: ctrl.signal,
374
+ });
375
+ clearTimeout(tid);
376
+ if (!response.ok) {
377
+ console.error(`[SmartRecruiters] Fetch failed for ${company.name}: HTTP ${response.status}`);
378
+ return [];
379
+ }
380
+ const data = (await response.json());
381
+ const postings = (data.content || []);
382
+ const matchingPostings = postings.filter((p) => {
383
+ const title = p.name || "";
384
+ return matchesKeywords(title, keywords) && !isBlocklistedRole(title, targetRoles, yearsOfExperience);
385
+ });
386
+ const detailedJobs = await Promise.all(matchingPostings.map(async (p) => {
387
+ const detailUrl = globalState.templates.smartrecruitersDetails.replace(/{slug}/g, company.slug).replace(/{id}/g, p.id);
388
+ const dCtrl = new AbortController();
389
+ const dTid = setTimeout(() => dCtrl.abort(), 5000);
390
+ try {
391
+ const dRes = await fetch(detailUrl, {
392
+ headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
393
+ signal: dCtrl.signal,
394
+ });
395
+ clearTimeout(dTid);
396
+ if (dRes.ok) {
397
+ const dData = (await dRes.json());
398
+ const jobDescHtml = [
399
+ dData.jobAd?.sections?.jobDescription?.text || "",
400
+ dData.jobAd?.sections?.qualifications?.text || "",
401
+ dData.jobAd?.sections?.additionalInformation?.text || "",
402
+ ]
403
+ .filter(Boolean)
404
+ .join("\n\n");
405
+ const desc = stripHtmlCommunity(jobDescHtml).slice(0, 15000);
406
+ const loc = [dData.location?.city, dData.location?.region, dData.location?.country].filter(Boolean).join(", ") || "Remote";
407
+ if (!matchesLocation(loc, searchLocation, prefersRemote))
408
+ return null;
409
+ return {
410
+ title: p.name,
411
+ company: company.name,
412
+ location: loc,
413
+ description: desc,
414
+ url: `https://careers.smartrecruiters.com/${company.slug}/${p.id}`,
415
+ postedAt: p.releasedDate || new Date().toISOString(),
416
+ type: "Full-Time",
417
+ isRemote: loc.toLowerCase().includes("remote") || dData.location?.remote === true,
418
+ source: "smartrecruiters",
419
+ };
420
+ }
421
+ }
422
+ catch (err) {
423
+ clearTimeout(dTid);
424
+ console.error(`[SmartRecruiters] Details failed for ${company.name} job ${p.id}:`, err?.message);
425
+ }
426
+ const loc = [p.location?.city, p.location?.region, p.location?.country].filter(Boolean).join(", ") || "Remote";
427
+ if (!matchesLocation(loc, searchLocation, prefersRemote))
428
+ return null;
429
+ return {
430
+ title: p.name,
431
+ company: company.name,
432
+ location: loc,
433
+ description: "Position details available on application site.",
434
+ url: `https://careers.smartrecruiters.com/${company.slug}/${p.id}`,
435
+ postedAt: p.releasedDate || new Date().toISOString(),
436
+ type: "Full-Time",
437
+ isRemote: loc.toLowerCase().includes("remote"),
438
+ source: "smartrecruiters",
439
+ };
440
+ }));
441
+ return detailedJobs.filter(Boolean);
442
+ }
443
+ catch (err) {
444
+ clearTimeout(tid);
445
+ console.error(`[SmartRecruiters] Failed fetching ${company.name} jobs:`, err?.message);
446
+ return [];
447
+ }
448
+ }));
449
+ return results.flatMap((r) => (r.status === "fulfilled" ? r.value : []));
450
+ }
451
+ export async function fetchRemoteOKJobs(keywords, skills, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
452
+ const ctrl = new AbortController();
453
+ const tid = setTimeout(() => ctrl.abort(), 10000);
454
+ try {
455
+ const res = await fetch("https://remoteok.com/api", {
456
+ signal: ctrl.signal,
457
+ headers: { "User-Agent": "Mozilla/5.0 (compatible; JobSearchAgent/1.0)" },
458
+ });
459
+ clearTimeout(tid);
460
+ if (!res.ok)
461
+ return [];
462
+ const raw = (await res.json());
463
+ const allKw = [...keywords, ...skills.map((s) => s.toLowerCase())];
464
+ return raw
465
+ .slice(1)
466
+ .filter(Boolean)
467
+ .filter((j) => {
468
+ if (!j.position || !j.company)
469
+ return false;
470
+ const title = j.position;
471
+ const tags = (j.tags || []).map((t) => t.toLowerCase());
472
+ const loc = j.location || "Remote";
473
+ const titleMatches = matchesKeywords(title, allKw) || tags.some((t) => allKw.some((kw) => t.includes(kw)));
474
+ return titleMatches && !isBlocklistedRole(title, targetRoles, yearsOfExperience) && matchesLocation(loc, searchLocation, prefersRemote);
475
+ })
476
+ .map((j) => ({
477
+ title: j.position,
478
+ company: j.company,
479
+ location: j.location || "Remote",
480
+ description: j.description ? stripHtmlCommunity(j.description).slice(0, 15000) : "",
481
+ url: j.apply_url || j.url || "",
482
+ applyUrl: j.apply_url,
483
+ postedAt: j.date || new Date().toISOString(),
484
+ type: "Full-Time",
485
+ salary: j.salary || (j.salaryMin ? `$${Math.round(j.salaryMin / 1000)}k–$${Math.round(j.salaryMax / 1000)}k` : undefined),
486
+ isRemote: true,
487
+ source: "remoteok",
488
+ }));
489
+ }
490
+ catch {
491
+ clearTimeout(tid);
492
+ console.error("[RemoteOK] Fetch failed");
493
+ return [];
494
+ }
495
+ }
496
+ export async function fetchRemotiveJobs(keywords, skills, targetRoles, searchLocation, prefersRemote, yearsOfExperience = 0) {
497
+ const ctrl = new AbortController();
498
+ const tid = setTimeout(() => ctrl.abort(), 10000);
499
+ try {
500
+ const categories = [];
501
+ const rolesLower = targetRoles.map((r) => r.toLowerCase());
502
+ if (rolesLower.some((r) => r.includes("design") || r.includes("ui") || r.includes("ux") || r.includes("creative")))
503
+ categories.push("design");
504
+ if (rolesLower.some((r) => r.includes("product") || r.includes("pm") || r.includes("program manager")))
505
+ categories.push("product");
506
+ if (rolesLower.some((r) => r.includes("data") || r.includes("analyst") || r.includes("analytics") || r.includes("science")))
507
+ categories.push("data");
508
+ if (rolesLower.some((r) => r.includes("devops") || r.includes("sre") || r.includes("reliability") || r.includes("infrastructure") || r.includes("sysadmin") || r.includes("platform")))
509
+ categories.push("devops");
510
+ if (categories.length === 0 ||
511
+ rolesLower.some((r) => r.includes("software") || r.includes("engineer") || r.includes("developer") || r.includes("frontend") || r.includes("backend") || r.includes("fullstack") || r.includes("web") || r.includes("tech"))) {
512
+ categories.push("software-development");
513
+ }
514
+ const allJobs = [];
515
+ const allKw = [...keywords, ...skills.map((s) => s.toLowerCase())];
516
+ await Promise.all(categories.map(async (category) => {
517
+ try {
518
+ const res = await fetch(`https://remotive.com/api/remote-jobs?category=${category}`, {
519
+ signal: ctrl.signal,
520
+ headers: { "User-Agent": "Mozilla/5.0 (compatible; JobSearchAgent/1.0)" },
521
+ });
522
+ if (!res.ok)
523
+ return;
524
+ const data = (await res.json());
525
+ const raw = data.jobs || [];
526
+ const mapped = raw
527
+ .filter((j) => {
528
+ if (!j.title || !j.company_name)
529
+ return false;
530
+ const title = j.title;
531
+ const tags = (j.tags || []).map((t) => t.toLowerCase());
532
+ const loc = j.candidate_required_location || "Remote";
533
+ const titleMatches = matchesKeywords(title, allKw) || tags.some((t) => allKw.some((kw) => t.includes(kw)));
534
+ return titleMatches && !isBlocklistedRole(title, targetRoles, yearsOfExperience) && matchesLocation(loc, searchLocation, prefersRemote);
535
+ })
536
+ .map((j) => ({
537
+ title: j.title,
538
+ company: j.company_name,
539
+ location: j.candidate_required_location || "Remote",
540
+ description: j.description ? stripHtmlCommunity(j.description).slice(0, 15000) : "",
541
+ url: j.url || "",
542
+ postedAt: j.publication_date || new Date().toISOString(),
543
+ type: j.job_type === "contract" ? "Contract" : "Full-Time",
544
+ salary: j.salary || undefined,
545
+ isRemote: true,
546
+ source: "remotive",
547
+ }));
548
+ allJobs.push(...mapped);
549
+ }
550
+ catch (e) {
551
+ console.error(`[Remotive] Category ${category} fetch failed:`, e?.message);
552
+ }
553
+ }));
554
+ clearTimeout(tid);
555
+ const seen = new Set();
556
+ return allJobs.filter((job) => {
557
+ const key = `${job.title.toLowerCase().trim()}|${job.company.toLowerCase().trim()}`;
558
+ if (seen.has(key))
559
+ return false;
560
+ seen.add(key);
561
+ return true;
562
+ });
563
+ }
564
+ catch (err) {
565
+ clearTimeout(tid);
566
+ console.error("[Remotive] Sourcing failed:", err?.message);
567
+ return [];
568
+ }
569
+ }
570
+ export async function fetchHackerNewsJobs(keywords, skills, _targetRoles, _searchLocation, _prefersRemote, _yearsOfExperience = 0) {
571
+ const ctrl = new AbortController();
572
+ const tid = setTimeout(() => ctrl.abort(), 12000);
573
+ try {
574
+ const searchUrl = "https://hn.algolia.com/api/v1/search_by_date?tags=story,author_whoishiring&hitsPerPage=10";
575
+ const searchRes = await fetch(searchUrl, { signal: ctrl.signal });
576
+ if (!searchRes.ok)
577
+ return [];
578
+ const searchData = (await searchRes.json());
579
+ const hits = searchData.hits || [];
580
+ const story = hits.find((h) => h.title && h.title.includes("Who is hiring?"));
581
+ if (!story) {
582
+ console.error("[HackerNews] Latest hiring story not found in hits");
583
+ return [];
584
+ }
585
+ const itemRes = await fetch(`https://hn.algolia.com/api/v1/items/${story.objectID}`, { signal: ctrl.signal });
586
+ if (!itemRes.ok)
587
+ return [];
588
+ const itemData = (await itemRes.json());
589
+ const comments = itemData.children || [];
590
+ const allKw = [...keywords, ...skills.map((s) => s.toLowerCase())];
591
+ const results = [];
592
+ for (const comment of comments) {
593
+ if (!comment.text)
594
+ continue;
595
+ const strippedText = stripHtmlCommunity(comment.text);
596
+ const textLower = strippedText.toLowerCase();
597
+ if (!allKw.some((kw) => textLower.includes(kw)))
598
+ continue;
599
+ const lines = strippedText.split("\n").map((l) => l.trim()).filter(Boolean);
600
+ const firstLine = lines[0] ? lines[0].substring(0, 80) : "Hacker News Post";
601
+ results.push({
602
+ title: firstLine,
603
+ company: "Hacker News Community",
604
+ location: "Remote / On-site",
605
+ description: strippedText.slice(0, 15000),
606
+ url: `https://news.ycombinator.com/item?id=${comment.id}`,
607
+ postedAt: comment.created_at || new Date().toISOString(),
608
+ type: "Full-Time",
609
+ isRemote: true,
610
+ source: "hackernews",
611
+ });
612
+ }
613
+ clearTimeout(tid);
614
+ return results;
615
+ }
616
+ catch (err) {
617
+ clearTimeout(tid);
618
+ console.error("[HackerNews] Sourcing failed:", err?.message);
619
+ return [];
620
+ }
621
+ }
622
+ // Guest search filter-code mappings (LinkedIn's f_* params).
623
+ const LI_DATE = { "24hr": "r86400", "past week": "r604800", "past month": "r2592000" };
624
+ const LI_REMOTE = { "on site": "1", remote: "2", hybrid: "3" };
625
+ const LI_JOBTYPE = { "full time": "F", "part time": "P", contract: "C", temporary: "T", internship: "I", volunteer: "V" };
626
+ const LI_EXP = { internship: "1", "entry level": "2", entry: "2", associate: "3", senior: "4", mid: "4", director: "5", executive: "6" };
627
+ function liSalaryBucket(min) {
628
+ if (!min)
629
+ return undefined;
630
+ if (min >= 120000)
631
+ return "5";
632
+ if (min >= 100000)
633
+ return "4";
634
+ if (min >= 80000)
635
+ return "3";
636
+ if (min >= 60000)
637
+ return "2";
638
+ if (min >= 40000)
639
+ return "1";
640
+ return undefined;
641
+ }
642
+ /** Extract the numeric job id from a LinkedIn job URL or urn. */
643
+ export function linkedInJobIdFromUrl(url) {
644
+ const m = url.match(/(\d{6,})/);
645
+ return m ? m[1] : null;
646
+ }
647
+ /** Regex-parse the guest search HTML into listing rows (no cheerio). */
648
+ function parseLinkedInCards(html) {
649
+ const out = [];
650
+ const re = /data-entity-urn="urn:li:jobPosting:(\d+)"/g;
651
+ const marks = [];
652
+ let m;
653
+ while ((m = re.exec(html)) !== null)
654
+ marks.push({ id: m[1], pos: m.index });
655
+ for (let i = 0; i < marks.length; i++) {
656
+ const chunk = html.slice(marks[i].pos, i + 1 < marks.length ? marks[i + 1].pos : undefined);
657
+ const id = marks[i].id;
658
+ const hrefM = chunk.match(/base-card__full-link[^>]*href="([^"]+)"/);
659
+ const jobUrl = (hrefM ? hrefM[1].replace(/&amp;/g, "&") : `https://www.linkedin.com/jobs/view/${id}`).split("?")[0];
660
+ const title = stripHtmlCommunity((chunk.match(/base-search-card__title[^>]*>([\s\S]*?)<\/h3>/) || [])[1] || "");
661
+ const company = stripHtmlCommunity((chunk.match(/base-search-card__subtitle[\s\S]*?<a[^>]*>([\s\S]*?)<\/a>/) || [])[1] || "") ||
662
+ stripHtmlCommunity((chunk.match(/base-search-card__subtitle[^>]*>([\s\S]*?)<\/h4>/) || [])[1] || "");
663
+ const location = stripHtmlCommunity((chunk.match(/job-search-card__location[^>]*>([\s\S]*?)<\/span>/) || [])[1] || "");
664
+ const postedAt = (chunk.match(/datetime="([^"]+)"/) || [])[1] || new Date().toISOString();
665
+ if (id && title && company)
666
+ out.push({ jobId: id, jobUrl, title, company, location, postedAt });
667
+ }
668
+ return out;
669
+ }
670
+ export async function fetchLinkedInJobs(q) {
671
+ const buildParams = (start) => {
672
+ const p = new URLSearchParams();
673
+ p.set("keywords", q.keyword);
674
+ if (q.location)
675
+ p.set("location", q.location);
676
+ const tpr = q.datePosted ? LI_DATE[q.datePosted.toLowerCase()] : undefined;
677
+ if (tpr)
678
+ p.set("f_TPR", tpr);
679
+ const wt = q.remote ? LI_REMOTE[q.remote.toLowerCase()] : undefined;
680
+ if (wt)
681
+ p.set("f_WT", wt);
682
+ const jt = q.jobType ? LI_JOBTYPE[q.jobType.toLowerCase()] : undefined;
683
+ if (jt)
684
+ p.set("f_JT", jt);
685
+ const e = q.experienceLevel ? LI_EXP[q.experienceLevel.toLowerCase()] : undefined;
686
+ if (e)
687
+ p.set("f_E", e);
688
+ const sb = liSalaryBucket(q.salaryMin);
689
+ if (sb)
690
+ p.set("f_SB2", sb);
691
+ // f_EA = LinkedIn's "early applicant" filter (verified live): the search returns
692
+ // only low-applicant jobs ("be among the first ~25 applicants") instead of the
693
+ // default 200+ ones. It's the ONLY guest-side applicant filter, so use it whenever a
694
+ // cap is requested. Exact <=N filtering needs the cookie (Voyager exact counts).
695
+ if (q.maxApplicants !== undefined)
696
+ p.set("f_EA", "true");
697
+ p.set("start", String(start));
698
+ if (q.sortBy === "recent")
699
+ p.set("sortBy", "DD");
700
+ else if (q.sortBy === "relevant")
701
+ p.set("sortBy", "R");
702
+ return p.toString();
703
+ };
704
+ const collected = [];
705
+ const seen = new Set();
706
+ const maxPages = Math.min(3, Math.ceil(q.limit / 25) + 1);
707
+ for (let page = 0; page < maxPages && collected.length < q.limit * 2; page++) {
708
+ const url = `https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?${buildParams(page * 25)}`;
709
+ const ctrl = new AbortController();
710
+ const tid = setTimeout(() => ctrl.abort(), 10000);
711
+ try {
712
+ const res = await fetch(url, { headers: { "User-Agent": LINKEDIN_UA, "Accept-Language": "en-US,en;q=0.9" }, signal: ctrl.signal });
713
+ clearTimeout(tid);
714
+ if (!res.ok) {
715
+ console.error(`[LinkedIn] search HTTP ${res.status}`);
716
+ break;
717
+ }
718
+ const html = await res.text();
719
+ const cards = parseLinkedInCards(html);
720
+ if (cards.length === 0) {
721
+ // Distinguish "no matches" from a likely markup change (HTTP 200 + real HTML, 0 parsed).
722
+ if (html.length > 2000)
723
+ console.error("[LinkedIn] HTTP 200 but parsed 0 job cards — card markup may have changed.");
724
+ break;
725
+ }
726
+ for (const c of cards) {
727
+ if (seen.has(c.jobId))
728
+ continue;
729
+ seen.add(c.jobId);
730
+ // LinkedIn already searched by keyword+location; only drop blocklisted/over-level titles.
731
+ if (isBlocklistedRole(c.title, q.targetRoles, q.yearsOfExperience))
732
+ continue;
733
+ collected.push({
734
+ title: c.title,
735
+ company: c.company,
736
+ location: c.location || "Not specified",
737
+ description: "", // enriched per-job later
738
+ url: c.jobUrl,
739
+ postedAt: c.postedAt,
740
+ type: "Full-Time",
741
+ isRemote: q.remote === "remote" || c.location.toLowerCase().includes("remote"),
742
+ source: "linkedin",
743
+ });
744
+ }
745
+ await new Promise((r) => setTimeout(r, 300));
746
+ }
747
+ catch (err) {
748
+ clearTimeout(tid);
749
+ console.error("[LinkedIn] search failed:", err?.message);
750
+ break;
751
+ }
752
+ }
753
+ return collected;
754
+ }
755
+ /** Authenticated Voyager headers, or null when no cookie is configured. */
756
+ function linkedInAuthHeaders() {
757
+ const li_at = process.env.LINKEDIN_LI_AT;
758
+ const js = process.env.LINKEDIN_JSESSIONID;
759
+ if (!li_at || !js)
760
+ return null;
761
+ const csrf = js.replace(/"/g, "");
762
+ return {
763
+ "User-Agent": LINKEDIN_UA,
764
+ Cookie: `li_at=${li_at}; JSESSIONID="${csrf}"`,
765
+ "csrf-token": csrf,
766
+ "x-restli-protocol-version": "2.0.0",
767
+ "x-li-lang": "en_US",
768
+ Accept: "application/json",
769
+ };
770
+ }
771
+ /** Authenticated detail via Voyager JSON (richer, when a cookie is set). */
772
+ async function fetchVoyagerDetail(jobId, headers) {
773
+ const ctrl = new AbortController();
774
+ const tid = setTimeout(() => ctrl.abort(), 8000);
775
+ try {
776
+ const res = await fetch(`https://www.linkedin.com/voyager/api/jobs/jobPostings/${jobId}`, { headers, signal: ctrl.signal });
777
+ clearTimeout(tid);
778
+ if (!res.ok)
779
+ return null;
780
+ const data = await res.json();
781
+ const node = data?.data ?? data;
782
+ const text = node?.description?.text || data?.description?.text || "";
783
+ if (!text)
784
+ return null;
785
+ const applies = node?.applies ?? data?.applies;
786
+ // Voyager returns an EXACT applicant count, so early=false is intentional: the cap can
787
+ // filter precisely here (unlike the guest detail page's "be among the first N" upper bound).
788
+ return { description: stripHtmlCommunity(text).slice(0, 15000), applicants: typeof applies === "number" ? applies : null, early: false };
789
+ }
790
+ catch {
791
+ clearTimeout(tid);
792
+ return null;
793
+ }
794
+ }
795
+ /** Full JD + applicant count for one job: Voyager when authenticated, else the guest detail page. */
796
+ export async function fetchLinkedInJobDetail(jobId) {
797
+ const auth = linkedInAuthHeaders();
798
+ if (auth) {
799
+ const v = await fetchVoyagerDetail(jobId, auth);
800
+ if (v)
801
+ return v;
802
+ }
803
+ const ctrl = new AbortController();
804
+ const tid = setTimeout(() => ctrl.abort(), 8000);
805
+ try {
806
+ const res = await fetch(`https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/${jobId}`, {
807
+ headers: { "User-Agent": LINKEDIN_UA, "Accept-Language": "en-US,en;q=0.9" },
808
+ signal: ctrl.signal,
809
+ });
810
+ clearTimeout(tid);
811
+ if (!res.ok)
812
+ return { description: "", applicants: null, early: false };
813
+ const html = await res.text();
814
+ const dm = html.match(/show-more-less-html__markup[^>]*>([\s\S]*?)<\/div>/);
815
+ const description = dm ? stripHtmlCommunity(dm[1]).slice(0, 15000) : "";
816
+ // Caption is a <span> ("115 applicants") for popular roles or a <figcaption>
817
+ // ("Be among the first 25 applicants") for new/under-10 ones — match either by
818
+ // capturing the text up to the next tag.
819
+ const cap = (html.match(/num-applicants__caption[^>]*>([^<]*)/) || [])[1] || "";
820
+ const nm = cap.match(/([\d,]+)/);
821
+ const applicants = nm ? parseInt(nm[1].replace(/,/g, ""), 10) : null;
822
+ return { description, applicants, early: /\bfirst\b/i.test(cap) };
823
+ }
824
+ catch {
825
+ clearTimeout(tid);
826
+ return { description: "", applicants: null, early: false };
827
+ }
828
+ }
829
+ //# sourceMappingURL=sourcing.js.map