@bluessu/meal-scraper 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +102 -7
- package/dist/index.mjs +102 -7
- package/package.json +14 -11
package/dist/index.cjs
CHANGED
|
@@ -378,17 +378,14 @@ var SoongguriScraper = class {
|
|
|
378
378
|
constructor(settings, cafeteriaType) {
|
|
379
379
|
this.settings = settings;
|
|
380
380
|
this.cafeteriaType = cafeteriaType;
|
|
381
|
+
this.challengeRetryLimit = 2;
|
|
382
|
+
this.cookieJar = {};
|
|
381
383
|
}
|
|
382
384
|
async scrapeMenu(date) {
|
|
383
385
|
const normalizedDate = normalizeSgDate(date);
|
|
384
|
-
const url =
|
|
386
|
+
const url = this.buildMenuUrl(normalizedDate);
|
|
385
387
|
try {
|
|
386
|
-
const
|
|
387
|
-
timeout: this.settings.timeoutMs,
|
|
388
|
-
responseType: "text",
|
|
389
|
-
validateStatus: (s) => s >= 200 && s < 300
|
|
390
|
-
});
|
|
391
|
-
const html = String(res.data);
|
|
388
|
+
const html = await this.fetchWithRetry(url, 0, normalizedDate);
|
|
392
389
|
const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
|
|
393
390
|
if (hasHoliday) {
|
|
394
391
|
throw new HolidayException(
|
|
@@ -450,11 +447,109 @@ var SoongguriScraper = class {
|
|
|
450
447
|
);
|
|
451
448
|
}
|
|
452
449
|
}
|
|
450
|
+
async fetchWithRetry(url, attempt = 0, targetDate) {
|
|
451
|
+
const response = await import_axios.default.get(url, {
|
|
452
|
+
timeout: this.settings.timeoutMs,
|
|
453
|
+
responseType: "text",
|
|
454
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
455
|
+
headers: this.buildBrowserLikeHeaders(attempt)
|
|
456
|
+
});
|
|
457
|
+
this.applySetCookies(response.headers);
|
|
458
|
+
const html = String(response.data);
|
|
459
|
+
if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
|
|
460
|
+
const nextAttempt = attempt + 1;
|
|
461
|
+
return this.fetchWithRetry(
|
|
462
|
+
this.buildMenuUrl(url, nextAttempt),
|
|
463
|
+
nextAttempt,
|
|
464
|
+
targetDate
|
|
465
|
+
);
|
|
466
|
+
}
|
|
467
|
+
if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
|
|
468
|
+
throw new MenuFetchException(
|
|
469
|
+
targetDate,
|
|
470
|
+
this.cafeteriaType,
|
|
471
|
+
"\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
|
|
472
|
+
html,
|
|
473
|
+
{
|
|
474
|
+
endpoint: url,
|
|
475
|
+
operation: "scrape",
|
|
476
|
+
cafeteria: this.cafeteriaType,
|
|
477
|
+
challengeBypass: true,
|
|
478
|
+
attempts: attempt,
|
|
479
|
+
ckattempt: nextChallengeAttempt(attempt)
|
|
480
|
+
}
|
|
481
|
+
);
|
|
482
|
+
}
|
|
483
|
+
return html;
|
|
484
|
+
}
|
|
485
|
+
buildMenuUrl(url, ckattempt) {
|
|
486
|
+
const parsed = new URL(url);
|
|
487
|
+
const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
|
|
488
|
+
const params = new URLSearchParams(parsed.search);
|
|
489
|
+
const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
|
|
490
|
+
const sdt = params.get("sdt") ?? "";
|
|
491
|
+
nextUrl.searchParams.set("rcd", rcd);
|
|
492
|
+
nextUrl.searchParams.set("sdt", sdt);
|
|
493
|
+
if (typeof ckattempt === "number") {
|
|
494
|
+
nextUrl.searchParams.set("ckattempt", String(ckattempt));
|
|
495
|
+
}
|
|
496
|
+
return nextUrl.toString();
|
|
497
|
+
}
|
|
498
|
+
buildBrowserLikeHeaders(attempt) {
|
|
499
|
+
const headers = {
|
|
500
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
501
|
+
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
|
|
502
|
+
"cache-control": "no-cache",
|
|
503
|
+
pragma: "no-cache",
|
|
504
|
+
referer: this.settings.soongguriBaseUrl,
|
|
505
|
+
"sec-fetch-dest": "document",
|
|
506
|
+
"sec-fetch-mode": "navigate",
|
|
507
|
+
"sec-fetch-site": "same-origin",
|
|
508
|
+
"user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
|
|
509
|
+
};
|
|
510
|
+
if (attempt > 0) {
|
|
511
|
+
headers["upgrade-insecure-requests"] = "1";
|
|
512
|
+
}
|
|
513
|
+
const cookie = this.getCookieHeader();
|
|
514
|
+
if (cookie) {
|
|
515
|
+
headers.cookie = cookie;
|
|
516
|
+
}
|
|
517
|
+
return headers;
|
|
518
|
+
}
|
|
519
|
+
getCookieHeader() {
|
|
520
|
+
return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
|
|
521
|
+
}
|
|
522
|
+
applySetCookies(headers) {
|
|
523
|
+
const setCookie = headers["set-cookie"];
|
|
524
|
+
if (!setCookie) {
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
|
|
528
|
+
for (const raw of rawCookies) {
|
|
529
|
+
const tuple = raw.split(";")[0];
|
|
530
|
+
const separatorIdx = tuple.indexOf("=");
|
|
531
|
+
if (separatorIdx < 1) {
|
|
532
|
+
continue;
|
|
533
|
+
}
|
|
534
|
+
const name = tuple.slice(0, separatorIdx).trim();
|
|
535
|
+
const value = tuple.slice(separatorIdx + 1).trim();
|
|
536
|
+
if (!name) {
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
this.cookieJar[name] = value;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
isChallengeResponse(html) {
|
|
543
|
+
return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
|
|
544
|
+
}
|
|
453
545
|
};
|
|
454
546
|
var normalizeSgDate = (date) => {
|
|
455
547
|
const digits = date.replace(/\D/g, "").slice(0, 8);
|
|
456
548
|
return digits.length === 8 ? digits : date;
|
|
457
549
|
};
|
|
550
|
+
var nextChallengeAttempt = (attempt) => {
|
|
551
|
+
return attempt + 1;
|
|
552
|
+
};
|
|
458
553
|
|
|
459
554
|
// src/repositories/scrapers/haksikScraper.ts
|
|
460
555
|
var HaksikScraper = class extends SoongguriScraper {
|
package/dist/index.mjs
CHANGED
|
@@ -333,17 +333,14 @@ var SoongguriScraper = class {
|
|
|
333
333
|
constructor(settings, cafeteriaType) {
|
|
334
334
|
this.settings = settings;
|
|
335
335
|
this.cafeteriaType = cafeteriaType;
|
|
336
|
+
this.challengeRetryLimit = 2;
|
|
337
|
+
this.cookieJar = {};
|
|
336
338
|
}
|
|
337
339
|
async scrapeMenu(date) {
|
|
338
340
|
const normalizedDate = normalizeSgDate(date);
|
|
339
|
-
const url =
|
|
341
|
+
const url = this.buildMenuUrl(normalizedDate);
|
|
340
342
|
try {
|
|
341
|
-
const
|
|
342
|
-
timeout: this.settings.timeoutMs,
|
|
343
|
-
responseType: "text",
|
|
344
|
-
validateStatus: (s) => s >= 200 && s < 300
|
|
345
|
-
});
|
|
346
|
-
const html = String(res.data);
|
|
343
|
+
const html = await this.fetchWithRetry(url, 0, normalizedDate);
|
|
347
344
|
const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
|
|
348
345
|
if (hasHoliday) {
|
|
349
346
|
throw new HolidayException(
|
|
@@ -405,11 +402,109 @@ var SoongguriScraper = class {
|
|
|
405
402
|
);
|
|
406
403
|
}
|
|
407
404
|
}
|
|
405
|
+
async fetchWithRetry(url, attempt = 0, targetDate) {
|
|
406
|
+
const response = await axios.get(url, {
|
|
407
|
+
timeout: this.settings.timeoutMs,
|
|
408
|
+
responseType: "text",
|
|
409
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
410
|
+
headers: this.buildBrowserLikeHeaders(attempt)
|
|
411
|
+
});
|
|
412
|
+
this.applySetCookies(response.headers);
|
|
413
|
+
const html = String(response.data);
|
|
414
|
+
if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
|
|
415
|
+
const nextAttempt = attempt + 1;
|
|
416
|
+
return this.fetchWithRetry(
|
|
417
|
+
this.buildMenuUrl(url, nextAttempt),
|
|
418
|
+
nextAttempt,
|
|
419
|
+
targetDate
|
|
420
|
+
);
|
|
421
|
+
}
|
|
422
|
+
if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
|
|
423
|
+
throw new MenuFetchException(
|
|
424
|
+
targetDate,
|
|
425
|
+
this.cafeteriaType,
|
|
426
|
+
"\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
|
|
427
|
+
html,
|
|
428
|
+
{
|
|
429
|
+
endpoint: url,
|
|
430
|
+
operation: "scrape",
|
|
431
|
+
cafeteria: this.cafeteriaType,
|
|
432
|
+
challengeBypass: true,
|
|
433
|
+
attempts: attempt,
|
|
434
|
+
ckattempt: nextChallengeAttempt(attempt)
|
|
435
|
+
}
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
return html;
|
|
439
|
+
}
|
|
440
|
+
buildMenuUrl(url, ckattempt) {
|
|
441
|
+
const parsed = new URL(url);
|
|
442
|
+
const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
|
|
443
|
+
const params = new URLSearchParams(parsed.search);
|
|
444
|
+
const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
|
|
445
|
+
const sdt = params.get("sdt") ?? "";
|
|
446
|
+
nextUrl.searchParams.set("rcd", rcd);
|
|
447
|
+
nextUrl.searchParams.set("sdt", sdt);
|
|
448
|
+
if (typeof ckattempt === "number") {
|
|
449
|
+
nextUrl.searchParams.set("ckattempt", String(ckattempt));
|
|
450
|
+
}
|
|
451
|
+
return nextUrl.toString();
|
|
452
|
+
}
|
|
453
|
+
buildBrowserLikeHeaders(attempt) {
|
|
454
|
+
const headers = {
|
|
455
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
456
|
+
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
|
|
457
|
+
"cache-control": "no-cache",
|
|
458
|
+
pragma: "no-cache",
|
|
459
|
+
referer: this.settings.soongguriBaseUrl,
|
|
460
|
+
"sec-fetch-dest": "document",
|
|
461
|
+
"sec-fetch-mode": "navigate",
|
|
462
|
+
"sec-fetch-site": "same-origin",
|
|
463
|
+
"user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
|
|
464
|
+
};
|
|
465
|
+
if (attempt > 0) {
|
|
466
|
+
headers["upgrade-insecure-requests"] = "1";
|
|
467
|
+
}
|
|
468
|
+
const cookie = this.getCookieHeader();
|
|
469
|
+
if (cookie) {
|
|
470
|
+
headers.cookie = cookie;
|
|
471
|
+
}
|
|
472
|
+
return headers;
|
|
473
|
+
}
|
|
474
|
+
getCookieHeader() {
|
|
475
|
+
return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
|
|
476
|
+
}
|
|
477
|
+
applySetCookies(headers) {
|
|
478
|
+
const setCookie = headers["set-cookie"];
|
|
479
|
+
if (!setCookie) {
|
|
480
|
+
return;
|
|
481
|
+
}
|
|
482
|
+
const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
|
|
483
|
+
for (const raw of rawCookies) {
|
|
484
|
+
const tuple = raw.split(";")[0];
|
|
485
|
+
const separatorIdx = tuple.indexOf("=");
|
|
486
|
+
if (separatorIdx < 1) {
|
|
487
|
+
continue;
|
|
488
|
+
}
|
|
489
|
+
const name = tuple.slice(0, separatorIdx).trim();
|
|
490
|
+
const value = tuple.slice(separatorIdx + 1).trim();
|
|
491
|
+
if (!name) {
|
|
492
|
+
continue;
|
|
493
|
+
}
|
|
494
|
+
this.cookieJar[name] = value;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
isChallengeResponse(html) {
|
|
498
|
+
return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
|
|
499
|
+
}
|
|
408
500
|
};
|
|
409
501
|
var normalizeSgDate = (date) => {
|
|
410
502
|
const digits = date.replace(/\D/g, "").slice(0, 8);
|
|
411
503
|
return digits.length === 8 ? digits : date;
|
|
412
504
|
};
|
|
505
|
+
var nextChallengeAttempt = (attempt) => {
|
|
506
|
+
return attempt + 1;
|
|
507
|
+
};
|
|
413
508
|
|
|
414
509
|
// src/repositories/scrapers/haksikScraper.ts
|
|
415
510
|
var HaksikScraper = class extends SoongguriScraper {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluessu/meal-scraper",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.1",
|
|
5
5
|
"description": "A meal menu scraper for dormitory meal info.",
|
|
6
6
|
"type": "commonjs",
|
|
7
7
|
"author": "bluessu",
|
|
@@ -36,6 +36,18 @@
|
|
|
36
36
|
}
|
|
37
37
|
},
|
|
38
38
|
"funding": "https://github.com/blue-ssu/meal-scraper",
|
|
39
|
+
"scripts": {
|
|
40
|
+
"typecheck": "tsc --noEmit -p tsconfig.json",
|
|
41
|
+
"build": "tsup",
|
|
42
|
+
"clean": "rimraf dist",
|
|
43
|
+
"prepare": "pnpm run build",
|
|
44
|
+
"quality": "pnpm run clean && pnpm run typecheck && pnpm run build",
|
|
45
|
+
"prepublishOnly": "pnpm run quality",
|
|
46
|
+
"prepack": "pnpm run quality",
|
|
47
|
+
"release:patch": "pnpm version patch && pnpm publish --access public",
|
|
48
|
+
"release:minor": "pnpm version minor && pnpm publish --access public",
|
|
49
|
+
"release:major": "pnpm version major && pnpm publish --access public"
|
|
50
|
+
},
|
|
39
51
|
"files": [
|
|
40
52
|
"dist",
|
|
41
53
|
"README.md",
|
|
@@ -56,14 +68,5 @@
|
|
|
56
68
|
"rimraf": "^5.0.9",
|
|
57
69
|
"typescript": "^5.5.4",
|
|
58
70
|
"@types/node": "^22.7.1"
|
|
59
|
-
},
|
|
60
|
-
"scripts": {
|
|
61
|
-
"typecheck": "tsc --noEmit -p tsconfig.json",
|
|
62
|
-
"build": "tsup",
|
|
63
|
-
"clean": "rimraf dist",
|
|
64
|
-
"quality": "pnpm run clean && pnpm run typecheck && pnpm run build",
|
|
65
|
-
"release:patch": "pnpm version patch && pnpm publish --access public",
|
|
66
|
-
"release:minor": "pnpm version minor && pnpm publish --access public",
|
|
67
|
-
"release:major": "pnpm version major && pnpm publish --access public"
|
|
68
71
|
}
|
|
69
|
-
}
|
|
72
|
+
}
|