@bluessu/meal-scraper 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.cjs +102 -7
  2. package/dist/index.mjs +102 -7
  3. package/package.json +14 -11
package/dist/index.cjs CHANGED
@@ -378,17 +378,14 @@ var SoongguriScraper = class {
378
378
  constructor(settings, cafeteriaType) {
379
379
  this.settings = settings;
380
380
  this.cafeteriaType = cafeteriaType;
381
+ this.challengeRetryLimit = 2;
382
+ this.cookieJar = {};
381
383
  }
382
384
  async scrapeMenu(date) {
383
385
  const normalizedDate = normalizeSgDate(date);
384
- const url = `${this.settings.soongguriBaseUrl}?rcd=${getRcd(this.cafeteriaType, this.settings)}&sdt=${normalizedDate}`;
386
+ const url = this.buildMenuUrl(normalizedDate);
385
387
  try {
386
- const res = await import_axios.default.get(url, {
387
- timeout: this.settings.timeoutMs,
388
- responseType: "text",
389
- validateStatus: (s) => s >= 200 && s < 300
390
- });
391
- const html = String(res.data);
388
+ const html = await this.fetchWithRetry(url, 0, normalizedDate);
392
389
  const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
393
390
  if (hasHoliday) {
394
391
  throw new HolidayException(
@@ -450,11 +447,109 @@ var SoongguriScraper = class {
450
447
  );
451
448
  }
452
449
  }
450
+ async fetchWithRetry(url, attempt = 0, targetDate) {
451
+ const response = await import_axios.default.get(url, {
452
+ timeout: this.settings.timeoutMs,
453
+ responseType: "text",
454
+ validateStatus: (s) => s >= 200 && s < 300,
455
+ headers: this.buildBrowserLikeHeaders(attempt)
456
+ });
457
+ this.applySetCookies(response.headers);
458
+ const html = String(response.data);
459
+ if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
460
+ const nextAttempt = attempt + 1;
461
+ return this.fetchWithRetry(
462
+ this.buildMenuUrl(url, nextAttempt),
463
+ nextAttempt,
464
+ targetDate
465
+ );
466
+ }
467
+ if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
468
+ throw new MenuFetchException(
469
+ targetDate,
470
+ this.cafeteriaType,
471
+ "\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
472
+ html,
473
+ {
474
+ endpoint: url,
475
+ operation: "scrape",
476
+ cafeteria: this.cafeteriaType,
477
+ challengeBypass: true,
478
+ attempts: attempt,
479
+ ckattempt: nextChallengeAttempt(attempt)
480
+ }
481
+ );
482
+ }
483
+ return html;
484
+ }
485
+ buildMenuUrl(url, ckattempt) {
486
+ const parsed = new URL(url);
487
+ const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
488
+ const params = new URLSearchParams(parsed.search);
489
+ const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
490
+ const sdt = params.get("sdt") ?? "";
491
+ nextUrl.searchParams.set("rcd", rcd);
492
+ nextUrl.searchParams.set("sdt", sdt);
493
+ if (typeof ckattempt === "number") {
494
+ nextUrl.searchParams.set("ckattempt", String(ckattempt));
495
+ }
496
+ return nextUrl.toString();
497
+ }
498
+ buildBrowserLikeHeaders(attempt) {
499
+ const headers = {
500
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
501
+ "accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
502
+ "cache-control": "no-cache",
503
+ pragma: "no-cache",
504
+ referer: this.settings.soongguriBaseUrl,
505
+ "sec-fetch-dest": "document",
506
+ "sec-fetch-mode": "navigate",
507
+ "sec-fetch-site": "same-origin",
508
+ "user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
509
+ };
510
+ if (attempt > 0) {
511
+ headers["upgrade-insecure-requests"] = "1";
512
+ }
513
+ const cookie = this.getCookieHeader();
514
+ if (cookie) {
515
+ headers.cookie = cookie;
516
+ }
517
+ return headers;
518
+ }
519
+ getCookieHeader() {
520
+ return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
521
+ }
522
+ applySetCookies(headers) {
523
+ const setCookie = headers["set-cookie"];
524
+ if (!setCookie) {
525
+ return;
526
+ }
527
+ const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
528
+ for (const raw of rawCookies) {
529
+ const tuple = raw.split(";")[0];
530
+ const separatorIdx = tuple.indexOf("=");
531
+ if (separatorIdx < 1) {
532
+ continue;
533
+ }
534
+ const name = tuple.slice(0, separatorIdx).trim();
535
+ const value = tuple.slice(separatorIdx + 1).trim();
536
+ if (!name) {
537
+ continue;
538
+ }
539
+ this.cookieJar[name] = value;
540
+ }
541
+ }
542
+ isChallengeResponse(html) {
543
+ return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
544
+ }
453
545
  };
454
546
  var normalizeSgDate = (date) => {
455
547
  const digits = date.replace(/\D/g, "").slice(0, 8);
456
548
  return digits.length === 8 ? digits : date;
457
549
  };
550
+ var nextChallengeAttempt = (attempt) => {
551
+ return attempt + 1;
552
+ };
458
553
 
459
554
  // src/repositories/scrapers/haksikScraper.ts
460
555
  var HaksikScraper = class extends SoongguriScraper {
package/dist/index.mjs CHANGED
@@ -333,17 +333,14 @@ var SoongguriScraper = class {
333
333
  constructor(settings, cafeteriaType) {
334
334
  this.settings = settings;
335
335
  this.cafeteriaType = cafeteriaType;
336
+ this.challengeRetryLimit = 2;
337
+ this.cookieJar = {};
336
338
  }
337
339
  async scrapeMenu(date) {
338
340
  const normalizedDate = normalizeSgDate(date);
339
- const url = `${this.settings.soongguriBaseUrl}?rcd=${getRcd(this.cafeteriaType, this.settings)}&sdt=${normalizedDate}`;
341
+ const url = this.buildMenuUrl(normalizedDate);
340
342
  try {
341
- const res = await axios.get(url, {
342
- timeout: this.settings.timeoutMs,
343
- responseType: "text",
344
- validateStatus: (s) => s >= 200 && s < 300
345
- });
346
- const html = String(res.data);
343
+ const html = await this.fetchWithRetry(url, 0, normalizedDate);
347
344
  const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
348
345
  if (hasHoliday) {
349
346
  throw new HolidayException(
@@ -405,11 +402,109 @@ var SoongguriScraper = class {
405
402
  );
406
403
  }
407
404
  }
405
+ async fetchWithRetry(url, attempt = 0, targetDate) {
406
+ const response = await axios.get(url, {
407
+ timeout: this.settings.timeoutMs,
408
+ responseType: "text",
409
+ validateStatus: (s) => s >= 200 && s < 300,
410
+ headers: this.buildBrowserLikeHeaders(attempt)
411
+ });
412
+ this.applySetCookies(response.headers);
413
+ const html = String(response.data);
414
+ if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
415
+ const nextAttempt = attempt + 1;
416
+ return this.fetchWithRetry(
417
+ this.buildMenuUrl(url, nextAttempt),
418
+ nextAttempt,
419
+ targetDate
420
+ );
421
+ }
422
+ if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
423
+ throw new MenuFetchException(
424
+ targetDate,
425
+ this.cafeteriaType,
426
+ "\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
427
+ html,
428
+ {
429
+ endpoint: url,
430
+ operation: "scrape",
431
+ cafeteria: this.cafeteriaType,
432
+ challengeBypass: true,
433
+ attempts: attempt,
434
+ ckattempt: nextChallengeAttempt(attempt)
435
+ }
436
+ );
437
+ }
438
+ return html;
439
+ }
440
+ buildMenuUrl(url, ckattempt) {
441
+ const parsed = new URL(url);
442
+ const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
443
+ const params = new URLSearchParams(parsed.search);
444
+ const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
445
+ const sdt = params.get("sdt") ?? "";
446
+ nextUrl.searchParams.set("rcd", rcd);
447
+ nextUrl.searchParams.set("sdt", sdt);
448
+ if (typeof ckattempt === "number") {
449
+ nextUrl.searchParams.set("ckattempt", String(ckattempt));
450
+ }
451
+ return nextUrl.toString();
452
+ }
453
+ buildBrowserLikeHeaders(attempt) {
454
+ const headers = {
455
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
456
+ "accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
457
+ "cache-control": "no-cache",
458
+ pragma: "no-cache",
459
+ referer: this.settings.soongguriBaseUrl,
460
+ "sec-fetch-dest": "document",
461
+ "sec-fetch-mode": "navigate",
462
+ "sec-fetch-site": "same-origin",
463
+ "user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
464
+ };
465
+ if (attempt > 0) {
466
+ headers["upgrade-insecure-requests"] = "1";
467
+ }
468
+ const cookie = this.getCookieHeader();
469
+ if (cookie) {
470
+ headers.cookie = cookie;
471
+ }
472
+ return headers;
473
+ }
474
+ getCookieHeader() {
475
+ return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
476
+ }
477
+ applySetCookies(headers) {
478
+ const setCookie = headers["set-cookie"];
479
+ if (!setCookie) {
480
+ return;
481
+ }
482
+ const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
483
+ for (const raw of rawCookies) {
484
+ const tuple = raw.split(";")[0];
485
+ const separatorIdx = tuple.indexOf("=");
486
+ if (separatorIdx < 1) {
487
+ continue;
488
+ }
489
+ const name = tuple.slice(0, separatorIdx).trim();
490
+ const value = tuple.slice(separatorIdx + 1).trim();
491
+ if (!name) {
492
+ continue;
493
+ }
494
+ this.cookieJar[name] = value;
495
+ }
496
+ }
497
+ isChallengeResponse(html) {
498
+ return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
499
+ }
408
500
  };
409
501
  var normalizeSgDate = (date) => {
410
502
  const digits = date.replace(/\D/g, "").slice(0, 8);
411
503
  return digits.length === 8 ? digits : date;
412
504
  };
505
+ var nextChallengeAttempt = (attempt) => {
506
+ return attempt + 1;
507
+ };
413
508
 
414
509
  // src/repositories/scrapers/haksikScraper.ts
415
510
  var HaksikScraper = class extends SoongguriScraper {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@bluessu/meal-scraper",
3
3
  "private": false,
4
- "version": "0.1.0",
4
+ "version": "0.1.1",
5
5
  "description": "A meal menu scraper for dormitory meal info.",
6
6
  "type": "commonjs",
7
7
  "author": "bluessu",
@@ -36,6 +36,18 @@
36
36
  }
37
37
  },
38
38
  "funding": "https://github.com/blue-ssu/meal-scraper",
39
+ "scripts": {
40
+ "typecheck": "tsc --noEmit -p tsconfig.json",
41
+ "build": "tsup",
42
+ "clean": "rimraf dist",
43
+ "prepare": "pnpm run build",
44
+ "quality": "pnpm run clean && pnpm run typecheck && pnpm run build",
45
+ "prepublishOnly": "pnpm run quality",
46
+ "prepack": "pnpm run quality",
47
+ "release:patch": "pnpm version patch && pnpm publish --access public",
48
+ "release:minor": "pnpm version minor && pnpm publish --access public",
49
+ "release:major": "pnpm version major && pnpm publish --access public"
50
+ },
39
51
  "files": [
40
52
  "dist",
41
53
  "README.md",
@@ -56,14 +68,5 @@
56
68
  "rimraf": "^5.0.9",
57
69
  "typescript": "^5.5.4",
58
70
  "@types/node": "^22.7.1"
59
- },
60
- "scripts": {
61
- "typecheck": "tsc --noEmit -p tsconfig.json",
62
- "build": "tsup",
63
- "clean": "rimraf dist",
64
- "quality": "pnpm run clean && pnpm run typecheck && pnpm run build",
65
- "release:patch": "pnpm version patch && pnpm publish --access public",
66
- "release:minor": "pnpm version minor && pnpm publish --access public",
67
- "release:major": "pnpm version major && pnpm publish --access public"
68
71
  }
69
- }
72
+ }