@bluessu/meal-scraper 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +7 -102
- package/dist/index.mjs +7 -102
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -378,14 +378,17 @@ var SoongguriScraper = class {
|
|
|
378
378
|
constructor(settings, cafeteriaType) {
|
|
379
379
|
this.settings = settings;
|
|
380
380
|
this.cafeteriaType = cafeteriaType;
|
|
381
|
-
this.challengeRetryLimit = 2;
|
|
382
|
-
this.cookieJar = {};
|
|
383
381
|
}
|
|
384
382
|
async scrapeMenu(date) {
|
|
385
383
|
const normalizedDate = normalizeSgDate(date);
|
|
386
|
-
const url = this.
|
|
384
|
+
const url = `${this.settings.soongguriBaseUrl}?rcd=${getRcd(this.cafeteriaType, this.settings)}&sdt=${normalizedDate}`;
|
|
387
385
|
try {
|
|
388
|
-
const
|
|
386
|
+
const res = await import_axios.default.get(url, {
|
|
387
|
+
timeout: this.settings.timeoutMs,
|
|
388
|
+
responseType: "text",
|
|
389
|
+
validateStatus: (s) => s >= 200 && s < 300
|
|
390
|
+
});
|
|
391
|
+
const html = String(res.data);
|
|
389
392
|
const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
|
|
390
393
|
if (hasHoliday) {
|
|
391
394
|
throw new HolidayException(
|
|
@@ -447,109 +450,11 @@ var SoongguriScraper = class {
|
|
|
447
450
|
);
|
|
448
451
|
}
|
|
449
452
|
}
|
|
450
|
-
async fetchWithRetry(url, attempt = 0, targetDate) {
|
|
451
|
-
const response = await import_axios.default.get(url, {
|
|
452
|
-
timeout: this.settings.timeoutMs,
|
|
453
|
-
responseType: "text",
|
|
454
|
-
validateStatus: (s) => s >= 200 && s < 300,
|
|
455
|
-
headers: this.buildBrowserLikeHeaders(attempt)
|
|
456
|
-
});
|
|
457
|
-
this.applySetCookies(response.headers);
|
|
458
|
-
const html = String(response.data);
|
|
459
|
-
if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
|
|
460
|
-
const nextAttempt = attempt + 1;
|
|
461
|
-
return this.fetchWithRetry(
|
|
462
|
-
this.buildMenuUrl(url, nextAttempt),
|
|
463
|
-
nextAttempt,
|
|
464
|
-
targetDate
|
|
465
|
-
);
|
|
466
|
-
}
|
|
467
|
-
if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
|
|
468
|
-
throw new MenuFetchException(
|
|
469
|
-
targetDate,
|
|
470
|
-
this.cafeteriaType,
|
|
471
|
-
"\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
|
|
472
|
-
html,
|
|
473
|
-
{
|
|
474
|
-
endpoint: url,
|
|
475
|
-
operation: "scrape",
|
|
476
|
-
cafeteria: this.cafeteriaType,
|
|
477
|
-
challengeBypass: true,
|
|
478
|
-
attempts: attempt,
|
|
479
|
-
ckattempt: nextChallengeAttempt(attempt)
|
|
480
|
-
}
|
|
481
|
-
);
|
|
482
|
-
}
|
|
483
|
-
return html;
|
|
484
|
-
}
|
|
485
|
-
buildMenuUrl(url, ckattempt) {
|
|
486
|
-
const parsed = new URL(url);
|
|
487
|
-
const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
|
|
488
|
-
const params = new URLSearchParams(parsed.search);
|
|
489
|
-
const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
|
|
490
|
-
const sdt = params.get("sdt") ?? "";
|
|
491
|
-
nextUrl.searchParams.set("rcd", rcd);
|
|
492
|
-
nextUrl.searchParams.set("sdt", sdt);
|
|
493
|
-
if (typeof ckattempt === "number") {
|
|
494
|
-
nextUrl.searchParams.set("ckattempt", String(ckattempt));
|
|
495
|
-
}
|
|
496
|
-
return nextUrl.toString();
|
|
497
|
-
}
|
|
498
|
-
buildBrowserLikeHeaders(attempt) {
|
|
499
|
-
const headers = {
|
|
500
|
-
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
501
|
-
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
|
|
502
|
-
"cache-control": "no-cache",
|
|
503
|
-
pragma: "no-cache",
|
|
504
|
-
referer: this.settings.soongguriBaseUrl,
|
|
505
|
-
"sec-fetch-dest": "document",
|
|
506
|
-
"sec-fetch-mode": "navigate",
|
|
507
|
-
"sec-fetch-site": "same-origin",
|
|
508
|
-
"user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
|
|
509
|
-
};
|
|
510
|
-
if (attempt > 0) {
|
|
511
|
-
headers["upgrade-insecure-requests"] = "1";
|
|
512
|
-
}
|
|
513
|
-
const cookie = this.getCookieHeader();
|
|
514
|
-
if (cookie) {
|
|
515
|
-
headers.cookie = cookie;
|
|
516
|
-
}
|
|
517
|
-
return headers;
|
|
518
|
-
}
|
|
519
|
-
getCookieHeader() {
|
|
520
|
-
return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
|
|
521
|
-
}
|
|
522
|
-
applySetCookies(headers) {
|
|
523
|
-
const setCookie = headers["set-cookie"];
|
|
524
|
-
if (!setCookie) {
|
|
525
|
-
return;
|
|
526
|
-
}
|
|
527
|
-
const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
|
|
528
|
-
for (const raw of rawCookies) {
|
|
529
|
-
const tuple = raw.split(";")[0];
|
|
530
|
-
const separatorIdx = tuple.indexOf("=");
|
|
531
|
-
if (separatorIdx < 1) {
|
|
532
|
-
continue;
|
|
533
|
-
}
|
|
534
|
-
const name = tuple.slice(0, separatorIdx).trim();
|
|
535
|
-
const value = tuple.slice(separatorIdx + 1).trim();
|
|
536
|
-
if (!name) {
|
|
537
|
-
continue;
|
|
538
|
-
}
|
|
539
|
-
this.cookieJar[name] = value;
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
isChallengeResponse(html) {
|
|
543
|
-
return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
|
|
544
|
-
}
|
|
545
453
|
};
|
|
546
454
|
var normalizeSgDate = (date) => {
|
|
547
455
|
const digits = date.replace(/\D/g, "").slice(0, 8);
|
|
548
456
|
return digits.length === 8 ? digits : date;
|
|
549
457
|
};
|
|
550
|
-
var nextChallengeAttempt = (attempt) => {
|
|
551
|
-
return attempt + 1;
|
|
552
|
-
};
|
|
553
458
|
|
|
554
459
|
// src/repositories/scrapers/haksikScraper.ts
|
|
555
460
|
var HaksikScraper = class extends SoongguriScraper {
|
package/dist/index.mjs
CHANGED
|
@@ -333,14 +333,17 @@ var SoongguriScraper = class {
|
|
|
333
333
|
constructor(settings, cafeteriaType) {
|
|
334
334
|
this.settings = settings;
|
|
335
335
|
this.cafeteriaType = cafeteriaType;
|
|
336
|
-
this.challengeRetryLimit = 2;
|
|
337
|
-
this.cookieJar = {};
|
|
338
336
|
}
|
|
339
337
|
async scrapeMenu(date) {
|
|
340
338
|
const normalizedDate = normalizeSgDate(date);
|
|
341
|
-
const url = this.
|
|
339
|
+
const url = `${this.settings.soongguriBaseUrl}?rcd=${getRcd(this.cafeteriaType, this.settings)}&sdt=${normalizedDate}`;
|
|
342
340
|
try {
|
|
343
|
-
const
|
|
341
|
+
const res = await axios.get(url, {
|
|
342
|
+
timeout: this.settings.timeoutMs,
|
|
343
|
+
responseType: "text",
|
|
344
|
+
validateStatus: (s) => s >= 200 && s < 300
|
|
345
|
+
});
|
|
346
|
+
const html = String(res.data);
|
|
344
347
|
const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
|
|
345
348
|
if (hasHoliday) {
|
|
346
349
|
throw new HolidayException(
|
|
@@ -402,109 +405,11 @@ var SoongguriScraper = class {
|
|
|
402
405
|
);
|
|
403
406
|
}
|
|
404
407
|
}
|
|
405
|
-
async fetchWithRetry(url, attempt = 0, targetDate) {
|
|
406
|
-
const response = await axios.get(url, {
|
|
407
|
-
timeout: this.settings.timeoutMs,
|
|
408
|
-
responseType: "text",
|
|
409
|
-
validateStatus: (s) => s >= 200 && s < 300,
|
|
410
|
-
headers: this.buildBrowserLikeHeaders(attempt)
|
|
411
|
-
});
|
|
412
|
-
this.applySetCookies(response.headers);
|
|
413
|
-
const html = String(response.data);
|
|
414
|
-
if (this.isChallengeResponse(html) && attempt < this.challengeRetryLimit) {
|
|
415
|
-
const nextAttempt = attempt + 1;
|
|
416
|
-
return this.fetchWithRetry(
|
|
417
|
-
this.buildMenuUrl(url, nextAttempt),
|
|
418
|
-
nextAttempt,
|
|
419
|
-
targetDate
|
|
420
|
-
);
|
|
421
|
-
}
|
|
422
|
-
if (this.isChallengeResponse(html) && attempt >= this.challengeRetryLimit) {
|
|
423
|
-
throw new MenuFetchException(
|
|
424
|
-
targetDate,
|
|
425
|
-
this.cafeteriaType,
|
|
426
|
-
"\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0 \uC6B0\uD68C \uC2E4\uD328",
|
|
427
|
-
html,
|
|
428
|
-
{
|
|
429
|
-
endpoint: url,
|
|
430
|
-
operation: "scrape",
|
|
431
|
-
cafeteria: this.cafeteriaType,
|
|
432
|
-
challengeBypass: true,
|
|
433
|
-
attempts: attempt,
|
|
434
|
-
ckattempt: nextChallengeAttempt(attempt)
|
|
435
|
-
}
|
|
436
|
-
);
|
|
437
|
-
}
|
|
438
|
-
return html;
|
|
439
|
-
}
|
|
440
|
-
buildMenuUrl(url, ckattempt) {
|
|
441
|
-
const parsed = new URL(url);
|
|
442
|
-
const nextUrl = new URL(`${parsed.origin}${parsed.pathname}`);
|
|
443
|
-
const params = new URLSearchParams(parsed.search);
|
|
444
|
-
const rcd = params.get("rcd") ?? String(getRcd(this.cafeteriaType, this.settings));
|
|
445
|
-
const sdt = params.get("sdt") ?? "";
|
|
446
|
-
nextUrl.searchParams.set("rcd", rcd);
|
|
447
|
-
nextUrl.searchParams.set("sdt", sdt);
|
|
448
|
-
if (typeof ckattempt === "number") {
|
|
449
|
-
nextUrl.searchParams.set("ckattempt", String(ckattempt));
|
|
450
|
-
}
|
|
451
|
-
return nextUrl.toString();
|
|
452
|
-
}
|
|
453
|
-
buildBrowserLikeHeaders(attempt) {
|
|
454
|
-
const headers = {
|
|
455
|
-
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
456
|
-
"accept-language": "ko-KR,ko;q=0.9,en-US;q=0.7,en;q=0.6",
|
|
457
|
-
"cache-control": "no-cache",
|
|
458
|
-
pragma: "no-cache",
|
|
459
|
-
referer: this.settings.soongguriBaseUrl,
|
|
460
|
-
"sec-fetch-dest": "document",
|
|
461
|
-
"sec-fetch-mode": "navigate",
|
|
462
|
-
"sec-fetch-site": "same-origin",
|
|
463
|
-
"user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"
|
|
464
|
-
};
|
|
465
|
-
if (attempt > 0) {
|
|
466
|
-
headers["upgrade-insecure-requests"] = "1";
|
|
467
|
-
}
|
|
468
|
-
const cookie = this.getCookieHeader();
|
|
469
|
-
if (cookie) {
|
|
470
|
-
headers.cookie = cookie;
|
|
471
|
-
}
|
|
472
|
-
return headers;
|
|
473
|
-
}
|
|
474
|
-
getCookieHeader() {
|
|
475
|
-
return Object.entries(this.cookieJar).filter(([, value]) => value.length > 0).map(([name, value]) => `${name}=${value}`).join("; ");
|
|
476
|
-
}
|
|
477
|
-
applySetCookies(headers) {
|
|
478
|
-
const setCookie = headers["set-cookie"];
|
|
479
|
-
if (!setCookie) {
|
|
480
|
-
return;
|
|
481
|
-
}
|
|
482
|
-
const rawCookies = Array.isArray(setCookie) ? setCookie : [setCookie];
|
|
483
|
-
for (const raw of rawCookies) {
|
|
484
|
-
const tuple = raw.split(";")[0];
|
|
485
|
-
const separatorIdx = tuple.indexOf("=");
|
|
486
|
-
if (separatorIdx < 1) {
|
|
487
|
-
continue;
|
|
488
|
-
}
|
|
489
|
-
const name = tuple.slice(0, separatorIdx).trim();
|
|
490
|
-
const value = tuple.slice(separatorIdx + 1).trim();
|
|
491
|
-
if (!name) {
|
|
492
|
-
continue;
|
|
493
|
-
}
|
|
494
|
-
this.cookieJar[name] = value;
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
isChallengeResponse(html) {
|
|
498
|
-
return html.includes("\uC790\uB3D9\uB4F1\uB85D\uBC29\uC9C0\uB97C \uC704\uD574 \uBCF4\uC548\uC808\uCC28\uB97C \uAC70\uCE58\uACE0 \uC788\uC2B5\uB2C8\uB2E4.") || html.includes("/___verify") || html.includes("Please prove that you are human.");
|
|
499
|
-
}
|
|
500
408
|
};
|
|
501
409
|
var normalizeSgDate = (date) => {
|
|
502
410
|
const digits = date.replace(/\D/g, "").slice(0, 8);
|
|
503
411
|
return digits.length === 8 ? digits : date;
|
|
504
412
|
};
|
|
505
|
-
var nextChallengeAttempt = (attempt) => {
|
|
506
|
-
return attempt + 1;
|
|
507
|
-
};
|
|
508
413
|
|
|
509
414
|
// src/repositories/scrapers/haksikScraper.ts
|
|
510
415
|
var HaksikScraper = class extends SoongguriScraper {
|