@bluessu/meal-scraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,907 @@
1
+ // src/errors.ts
2
+ var BaseCafeteriaException = class extends Error {
3
+ constructor(targetDate, cafeteria, message, rawData, context = {}) {
4
+ const withContext = Object.keys(context).length > 0 ? `${message} | context=${JSON.stringify(context)}` : message;
5
+ super(`${cafeteria}(${targetDate}) ${withContext}`);
6
+ this.targetDate = targetDate;
7
+ this.cafeteria = cafeteria;
8
+ this.rawData = rawData;
9
+ this.name = this.constructor.name;
10
+ this.context = context;
11
+ }
12
+ };
13
+ var HolidayException = class extends BaseCafeteriaException {
14
+ };
15
+ var MenuFetchException = class extends BaseCafeteriaException {
16
+ };
17
+ var MenuParseException = class extends BaseCafeteriaException {
18
+ };
19
+
20
+ // src/domain.ts
21
+ var CafeteriaType = /* @__PURE__ */ ((CafeteriaType2) => {
22
+ CafeteriaType2["HAKSIK"] = "HAKSIK";
23
+ CafeteriaType2["DODAM"] = "DODAM";
24
+ CafeteriaType2["FACULTY"] = "FACULTY";
25
+ CafeteriaType2["DORMITORY"] = "DORMITORY";
26
+ return CafeteriaType2;
27
+ })(CafeteriaType || {});
28
+ var CafeteriaStatus = {
29
+ Open: "open",
30
+ Closed: "closed"
31
+ };
32
+ var normalizeMenuSlot = (slot) => {
33
+ const normalized = String(slot ?? "").replace(/\s+/g, "").trim();
34
+ if (normalized.includes("\uC870\uC2DD") || normalized.includes("\uC544\uCE68") || normalized.includes("breakfast")) {
35
+ return "breakfast";
36
+ }
37
+ if (normalized.includes("\uC911\uC2DD") || normalized.includes("\uC810\uC2EC") || normalized.includes("lunch")) {
38
+ return "lunch";
39
+ }
40
+ if (normalized.includes("\uC11D\uC2DD") || normalized.includes("\uC800\uB141") || normalized.includes("dinner")) {
41
+ return "dinner";
42
+ }
43
+ return void 0;
44
+ };
45
+ var createDailyMenu = (date, cafeteria, menus, status = CafeteriaStatus.Open) => {
46
+ return { date, cafeteria, status, ...menus };
47
+ };
48
+
49
+ // src/parsers/noopMenuParser.ts
50
+ var splitItems = (text) => text.split(/[\n\/,]/).flatMap((line) => line.split("&")).flatMap((line) => line.split(" ")).map((s) => s.replace(/\*/g, "").trim()).filter(Boolean);
51
+ var NoopMenuParser = class {
52
+ async parseMenu(raw) {
53
+ try {
54
+ const dailyMenu = createDailyMenu(raw.date, raw.cafeteria, {
55
+ breakfast: {},
56
+ lunch: {},
57
+ dinner: {}
58
+ });
59
+ for (const [slot, text] of Object.entries(raw.menuTexts)) {
60
+ const items = splitItems(text).filter((item) => /[가-힣]/.test(item));
61
+ const slotKey = normalizeMenuSlot(slot);
62
+ if (!slotKey) continue;
63
+ dailyMenu[slotKey] = {
64
+ ...dailyMenu[slotKey],
65
+ [slot]: [...new Set(items)]
66
+ };
67
+ }
68
+ return dailyMenu;
69
+ } catch (err) {
70
+ throw new MenuParseException(
71
+ raw.date,
72
+ raw.cafeteria,
73
+ "\uAE30\uBCF8 \uD30C\uC2F1 \uC2E4\uD328",
74
+ err
75
+ );
76
+ }
77
+ }
78
+ };
79
+
80
+ // src/parsers/gptMenuParser.ts
81
+ var SYSTEM_PROMPT = `\uB2F9\uC2E0\uC740 \uD55C\uAD6D \uB300\uD559 \uC2DD\uB2F9 \uBA54\uB274 \uB370\uC774\uD130\uB97C \uC815\uD655\uD558\uAC8C \uD30C\uC2F1\uD558\uB294 \uC804\uBB38\uAC00\uC785\uB2C8\uB2E4.
82
+ - \uBA54\uB274\uBA85\uB9CC \uCD94\uCD9C\uD574\uC11C \uC544\uB798 JSON \uD615\uC2DD\uC73C\uB85C \uBC18\uD658:
83
+ { "menus": ["\uBA54\uB274\uBA851", "\uBA54\uB274\uBA852"] }
84
+ - \uBA54\uB274\uBA85\uC740 \uD55C\uAE00\uB9CC \uC0AC\uC6A9
85
+ - \uBD88\uD544\uC694\uD55C \uC218\uC2DD\uC5B4 (\uC608: "\uB9DB\uC788\uB294", "\uC2E0\uC120\uD55C")\uB294 \uC81C\uAC70
86
+ - \uC911\uBCF5\uB41C \uBA54\uB274\uBA85\uC740 \uD55C \uBC88\uB9CC \uC791\uC131
87
+ - \uBA54\uB274\uBA85\uC774 \uC5C6\uB294 \uACBD\uC6B0 menus\uB294 \uBE48 \uBC30\uC5F4\uB85C \uBC18\uD658
88
+ - JSON \uC678 \uD14D\uC2A4\uD2B8\uB97C \uC808\uB300 \uD3EC\uD568\uD558\uC9C0 \uC54A\uC74C
89
+ - \uBA54\uC778 \uBA54\uB274\uB97C \uAC00\uC7A5 \uC55E\uC5D0 \uBC30\uCE58
90
+ `;
91
+ var GPTMenuParser = class _GPTMenuParser {
92
+ static {
93
+ this.model = "gpt-5-nano";
94
+ }
95
+ constructor(apiKey) {
96
+ const openAI = this.resolveOpenAI(apiKey);
97
+ this.client = openAI;
98
+ }
99
+ async resolveOpenAI(apiKey) {
100
+ try {
101
+ const mod = await import("openai");
102
+ const OpenAIConstructor = mod.default ?? mod.OpenAI;
103
+ if (!OpenAIConstructor) {
104
+ throw new Error(
105
+ "openai \uBAA8\uB4C8\uC5D0\uC11C OpenAI \uD074\uB798\uC2A4 \uCD08\uAE30\uD654\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4."
106
+ );
107
+ }
108
+ return new OpenAIConstructor({ apiKey });
109
+ } catch (err) {
110
+ const isModuleNotFound = err instanceof Error && (err.message.includes("Cannot find module") || err.code === "ERR_MODULE_NOT_FOUND");
111
+ if (isModuleNotFound) {
112
+ throw new Error(
113
+ 'openai \uD328\uD0A4\uC9C0\uAC00 \uC124\uCE58\uB418\uC5B4 \uC788\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. parser="gpt" \uC0AC\uC6A9 \uC2DC `pnpm add openai`\uAC00 \uD544\uC694\uD569\uB2C8\uB2E4.'
114
+ );
115
+ }
116
+ throw err;
117
+ }
118
+ }
119
+ sanitizeMenuName(menu) {
120
+ return String(menu).replace(/^[\s\-\*\u2022·•\d\.\)]+/g, "").replace(/\s+/g, " ").trim();
121
+ }
122
+ normalizeTextForParsing(text) {
123
+ const tokens = text.replace(/\s+/g, " ").trim().split(" ").filter((token) => token.length > 0);
124
+ if (tokens.length === 0) {
125
+ return "";
126
+ }
127
+ const maxPhraseLength = 12;
128
+ const normalized = [];
129
+ for (let i = 0; i < tokens.length; ) {
130
+ const remaining = tokens.length - i;
131
+ const maxLen = Math.min(maxPhraseLength, Math.floor(remaining / 2));
132
+ let matchedLen = 1;
133
+ for (let len = maxLen; len >= 1; len--) {
134
+ let isRepeated = true;
135
+ for (let j = 0; j < len; j++) {
136
+ if (tokens[i + j] !== tokens[i + len + j]) {
137
+ isRepeated = false;
138
+ break;
139
+ }
140
+ }
141
+ if (isRepeated) {
142
+ matchedLen = len;
143
+ break;
144
+ }
145
+ }
146
+ normalized.push(...tokens.slice(i, i + matchedLen));
147
+ i += matchedLen * 2;
148
+ }
149
+ return normalized.join(" ");
150
+ }
151
+ async parseMenuText(text) {
152
+ if (!text || text.trim().length === 0) {
153
+ return [];
154
+ }
155
+ const normalizedText = this.normalizeTextForParsing(text);
156
+ if (!normalizedText) {
157
+ return [];
158
+ }
159
+ const client = await this.client;
160
+ const result = await client.chat.completions.create({
161
+ model: _GPTMenuParser.model,
162
+ messages: [
163
+ { role: "system", content: SYSTEM_PROMPT },
164
+ {
165
+ role: "user",
166
+ content: `\uB2E4\uC74C \uD14D\uC2A4\uD2B8\uC5D0\uC11C \uBA54\uB274\uBA85\uB9CC \uCD94\uCD9C\uD558\uC138\uC694.
167
+
168
+ ${normalizedText}`
169
+ }
170
+ ],
171
+ response_format: { type: "json_object" }
172
+ });
173
+ if (!result?.choices || !result.choices.length) {
174
+ throw new Error("\uBAA8\uB378 \uC751\uB2F5 \uD615\uC2DD\uC774 \uC62C\uBC14\uB974\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4.");
175
+ }
176
+ const content = result.choices[0]?.message?.content;
177
+ if (!content) {
178
+ throw new Error("\uBAA8\uB378 \uC751\uB2F5\uC774 \uBE44\uC5B4 \uC788\uC2B5\uB2C8\uB2E4.");
179
+ }
180
+ const parsed = JSON.parse(content);
181
+ const menus = parsed?.menus;
182
+ if (!Array.isArray(menus)) {
183
+ throw new Error("menus \uBC30\uC5F4\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
184
+ }
185
+ return menus.map((item) => this.sanitizeMenuName(String(item))).filter((item) => item.length > 0);
186
+ }
187
+ async parseMenu(raw) {
188
+ const menus = {
189
+ breakfast: {},
190
+ lunch: {},
191
+ dinner: {}
192
+ };
193
+ const errors = {};
194
+ await Promise.all(
195
+ Object.entries(raw.menuTexts).map(async ([slot, text]) => {
196
+ try {
197
+ const unique = /* @__PURE__ */ new Set();
198
+ const parsed = await this.parseMenuText(text);
199
+ const slotKey = normalizeMenuSlot(slot);
200
+ if (!slotKey) return;
201
+ parsed.forEach((menu) => unique.add(menu));
202
+ menus[slotKey][slot] = Array.from(unique);
203
+ } catch (err) {
204
+ errors[slot] = String(err);
205
+ }
206
+ })
207
+ );
208
+ if (Object.keys(errors).length > 0) {
209
+ throw new MenuParseException(
210
+ raw.date,
211
+ raw.cafeteria,
212
+ "\uC77C\uBD80 \uC2AC\uB86F \uD30C\uC2F1 \uC2E4\uD328",
213
+ JSON.stringify(errors)
214
+ );
215
+ }
216
+ return createDailyMenu(raw.date, raw.cafeteria, menus);
217
+ }
218
+ };
219
+
220
+ // src/repositories/scrapers/soongguriScraper.ts
221
+ import axios from "axios";
222
+
223
+ // src/config.ts
224
+ var defaultSettings = {
225
+ soongguriBaseUrl: "http://m.soongguri.com/m_req/m_menu.php",
226
+ dormitoryBaseUrl: "https://ssudorm.ssu.ac.kr:444/SShostel/mall_main.php",
227
+ haksikRcd: 1,
228
+ dodamRcd: 2,
229
+ facultyRcd: 7,
230
+ timeoutMs: 15e3
231
+ };
232
+ var getRcd = (type, settings) => {
233
+ switch (type) {
234
+ case "HAKSIK" /* HAKSIK */:
235
+ return settings.haksikRcd;
236
+ case "DODAM" /* DODAM */:
237
+ return settings.dodamRcd;
238
+ case "FACULTY" /* FACULTY */:
239
+ return settings.facultyRcd;
240
+ default:
241
+ return 0;
242
+ }
243
+ };
244
+
245
+ // src/utils/parsing.ts
246
+ import * as cheerio from "cheerio";
247
+ var normalizeText = (v) => v.replace(/\r/g, "").replace(/\n+/g, " ").replace(/\s+/g, " ").trim();
248
+ var parseTableToDict = (html) => {
249
+ const $ = cheerio.load(html);
250
+ const result = {};
251
+ const parseWithMenuClass = () => {
252
+ $("tr").each((_, tr) => {
253
+ const menuSlot = $(tr).find("td.menu_nm").first().text().trim();
254
+ if (!menuSlot) return;
255
+ const rowText = $(tr).find("*").contents().toArray().map((node) => $(node).text()).join(" ");
256
+ const cleaned = normalizeText(rowText);
257
+ result[menuSlot] = cleaned;
258
+ });
259
+ };
260
+ const parseFallbackRows = () => {
261
+ const slotKeywords = /조식|중식|석식|점심|저녁|아침/;
262
+ $("tr").each((_, tr) => {
263
+ const cells = $(tr).find("td, th").toArray();
264
+ if (cells.length < 2) return;
265
+ const key = normalizeText($(cells[0]).text());
266
+ if (!key || !slotKeywords.test(key)) return;
267
+ const values = cells.slice(1).map((cell) => normalizeText($(cell).text())).filter((value) => value.length > 0).join(" ");
268
+ if (!values) return;
269
+ result[key] = values;
270
+ });
271
+ };
272
+ parseWithMenuClass();
273
+ if (!Object.keys(result).length) {
274
+ parseFallbackRows();
275
+ }
276
+ return result;
277
+ };
278
+ var stripStringFromDict = (menuDict) => {
279
+ const out = {};
280
+ for (const [key, value] of Object.entries(menuDict)) {
281
+ out[key] = normalizeText(value);
282
+ }
283
+ return out;
284
+ };
285
+ var make2d = (tableHtml) => {
286
+ if (!tableHtml) return [];
287
+ let $ = cheerio.load(tableHtml);
288
+ let rows = $("tr");
289
+ if (!rows.length) {
290
+ $ = cheerio.load(`<table>${tableHtml}</table>`);
291
+ rows = $("tr");
292
+ }
293
+ const matrix = [];
294
+ const toSpan = (value) => {
295
+ const v = Number.parseInt(value ?? "1", 10);
296
+ return Number.isNaN(v) || v < 1 ? 1 : v;
297
+ };
298
+ rows.each((rIdx, tr) => {
299
+ const rowCells = $(tr).children("th,td");
300
+ if (!matrix[rIdx]) matrix[rIdx] = [];
301
+ let cIdx = 0;
302
+ rowCells.each((_, cell) => {
303
+ while (matrix[rIdx][cIdx] !== void 0) cIdx += 1;
304
+ const txt = normalizeText($(cell).text());
305
+ const colspan = toSpan($(cell).attr("colspan"));
306
+ const rowspan = toSpan($(cell).attr("rowspan"));
307
+ for (let cc = 0; cc < colspan; cc++) {
308
+ matrix[rIdx][cIdx + cc] = txt;
309
+ }
310
+ for (let rr = 1; rr < rowspan; rr++) {
311
+ const targetRow = rIdx + rr;
312
+ if (!matrix[targetRow]) matrix[targetRow] = [];
313
+ for (let cc = 0; cc < colspan; cc++) {
314
+ matrix[targetRow][cIdx + cc] = txt;
315
+ }
316
+ }
317
+ cIdx += colspan;
318
+ });
319
+ });
320
+ return matrix.map((row) => row.map((v) => normalizeText(v ?? "")));
321
+ };
322
+ var make2dFromHtml = (html) => {
323
+ const $ = cheerio.load(html);
324
+ const table = $("table.boxstyle02").first();
325
+ if (!table.length) return [];
326
+ const tableHtml = table.html();
327
+ if (!tableHtml) return [];
328
+ return make2d(tableHtml);
329
+ };
330
+
331
+ // src/repositories/scrapers/soongguriScraper.ts
332
+ var SoongguriScraper = class {
333
+ constructor(settings, cafeteriaType) {
334
+ this.settings = settings;
335
+ this.cafeteriaType = cafeteriaType;
336
+ }
337
+ async scrapeMenu(date) {
338
+ const normalizedDate = normalizeSgDate(date);
339
+ const url = `${this.settings.soongguriBaseUrl}?rcd=${getRcd(this.cafeteriaType, this.settings)}&sdt=${normalizedDate}`;
340
+ try {
341
+ const res = await axios.get(url, {
342
+ timeout: this.settings.timeoutMs,
343
+ responseType: "text",
344
+ validateStatus: (s) => s >= 200 && s < 300
345
+ });
346
+ const html = String(res.data);
347
+ const hasHoliday = html.includes("\uC624\uB298\uC740 \uC27D\uB2C8\uB2E4.") || html.includes("\uD734\uBB34");
348
+ if (hasHoliday) {
349
+ throw new HolidayException(
350
+ date,
351
+ this.cafeteriaType,
352
+ "\uD574\uB2F9\uC77C\uC740 \uD734\uBB34\uC77C\uC785\uB2C8\uB2E4.",
353
+ html,
354
+ {
355
+ endpoint: url,
356
+ operation: "scrape",
357
+ cafeteria: this.cafeteriaType,
358
+ timeoutMs: this.settings.timeoutMs
359
+ }
360
+ );
361
+ }
362
+ const parsed = parseTableToDict(html);
363
+ const menus = stripStringFromDict(parsed);
364
+ if (!Object.keys(menus).length) {
365
+ throw new MenuFetchException(
366
+ date,
367
+ this.cafeteriaType,
368
+ "\uBA54\uB274\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.",
369
+ { menus, html },
370
+ {
371
+ endpoint: url,
372
+ operation: "parse",
373
+ cafeteria: this.cafeteriaType,
374
+ timeoutMs: this.settings.timeoutMs
375
+ }
376
+ );
377
+ }
378
+ return {
379
+ date,
380
+ cafeteria: this.cafeteriaType,
381
+ menuTexts: menus
382
+ };
383
+ } catch (err) {
384
+ if (err instanceof HolidayException) {
385
+ throw err;
386
+ }
387
+ if (err instanceof BaseCafeteriaException) {
388
+ throw err;
389
+ }
390
+ const raw = err;
391
+ const statusCode = raw?.status ?? raw?.response?.status;
392
+ const statusText = raw?.response?.statusText;
393
+ throw new MenuFetchException(
394
+ date,
395
+ this.cafeteriaType,
396
+ "\uBA54\uB274 \uC218\uC9D1 \uC2E4\uD328",
397
+ err,
398
+ {
399
+ endpoint: url,
400
+ cafeteria: this.cafeteriaType,
401
+ statusCode,
402
+ statusText,
403
+ timeoutMs: this.settings.timeoutMs
404
+ }
405
+ );
406
+ }
407
+ }
408
+ };
409
+ var normalizeSgDate = (date) => {
410
+ const digits = date.replace(/\D/g, "").slice(0, 8);
411
+ return digits.length === 8 ? digits : date;
412
+ };
413
+
414
+ // src/repositories/scrapers/haksikScraper.ts
415
+ var HaksikScraper = class extends SoongguriScraper {
416
+ constructor(settings) {
417
+ super(settings, "HAKSIK" /* HAKSIK */);
418
+ }
419
+ };
420
+
421
+ // src/repositories/scrapers/dodamScraper.ts
422
+ var DodamScraper = class extends SoongguriScraper {
423
+ constructor(settings) {
424
+ super(settings, "DODAM" /* DODAM */);
425
+ }
426
+ };
427
+
428
+ // src/repositories/scrapers/facultyScraper.ts
429
+ var FacultyScraper = class extends SoongguriScraper {
430
+ constructor(settings) {
431
+ super(settings, "FACULTY" /* FACULTY */);
432
+ }
433
+ };
434
+
435
+ // src/repositories/scrapers/dormitoryScraper.ts
436
+ import axios2 from "axios";
437
+ var DormitoryScraper = class {
438
+ constructor(baseUrl, timeoutMs = 15e3) {
439
+ this.baseUrl = baseUrl;
440
+ this.timeoutMs = timeoutMs;
441
+ }
442
+ async scrapeMenu(date) {
443
+ const dt = parseDate(date);
444
+ const targetDate = formatDateKey(dt);
445
+ const endpoint = this.baseUrl;
446
+ try {
447
+ const res = await axios2.get(this.baseUrl, {
448
+ params: {
449
+ viewform: "B0001_foodboard_list",
450
+ gyear: dt.getFullYear(),
451
+ gmonth: dt.getMonth() + 1,
452
+ gday: dt.getDate()
453
+ },
454
+ timeout: this.timeoutMs,
455
+ responseType: "arraybuffer",
456
+ validateStatus: (s) => s >= 200 && s < 300
457
+ });
458
+ const bytes = new Uint8Array(res.data);
459
+ const encoding = detectEncoding({
460
+ data: res.data,
461
+ headers: res.headers
462
+ });
463
+ const html = new TextDecoder(encoding).decode(bytes);
464
+ const matrix = make2dFromHtml(html);
465
+ const parsedRows = structureRows(matrix);
466
+ let matched;
467
+ for (const row of parsedRows) {
468
+ const dateStr = parseDateTokenKey(row["\uB0A0\uC9DC"]);
469
+ if (!dateStr) continue;
470
+ const menuTexts = extractMenuTexts(row);
471
+ if (Object.keys(menuTexts).length === 0) continue;
472
+ if (dateStr !== targetDate) continue;
473
+ matched = {
474
+ date: dateStr,
475
+ cafeteria: "DORMITORY" /* DORMITORY */,
476
+ menuTexts
477
+ };
478
+ break;
479
+ }
480
+ if (!matched) {
481
+ throw new MenuFetchException(
482
+ date,
483
+ "DORMITORY" /* DORMITORY */,
484
+ "\uC694\uCCAD\uD55C \uB0A0\uC9DC\uC758 \uBA54\uB274\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4",
485
+ void 0,
486
+ {
487
+ endpoint,
488
+ operation: "match",
489
+ cafeteria: "DORMITORY" /* DORMITORY */
490
+ }
491
+ );
492
+ }
493
+ return matched;
494
+ } catch (err) {
495
+ if (err instanceof HolidayException) {
496
+ throw err;
497
+ }
498
+ if (err instanceof BaseCafeteriaException) {
499
+ throw err;
500
+ }
501
+ const raw = err;
502
+ const statusCode = raw?.status ?? raw?.response?.status;
503
+ const statusText = raw?.response?.statusText;
504
+ throw new MenuFetchException(
505
+ date,
506
+ "DORMITORY" /* DORMITORY */,
507
+ "\uAE30\uC219\uC0AC \uBA54\uB274 \uD30C\uC2F1 \uC2E4\uD328",
508
+ err,
509
+ {
510
+ endpoint,
511
+ operation: "parse",
512
+ cafeteria: "DORMITORY" /* DORMITORY */,
513
+ timeoutMs: this.timeoutMs,
514
+ targetDate,
515
+ statusCode,
516
+ statusText
517
+ }
518
+ );
519
+ }
520
+ }
521
+ };
522
+ var detectEncoding = (res) => {
523
+ const header = String(res.headers["content-type"] ?? "").toLowerCase();
524
+ const headerCharset = header.match(/charset=([a-z0-9-]+)/i)?.[1];
525
+ if (headerCharset) {
526
+ const lower = headerCharset.toLowerCase();
527
+ if (lower.includes("utf-8") || lower.includes("utf8")) return "utf-8";
528
+ if (lower.includes("euc-kr") || lower.includes("cp949")) return "euc-kr";
529
+ }
530
+ const latin1Text = new TextDecoder("latin1").decode(new Uint8Array(res.data));
531
+ const metaCharset = latin1Text.match(
532
+ /<meta[^>]*charset\s*=\s*([a-z0-9-]+)/i
533
+ )?.[1];
534
+ if (metaCharset) {
535
+ const lower = metaCharset.toLowerCase();
536
+ if (lower.includes("utf-8") || lower.includes("utf8")) return "utf-8";
537
+ if (lower.includes("euc-kr") || lower.includes("cp949")) return "euc-kr";
538
+ }
539
+ return "euc-kr";
540
+ };
541
+ var parseDate = (date) => {
542
+ if (/^\d{8}$/.test(date)) {
543
+ const year = Number(date.slice(0, 4));
544
+ const month = Number(date.slice(4, 6));
545
+ const day = Number(date.slice(6, 8));
546
+ return new Date(year, month - 1, day);
547
+ }
548
+ if (/^\d{4}-\d{2}-\d{2}$/.test(date)) {
549
+ const [year, month, day] = date.split("-");
550
+ return new Date(
551
+ Number(year),
552
+ Number(month) - 1,
553
+ Number(day)
554
+ );
555
+ }
556
+ return new Date(date);
557
+ };
558
+ var formatDateKey = (date) => {
559
+ const year = date.getFullYear();
560
+ const month = String(date.getMonth() + 1).padStart(2, "0");
561
+ const day = String(date.getDate()).padStart(2, "0");
562
+ return `${year}-${month}-${day}`;
563
+ };
564
+ var parseDateTokenKey = (value) => {
565
+ if (!value) return "";
566
+ const trimmed = value.split(/\s+/)[0];
567
+ const withDash = trimmed.replace(/[.]/g, "-");
568
+ if (/^\d{4}-\d{2}-\d{2}$/.test(withDash)) {
569
+ return withDash;
570
+ }
571
+ const compact = withDash.replace(/-/g, "");
572
+ if (/^\d{8}$/.test(compact)) {
573
+ const year = Number(compact.slice(0, 4));
574
+ const month = Number(compact.slice(4, 6));
575
+ const day = Number(compact.slice(6, 8));
576
+ const parsed = new Date(year, month - 1, day);
577
+ if (parsed.getFullYear() !== year || parsed.getMonth() !== month - 1 || parsed.getDate() !== day) {
578
+ return "";
579
+ }
580
+ return `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
581
+ }
582
+ if (/^\d{4}$/.test(compact)) {
583
+ const year = (/* @__PURE__ */ new Date()).getFullYear();
584
+ const month = Number(compact.slice(0, 2));
585
+ const day = Number(compact.slice(2, 4));
586
+ const parsed = new Date(year, month - 1, day);
587
+ if (parsed.getFullYear() !== year || parsed.getMonth() !== month - 1 || parsed.getDate() !== day) {
588
+ return "";
589
+ }
590
+ return `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
591
+ }
592
+ if (/^\d{2}-\d{2}$/.test(withDash)) {
593
+ const [monthPart, dayPart] = withDash.split("-");
594
+ const year = (/* @__PURE__ */ new Date()).getFullYear();
595
+ const month = Number(monthPart);
596
+ const day = Number(dayPart);
597
+ const parsed = new Date(year, month - 1, day);
598
+ if (parsed.getFullYear() !== year || parsed.getMonth() !== month - 1 || parsed.getDate() !== day) {
599
+ return "";
600
+ }
601
+ return `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
602
+ }
603
+ return "";
604
+ };
605
+ var structureRows = (matrix) => {
606
+ if (!matrix.length) return [];
607
+ const headers = matrix[0];
608
+ const dateCol = headers.findIndex((h) => h === "\uB0A0\uC9DC");
609
+ if (dateCol < 0) return [];
610
+ const colMap = /* @__PURE__ */ new Map();
611
+ for (let i = 0; i < headers.length; i++) {
612
+ if (headers[i] === "\uC870\uC2DD" || headers[i] === "\uC911\uC2DD" || headers[i] === "\uC11D\uC2DD") {
613
+ colMap.set(i, headers[i]);
614
+ }
615
+ }
616
+ const out = [];
617
+ for (let r = 1; r < matrix.length; r++) {
618
+ const row = matrix[r];
619
+ if (!row || !row[dateCol]) continue;
620
+ const dict = { \uB0A0\uC9DC: row[dateCol] || "" };
621
+ for (const [idx, key] of colMap) {
622
+ dict[key] = row[idx] || "";
623
+ }
624
+ out.push(dict);
625
+ }
626
+ return out;
627
+ };
628
+ var extractMenuTexts = (row) => {
629
+ const out = {};
630
+ ["\uC911\uC2DD", "\uC11D\uC2DD"].forEach((slot) => {
631
+ const value = row[slot];
632
+ if (!value) return;
633
+ const items = value.split("\r\n").map((x) => x.trim()).filter((x) => x.length > 0 && !x.includes("\uC6B4\uC601"));
634
+ if (items.length > 0) {
635
+ out[slot] = items.join(" ");
636
+ }
637
+ });
638
+ return out;
639
+ };
640
+
641
+ // src/services/scrapingService.ts
642
+ var FoodScrapingService = class {
643
+ constructor(settings = defaultSettings, parser, parserMode = "noop") {
644
+ this.settings = settings;
645
+ this.parser = parser;
646
+ this.parserMode = parserMode;
647
+ }
648
+ createScraper(cafeteriaType) {
649
+ if (cafeteriaType === "HAKSIK" /* HAKSIK */) {
650
+ return new HaksikScraper(this.settings);
651
+ }
652
+ if (cafeteriaType === "DODAM" /* DODAM */) {
653
+ return new DodamScraper(this.settings);
654
+ }
655
+ if (cafeteriaType === "FACULTY" /* FACULTY */) {
656
+ return new FacultyScraper(this.settings);
657
+ }
658
+ if (cafeteriaType === "DORMITORY" /* DORMITORY */) {
659
+ return new DormitoryScraper(
660
+ this.settings.dormitoryBaseUrl,
661
+ this.settings.timeoutMs
662
+ );
663
+ }
664
+ throw new Error(`Unsupported cafeteria: ${cafeteriaType}`);
665
+ }
666
+ async scrapeRawMenu(cafeteriaType, date) {
667
+ try {
668
+ const scraper = this.createScraper(cafeteriaType);
669
+ return await scraper.scrapeMenu(date);
670
+ } catch (err) {
671
+ if (err instanceof BaseCafeteriaException) throw err;
672
+ throw new MenuFetchException(
673
+ date,
674
+ cafeteriaType,
675
+ "scrape \uC2E4\uD328",
676
+ err,
677
+ {
678
+ targetDate: date,
679
+ cafeteria: cafeteriaType,
680
+ operation: "scrape"
681
+ }
682
+ );
683
+ }
684
+ }
685
+ async scrapeAndParseMenu(cafeteriaType, date) {
686
+ try {
687
+ const raw = await this.scrapeRawMenu(cafeteriaType, date);
688
+ return await this.parse(raw);
689
+ } catch (err) {
690
+ if (err instanceof HolidayException) {
691
+ return createDailyMenu(
692
+ date,
693
+ cafeteriaType,
694
+ {
695
+ breakfast: {},
696
+ lunch: {},
697
+ dinner: {}
698
+ },
699
+ CafeteriaStatus.Closed
700
+ );
701
+ }
702
+ throw err;
703
+ }
704
+ }
705
+ async parse(raw) {
706
+ try {
707
+ return await this.parser.parseMenu(raw);
708
+ } catch (err) {
709
+ if (err instanceof BaseCafeteriaException) {
710
+ err.context = {
711
+ ...err.context,
712
+ parserMode: this.parserMode,
713
+ operation: "parse",
714
+ targetDate: raw.date,
715
+ cafeteria: raw.cafeteria
716
+ };
717
+ throw err;
718
+ }
719
+ throw new MenuParseException(
720
+ raw.date,
721
+ raw.cafeteria,
722
+ "parse \uC2E4\uD328",
723
+ err,
724
+ {
725
+ parserMode: this.parserMode,
726
+ operation: "parse",
727
+ targetDate: raw.date,
728
+ cafeteria: raw.cafeteria
729
+ }
730
+ );
731
+ }
732
+ }
733
+ };
734
+
735
+ // src/client/dateUtils.ts
736
+ var MENU_DATE_COMPACT_RE = /^\d{8}$/;
737
+ var MENU_DATE_ISO_RE = /^\d{4}-\d{2}-\d{2}$/;
738
+ var toDate = (value) => {
739
+ if (MENU_DATE_COMPACT_RE.test(value)) {
740
+ const year = Number(value.slice(0, 4));
741
+ const month = Number(value.slice(4, 6));
742
+ const day = Number(value.slice(6, 8));
743
+ const date = new Date(year, month - 1, day);
744
+ const isValidDate = date.getFullYear() === year && date.getMonth() === month - 1 && date.getDate() === day;
745
+ if (!isValidDate) {
746
+ throw new RangeError(`invalid menu date: ${value}`);
747
+ }
748
+ return date;
749
+ }
750
+ if (MENU_DATE_ISO_RE.test(value)) {
751
+ const [y, m, d] = value.split("-");
752
+ const year = Number(y);
753
+ const month = Number(m);
754
+ const day = Number(d);
755
+ const date = new Date(year, month - 1, day);
756
+ const isValidDate = date.getFullYear() === year && date.getMonth() === month - 1 && date.getDate() === day;
757
+ if (!isValidDate) {
758
+ throw new RangeError(`invalid menu date: ${value}`);
759
+ }
760
+ return date;
761
+ }
762
+ throw new RangeError(`invalid menu date: ${value}`);
763
+ };
764
+ var normalizeMenuDate = (input) => {
765
+ let date;
766
+ if (input instanceof Date) {
767
+ date = input;
768
+ } else if (typeof input === "string") {
769
+ if (MENU_DATE_COMPACT_RE.test(input) || MENU_DATE_ISO_RE.test(input)) {
770
+ date = toDate(input);
771
+ } else {
772
+ date = new Date(input);
773
+ }
774
+ } else {
775
+ throw new TypeError("menu date must be a Date or string");
776
+ }
777
+ if (Number.isNaN(date.getTime())) {
778
+ throw new RangeError(`invalid menu date: ${String(input)}`);
779
+ }
780
+ const y = date.getFullYear();
781
+ const m = String(date.getMonth() + 1).padStart(2, "0");
782
+ const d = String(date.getDate()).padStart(2, "0");
783
+ return `${y}-${m}-${d}`;
784
+ };
785
+ var buildDateRange = (start, end) => {
786
+ const parse = (value) => {
787
+ if (!MENU_DATE_COMPACT_RE.test(value) && !MENU_DATE_ISO_RE.test(value)) {
788
+ throw new RangeError(`invalid menu date: ${value}`);
789
+ }
790
+ return toDate(value);
791
+ };
792
+ const startDate = parse(start);
793
+ const endDate = parse(end);
794
+ if (startDate > endDate) {
795
+ throw new RangeError(`start date must not be after end date: ${start} ~ ${end}`);
796
+ }
797
+ const out = [];
798
+ const cursor = new Date(startDate);
799
+ while (cursor <= endDate) {
800
+ out.push(normalizeMenuDate(cursor));
801
+ cursor.setDate(cursor.getDate() + 1);
802
+ }
803
+ return out;
804
+ };
805
+
806
+ // src/client/concurrency.ts
807
+ var runWithConcurrency = async (tasks, concurrency) => {
808
+ const limit = Math.max(1, Math.floor(concurrency) || 1);
809
+ const results = new Array(tasks.length);
810
+ let cursor = 0;
811
+ const worker = async () => {
812
+ while (true) {
813
+ const current = cursor;
814
+ cursor += 1;
815
+ if (current >= tasks.length) {
816
+ return;
817
+ }
818
+ results[current] = await tasks[current]();
819
+ }
820
+ };
821
+ await Promise.all(
822
+ Array.from({ length: Math.min(limit, tasks.length) }, () => worker())
823
+ );
824
+ return results;
825
+ };
826
+
827
+ // src/client/mealClient.ts
828
+ var DEFAULT_CONCURRENCY = 3;
829
+ var MealClient = class {
830
+ constructor(options = {}) {
831
+ const settings = {
832
+ ...defaultSettings,
833
+ ...options.settings ?? {}
834
+ };
835
+ let parser;
836
+ const parserMode = options.parser ?? "noop";
837
+ if (parserMode === "gpt") {
838
+ if (!options.gptApiKey) {
839
+ throw new Error("gpt parser requires gptApiKey");
840
+ }
841
+ parser = new GPTMenuParser(options.gptApiKey);
842
+ } else if (parserMode === "custom") {
843
+ if (!options.parserImpl) {
844
+ throw new Error("custom parser requires parserImpl");
845
+ }
846
+ parser = options.parserImpl;
847
+ } else {
848
+ parser = new NoopMenuParser();
849
+ }
850
+ this.service = new FoodScrapingService(settings, parser, parserMode);
851
+ }
852
+ getRawMenu(cafeteria, date) {
853
+ return this.service.scrapeRawMenu(cafeteria, normalizeMenuDate(date));
854
+ }
855
+ getDailyMenu(cafeteria, date) {
856
+ return this.service.scrapeAndParseMenu(cafeteria, normalizeMenuDate(date));
857
+ }
858
+ async getRawMenus(cafeteria, dates, options = {}) {
859
+ const normalizedDates = dates.map(normalizeMenuDate);
860
+ const concurrencyLimit = options.concurrency ?? DEFAULT_CONCURRENCY;
861
+ const tasks = normalizedDates.map(
862
+ (date) => () => this.service.scrapeRawMenu(cafeteria, date)
863
+ );
864
+ return runWithConcurrency(tasks, concurrencyLimit);
865
+ }
866
+ async getDailyMenus(cafeteria, dates, options = {}) {
867
+ const normalizedDates = dates.map(normalizeMenuDate);
868
+ const concurrencyLimit = options.concurrency ?? DEFAULT_CONCURRENCY;
869
+ const tasks = normalizedDates.map(
870
+ (date) => () => this.service.scrapeAndParseMenu(cafeteria, date)
871
+ );
872
+ return runWithConcurrency(tasks, concurrencyLimit);
873
+ }
874
+ async getRawMenusByRange(cafeteria, start, end, options = {}) {
875
+ const range = buildDateRange(
876
+ normalizeMenuDate(start),
877
+ normalizeMenuDate(end)
878
+ );
879
+ const startOffset = options.startInclusive === false ? 1 : 0;
880
+ const targetDates = range.slice(startOffset);
881
+ return this.getRawMenus(cafeteria, targetDates, options);
882
+ }
883
+ async getDailyMenusByRange(cafeteria, start, end, options = {}) {
884
+ const range = buildDateRange(
885
+ normalizeMenuDate(start),
886
+ normalizeMenuDate(end)
887
+ );
888
+ const startOffset = options.startInclusive === false ? 1 : 0;
889
+ const targetDates = range.slice(startOffset);
890
+ return this.getDailyMenus(cafeteria, targetDates, options);
891
+ }
892
+ };
893
+ var createMealClient = (options) => {
894
+ return new MealClient(options);
895
+ };
896
+ export {
897
+ BaseCafeteriaException,
898
+ CafeteriaType,
899
+ HolidayException,
900
+ MealClient,
901
+ MenuFetchException,
902
+ MenuParseException,
903
+ buildDateRange,
904
+ createMealClient,
905
+ defaultSettings,
906
+ normalizeMenuDate
907
+ };