@heripo/research-radar 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2281 @@
1
+ import { createOpenAI } from '@ai-sdk/openai';
2
+ import { DateType, GenerateNewsletter } from '@llm-newsletter-kit/core';
3
+ import * as cheerio from 'cheerio';
4
+ import TurndownService from 'turndown';
5
+
6
+ /**
7
+ * Formats a date string by replacing dots with dashes.
8
+ * If the string contains a newline (indicating a date range),
9
+ * it formats it as "start ~ end".
10
+ *
11
+ * Examples:
12
+ * - "2024.01.15" → "2024-01-15"
13
+ * - "2024.01.15\n2024.01.20" → "2024-01-15 ~ 2024-01-20"
14
+ *
15
+ * @param str The date string to format
16
+ * @returns The formatted date string with dashes instead of dots
17
+ */
18
+ function getDate(str) {
19
+ if (str.includes('\n')) {
20
+ const textArr = str.split('\n');
21
+ return `${textArr[0]} ~ ${textArr[1].trim()}`.replaceAll('.', '-');
22
+ }
23
+ return str.replaceAll('.', '-');
24
+ }
25
+ /**
26
+ * Removes unnecessary parts from a URL such as session IDs and pagination parameters.
27
+ * The following items are removed:
28
+ * - Session IDs like ;jsessionid=xxx
29
+ * - Query parameters: pageIndex, pageUnit, strWhere, searchWrd, sdate, edate
30
+ *
31
+ * Example: https://example.com/path;jsessionid=123456?id=123&pageIndex=1 → https://example.com/path?id=123
32
+ *
33
+ * @param url The URL string to clean
34
+ * @returns The cleaned URL with unnecessary parts removed
35
+ */
36
+ function cleanUrl(url) {
37
+ if (!url) {
38
+ return url;
39
+ }
40
+ try {
41
+ // Remove all session ID information from semicolon (;) to before question mark (?) or hash (#)
42
+ let cleaned = url.replace(/;[^?#]*/g, '');
43
+ // Create URL object for parsing
44
+ const urlObj = new URL(cleaned);
45
+ // List of parameters to remove
46
+ const paramsToRemove = [
47
+ 'pageIndex',
48
+ 'pageUnit',
49
+ 'strWhere',
50
+ 'searchWrd',
51
+ 'sdate',
52
+ 'edate',
53
+ ];
54
+ // Remove each unnecessary parameter
55
+ paramsToRemove.forEach((param) => {
56
+ urlObj.searchParams.delete(param);
57
+ });
58
+ // Return the cleaned URL string
59
+ cleaned = urlObj.toString();
60
+ return cleaned;
61
+ }
62
+ catch {
63
+ return url; // Return original URL if an error occurs
64
+ }
65
+ }
66
+
67
+ const parseBuyeoMuseumList = (html, listId) => {
68
+ const $ = cheerio.load(html);
69
+ const posts = [];
70
+ const baseUrl = 'https://buyeo.museum.go.kr';
71
+ $('table.tableType1 tbody tr').each((index, element) => {
72
+ const columns = $(element).find('td');
73
+ if (columns.length === 0) {
74
+ return;
75
+ }
76
+ const titleElement = columns.eq(1).find('a');
77
+ const uniqId = getUniqIdFromBuyeoMuseum(columns.eq(1));
78
+ const detailUrl = `${baseUrl}/bbs/view.do?pstSn=${uniqId}&key=${listId}`;
79
+ const title = titleElement
80
+ .clone() // 원본을 수정하지 않도록 복사
81
+ .find('span, i') // span과 i 태그 선택
82
+ .remove() // 제거
83
+ .end() // 원래 요소로 돌아감
84
+ .text() // 텍스트 추출
85
+ .trim() ||
86
+ titleElement.attr('title')?.trim() ||
87
+ '';
88
+ const date = getDate(columns.eq(4).text().trim());
89
+ posts.push({
90
+ uniqId,
91
+ title,
92
+ date,
93
+ detailUrl: cleanUrl(detailUrl),
94
+ dateType: DateType.REGISTERED,
95
+ });
96
+ });
97
+ return posts;
98
+ };
99
+ const parseBuyeoMuseumDetail = (html) => {
100
+ const $ = cheerio.load(html);
101
+ const content = $('div.cont');
102
+ return {
103
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
104
+ hasAttachedFile: $('div.attachmentWrap ul.noList').length === 0,
105
+ hasAttachedImage: content.find('img').length > 0,
106
+ };
107
+ };
108
+ function getUniqIdFromBuyeoMuseum(element) {
109
+ return (element.attr('onclick') ?? '').match(/goView\('(.*)'\)/)?.[1] ?? '';
110
+ }
111
+
112
+ const parseExcavationReportList = (html) => {
113
+ const $ = cheerio.load(html);
114
+ const posts = [];
115
+ const baseUrl = 'https://www.e-minwon.go.kr';
116
+ $('table.td_left tbody tr.list_tr').each((index, element) => {
117
+ const columns = $(element).find('td');
118
+ if (columns.length === 0) {
119
+ return;
120
+ }
121
+ const titleElement = columns.eq(2).find('a');
122
+ const relativeHref = titleElement.attr('href');
123
+ if (!relativeHref) {
124
+ return;
125
+ }
126
+ const uniqId = getUniqIdFromExcavationItem(titleElement);
127
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text().trim() ?? '';
128
+ const date = getDate(columns.eq(6).text().trim());
129
+ const detailUrl = `${baseUrl}/ge/ee/getEcexmRptp.do?ecexmRcno=${uniqId}`;
130
+ posts.push({
131
+ uniqId,
132
+ title,
133
+ date,
134
+ detailUrl,
135
+ dateType: DateType.REGISTERED,
136
+ });
137
+ });
138
+ return posts;
139
+ };
140
+ const parseExcavationSiteList = (html) => {
141
+ const $ = cheerio.load(html);
142
+ const posts = [];
143
+ const baseUrl = 'https://www.e-minwon.go.kr';
144
+ $('table.td_left tbody tr.list_tr').each((index, element) => {
145
+ const columns = $(element).find('td');
146
+ if (columns.length === 0) {
147
+ return;
148
+ }
149
+ const titleElement = columns.eq(1).find('a');
150
+ const relativeHref = titleElement.attr('href');
151
+ if (!relativeHref) {
152
+ return;
153
+ }
154
+ const uniqId = getUniqIdFromExcavationItem(titleElement);
155
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text().trim() ?? '';
156
+ const date = getDate(columns.eq(4).text().trim());
157
+ const detailUrl = `${baseUrl}/ge/ee/getLinkGrndsRls.do?grndsRlsSeqc=${uniqId}`;
158
+ posts.push({
159
+ uniqId,
160
+ title,
161
+ date,
162
+ detailUrl,
163
+ dateType: DateType.DURATION,
164
+ });
165
+ });
166
+ return posts;
167
+ };
168
+ const parseExcavationReportDetail = (html) => {
169
+ const $ = cheerio.load(html);
170
+ const content = $('table.td_left').parent();
171
+ const trList = content.find('table tbody tr');
172
+ trList.each((index, element) => {
173
+ if (trList.length === index + 1) {
174
+ $(element).remove();
175
+ }
176
+ });
177
+ return {
178
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
179
+ hasAttachedFile: true,
180
+ hasAttachedImage: false,
181
+ };
182
+ };
183
+ const parseExcavationSiteDetail = (html) => {
184
+ const $ = cheerio.load(html);
185
+ const content = $('div.board_view').parent();
186
+ const dlList = content.find('div dl');
187
+ dlList.each((index, element) => {
188
+ if (dlList.length === index + 1) {
189
+ $(element).remove();
190
+ }
191
+ });
192
+ return {
193
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
194
+ hasAttachedFile: true,
195
+ hasAttachedImage: false,
196
+ };
197
+ };
198
+ function getUniqIdFromExcavationItem(element) {
199
+ return ((element.attr('onclick') ?? '').match(/dataSelected\('(.*)'\)/)?.[1] ?? '');
200
+ }
201
+
202
+ const parseGogungList = (html) => {
203
+ const $ = cheerio.load(html);
204
+ const posts = [];
205
+ const baseUrl = 'https://www.gogung.go.kr';
206
+ $('table.board-list tbody tr').each((index, element) => {
207
+ const columns = $(element).find('td');
208
+ if (columns.length === 0) {
209
+ return;
210
+ }
211
+ const titleElement = columns.eq(1).find('a');
212
+ const relativeHref = titleElement.attr('href');
213
+ if (!relativeHref) {
214
+ return;
215
+ }
216
+ const fullUrl = new URL(`/gogung/bbs/BMSR00022/${relativeHref}`, baseUrl);
217
+ const detailUrl = fullUrl.href;
218
+ const uniqId = fullUrl.searchParams.get('boardId') ?? undefined;
219
+ const title = titleElement.text()?.trim() ?? '';
220
+ const date = getDate(columns.eq(3).text().trim());
221
+ posts.push({
222
+ uniqId,
223
+ title,
224
+ date,
225
+ detailUrl: cleanUrl(detailUrl),
226
+ dateType: DateType.REGISTERED,
227
+ });
228
+ });
229
+ return posts;
230
+ };
231
+ const parseGogungDetail = (html) => {
232
+ const $ = cheerio.load(html);
233
+ const content = $('div.content-wrap');
234
+ return {
235
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
236
+ hasAttachedFile: $('div.file-list').length > 0,
237
+ hasAttachedImage: content.find('img').length > 0,
238
+ };
239
+ };
240
+
241
+ const parseHeritageAgencyList = (html) => {
242
+ const $ = cheerio.load(html);
243
+ const posts = [];
244
+ const baseUrl = 'https://www.kh.or.kr';
245
+ $('table.board_list tbody tr').each((index, element) => {
246
+ const columns = $(element).find('td');
247
+ if (columns.length === 0) {
248
+ return;
249
+ }
250
+ const titleElement = columns.eq(1).find('a');
251
+ const relativeHref = titleElement.attr('href');
252
+ if (!relativeHref) {
253
+ return;
254
+ }
255
+ const fullUrl = new URL(relativeHref, baseUrl);
256
+ const detailUrl = fullUrl.href;
257
+ const uniqId = fullUrl.searchParams.get('bbIdx') ?? undefined;
258
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text()?.trim() ?? '';
259
+ const date = getDate(columns.eq(4).text().trim());
260
+ if (!$(element).hasClass('top')) {
261
+ posts.push({
262
+ uniqId,
263
+ title,
264
+ date,
265
+ detailUrl: cleanUrl(detailUrl),
266
+ dateType: DateType.REGISTERED,
267
+ });
268
+ }
269
+ });
270
+ return posts;
271
+ };
272
+ const parseHeritageAgencyDetail = (html) => {
273
+ const $ = cheerio.load(html);
274
+ const content = $('div.view_con');
275
+ return {
276
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
277
+ hasAttachedFile: $('div.tbl_file p').length > 0,
278
+ hasAttachedImage: content.find('img').length > 0,
279
+ };
280
+ };
281
+
282
+ const parseHsasList = (html) => {
283
+ const $ = cheerio.load(html);
284
+ const posts = [];
285
+ const baseUrl = 'http://www.hsas.or.kr';
286
+ $('table#bbs_list tbody tr').each((index, element) => {
287
+ const columns = $(element).find('td');
288
+ if (columns.length === 0) {
289
+ return;
290
+ }
291
+ const titleElement = columns.eq(1).find('a');
292
+ const relativeHref = titleElement.attr('href');
293
+ if (!relativeHref) {
294
+ return;
295
+ }
296
+ const fullUrl = new URL(relativeHref.replace('./?ref', '/flow/?ref'), baseUrl);
297
+ const detailUrl = fullUrl.href;
298
+ const uniqId = fullUrl.searchParams.get('eb_idx') ?? undefined;
299
+ const title = titleElement.text()?.trim() ?? '';
300
+ const date = getDate(`20${columns.eq(3).text().trim()}`);
301
+ posts.push({
302
+ uniqId,
303
+ title,
304
+ date,
305
+ detailUrl: cleanUrl(detailUrl),
306
+ dateType: DateType.REGISTERED,
307
+ });
308
+ });
309
+ return posts;
310
+ };
311
+ const parseHsasDetail = (html) => {
312
+ const $ = cheerio.load(html);
313
+ const content = $('div#view_content');
314
+ return {
315
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
316
+ hasAttachedFile: false,
317
+ hasAttachedImage: content.find('img').length > 0,
318
+ };
319
+ };
320
+
321
+ const parseJbgogoList = (html) => {
322
+ const $ = cheerio.load(html);
323
+ const posts = [];
324
+ const baseUrl = 'https://www.jbgogo.or.kr';
325
+ $('table.table_list tbody tr').each((index, element) => {
326
+ const columns = $(element).find('td');
327
+ if (columns.length === 0) {
328
+ return;
329
+ }
330
+ const titleElement = columns.eq(1).find('div a');
331
+ const relativeHref = titleElement.attr('href');
332
+ if (!relativeHref) {
333
+ return;
334
+ }
335
+ const fullUrl = new URL(relativeHref, baseUrl);
336
+ const detailUrl = fullUrl.href;
337
+ const uniqId = fullUrl.pathname.split('/').pop() ?? undefined;
338
+ const title = titleElement.text()?.trim() ?? '';
339
+ const date = getDate(columns.eq(3).text().trim());
340
+ posts.push({
341
+ uniqId,
342
+ title,
343
+ date,
344
+ detailUrl: cleanUrl(detailUrl),
345
+ dateType: DateType.REGISTERED,
346
+ });
347
+ });
348
+ return posts;
349
+ };
350
+ const parseJbgogoDetail = (html) => {
351
+ const $ = cheerio.load(html);
352
+ const content = $('div.upload_contents');
353
+ content.find('div.snsbox').remove();
354
+ return {
355
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
356
+ hasAttachedFile: $('div.item_box a.btn_file').length > 0,
357
+ hasAttachedImage: content.find('img').length > 0,
358
+ };
359
+ };
360
+
361
+ const parseJeonjuMuseumList = (html) => {
362
+ const $ = cheerio.load(html);
363
+ const posts = [];
364
+ const baseUrl = 'https://jeonju.museum.go.kr';
365
+ $('table.tstyle_list tbody tr').each((index, element) => {
366
+ const columns = $(element).find('td');
367
+ if (columns.length === 0) {
368
+ return;
369
+ }
370
+ const titleElement = columns.eq(1).find('a');
371
+ const relativeHref = titleElement.attr('href');
372
+ if (!relativeHref) {
373
+ return;
374
+ }
375
+ const fullUrl = new URL(relativeHref, baseUrl);
376
+ const detailUrl = fullUrl.href;
377
+ const uniqId = fullUrl.searchParams.get('list_no') ?? undefined;
378
+ const title = titleElement.text()?.trim() ?? '';
379
+ const date = getDate(columns.eq(3).text().trim());
380
+ posts.push({
381
+ uniqId,
382
+ title,
383
+ date,
384
+ detailUrl: cleanUrl(detailUrl),
385
+ dateType: DateType.REGISTERED,
386
+ });
387
+ });
388
+ return posts;
389
+ };
390
+ const parseJeonjuMuseumRecruitList = (html) => {
391
+ const $ = cheerio.load(html);
392
+ const posts = [];
393
+ const baseUrl = 'https://jeonju.museum.go.kr';
394
+ $('table.tstyle_list tbody tr').each((index, element) => {
395
+ const columns = $(element).find('td');
396
+ if (columns.length === 0) {
397
+ return;
398
+ }
399
+ const titleElement = columns.eq(1).find('a');
400
+ const relativeHref = titleElement.attr('href');
401
+ if (!relativeHref) {
402
+ return;
403
+ }
404
+ const fullUrl = new URL(relativeHref, baseUrl);
405
+ const detailUrl = fullUrl.href;
406
+ const uniqId = fullUrl.searchParams.get('list_no') ?? undefined;
407
+ const title = titleElement.text()?.trim() ?? '';
408
+ const date = getDate(columns.eq(2).text().trim());
409
+ posts.push({
410
+ uniqId,
411
+ title,
412
+ date,
413
+ detailUrl: cleanUrl(detailUrl),
414
+ dateType: DateType.DURATION,
415
+ });
416
+ });
417
+ return posts;
418
+ };
419
+ const parseJeonjuMuseumDetail = (html) => {
420
+ const $ = cheerio.load(html);
421
+ const content = $('div.contents');
422
+ return {
423
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
424
+ hasAttachedFile: $('div.file ul.list').length > 0,
425
+ hasAttachedImage: content.find('img').length > 0,
426
+ };
427
+ };
428
+
429
+ const parseJinjuMuseumList = (html) => {
430
+ const $ = cheerio.load(html);
431
+ const posts = [];
432
+ const baseUrl = 'https://jinju.museum.go.kr';
433
+ $('table tbody tr').each((index, element) => {
434
+ const columns = $(element).find('td');
435
+ if (columns.length === 0) {
436
+ return;
437
+ }
438
+ const titleElement = columns.eq(1).find('a');
439
+ const relativeHref = titleElement.attr('href');
440
+ if (!relativeHref) {
441
+ return;
442
+ }
443
+ const fullUrl = new URL(`/kor/html/sub06/0601.html${relativeHref}`, baseUrl);
444
+ const detailUrl = fullUrl.href;
445
+ const uniqId = fullUrl.searchParams.get('no') ?? undefined;
446
+ const title = titleElement.text()?.trim() ?? '';
447
+ const date = getDate(columns.eq(4).text().trim());
448
+ posts.push({
449
+ uniqId,
450
+ title,
451
+ date,
452
+ detailUrl: cleanUrl(detailUrl),
453
+ dateType: DateType.REGISTERED,
454
+ });
455
+ });
456
+ return posts;
457
+ };
458
+ const parseJinjuMuseumDetail = (html) => {
459
+ const $ = cheerio.load(html);
460
+ const content = $('div.bbs--view--content');
461
+ return {
462
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
463
+ hasAttachedFile: $('div.bbs--view--file').length > 0,
464
+ hasAttachedImage: content.find('img').length > 0,
465
+ };
466
+ };
467
+
468
+ const parseKaahList = (html) => {
469
+ const $ = cheerio.load(html);
470
+ const posts = [];
471
+ $('table.board-table-list tbody tr').each((index, element) => {
472
+ const columns = $(element).find('td');
473
+ if (columns.length === 0) {
474
+ return;
475
+ }
476
+ const titleElement = columns.eq(1).find('a');
477
+ const relativeHref = titleElement.attr('href');
478
+ if (!relativeHref) {
479
+ return;
480
+ }
481
+ const fullUrl = new URL(relativeHref);
482
+ const detailUrl = fullUrl.href;
483
+ const uniqId = fullUrl.pathname.split('/').pop() ?? undefined;
484
+ const title = titleElement.text()?.trim() ?? '';
485
+ const date = getDate(columns.eq(3).text().trim());
486
+ posts.push({
487
+ uniqId,
488
+ title,
489
+ date,
490
+ detailUrl: cleanUrl(detailUrl),
491
+ dateType: DateType.REGISTERED,
492
+ });
493
+ });
494
+ return posts;
495
+ };
496
+ const parseKaahPlaceList = (html) => {
497
+ const $ = cheerio.load(html);
498
+ const posts = [];
499
+ $('div.page-con-box div.data-list').each((index, element) => {
500
+ const titleEl = $(element).find('div.con');
501
+ const dateEl = $(element).find('div.title');
502
+ const relativeHref = titleEl.find('div a').attr('href');
503
+ if (!relativeHref) {
504
+ return;
505
+ }
506
+ const fullUrl = new URL(relativeHref);
507
+ const detailUrl = fullUrl.href;
508
+ const uniqId = fullUrl.pathname.split('/').pop() ?? undefined;
509
+ const title = titleEl.find('div a').text()?.trim() ?? '';
510
+ const date = getDate(`${dateEl.find('div.year').text().trim()}-${dateEl.find('div.date').text().trim()}`);
511
+ posts.push({
512
+ uniqId,
513
+ title,
514
+ date,
515
+ detailUrl: cleanUrl(detailUrl),
516
+ dateType: DateType.REGISTERED,
517
+ });
518
+ });
519
+ return posts;
520
+ };
521
+ const parseKaahDetail = (html) => {
522
+ const $ = cheerio.load(html);
523
+ const content = $('div.content');
524
+ return {
525
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
526
+ hasAttachedFile: $('div.board-view-file div.file-list').length > 0,
527
+ hasAttachedImage: content.find('img').length > 0,
528
+ };
529
+ };
530
+ const parseKaahPlaceDetail = (html) => {
531
+ const $ = cheerio.load(html);
532
+ const content = $('div.board-view-top');
533
+ return {
534
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
535
+ hasAttachedFile: $('div.board-view-file div.file-list').length > 0,
536
+ hasAttachedImage: content.find('img').length > 0,
537
+ };
538
+ };
539
+
540
+ const parseKhsList = (html) => {
541
+ const $ = cheerio.load(html);
542
+ const posts = [];
543
+ const baseUrl = 'https://www.khs.go.kr';
544
+ $('table.list_t01 tbody tr').each((index, element) => {
545
+ const columns = $(element).find('td');
546
+ if (columns.length === 0) {
547
+ return;
548
+ }
549
+ const titleElement = columns.eq(1).find('a');
550
+ const relativeHref = titleElement.attr('href');
551
+ if (!relativeHref) {
552
+ return;
553
+ }
554
+ const fullUrl = new URL(relativeHref, baseUrl);
555
+ const detailUrl = fullUrl.href;
556
+ const uniqId = fullUrl.searchParams.get('id') ?? undefined;
557
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text()?.trim() ?? '';
558
+ const date = getDate(columns.eq(3).text().trim());
559
+ const endDate = getDate(columns.eq(4).text().trim());
560
+ const hasEndDate = new Date(endDate) > new Date();
561
+ posts.push({
562
+ uniqId,
563
+ title,
564
+ date: hasEndDate ? `${date} ~ ${endDate}` : date,
565
+ detailUrl: cleanUrl(detailUrl),
566
+ dateType: hasEndDate ? DateType.DURATION : DateType.REGISTERED,
567
+ });
568
+ });
569
+ return posts;
570
+ };
571
+ const parseKhsGalleryList = (html) => {
572
+ const $ = cheerio.load(html);
573
+ const posts = [];
574
+ const baseUrl = 'https://www.khs.go.kr';
575
+ $('ul.photo_board li a').each((index, element) => {
576
+ const relativeHref = $(element).attr('href');
577
+ if (!relativeHref) {
578
+ return;
579
+ }
580
+ const fullUrl = new URL(relativeHref, baseUrl);
581
+ const detailUrl = fullUrl.href;
582
+ const uniqId = fullUrl.searchParams.get('nttId') ?? undefined;
583
+ const children = $(element).find('div');
584
+ const imgElement = children.eq(0).find('img');
585
+ const titleElement = children.eq(1).find('strong');
586
+ const dateElement = children.eq(2).find('span');
587
+ const title = imgElement.attr('alt') || titleElement.text() || '';
588
+ const date = getDate(dateElement.text().trim());
589
+ posts.push({
590
+ uniqId,
591
+ title,
592
+ date,
593
+ detailUrl: cleanUrl(detailUrl),
594
+ dateType: DateType.REGISTERED,
595
+ });
596
+ });
597
+ return posts;
598
+ };
599
+ const parseKhsLawList = (html) => {
600
+ const $ = cheerio.load(html);
601
+ const posts = [];
602
+ const baseUrl = 'https://www.khs.go.kr';
603
+ $('table.b_list tbody tr').each((index, element) => {
604
+ const columns = $(element).find('td');
605
+ if (columns.length === 0) {
606
+ return;
607
+ }
608
+ const titleElement = columns.eq(1).find('a');
609
+ const relativeHref = titleElement.attr('href');
610
+ if (!relativeHref) {
611
+ return;
612
+ }
613
+ const fullUrl = new URL(relativeHref, baseUrl);
614
+ const detailUrl = fullUrl.href;
615
+ const uniqId = fullUrl.searchParams.get('id') ?? undefined;
616
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text().trim() ?? '';
617
+ const date = getDate(columns.eq(4).text().trim());
618
+ const hasEndDate = date.includes('~');
619
+ posts.push({
620
+ uniqId,
621
+ title,
622
+ date,
623
+ detailUrl: cleanUrl(detailUrl),
624
+ dateType: hasEndDate ? DateType.DURATION : DateType.REGISTERED,
625
+ });
626
+ });
627
+ return posts;
628
+ };
629
+ const parseKhsDetail = async (html) => {
630
+ const $ = cheerio.load(html);
631
+ const content = $('div.b_content');
632
+ const fileCount = $('dl.b_file dd ul li').length;
633
+ return {
634
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
635
+ hasAttachedFile: fileCount > 0,
636
+ hasAttachedImage: content.find('img').length > 0,
637
+ };
638
+ };
639
+
640
+ const parseKrasList = (html) => {
641
+ const $ = cheerio.load(html);
642
+ const posts = [];
643
+ const baseUrl = 'https://www.kras.or.kr';
644
+ $('table tbody tr').each((index, element) => {
645
+ const columns = $(element).find('td');
646
+ if (columns.length === 0) {
647
+ return;
648
+ }
649
+ const titleElement = columns.eq(1).find('a');
650
+ const relativeHref = titleElement.attr('href');
651
+ if (!relativeHref) {
652
+ return;
653
+ }
654
+ const fullUrl = new URL(relativeHref, baseUrl);
655
+ const detailUrl = fullUrl.href;
656
+ const uniqId = fullUrl.searchParams.get('uid') ?? undefined;
657
+ const title = titleElement.text()?.trim() ?? '';
658
+ const date = getDate(columns.eq(4).text().trim());
659
+ posts.push({
660
+ uniqId,
661
+ title,
662
+ date,
663
+ detailUrl: cleanUrl(detailUrl),
664
+ dateType: DateType.REGISTERED,
665
+ });
666
+ });
667
+ return posts;
668
+ };
669
+ const parseKrasDetail = (html) => {
670
+ const $ = cheerio.load(html);
671
+ const content = $('#vContent');
672
+ content.find('div.snsbox').remove();
673
+ return {
674
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
675
+ hasAttachedFile: $('div.attach ul li').length > 0,
676
+ hasAttachedImage: content.find('img').length > 0,
677
+ };
678
+ };
679
+
680
+ const parseMuseumList = (html, hrefPrefix) => {
681
+ const $ = cheerio.load(html);
682
+ const posts = [];
683
+ const baseUrl = 'https://www.museum.go.kr';
684
+ $('div.board-list-tbody ul').each((index, element) => {
685
+ const columns = $(element).find('li');
686
+ if (columns.length === 0) {
687
+ return;
688
+ }
689
+ const titleElement = columns.eq(3).find('a');
690
+ const relativeHref = titleElement.attr('href');
691
+ if (!relativeHref) {
692
+ return;
693
+ }
694
+ const fullUrl = new URL(`${hrefPrefix}${relativeHref}`, baseUrl);
695
+ const detailUrl = fullUrl.href;
696
+ const uniqId = fullUrl.searchParams.get('arcId') ?? undefined;
697
+ const title = titleElement.text()?.trim() ?? '';
698
+ const date = getDate(columns.eq(5).text().trim());
699
+ posts.push({
700
+ uniqId,
701
+ title,
702
+ date,
703
+ detailUrl: cleanUrl(detailUrl),
704
+ dateType: DateType.REGISTERED,
705
+ });
706
+ });
707
+ return posts;
708
+ };
709
+ const parseMuseumPressList = (html) => {
710
+ const $ = cheerio.load(html);
711
+ const posts = [];
712
+ const baseUrl = 'https://www.museum.go.kr';
713
+ $('div.board-list-tbody ul').each((index, element) => {
714
+ const columns = $(element).find('li');
715
+ if (columns.length === 0) {
716
+ return;
717
+ }
718
+ const titleElement = columns.eq(1).find('a');
719
+ const relativeHref = titleElement.attr('href');
720
+ if (!relativeHref) {
721
+ return;
722
+ }
723
+ const fullUrl = new URL(`/MUSEUM/contents/M0701040000.do${relativeHref}`, baseUrl);
724
+ const detailUrl = fullUrl.href;
725
+ const uniqId = fullUrl.searchParams.get('arcId') ?? undefined;
726
+ const title = titleElement.text()?.trim() ?? '';
727
+ const date = getDate(columns.eq(3).text().trim());
728
+ posts.push({
729
+ uniqId,
730
+ title,
731
+ date,
732
+ detailUrl: cleanUrl(detailUrl),
733
+ dateType: DateType.REGISTERED,
734
+ });
735
+ });
736
+ return posts;
737
+ };
738
+ const parseMuseumRecruitList = (html) => {
739
+ const $ = cheerio.load(html);
740
+ const posts = [];
741
+ const baseUrl = 'https://www.museum.go.kr';
742
+ $('div.board-list-tbody ul').each((index, element) => {
743
+ const columns = $(element).find('li');
744
+ if (columns.length === 0) {
745
+ return;
746
+ }
747
+ const titleElement = columns.eq(2).find('a');
748
+ const relativeHref = titleElement.attr('href');
749
+ if (!relativeHref) {
750
+ return;
751
+ }
752
+ const fullUrl = new URL(`/MUSEUM/contents/M0701030000.do${relativeHref}`, baseUrl);
753
+ const detailUrl = fullUrl.href;
754
+ const uniqId = fullUrl.searchParams.get('arcId') ?? undefined;
755
+ const title = titleElement.text()?.trim() ?? '';
756
+ const date = getDate(columns.eq(3).text().trim().replace('~ ', ' ~ '));
757
+ posts.push({
758
+ uniqId,
759
+ title,
760
+ date,
761
+ detailUrl: cleanUrl(detailUrl),
762
+ dateType: DateType.DURATION,
763
+ });
764
+ });
765
+ return posts;
766
+ };
767
+ const parseMuseumDetail = (html) => {
768
+ const $ = cheerio.load(html);
769
+ const content = $('div.viewStyle1');
770
+ return {
771
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
772
+ hasAttachedFile: $('div.flie-down-area ul li').length > 0,
773
+ hasAttachedImage: content.find('img').length > 0,
774
+ };
775
+ };
776
+
777
+ const parseNrichNoticeList = (html) => {
778
+ const $ = cheerio.load(html);
779
+ const posts = [];
780
+ const baseUrl = 'https://www.nrich.go.kr';
781
+ $('table.table-list tbody tr').each((index, element) => {
782
+ const columns = $(element).find('td');
783
+ if (columns.length === 0) {
784
+ return;
785
+ }
786
+ const titleElement = columns.eq(1).find('a');
787
+ const relativeHref = titleElement.attr('href');
788
+ if (!relativeHref) {
789
+ return;
790
+ }
791
+ const fullUrl = new URL(`/kor/${relativeHref}`, baseUrl);
792
+ const detailUrl = fullUrl.href;
793
+ const uniqId = fullUrl.searchParams.get('bbs_idx') ?? undefined;
794
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text()?.trim() ?? '';
795
+ const date = getDate(columns.eq(3).text().trim());
796
+ posts.push({
797
+ uniqId,
798
+ title,
799
+ date,
800
+ detailUrl: cleanUrl(detailUrl),
801
+ dateType: DateType.REGISTERED,
802
+ });
803
+ });
804
+ return posts;
805
+ };
806
+ const parseNrichMajorEventList = (html) => {
807
+ const $ = cheerio.load(html);
808
+ const posts = [];
809
+ $('ul.event-list li a').each((index, element) => {
810
+ const uniqId = getUniqIdFromNrichMajorEvent($(element));
811
+ const detailUrl = `https://www.nrich.go.kr/kor/majorView.do?menuIdx=286&bbs_idx=${uniqId}`;
812
+ const title = $(element)
813
+ .find('strong')
814
+ .clone()
815
+ .children('span')
816
+ .remove()
817
+ .end()
818
+ .text()
819
+ .trim();
820
+ const dateSplit = getDate($(element).find('span.date').text().replaceAll('행사기간 : ', '').trim()).split(' ~ ');
821
+ const startDate = dateSplit[0];
822
+ const endDate = dateSplit[1];
823
+ const hasEndDate = startDate !== endDate;
824
+ if (uniqId) {
825
+ posts.push({
826
+ uniqId,
827
+ title,
828
+ date: hasEndDate ? `${startDate} ~ ${endDate}` : startDate,
829
+ detailUrl: cleanUrl(detailUrl),
830
+ dateType: hasEndDate ? DateType.DURATION : DateType.REGISTERED,
831
+ });
832
+ }
833
+ });
834
+ return posts;
835
+ };
836
+ const parseNrichJournalList = (html) => {
837
+ const $ = cheerio.load(html);
838
+ const posts = [];
839
+ const baseUrl = 'https://www.nrich.go.kr';
840
+ $('table.table-list tbody tr').each((index, element) => {
841
+ const columns = $(element).find('td');
842
+ if (columns.length === 0) {
843
+ return;
844
+ }
845
+ const titleElement = columns.eq(1).find('a');
846
+ const relativeHref = titleElement.attr('href');
847
+ if (!relativeHref) {
848
+ return;
849
+ }
850
+ const fullUrl = new URL(`${relativeHref}`, baseUrl);
851
+ const detailUrl = fullUrl.href;
852
+ const uniqId = fullUrl.searchParams.get('bbs_idx') ?? undefined;
853
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text()?.trim() ?? '';
854
+ const date = getDate(columns.eq(2).text().trim());
855
+ posts.push({
856
+ uniqId,
857
+ title,
858
+ date,
859
+ detailUrl: cleanUrl(detailUrl),
860
+ dateType: DateType.REGISTERED,
861
+ });
862
+ });
863
+ return posts;
864
+ };
865
+ const parseNrichPortalList = (html) => {
866
+ const $ = cheerio.load(html);
867
+ const posts = [];
868
+ const baseUrl = 'https://portal.nrich.go.kr';
869
+ $('table.tbl02 tbody tr').each((index, element) => {
870
+ const columns = $(element).find('td');
871
+ if (columns.length === 0) {
872
+ return;
873
+ }
874
+ const titleElement = columns.eq(1).find('a');
875
+ const relativeHref = titleElement.attr('href');
876
+ if (!relativeHref) {
877
+ return;
878
+ }
879
+ const fullUrl = new URL(`/kor/${relativeHref}`, baseUrl);
880
+ const detailUrl = fullUrl.href;
881
+ const uniqId = fullUrl.searchParams.get('bbs_idx') ?? undefined;
882
+ const title = titleElement.attr('title')?.trim() ?? titleElement.text()?.trim() ?? '';
883
+ const date = getDate(columns.eq(2).text().trim());
884
+ posts.push({
885
+ uniqId,
886
+ title,
887
+ date,
888
+ detailUrl: cleanUrl(detailUrl),
889
+ dateType: DateType.REGISTERED,
890
+ });
891
+ });
892
+ return posts;
893
+ };
894
+ const parseNrichNoticeDetail = (html) => {
895
+ const $ = cheerio.load(html);
896
+ const trList = $('table.table-view tbody tr');
897
+ const content = trList.eq(3).find('td');
898
+ return {
899
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
900
+ hasAttachedFile: trList.length > 4,
901
+ hasAttachedImage: content.find('img').length > 0,
902
+ };
903
+ };
904
+ const parseNrichMajorEventDetail = (html) => {
905
+ const $ = cheerio.load(html);
906
+ const trList = $('table.table-view tbody tr');
907
+ const content = trList.eq(4).find('td');
908
+ return {
909
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
910
+ hasAttachedFile: false,
911
+ hasAttachedImage: content.find('img').length > 0,
912
+ };
913
+ };
914
+ const parseNrichJournalDetail = (html) => {
915
+ const $ = cheerio.load(html);
916
+ $('script, style').remove();
917
+ const articles = [];
918
+ // 테이블의 각 행을 순회하면서 논문 정보 추출
919
+ $('table.table-list tbody tr').each((index, element) => {
920
+ const columns = $(element).find('td');
921
+ if (columns.length === 0) {
922
+ return;
923
+ }
924
+ const number = columns.eq(0).text().trim();
925
+ const titleElement = columns.eq(1).find('a');
926
+ const title = titleElement.text().trim();
927
+ const author = columns.eq(2).text().trim();
928
+ if (title && author) {
929
+ articles.push(`${number}. **${title}**\n 저자: ${author}`);
930
+ }
931
+ });
932
+ const content = articles.length > 0 ? `## 논문 목록\n\n${articles.join('\n\n')}` : '';
933
+ return {
934
+ detailContent: content,
935
+ hasAttachedFile: true,
936
+ hasAttachedImage: false,
937
+ };
938
+ };
939
+ const parseNrichPortalDetail = (html) => {
940
+ const $ = cheerio.load(html);
941
+ const content = $('div.detail_Area2');
942
+ return {
943
+ detailContent: new TurndownService().turndown(content.html() ?? ''),
944
+ hasAttachedFile: false,
945
+ hasAttachedImage: content.find('img').length > 0,
946
+ };
947
+ };
948
+ function getUniqIdFromNrichMajorEvent(element) {
949
+ return ((element.attr('onclick') ?? '').match(/fnViewPage\('(.*)'\)/)?.[1] ?? '');
950
+ }
951
+
952
+ const crawlingTargetGroups = [
953
+ {
954
+ id: 'news',
955
+ name: 'News',
956
+ targets: [
957
+ {
958
+ id: '국가유산청_공지사항',
959
+ name: '국가유산청 공지사항',
960
+ url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newpublic&mn=NS_01_01',
961
+ parseList: parseKhsList,
962
+ parseDetail: parseKhsDetail,
963
+ },
964
+ {
965
+ id: '국가유산청_보도설명',
966
+ name: '국가유산청 보도/설명',
967
+ url: 'https://www.khs.go.kr/newsBbz/selectNewsBbzList.do?sectionId=all_sec_1&mn=NS_01_02',
968
+ parseList: parseKhsList,
969
+ parseDetail: parseKhsDetail,
970
+ },
971
+ {
972
+ id: '국가유산청_사진뉴스',
973
+ name: '국가유산청 사진뉴스',
974
+ url: 'https://www.khs.go.kr/cop/bbs/selectBoardList.do?bbsId=BBSMSTR_1002&mn=NS_01_03',
975
+ parseList: parseKhsGalleryList,
976
+ parseDetail: parseKhsDetail,
977
+ },
978
+ {
979
+ id: '국가유산청_입법예고',
980
+ name: '국가유산청 입법예고',
981
+ url: 'https://www.khs.go.kr/lawBbz/selectLawBbzList.do?mn=NS_03_01_01',
982
+ parseList: parseKhsLawList,
983
+ parseDetail: parseKhsDetail,
984
+ },
985
+ // NOTE: Parsing logic is implemented, but too much fragmented data with little value for newsletter
986
+ // {
987
+ // id: '국가유산청_발굴조사_현황공개',
988
+ // name: '국가유산청 발굴조사 현황공개',
989
+ // url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmPrmsnAply.do',
990
+ // parseList: parseExcavationStatusList,
991
+ // parseDetail: parseExcavationStatusDetail,
992
+ // },
993
+ {
994
+ id: '국가유산청_발굴조사_보고서',
995
+ name: '국가유산청 발굴조사 보고서',
996
+ url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmRptp.do',
997
+ parseList: parseExcavationReportList,
998
+ parseDetail: parseExcavationReportDetail,
999
+ },
1000
+ {
1001
+ id: '국가유산청_발굴조사_현장공개',
1002
+ name: '국가유산청 발굴조사 현장공개',
1003
+ url: 'https://www.e-minwon.go.kr/ge/ee/getListLinkGrndsRls.do',
1004
+ parseList: parseExcavationSiteList,
1005
+ parseDetail: parseExcavationSiteDetail,
1006
+ },
1007
+ {
1008
+ id: '국립문화유산연구원_공지사항',
1009
+ name: '국립문화유산연구원 공지사항',
1010
+ url: 'https://www.nrich.go.kr/kor/boardList.do?menuIdx=282&bbscd=32',
1011
+ parseList: parseNrichNoticeList,
1012
+ parseDetail: parseNrichNoticeDetail,
1013
+ },
1014
+ {
1015
+ id: '국립문화유산연구원_주요행사',
1016
+ name: '국립문화유산연구원 주요행사',
1017
+ url: 'https://www.nrich.go.kr/kor/majorList.do?menuIdx=286',
1018
+ parseList: parseNrichMajorEventList,
1019
+ parseDetail: parseNrichMajorEventDetail,
1020
+ },
1021
+ {
1022
+ id: '국립문화유산연구원_학술지_헤리티지',
1023
+ name: '국립문화유산연구원 헤리티지:역사와 과학 학술지',
1024
+ url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=1651&gubun=J',
1025
+ parseList: parseNrichJournalList,
1026
+ parseDetail: parseNrichJournalDetail,
1027
+ },
1028
+ {
1029
+ id: '국립문화유산연구원_학술지_보존과학연구',
1030
+ name: '국립문화유산연구원 보존과학연구 학술지',
1031
+ url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=2065&gubun=K',
1032
+ parseList: parseNrichJournalList,
1033
+ parseDetail: parseNrichJournalDetail,
1034
+ },
1035
+ {
1036
+ id: '국가유산지식이음_공지사항',
1037
+ name: '국가유산 지식이음 공지사항',
1038
+ url: 'https://portal.nrich.go.kr/kor/boardList.do?menuIdx=1058&bbscd=9',
1039
+ parseList: parseNrichPortalList,
1040
+ parseDetail: parseNrichPortalDetail,
1041
+ },
1042
+ {
1043
+ id: '국립고궁박물관_공지사항',
1044
+ name: '국립고궁박물관 공지사항',
1045
+ url: 'https://www.gogung.go.kr/gogung/bbs/BMSR00022/list.do?gubunCd=B22_001&menuNo=800088',
1046
+ parseList: parseGogungList,
1047
+ parseDetail: parseGogungDetail,
1048
+ },
1049
+ {
1050
+ id: '국가유산진흥원_공지사항',
1051
+ name: '국가유산진흥원 공지사항',
1052
+ url: 'https://www.kh.or.kr/brd/board/644/L/SITES/100/menu/371',
1053
+ parseList: parseHeritageAgencyList,
1054
+ parseDetail: parseHeritageAgencyDetail,
1055
+ },
1056
+ {
1057
+ id: '국가유산진흥원_보도자료',
1058
+ name: '국가유산진흥원 보도자료',
1059
+ url: 'https://www.kh.or.kr/brd/board/715/L/menu/373',
1060
+ parseList: parseHeritageAgencyList,
1061
+ parseDetail: parseHeritageAgencyDetail,
1062
+ },
1063
+ {
1064
+ id: '국가유산진흥원_매장유산국비발굴단_공지사항',
1065
+ name: '국가유산진흥원 매장유산국비발굴단 공지사항',
1066
+ url: 'https://www.kh.or.kr/brd/board/644/L/SITES/201/menu/506',
1067
+ parseList: parseHeritageAgencyList,
1068
+ parseDetail: parseHeritageAgencyDetail,
1069
+ },
1070
+ {
1071
+ id: '국가유산진흥원_매장유산국비발굴단_현장설명회',
1072
+ name: '국가유산진흥원 매장유산국비발굴단 현장설명회',
1073
+ url: 'https://www.kh.or.kr/brd/board/631/L/menu/504',
1074
+ parseList: parseHeritageAgencyList,
1075
+ parseDetail: parseHeritageAgencyDetail,
1076
+ },
1077
+ {
1078
+ id: '한국문화유산협회_공지사항',
1079
+ name: '한국문화유산협회 공지사항',
1080
+ url: 'https://www.kaah.kr/notice',
1081
+ parseList: parseKaahList,
1082
+ parseDetail: parseKaahDetail,
1083
+ },
1084
+ {
1085
+ id: '한국문화유산협회_협회소식',
1086
+ name: '한국문화유산협회 협회소식',
1087
+ url: 'https://www.kaah.kr/news',
1088
+ parseList: parseKaahList,
1089
+ parseDetail: parseKaahDetail,
1090
+ },
1091
+ {
1092
+ id: '한국문화유산협회_보도자료',
1093
+ name: '한국문화유산협회 보도자료',
1094
+ url: 'https://www.kaah.kr/mass',
1095
+ parseList: parseKaahList,
1096
+ parseDetail: parseKaahDetail,
1097
+ },
1098
+ {
1099
+ id: '한국문화유산협회_회원기관소식',
1100
+ name: '한국문화유산협회 회원기관소식',
1101
+ url: 'https://www.kaah.kr/assnews',
1102
+ parseList: parseKaahList,
1103
+ parseDetail: parseKaahDetail,
1104
+ },
1105
+ {
1106
+ id: '한국문화유산협회_유관기관소식',
1107
+ name: '한국문화유산협회 유관기관소식',
1108
+ url: 'https://www.kaah.kr/ralnews',
1109
+ parseList: parseKaahList,
1110
+ parseDetail: parseKaahDetail,
1111
+ },
1112
+ {
1113
+ id: '한국문화유산협회_발굴현장공개',
1114
+ name: '한국문화유산협회 발굴현장공개',
1115
+ url: 'https://www.kaah.kr/placeopen',
1116
+ parseList: parseKaahPlaceList,
1117
+ parseDetail: parseKaahPlaceDetail,
1118
+ },
1119
+ {
1120
+ id: '한국고고학회_공지사항',
1121
+ name: '한국고고학회 공지사항',
1122
+ url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=notice',
1123
+ parseList: parseKrasList,
1124
+ parseDetail: parseKrasDetail,
1125
+ },
1126
+ {
1127
+ id: '한국고고학회_학술대회및행사',
1128
+ name: '한국고고학회 학술대회 및 행사',
1129
+ url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=sympo',
1130
+ parseList: parseKrasList,
1131
+ parseDetail: parseKrasDetail,
1132
+ },
1133
+ {
1134
+ id: '한국고고학회_신간안내_단행본',
1135
+ name: '한국고고학회 신간안내 - 단행본',
1136
+ url: 'https://www.kras.or.kr/?c=61/101/105',
1137
+ parseList: parseKrasList,
1138
+ parseDetail: parseKrasDetail,
1139
+ },
1140
+ {
1141
+ id: '한국고고학회_현장소식',
1142
+ name: '한국고고학회 현장소식',
1143
+ url: 'https://www.kras.or.kr/?c=61/73',
1144
+ parseList: parseKrasList,
1145
+ parseDetail: parseKrasDetail,
1146
+ },
1147
+ {
1148
+ id: '중부고고학회_공지사항',
1149
+ name: '중부고고학회 공지사항',
1150
+ url: 'https://www.jbgogo.or.kr/bbs/notice',
1151
+ parseList: parseJbgogoList,
1152
+ parseDetail: parseJbgogoDetail,
1153
+ },
1154
+ {
1155
+ id: '중부고고학회_학계소식',
1156
+ name: '중부고고학회 학계소식',
1157
+ url: 'https://www.jbgogo.or.kr/bbs/news',
1158
+ parseList: parseJbgogoList,
1159
+ parseDetail: parseJbgogoDetail,
1160
+ },
1161
+ {
1162
+ id: '중부고고학회_발굴현장소식',
1163
+ name: '중부고고학회 발굴현장소식',
1164
+ url: 'https://www.jbgogo.or.kr/bbs/spotnews',
1165
+ parseList: parseJbgogoList,
1166
+ parseDetail: parseJbgogoDetail,
1167
+ },
1168
+ {
1169
+ id: '호서고고학회_공지사항',
1170
+ name: '호서고고학회 공지사항',
1171
+ url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=notice&menu_idx=010000',
1172
+ parseList: parseHsasList,
1173
+ parseDetail: parseHsasDetail,
1174
+ },
1175
+ {
1176
+ id: '호서고고학회_학회소식',
1177
+ name: '호서고고학회 학회소식',
1178
+ url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=m2_01&menu_idx=020000',
1179
+ parseList: parseHsasList,
1180
+ parseDetail: parseHsasDetail,
1181
+ },
1182
+ {
1183
+ id: '국립중앙박물관_알림',
1184
+ name: '국립중앙박물관 알림',
1185
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701010000.do?catCustomType=united&catId=128',
1186
+ parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701010000.do'),
1187
+ parseDetail: parseMuseumDetail,
1188
+ },
1189
+ {
1190
+ id: '국립중앙박물관_고시공고',
1191
+ name: '국립중앙박물관 고시/공고',
1192
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701020000.do',
1193
+ parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701020000.do'),
1194
+ parseDetail: parseMuseumDetail,
1195
+ },
1196
+ {
1197
+ id: '국립중앙박물관_보도자료',
1198
+ name: '국립중앙박물관 보도 자료',
1199
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701040000.do?catCustomType=post&catId=93',
1200
+ parseList: parseMuseumPressList,
1201
+ parseDetail: parseMuseumDetail,
1202
+ },
1203
+ {
1204
+ id: '국립전주박물관_새소식',
1205
+ name: '국립전주박물관 새소식',
1206
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105010000&bid=0001',
1207
+ parseList: parseJeonjuMuseumList,
1208
+ parseDetail: parseJeonjuMuseumDetail,
1209
+ },
1210
+ {
1211
+ id: '국립전주박물관_보도자료',
1212
+ name: '국립전주박물관 보도자료',
1213
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105050000&bid=0004',
1214
+ parseList: parseJeonjuMuseumList,
1215
+ parseDetail: parseJeonjuMuseumDetail,
1216
+ },
1217
+ {
1218
+ id: '국립부여박물관_공지사항',
1219
+ name: '국립부여박물관 공지사항',
1220
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301250005',
1221
+ parseList: (html) => parseBuyeoMuseumList(html, '2301250005'),
1222
+ parseDetail: parseBuyeoMuseumDetail,
1223
+ },
1224
+ {
1225
+ id: '국립부여박물관_보도자료',
1226
+ name: '국립부여박물관 보도자료',
1227
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2302150024',
1228
+ parseList: (html) => parseBuyeoMuseumList(html, '2302150024'),
1229
+ parseDetail: parseBuyeoMuseumDetail,
1230
+ },
1231
+ {
1232
+ id: '국립진주박물관_새소식',
1233
+ name: '국립진주박물관 새소식',
1234
+ url: 'https://jinju.museum.go.kr/kor/html/sub06/0601.html',
1235
+ parseList: parseJinjuMuseumList,
1236
+ parseDetail: parseJinjuMuseumDetail,
1237
+ },
1238
+ // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1239
+ // {
1240
+ // id: '국립경주박물관_새소식',
1241
+ // name: '국립경주박물관 새소식',
1242
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0701.html',
1243
+ // parseList: (html) =>
1244
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0701.html'),
1245
+ // parseDetail: parseGyeongjuMuseumDetail,
1246
+ // },
1247
+ // {
1248
+ // id: '국립경주박물관_고시공고',
1249
+ // name: '국립경주박물관 고시/공고',
1250
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0703.html',
1251
+ // parseList: parseGyeongjuMuseumNoticeList,
1252
+ // parseDetail: parseGyeongjuMuseumDetail,
1253
+ // },
1254
+ // {
1255
+ // id: '국립경주박물관_보도자료',
1256
+ // name: '국립경주박물관 보도자료',
1257
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0705.html',
1258
+ // parseList: (html) =>
1259
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0705.html'),
1260
+ // parseDetail: parseGyeongjuMuseumDetail,
1261
+ // },
1262
+ // {
1263
+ // id: '국립청주박물관_새소식',
1264
+ // name: '국립청주박물관 새소식',
1265
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=1&key=482&nbar=s',
1266
+ // parseList: parseCheongjuMuseumList,
1267
+ // parseDetail: parseCheongjuMuseumDetail,
1268
+ // },
1269
+ // {
1270
+ // id: '국립청주박물관_언론보도자료',
1271
+ // name: '국립청주박물관 언론보도자료',
1272
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=20&key=31&nbar=s',
1273
+ // parseList: parseCheongjuMuseumList,
1274
+ // parseDetail: parseCheongjuMuseumDetail,
1275
+ // },
1276
+ // {
1277
+ // id: '국립김해박물관_새소식',
1278
+ // name: '국립김해박물관 새소식',
1279
+ // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0401.html',
1280
+ // parseList: (html) =>
1281
+ // parseGimhaeMuseumList(html, '/kr/html/sub04/0401.html'),
1282
+ // parseDetail: parseGimhaeMuseumDetail,
1283
+ // },
1284
+ // {
1285
+ // id: '국립김해박물관_보도자료',
1286
+ // name: '국립김해박물관 언론보도자료',
1287
+ // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0402.html',
1288
+ // parseList: (html) =>
1289
+ // parseGimhaeMuseumList(html, '/kr/html/sub04/0402.html'),
1290
+ // parseDetail: parseGimhaeMuseumDetail,
1291
+ // },
1292
+ // {
1293
+ // id: '국립제주박물관_새소식',
1294
+ // name: '국립제주박물관 새소식',
1295
+ // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050101&ntt_tag=1',
1296
+ // parseList: parseJejuMuseumList,
1297
+ // parseDetail: parseJejuMuseumDetail,
1298
+ // },
1299
+ // {
1300
+ // id: '국립익산박물관_공지사항',
1301
+ // name: '국립익산박물관 공지사항',
1302
+ // url: 'https://iksan.museum.go.kr/kor/html/sub05/0501.html',
1303
+ // parseList: parseIksanMuseumList,
1304
+ // parseDetail: parseIksanMuseumDetail,
1305
+ // },
1306
+ ],
1307
+ },
1308
+ {
1309
+ id: 'business',
1310
+ name: 'Business',
1311
+ targets: [
1312
+ {
1313
+ id: '국가유산청_입찰정보',
1314
+ name: '국가유산청 입찰정보',
1315
+ url: 'https://www.khs.go.kr/tenderBbz/selectTenderBbzList.do?mn=NS_01_05',
1316
+ parseList: parseKhsList,
1317
+ parseDetail: parseKhsDetail,
1318
+ },
1319
+ {
1320
+ id: '국가유산진흥원_입찰정보',
1321
+ name: '국가유산진흥원 입찰정보',
1322
+ url: 'https://www.kh.or.kr/brd/board/717/L/menu/375',
1323
+ parseList: parseHeritageAgencyList,
1324
+ parseDetail: parseHeritageAgencyDetail,
1325
+ },
1326
+ {
1327
+ id: '한국문화유산협회_사업공고',
1328
+ name: '한국문화유산협회 사업공고',
1329
+ url: 'https://www.kaah.kr/bussopen',
1330
+ parseList: parseKaahList,
1331
+ parseDetail: parseKaahDetail,
1332
+ },
1333
+ {
1334
+ id: '한국문화유산협회_입찰공고',
1335
+ name: '한국문화유산협회 입찰공고',
1336
+ url: 'https://www.kaah.kr/ipcopen',
1337
+ parseList: parseKaahList,
1338
+ parseDetail: parseKaahDetail,
1339
+ },
1340
+ ],
1341
+ },
1342
+ {
1343
+ id: 'employment',
1344
+ name: 'Employment',
1345
+ targets: [
1346
+ {
1347
+ id: '국가유산청_시험채용',
1348
+ name: '국가유산청 시험/채용',
1349
+ url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newexam&mn=NS_01_06',
1350
+ parseList: parseKhsList,
1351
+ parseDetail: parseKhsDetail,
1352
+ },
1353
+ {
1354
+ id: '국가유산진흥원_인재채용',
1355
+ name: '국가유산진흥원 인재채용',
1356
+ url: 'https://www.kh.or.kr/brd/board/721/L/CATEGORY/719/menu/377',
1357
+ parseList: parseHeritageAgencyList,
1358
+ parseDetail: parseHeritageAgencyDetail,
1359
+ },
1360
+ {
1361
+ id: '한국문화유산협회_채용공고',
1362
+ name: '한국문화유산협회 채용공고',
1363
+ url: 'https://www.kaah.kr/reqopen',
1364
+ parseList: parseKaahList,
1365
+ parseDetail: parseKaahDetail,
1366
+ },
1367
+ {
1368
+ id: '국립중앙박물관_채용안내',
1369
+ name: '국립중앙박물관 채용 안내',
1370
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701030000.do?catCustomType=post&catId=54&recruitYn=Y',
1371
+ parseList: parseMuseumRecruitList,
1372
+ parseDetail: parseMuseumDetail,
1373
+ },
1374
+ {
1375
+ id: '국립전주박물관_채용',
1376
+ name: '국립전주박물관 채용',
1377
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105020000&bid=0002',
1378
+ parseList: parseJeonjuMuseumRecruitList,
1379
+ parseDetail: parseJeonjuMuseumDetail,
1380
+ },
1381
+ {
1382
+ id: '국립부여박물관_채용공고',
1383
+ name: '국립부여박물관 채용공고',
1384
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301270001',
1385
+ parseList: (html) => parseBuyeoMuseumList(html, '2301270001'),
1386
+ parseDetail: parseBuyeoMuseumDetail,
1387
+ },
1388
+ // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1389
+ // {
1390
+ // id: '국립경주박물관_채용안내',
1391
+ // name: '국립경주박물관 채용안내',
1392
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0704.html',
1393
+ // parseList: (html) =>
1394
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0704.html'),
1395
+ // parseDetail: parseGyeongjuMuseumDetail,
1396
+ // },
1397
+ // {
1398
+ // id: '국립청주박물관_채용및공고',
1399
+ // name: '국립청주박물관 채용 및 공고',
1400
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=29&key=476&nbar=s',
1401
+ // parseList: parseCheongjuMuseumList,
1402
+ // parseDetail: parseCheongjuMuseumDetail,
1403
+ // },
1404
+ // {
1405
+ // id: '국립제주박물관_채용정보',
1406
+ // name: '국립제주박물관 채용정보',
1407
+ // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050102&ntt_tag=2',
1408
+ // parseList: parseJejuMuseumList,
1409
+ // parseDetail: parseJejuMuseumDetail,
1410
+ // },
1411
+ ],
1412
+ },
1413
+ ];
1414
+
1415
+ /**
1416
+ * Newsletter content configuration
1417
+ */
1418
+ const contentOptions = {
1419
+ outputLanguage: '한국어',
1420
+ expertField: ['문화유산'],
1421
+ };
1422
+ /**
1423
+ * Newsletter brand configuration
1424
+ */
1425
+ const newsletterConfig = {
1426
+ brandName: '문화유산 리서치 레이더',
1427
+ subscribePageUrl: 'https://heripo.com/research-radar/subscribe',
1428
+ publicationCriteria: {
1429
+ minimumArticleCountForIssue: 5,
1430
+ priorityArticleScoreThreshold: 8,
1431
+ },
1432
+ };
1433
+ /**
1434
+ * LLM configuration
1435
+ */
1436
+ const llmConfig = {
1437
+ maxRetries: 5,
1438
+ chainStopAfterAttempt: 3,
1439
+ generation: {
1440
+ temperature: 0.3,
1441
+ },
1442
+ };
1443
+
1444
+ /**
1445
+ * Analysis provider implementation
1446
+ * - LLM-based article analysis
1447
+ * - Tag classification, image analysis, importance scoring
1448
+ */
1449
+ class AnalysisProvider {
1450
+ openai;
1451
+ articleRepository;
1452
+ tagRepository;
1453
+ classifyTagOptions;
1454
+ analyzeImagesOptions;
1455
+ determineScoreOptions;
1456
+ constructor(openai, articleRepository, tagRepository) {
1457
+ this.openai = openai;
1458
+ this.articleRepository = articleRepository;
1459
+ this.tagRepository = tagRepository;
1460
+ this.classifyTagOptions = {
1461
+ model: this.openai('gpt-5-mini'),
1462
+ };
1463
+ this.analyzeImagesOptions = {
1464
+ model: this.openai('gpt-5.1'),
1465
+ };
1466
+ this.determineScoreOptions = {
1467
+ model: this.openai('gpt-5.1'),
1468
+ minimumImportanceScoreRules: [
1469
+ // Korean Archaeological Society news: minimum score 6
1470
+ {
1471
+ targetUrl: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=notice',
1472
+ minScore: 6,
1473
+ },
1474
+ {
1475
+ targetUrl: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=sympo',
1476
+ minScore: 6,
1477
+ },
1478
+ {
1479
+ targetUrl: 'https://www.kras.or.kr/?c=61/101/105',
1480
+ minScore: 6,
1481
+ },
1482
+ {
1483
+ targetUrl: 'https://www.kaah.kr/notice',
1484
+ minScore: 6,
1485
+ },
1486
+ {
1487
+ targetUrl: 'https://www.kaah.kr/news',
1488
+ minScore: 6,
1489
+ },
1490
+ {
1491
+ targetUrl: 'https://www.kaah.kr/mass',
1492
+ minScore: 6,
1493
+ },
1494
+ {
1495
+ targetUrl: 'https://www.kaah.kr/assnews',
1496
+ minScore: 6,
1497
+ },
1498
+ {
1499
+ targetUrl: 'https://www.kaah.kr/ralnews',
1500
+ minScore: 6,
1501
+ },
1502
+ {
1503
+ targetUrl: 'https://www.kaah.kr/notice',
1504
+ minScore: 6,
1505
+ },
1506
+ {
1507
+ targetUrl: 'https://www.kaah.kr/placeopen',
1508
+ minScore: 6,
1509
+ },
1510
+ {
1511
+ targetUrl: 'https://www.kaah.kr/bussopen',
1512
+ minScore: 6,
1513
+ },
1514
+ {
1515
+ targetUrl: 'https://www.kaah.kr/ipcopen',
1516
+ minScore: 6,
1517
+ },
1518
+ // Excavation report news: minimum score 2
1519
+ {
1520
+ targetUrl: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmPrmsnAply.do',
1521
+ minScore: 2,
1522
+ },
1523
+ {
1524
+ targetUrl: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmRptp.do',
1525
+ minScore: 2,
1526
+ },
1527
+ {
1528
+ targetUrl: 'https://www.e-minwon.go.kr/ge/ee/getListLinkGrndsRls.do',
1529
+ minScore: 2,
1530
+ },
1531
+ ],
1532
+ };
1533
+ }
1534
+ /**
1535
+ * Fetch articles that haven't been scored yet
1536
+ * @returns Unscored articles awaiting analysis
1537
+ */
1538
+ async fetchUnscoredArticles() {
1539
+ return this.articleRepository.findUnscoredArticles();
1540
+ }
1541
+ /**
1542
+ * Fetch all existing tags for classification
1543
+ * @returns List of tag names
1544
+ */
1545
+ async fetchTags() {
1546
+ return this.tagRepository.findAllTags();
1547
+ }
1548
+ /**
1549
+ * Update article with analysis results (tags, image analysis, importance score)
1550
+ * @param article - Article with analysis data
1551
+ */
1552
+ async update(article) {
1553
+ await this.articleRepository.updateAnalysis(article);
1554
+ }
1555
+ }
1556
+
1557
+ /**
1558
+ * Creates an HTML template for the newsletter email
1559
+ *
1560
+ * This function generates a responsive email template with:
1561
+ * - Light/dark mode support
1562
+ * - Mobile-friendly design
1563
+ * - Brand-specific styling
1564
+ * - List of crawling sources
1565
+ * - Publication policy information
1566
+ * - Platform introduction
1567
+ *
1568
+ * @param targets - Array of crawling targets to be listed in the newsletter footer
1569
+ * @returns Complete HTML string for the newsletter email
1570
+ *
1571
+ * @example
1572
+ * ```typescript
1573
+ * const html = createNewsletterHtmlTemplate([
1574
+ * { id: '1', name: 'Source 1', url: 'https://example.com', ... }
1575
+ * ]);
1576
+ * ```
1577
+ */
1578
+ const createNewsletterHtmlTemplate = (targets) => `<!DOCTYPE html>
1579
+ <html lang="ko" style="color-scheme: light dark; supported-color-schemes: light dark;">
1580
+ <head>
1581
+ <meta charset="UTF-8">
1582
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1583
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
1584
+ <meta name="color-scheme" content="light dark">
1585
+ <meta name="supported-color-schemes" content="light dark">
1586
+ <style type="text/css">
1587
+ a:hover {
1588
+ color: #D2691E !important;
1589
+ }
1590
+ .button-link {
1591
+ color: #fff !important;
1592
+ background: #D2691E;
1593
+ padding: 10px 28px;
1594
+ border-radius: 5px;
1595
+ text-decoration: none !important;
1596
+ font-weight: bold;
1597
+ font-size: 16px;
1598
+ display: inline-block;
1599
+ letter-spacing: 0.5px;
1600
+ }
1601
+ .button-link:hover {
1602
+ background: #b85a1a !important;
1603
+ }
1604
+
1605
+ html {
1606
+ color-scheme: light dark;
1607
+ supported-color-schemes: light dark;
1608
+ }
1609
+
1610
+ body {
1611
+ -webkit-text-size-adjust: 100%;
1612
+ -ms-text-size-adjust: 100%;
1613
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1614
+ background-color: #f4f4f4;
1615
+ font-size: 16px;
1616
+ line-height: 1.7;
1617
+ letter-spacing: 0.01em;
1618
+ height: 100%;
1619
+ width: 100%;
1620
+ margin: 0;
1621
+ padding: 0;
1622
+ }
1623
+
1624
+ .main-table {
1625
+ -webkit-text-size-adjust: 100%;
1626
+ -ms-text-size-adjust: 100%;
1627
+ mso-table-lspace: 0pt;
1628
+ mso-table-rspace: 0pt;
1629
+ }
1630
+
1631
+ .outer-cell {
1632
+ -webkit-text-size-adjust: 100%;
1633
+ -ms-text-size-adjust: 100%;
1634
+ mso-table-lspace: 0pt;
1635
+ mso-table-rspace: 0pt;
1636
+ padding: 20px 0;
1637
+ }
1638
+
1639
+ .container {
1640
+ -webkit-text-size-adjust: 100%;
1641
+ -ms-text-size-adjust: 100%;
1642
+ mso-table-lspace: 0pt;
1643
+ mso-table-rspace: 0pt;
1644
+ max-width: 800px;
1645
+ }
1646
+
1647
+ .content-cell {
1648
+ -webkit-text-size-adjust: 100%;
1649
+ -ms-text-size-adjust: 100%;
1650
+ mso-table-lspace: 0pt;
1651
+ mso-table-rspace: 0pt;
1652
+ padding: 44px 44px 36px 44px;
1653
+ border-radius: 12px;
1654
+ box-shadow: 0 4px 18px rgba(0,0,0,0.07);
1655
+ }
1656
+
1657
+ .logo-container {
1658
+ margin-bottom: 32px;
1659
+ }
1660
+
1661
+ .light-logo {
1662
+ text-align: left;
1663
+ display: block;
1664
+ }
1665
+
1666
+ .dark-logo {
1667
+ text-align: left;
1668
+ display: none;
1669
+ }
1670
+
1671
+ .logo-img {
1672
+ -ms-interpolation-mode: bicubic;
1673
+ border: 0;
1674
+ height: auto;
1675
+ line-height: 100%;
1676
+ outline: none;
1677
+ text-decoration: none;
1678
+ display: block;
1679
+ margin-bottom: 12px;
1680
+ }
1681
+
1682
+ h1 {
1683
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1684
+ line-height: 1.2;
1685
+ margin: 0 0 18px 0;
1686
+ letter-spacing: -0.5px;
1687
+ margin-top: 0;
1688
+ font-size: 28px;
1689
+ font-weight: bold;
1690
+ color: #111111;
1691
+ border-bottom: 3px solid #D2691E;
1692
+ padding-bottom: 8px;
1693
+ margin-bottom: 24px;
1694
+ }
1695
+
1696
+ p {
1697
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1698
+ font-size: 16px;
1699
+ line-height: 1.7;
1700
+ color: #444444;
1701
+ margin: 0 0 18px 0;
1702
+ }
1703
+
1704
+ .strong-text {
1705
+ color: #D2691E;
1706
+ font-weight: bold;
1707
+ }
1708
+
1709
+ hr {
1710
+ border: 0;
1711
+ border-top: 2px solid #D2691E;
1712
+ margin: 32px 0;
1713
+ }
1714
+
1715
+ h2 {
1716
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1717
+ font-size: 24px;
1718
+ font-weight: bold;
1719
+ line-height: 1.3;
1720
+ color: #D2691E;
1721
+ margin: 0 0 16px 0;
1722
+ letter-spacing: -0.2px;
1723
+ border-left: 5px solid #D2691E;
1724
+ padding-left: 12px;
1725
+ background: none;
1726
+ }
1727
+
1728
+ ul {
1729
+ padding-left: 24px;
1730
+ margin: 0 0 18px 0;
1731
+ }
1732
+
1733
+ ol {
1734
+ padding-left: 24px;
1735
+ margin: 0 0 18px 0;
1736
+ }
1737
+
1738
+ li {
1739
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1740
+ font-size: 16px;
1741
+ line-height: 1.7;
1742
+ color: #444444;
1743
+ margin: 0 0 18px 0;
1744
+ margin-bottom: 8px;
1745
+ }
1746
+
1747
+ h3 {
1748
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1749
+ font-size: 18px;
1750
+ font-weight: bold;
1751
+ line-height: 1.3;
1752
+ color: #D2691E;
1753
+ margin: 0 0 12px 0;
1754
+ letter-spacing: -0.1px;
1755
+ }
1756
+
1757
+ .link-style {
1758
+ -webkit-text-size-adjust: 100%;
1759
+ -ms-text-size-adjust: 100%;
1760
+ color: #0056b3;
1761
+ text-decoration: underline;
1762
+ font-weight: bold;
1763
+ transition: color 0.2s;
1764
+ }
1765
+
1766
+ .data-table {
1767
+ -webkit-text-size-adjust: 100%;
1768
+ -ms-text-size-adjust: 100%;
1769
+ mso-table-lspace: 0pt;
1770
+ mso-table-rspace: 0pt;
1771
+ width: 100%;
1772
+ border-collapse: collapse;
1773
+ margin: 0 0 18px 0;
1774
+ }
1775
+
1776
+ .table-th {
1777
+ border: 1px solid #e5e5e5;
1778
+ padding: 12px 8px;
1779
+ text-align: left;
1780
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1781
+ font-size: 15px;
1782
+ background-color: #f2e6dd;
1783
+ font-weight: bold;
1784
+ color: #D2691E;
1785
+ }
1786
+
1787
+ .content-cell table {
1788
+ width: 100%;
1789
+ border-collapse: collapse;
1790
+ margin: 0 0 18px 0;
1791
+ }
1792
+ .content-cell th,
1793
+ .content-cell td {
1794
+ border: 1px solid #e5e5e5;
1795
+ padding: 12px 8px;
1796
+ text-align: left;
1797
+ font-size: 15px;
1798
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
1799
+ }
1800
+ .content-cell thead th {
1801
+ background-color: #f2e6dd;
1802
+ color: #D2691E;
1803
+ font-weight: bold;
1804
+ }
1805
+ .content-cell tbody tr:nth-child(even) td {
1806
+ background-color: #faf7f3;
1807
+ }
1808
+
1809
+ @media screen and (max-width: 800px) {
1810
+ .container {
1811
+ width: 100% !important;
1812
+ max-width: 100% !important;
1813
+ padding: 0 !important;
1814
+ }
1815
+
1816
+ .content-cell {
1817
+ padding: 20px !important;
1818
+ }
1819
+ }
1820
+ @media screen and (max-width: 600px) {
1821
+ h1 {
1822
+ font-size: 22px !important;
1823
+ }
1824
+
1825
+ h2 {
1826
+ font-size: 18px !important;
1827
+ }
1828
+
1829
+ .content-cell {
1830
+ padding: 12px !important;
1831
+ }
1832
+ }
1833
+ @media (prefers-color-scheme: dark) {
1834
+ body,
1835
+ .dark-mode-bg {
1836
+ background-color: #121212 !important;
1837
+ }
1838
+
1839
+ .dark-mode-content-bg {
1840
+ background-color: #1e1e1e !important;
1841
+ box-shadow: 0 4px 10px rgba(0,0,0,0.25) !important;
1842
+ }
1843
+
1844
+ h1 {
1845
+ color: #ffffff !important;
1846
+ border-bottom: 3px solid #E59866 !important;
1847
+ }
1848
+
1849
+ h2,
1850
+ h3 {
1851
+ color: #E59866 !important;
1852
+ border-left-color: #E59866 !important;
1853
+ }
1854
+
1855
+ p,
1856
+ li {
1857
+ color: #eeeeee !important;
1858
+ }
1859
+
1860
+ a:not(.button-link) {
1861
+ color: #4da6ff !important;
1862
+ text-decoration: underline !important;
1863
+ }
1864
+
1865
+ a.button-link {
1866
+ background: #E59866 !important;
1867
+ color: #222 !important;
1868
+ }
1869
+
1870
+ strong {
1871
+ color: #E59866 !important;
1872
+ }
1873
+
1874
+ hr,
1875
+ .section-divider {
1876
+ border-top-color: #E59866 !important;
1877
+ background: linear-gradient(90deg, #E59866 0%, #121212 100%) !important;
1878
+ }
1879
+
1880
+ blockquote {
1881
+ background-color: #2b2b2b !important;
1882
+ border-left-color: #E59866 !important;
1883
+ }
1884
+
1885
+ blockquote p {
1886
+ color: #E59866 !important;
1887
+ }
1888
+
1889
+ code {
1890
+ background-color: #333333 !important;
1891
+ color: #E59866 !important;
1892
+ }
1893
+
1894
+ pre {
1895
+ background-color: #2d2d2d !important;
1896
+ color: #f2f2f2 !important;
1897
+ border: 1px solid #444 !important;
1898
+ }
1899
+
1900
+ .container table th,
1901
+ .container table td {
1902
+ border-color: #444444 !important;
1903
+ }
1904
+
1905
+ .container table th {
1906
+ background-color: #333333 !important;
1907
+ color: #E59866 !important;
1908
+ }
1909
+
1910
+ .container table td strong {
1911
+ color: #E59866 !important;
1912
+ }
1913
+
1914
+ .container table tr:nth-child(even) td {
1915
+ background-color: #23201c !important;
1916
+ }
1917
+
1918
+ .button-cell {
1919
+ background-color: #E59866 !important;
1920
+ }
1921
+
1922
+ .button-link {
1923
+ color: #222 !important;
1924
+ background: #E59866 !important;
1925
+ }
1926
+
1927
+ .footer-text {
1928
+ color: #999999 !important;
1929
+ }
1930
+
1931
+ .footer-link {
1932
+ color: #999999 !important;
1933
+ text-decoration: underline !important;
1934
+ }
1935
+
1936
+ .dark-logo {
1937
+ display: block !important;
1938
+ }
1939
+
1940
+ .light-logo {
1941
+ display: none !important;
1942
+ }
1943
+
1944
+ .content-cell th,
1945
+ .content-cell td {
1946
+ border-color: #444444 !important;
1947
+ }
1948
+ .content-cell thead th {
1949
+ background-color: #333333 !important;
1950
+ color: #E59866 !important;
1951
+ }
1952
+ .content-cell tbody tr:nth-child(even) td {
1953
+ background-color: #23201c !important;
1954
+ }
1955
+
1956
+ }
1957
+ </style>
1958
+ </head>
1959
+ <body style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; background-color: #f4f4f4; font-size: 16px; line-height: 1.7; letter-spacing: 0.01em; height: 100%; width: 100%; margin: 0; padding: 0;">
1960
+ <table border="0" cellpadding="0" cellspacing="0" width="100%" role="presentation" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
1961
+ <tr>
1962
+ <td bgcolor="#f4f4f4" align="center" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; mso-table-lspace: 0pt; mso-table-rspace: 0pt; padding: 20px 0;" class="dark-mode-bg">
1963
+ <!--[if (gte mso 9)|(IE)]>
1964
+ <table align="center" border="0" cellspacing="0" cellpadding="0" width="800">
1965
+ <tr>
1966
+ <td align="center" valign="top" width="800">
1967
+ <![endif]-->
1968
+ <table border="0" cellpadding="0" cellspacing="0" width="100%" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; mso-table-lspace: 0pt; mso-table-rspace: 0pt; max-width: 800px;" class="container" role="presentation">
1969
+ <tr>
1970
+ <td bgcolor="#ffffff" align="left" class="content-cell dark-mode-content-bg" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; mso-table-lspace: 0pt; mso-table-rspace: 0pt; padding: 44px 44px 36px 44px; border-radius: 12px; box-shadow: 0 4px 18px rgba(0,0,0,0.07);">
1971
+ <div style="margin-bottom: 32px;">
1972
+ <div style="text-align: left; display: block;" class="light-logo">
1973
+ <img src="https://heripo.com/heripo-logo.png" width="150" alt="로고" style="-ms-interpolation-mode: bicubic; border: 0; height: auto; line-height: 100%; outline: none; text-decoration: none; display: block; margin-bottom: 12px;" height="auto">
1974
+ </div>
1975
+ <!--[if !mso]><!-->
1976
+ <div style="text-align: left; display: none;" class="dark-logo">
1977
+ <img src="https://heripo.com/heripo-logo-dark.png" width="150" alt="다크모드 로고" style="-ms-interpolation-mode: bicubic; border: 0; height: auto; line-height: 100%; outline: none; text-decoration: none; display: block; margin-bottom: 12px;" height="auto">
1978
+ </div>
1979
+ <!--<![endif]-->
1980
+ </div>
1981
+
1982
+ {{NEWSLETTER_CONTENT}}
1983
+
1984
+ <hr style="border: 0; border-top: 2px solid #D2691E; margin: 32px 0;">
1985
+ <h2 style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 24px; font-weight: bold; line-height: 1.3; color: #D2691E; margin: 0 0 16px 0; letter-spacing: -0.2px; border-left: 5px solid #D2691E; padding-left: 12px; background: none;">🔍 뉴스레터 출처</h2>
1986
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">모든 소식은 다음 출처에서 수집됩니다:</p>
1987
+ <ul style="padding-left: 24px; margin: 0 0 18px 0;">
1988
+ ${targets
1989
+ .map((target) => `
1990
+ <li style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0; margin-bottom: 8px;"><a href="${target.url}" target="_blank">${target.name}</a></li>
1991
+ `)
1992
+ .join('\n')}
1993
+ </ul>
1994
+ <hr style="border: 0; border-top: 2px solid #D2691E; margin: 32px 0;">
1995
+ <h2 style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 24px; font-weight: bold; line-height: 1.3; color: #D2691E; margin: 0 0 16px 0; letter-spacing: -0.2px; border-left: 5px solid #D2691E; padding-left: 12px; background: none;">📅 발행 정책</h2>
1996
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;"><strong>heripo 리서치 레이더</strong>는 매일 발행을 원칙으로 하되, 독자분들께 의미 있는 정보를 제공하기 위해 다음과 같은 발행 기준을 적용합니다:</p>
1997
+ <ul style="padding-left: 24px; margin: 0 0 18px 0;">
1998
+ <li style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0; margin-bottom: 8px;"><strong>정상 발행</strong>: 새로운 소식이 ${newsletterConfig.publicationCriteria.minimumArticleCountForIssue + 1}개 이상이거나, ${newsletterConfig.publicationCriteria.minimumArticleCountForIssue}개 이하여도 중요도 ${newsletterConfig.publicationCriteria.priorityArticleScoreThreshold}점 이상의 핵심 소식이 포함된 경우</li>
1999
+ <li style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0; margin-bottom: 8px;"><strong>이월 발행</strong>: 새로운 소식이 ${newsletterConfig.publicationCriteria.minimumArticleCountForIssue}개 이하이면서 중요한 내용(${newsletterConfig.publicationCriteria.priorityArticleScoreThreshold}점 이상)이 없을 경우, 다음 호로 이월하여 더 풍성한 내용으로 제공</li>
2000
+ <li style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0; margin-bottom: 8px;"><strong>통합 발행</strong>: 이월된 소식과 새로운 소식을 함께 발행하여 보다 종합적인 업계 동향을 전달</li>
2001
+ </ul>
2002
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">이러한 정책을 통해 매일 의미 없는 소식으로 독자분들의 시간을 낭비하지 않고, 정말 중요한 정보를 적절한 타이밍에 제공하고자 합니다.</p>
2003
+ <hr style="border: 0; border-top: 2px solid #D2691E; margin: 32px 0;">
2004
+ <h2 style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 24px; font-weight: bold; line-height: 1.3; color: #D2691E; margin: 0 0 16px 0; letter-spacing: -0.2px; border-left: 5px solid #D2691E; padding-left: 12px; background: none;">🔍 heripo(헤리포) 플랫폼 소개</h2>
2005
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">지능형 고고학 연구 플랫폼 heripo는 <a href="https://poc.heripo.com" target="_blank">「대형 언어 모델(LLM)을 활용한 고고학 정보화 연구」 논문</a>에 기반한 후속 연구 프로젝트로, 김홍연(고고학 정보화 연구자, 소프트웨어 엔지니어) 개인이 개발·운영하고 있습니다.</p>
2006
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">이 뉴스레터는 heripo의 선공개 버전으로 플랫폼의 일부 기능 중 하나입니다.</p>
2007
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">2026년 중 플랫폼 출시를 목표로 개발 중이며, 핵심 로직 또한 오픈소스 프로젝트로 공개할 예정입니다.</p>
2008
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">플랫폼 프로토타입 출시 시 구독자분들께 우선 안내해 드리겠습니다.</p>
2009
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">뉴스레터 소스 추가를 원하시거나 기타 궁금한 점이 있다면 <a href="mailto:kimhongyeon89@gmail.com" target="_blank">kimhongyeon89@gmail.com</a> 으로 문의 바랍니다.</p>
2010
+ <hr style="border: 0; border-top: 2px solid #D2691E; margin: 32px 0;">
2011
+ <h2 style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 24px; font-weight: bold; line-height: 1.3; color: #D2691E; margin: 0 0 16px 0; letter-spacing: -0.2px; border-left: 5px solid #D2691E; padding-left: 12px; background: none;">⚠️ 중요 안내</h2>
2012
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">본 뉴스레터는 국가유산청 공지사항, 관련 기관 입찰 정보 등 특정 웹 게시판의 모든 신규 소식을 빠짐없이 수집하여 제공합니다. 수집된 모든 정보는 정확한 크롤링 로직에 기반하므로 원본과 일치하여 신뢰할 수 있습니다.</p>
2013
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">다만, 수집된 정보를 바탕으로 한 <strong>분류, 요약, 분석, 중요도 판단</strong>은 LLM에 의해 수행되었습니다. LLM은 고도로 발전된 기술이지만, 정보를 해석하고 판단하는 과정에서 오류가 발생할 수 있습니다.</p>
2014
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 18px 0;">따라서 중요한 의사결정 시에는 <strong>반드시 원문 또는 원본 정보 출처를 직접 확인</strong>하시기를 권고합니다.</p>
2015
+ </td>
2016
+ </tr>
2017
+ <tr>
2018
+ <td align="center" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; mso-table-lspace: 0pt; mso-table-rspace: 0pt; padding: 30px 20px; font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 12px; line-height: 1.5; color: #888888;">
2019
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0 0 10px 0;" class="footer-text">김홍연(고고학 정보화 연구자) | kimhongyeon89@gmail.com</p>
2020
+ <p style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 16px; line-height: 1.7; color: #444444; margin: 0;" class="footer-text">이 메일은 heripo.com에서 리서치 레이더를 구독하신 분들에게 발송됩니다.<br>
2021
+ 더 이상 이메일을 받고 싶지 않으시면 <a href="{{{RESEND_UNSUBSCRIBE_URL}}}" target="_blank" style="-webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; font-weight: bold; transition: color 0.2s; color: #888888; text-decoration: underline;" class="footer-link">여기에서 수신 거부</a>하세요.</p>
2022
+ </td>
2023
+ </tr>
2024
+ </table>
2025
+ <!--[if (gte mso 9)|(IE)]>
2026
+ </td>
2027
+ </tr>
2028
+ </table>
2029
+ <![endif]-->
2030
+ </td>
2031
+ </tr>
2032
+ </table>
2033
+ </body>
2034
+ </html>`;
2035
+
2036
+ /**
2037
+ * Content generation provider implementation
2038
+ * - LLM-based newsletter content generation
2039
+ * - HTML template provisioning
2040
+ * - Newsletter persistence
2041
+ */
2042
+ class ContentGenerateProvider {
2043
+ openai;
2044
+ articleRepository;
2045
+ newsletterRepository;
2046
+ _issueOrder = null;
2047
+ model;
2048
+ constructor(openai, articleRepository, newsletterRepository) {
2049
+ this.openai = openai;
2050
+ this.articleRepository = articleRepository;
2051
+ this.newsletterRepository = newsletterRepository;
2052
+ this.model = this.openai('gpt-5.1');
2053
+ }
2054
+ /** LLM temperature setting for content generation */
2055
+ temperature = llmConfig.generation.temperature;
2056
+ /** Newsletter brand name */
2057
+ newsletterBrandName = newsletterConfig.brandName;
2058
+ /** Subscribe page URL */
2059
+ subscribePageUrl = newsletterConfig.subscribePageUrl;
2060
+ /** Publication criteria (minimum article count, priority score threshold) */
2061
+ publicationCriteria = newsletterConfig.publicationCriteria;
2062
+ /**
2063
+ * Get current issue order
2064
+ * @throws Error if issue order not initialized
2065
+ */
2066
+ get issueOrder() {
2067
+ if (this._issueOrder === null) {
2068
+ throw new Error('issueOrder not initialized. Call initializeIssueOrder() first.');
2069
+ }
2070
+ return this._issueOrder;
2071
+ }
2072
+ /**
2073
+ * Initialize issue order before newsletter generation
2074
+ */
2075
+ async initializeIssueOrder() {
2076
+ this._issueOrder = await this.newsletterRepository.getNextIssueOrder();
2077
+ }
2078
+ /**
2079
+ * Fetch candidate articles for newsletter generation
2080
+ * @returns Articles eligible for inclusion in the newsletter
2081
+ */
2082
+ async fetchArticleCandidates() {
2083
+ return this.articleRepository.findCandidatesForNewsletter();
2084
+ }
2085
+ /** HTML template with markers for title and content injection */
2086
+ htmlTemplate = {
2087
+ html: createNewsletterHtmlTemplate(crawlingTargetGroups.flatMap((group) => group.targets)),
2088
+ markers: {
2089
+ title: 'NEWSLETTER_TITLE',
2090
+ content: 'NEWSLETTER_CONTENT',
2091
+ },
2092
+ };
2093
+ /**
2094
+ * Save generated newsletter to the repository
2095
+ * @param input - Newsletter data and used articles
2096
+ * @returns Saved newsletter ID
2097
+ */
2098
+ async saveNewsletter(input) {
2099
+ return this.newsletterRepository.saveNewsletter(input);
2100
+ }
2101
+ }
2102
+
2103
+ /**
2104
+ * Crawling provider implementation
2105
+ * - Defines crawling targets
2106
+ * - Saves crawling results
2107
+ * - Fetches existing articles
2108
+ */
2109
+ class CrawlingProvider {
2110
+ articleRepository;
2111
+ /** Maximum number of concurrent crawling operations */
2112
+ maxConcurrency = 5;
2113
+ constructor(articleRepository) {
2114
+ this.articleRepository = articleRepository;
2115
+ }
2116
+ /** Crawling target groups configuration */
2117
+ crawlingTargetGroups = crawlingTargetGroups;
2118
+ /**
2119
+ * Fetch existing articles by URLs to avoid duplicate crawling
2120
+ * @param articleUrls - URLs to check
2121
+ * @returns Existing articles
2122
+ */
2123
+ async fetchExistingArticlesByUrls(articleUrls) {
2124
+ return this.articleRepository.findByUrls(articleUrls);
2125
+ }
2126
+ /**
2127
+ * Save crawled articles to the repository
2128
+ * @param articles - Articles to save
2129
+ * @param context - Task context (task ID, target group, target)
2130
+ * @returns Number of saved articles
2131
+ */
2132
+ async saveCrawledArticles(articles, context) {
2133
+ return this.articleRepository.saveCrawledArticles(articles, context);
2134
+ }
2135
+ }
2136
+
2137
+ /**
2138
+ * Date service implementation
2139
+ * - Provides current date and display date strings
2140
+ */
2141
+ class DateService {
2142
+ /**
2143
+ * Get current date in ISO format (YYYY-MM-DD)
2144
+ * @returns ISO date string (e.g., "2024-10-15")
2145
+ */
2146
+ getCurrentISODateString() {
2147
+ const now = new Date();
2148
+ const year = now.getFullYear();
2149
+ const month = String(now.getMonth() + 1).padStart(2, '0');
2150
+ const day = String(now.getDate()).padStart(2, '0');
2151
+ return `${year}-${month}-${day}`;
2152
+ }
2153
+ /**
2154
+ * Get formatted display date string
2155
+ * @returns Korean formatted date (e.g., "2024년 10월 15일")
2156
+ */
2157
+ getDisplayDateString() {
2158
+ const now = new Date();
2159
+ const year = now.getFullYear();
2160
+ const month = now.getMonth() + 1;
2161
+ const day = now.getDate();
2162
+ return `${year}년 ${month}월 ${day}일`;
2163
+ }
2164
+ }
2165
+
2166
+ /**
2167
+ * Task service implementation
2168
+ * - Manages newsletter generation task lifecycle (start/end)
2169
+ * - Prevents duplicate execution
2170
+ */
2171
+ class TaskService {
2172
+ taskRepository;
2173
+ currentTaskId = null;
2174
+ constructor(taskRepository) {
2175
+ this.taskRepository = taskRepository;
2176
+ }
2177
+ /**
2178
+ * Start a new task
2179
+ * @throws Error if a task is already running
2180
+ * @returns Task ID
2181
+ */
2182
+ async start() {
2183
+ if (this.currentTaskId !== null) {
2184
+ throw new Error('Task is already running');
2185
+ }
2186
+ const taskId = await this.taskRepository.createTask();
2187
+ this.currentTaskId = taskId;
2188
+ return taskId;
2189
+ }
2190
+ /**
2191
+ * End the current task
2192
+ * @throws Error if no task is running
2193
+ */
2194
+ async end() {
2195
+ if (this.currentTaskId === null) {
2196
+ throw new Error('No task is running');
2197
+ }
2198
+ await this.taskRepository.completeTask(this.currentTaskId);
2199
+ this.currentTaskId = null;
2200
+ }
2201
+ }
2202
+
2203
+ /**
2204
+ * Newsletter generator factory function
2205
+ *
2206
+ * @param dependencies - Repository implementations and options
2207
+ * @returns Configured newsletter generator instance
2208
+ *
2209
+ * @example
2210
+ * ```typescript
2211
+ * const generator = createNewsletterGenerator({
2212
+ * openAIApiKey: process.env.OPENAI_API_KEY,
2213
+ * taskRepository: new PrismaTaskRepository(prisma),
2214
+ * articleRepository: new PrismaArticleRepository(prisma),
2215
+ * tagRepository: new PrismaTagRepository(prisma),
2216
+ * newsletterRepository: new PrismaNewsletterRepository(prisma),
2217
+ * logger: customLogger, // optional
2218
+ * previewNewsletter: { // optional
2219
+ * fetchNewsletterForPreview: async () => { ... },
2220
+ * emailService: emailService,
2221
+ * emailMessage: { from: '...', to: '...' },
2222
+ * },
2223
+ * });
2224
+ *
2225
+ * const newsletterId = await generator.generate();
2226
+ * ```
2227
+ */
2228
+ function createNewsletterGenerator(dependencies) {
2229
+ const openai = createOpenAI({
2230
+ apiKey: dependencies.openAIApiKey,
2231
+ });
2232
+ const dateService = new DateService();
2233
+ const taskService = new TaskService(dependencies.taskRepository);
2234
+ const crawlingProvider = new CrawlingProvider(dependencies.articleRepository);
2235
+ const analysisProvider = new AnalysisProvider(openai, dependencies.articleRepository, dependencies.tagRepository);
2236
+ const contentGenerateProvider = new ContentGenerateProvider(openai, dependencies.articleRepository, dependencies.newsletterRepository);
2237
+ return new GenerateNewsletter({
2238
+ contentOptions,
2239
+ dateService,
2240
+ taskService,
2241
+ crawlingProvider,
2242
+ analysisProvider,
2243
+ contentGenerateProvider,
2244
+ options: {
2245
+ llm: {
2246
+ maxRetries: llmConfig.maxRetries,
2247
+ },
2248
+ chain: {
2249
+ stopAfterAttempt: llmConfig.chainStopAfterAttempt,
2250
+ },
2251
+ logger: dependencies.logger,
2252
+ previewNewsletter: dependencies.previewNewsletter,
2253
+ },
2254
+ });
2255
+ }
2256
+ /**
2257
+ * Newsletter generation execution function
2258
+ *
2259
+ * @param dependencies - Repository implementations and options
2260
+ * @returns Generated newsletter ID
2261
+ *
2262
+ * @example
2263
+ * ```typescript
2264
+ * const newsletterId = await generateNewsletter({
2265
+ * openAIApiKey: process.env.OPENAI_API_KEY,
2266
+ * taskRepository: new PrismaTaskRepository(prisma),
2267
+ * articleRepository: new PrismaArticleRepository(prisma),
2268
+ * tagRepository: new PrismaTagRepository(prisma),
2269
+ * newsletterRepository: new PrismaNewsletterRepository(prisma),
2270
+ * });
2271
+ * ```
2272
+ */
2273
+ async function generateNewsletter(dependencies) {
2274
+ const generator = createNewsletterGenerator(dependencies);
2275
+ // Initialize issueOrder right before calling generate()
2276
+ await generator['contentGenerateProvider'].initializeIssueOrder();
2277
+ return generator.generate();
2278
+ }
2279
+
2280
+ export { AnalysisProvider, ContentGenerateProvider, CrawlingProvider, DateService, TaskService, contentOptions, crawlingTargetGroups, generateNewsletter, llmConfig, newsletterConfig };
2281
+ //# sourceMappingURL=index.js.map