@heripo/research-radar 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -652,6 +652,33 @@ const parseKhsLawList = (html) => {
652
652
  });
653
653
  return posts;
654
654
  };
655
+ const parseKhsTenderList = (html) => {
656
+ const $ = cheerio__namespace.load(html);
657
+ const posts = [];
658
+ const baseUrl = 'https://www.khs.go.kr';
659
+ $('table.b_list tbody tr').each((index, element) => {
660
+ const columns = $(element).find('td');
661
+ if (columns.length === 0)
662
+ return;
663
+ const titleElement = columns.eq(1).find('a.b_tit');
664
+ const relativeHref = titleElement.attr('href');
665
+ if (!relativeHref)
666
+ return;
667
+ const fullUrl = new URL(relativeHref, baseUrl);
668
+ const detailUrl = fullUrl.href;
669
+ const uniqId = fullUrl.searchParams.get('id') ?? undefined;
670
+ const title = titleElement.text().trim();
671
+ const date = getDate(columns.eq(4).text().trim());
672
+ posts.push({
673
+ uniqId,
674
+ title,
675
+ date,
676
+ detailUrl: cleanUrl(detailUrl),
677
+ dateType: core.DateType.REGISTERED,
678
+ });
679
+ });
680
+ return posts;
681
+ };
655
682
  const parseKhsDetail = async (html) => {
656
683
  const $ = cheerio__namespace.load(html);
657
684
  const content = $('div.b_content');
@@ -996,9 +1023,9 @@ const getRandomUserAgent = () => USER_AGENTS[Math.floor(Math.random() * USER_AGE
996
1023
  * Uses internal API since the table is rendered via CSR
997
1024
  * @see https://www.yngogo.or.kr
998
1025
  */
999
- const parseYngogoList = async (_html, menuSeq, bbsSeq, sitecntntsSeq) => {
1026
+ const parseYngogoList = async (_html, menuSeq, bbsSeq, sitecntntsSeq, customFetch) => {
1000
1027
  // Fetch from internal API (CSR workaround)
1001
- const response = await fetch(LIST_API_URL, {
1028
+ const response = await (customFetch ?? fetch)(LIST_API_URL, {
1002
1029
  method: 'POST',
1003
1030
  headers: {
1004
1031
  'Content-Type': 'application/x-www-form-urlencoded',
@@ -1045,9 +1072,9 @@ const parseYngogoList = async (_html, menuSeq, bbsSeq, sitecntntsSeq) => {
1045
1072
  * Parse detail page from 영남고고학회 (Yeongnam Archaeological Society)
1046
1073
  * Uses internal API since the detail page is rendered via CSR
1047
1074
  */
1048
- const parseYngogoDetail = async (_html, menuSeq, bbsSeq, nttSeq, sitecntntsSeq) => {
1075
+ const parseYngogoDetail = async (_html, menuSeq, bbsSeq, nttSeq, sitecntntsSeq, customFetch) => {
1049
1076
  // Fetch from internal API (CSR workaround)
1050
- const response = await fetch(DETAIL_API_URL, {
1077
+ const response = await (customFetch ?? fetch)(DETAIL_API_URL, {
1051
1078
  method: 'POST',
1052
1079
  headers: {
1053
1080
  'Content-Type': 'application/x-www-form-urlencoded',
@@ -1098,496 +1125,498 @@ function extractNttSeq(html) {
1098
1125
  return match?.[1] ?? '';
1099
1126
  }
1100
1127
 
1101
- const crawlingTargetGroups = [
1102
- {
1103
- id: 'news',
1104
- name: 'News',
1105
- targets: [
1106
- {
1107
- id: '국가유산청_공지사항',
1108
- name: '국가유산청 공지사항',
1109
- url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newpublic&mn=NS_01_01',
1110
- parseList: parseKhsList,
1111
- parseDetail: parseKhsDetail,
1112
- },
1113
- {
1114
- id: '국가유산청_보도설명',
1115
- name: '국가유산청 보도/설명',
1116
- url: 'https://www.khs.go.kr/newsBbz/selectNewsBbzList.do?sectionId=all_sec_1&mn=NS_01_02',
1117
- parseList: parseKhsList,
1118
- parseDetail: parseKhsDetail,
1119
- },
1120
- {
1121
- id: '국가유산청_사진뉴스',
1122
- name: '국가유산청 사진뉴스',
1123
- url: 'https://www.khs.go.kr/cop/bbs/selectBoardList.do?bbsId=BBSMSTR_1002&mn=NS_01_03',
1124
- parseList: parseKhsGalleryList,
1125
- parseDetail: parseKhsDetail,
1126
- },
1127
- {
1128
- id: '국가유산청_입법예고',
1129
- name: '국가유산청 입법예고',
1130
- url: 'https://www.khs.go.kr/lawBbz/selectLawBbzList.do?mn=NS_03_01_01',
1131
- parseList: parseKhsLawList,
1132
- parseDetail: parseKhsDetail,
1133
- },
1134
- // NOTE: Parsing logic is implemented, but too much fragmented data with little value for newsletter
1135
- // {
1136
- // id: '국가유산청_발굴조사_현황공개',
1137
- // name: '국가유산청 발굴조사 현황공개',
1138
- // url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmPrmsnAply.do',
1139
- // parseList: parseExcavationStatusList,
1140
- // parseDetail: parseExcavationStatusDetail,
1141
- // },
1142
- {
1143
- id: '국가유산청_발굴조사_보고서',
1144
- name: '국가유산청 발굴조사 보고서',
1145
- url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmRptp.do',
1146
- parseList: parseExcavationReportList,
1147
- parseDetail: parseExcavationReportDetail,
1148
- },
1149
- {
1150
- id: '국가유산청_발굴조사_현장공개',
1151
- name: '국가유산청 발굴조사 현장공개',
1152
- url: 'https://www.e-minwon.go.kr/ge/ee/getListLinkGrndsRls.do',
1153
- parseList: parseExcavationSiteList,
1154
- parseDetail: parseExcavationSiteDetail,
1155
- },
1156
- {
1157
- id: '국립문화유산연구원_공지사항',
1158
- name: '국립문화유산연구원 공지사항',
1159
- url: 'https://www.nrich.go.kr/kor/boardList.do?menuIdx=282&bbscd=32',
1160
- parseList: parseNrichNoticeList,
1161
- parseDetail: parseNrichNoticeDetail,
1162
- },
1163
- {
1164
- id: '국립문화유산연구원_주요행사',
1165
- name: '국립문화유산연구원 주요행사',
1166
- url: 'https://www.nrich.go.kr/kor/majorList.do?menuIdx=286',
1167
- parseList: parseNrichMajorEventList,
1168
- parseDetail: parseNrichMajorEventDetail,
1169
- },
1170
- {
1171
- id: '국립문화유산연구원_학술지_헤리티지',
1172
- name: '국립문화유산연구원 헤리티지:역사와 과학 학술지',
1173
- url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=1651&gubun=J',
1174
- parseList: parseNrichJournalList,
1175
- parseDetail: parseNrichJournalDetail,
1176
- },
1177
- {
1178
- id: '국립문화유산연구원_학술지_보존과학연구',
1179
- name: '국립문화유산연구원 보존과학연구 학술지',
1180
- url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=2065&gubun=K',
1181
- parseList: parseNrichJournalList,
1182
- parseDetail: parseNrichJournalDetail,
1183
- },
1184
- {
1185
- id: '국가유산지식이음_공지사항',
1186
- name: '국가유산 지식이음 공지사항',
1187
- url: 'https://portal.nrich.go.kr/kor/boardList.do?menuIdx=1058&bbscd=9',
1188
- parseList: parseNrichPortalList,
1189
- parseDetail: parseNrichPortalDetail,
1190
- },
1191
- {
1192
- id: '국립고궁박물관_공지사항',
1193
- name: '국립고궁박물관 공지사항',
1194
- url: 'https://www.gogung.go.kr/gogung/bbs/BMSR00022/list.do?gubunCd=B22_001&menuNo=800088',
1195
- parseList: parseGogungList,
1196
- parseDetail: parseGogungDetail,
1197
- },
1198
- {
1199
- id: '국가유산진흥원_공지사항',
1200
- name: '국가유산진흥원 공지사항',
1201
- url: 'https://www.kh.or.kr/brd/board/644/L/SITES/100/menu/371',
1202
- parseList: parseHeritageAgencyList,
1203
- parseDetail: parseHeritageAgencyDetail,
1204
- },
1205
- {
1206
- id: '국가유산진흥원_보도자료',
1207
- name: '국가유산진흥원 보도자료',
1208
- url: 'https://www.kh.or.kr/brd/board/715/L/menu/373',
1209
- parseList: parseHeritageAgencyList,
1210
- parseDetail: parseHeritageAgencyDetail,
1211
- },
1212
- {
1213
- id: '국가유산진흥원_매장유산국비발굴단_공지사항',
1214
- name: '국가유산진흥원 매장유산국비발굴단 공지사항',
1215
- url: 'https://www.kh.or.kr/brd/board/644/L/SITES/201/menu/506',
1216
- parseList: parseHeritageAgencyList,
1217
- parseDetail: parseHeritageAgencyDetail,
1218
- },
1219
- {
1220
- id: '국가유산진흥원_매장유산국비발굴단_현장설명회',
1221
- name: '국가유산진흥원 매장유산국비발굴단 현장설명회',
1222
- url: 'https://www.kh.or.kr/brd/board/631/L/menu/504',
1223
- parseList: parseHeritageAgencyList,
1224
- parseDetail: parseHeritageAgencyDetail,
1225
- },
1226
- {
1227
- id: '한국문화유산협회_공지사항',
1228
- name: '한국문화유산협회 공지사항',
1229
- url: 'https://www.kaah.kr/notice',
1230
- parseList: parseKaahList,
1231
- parseDetail: parseKaahDetail,
1232
- },
1233
- {
1234
- id: '한국문화유산협회_협회소식',
1235
- name: '한국문화유산협회 협회소식',
1236
- url: 'https://www.kaah.kr/news',
1237
- parseList: parseKaahList,
1238
- parseDetail: parseKaahDetail,
1239
- },
1240
- {
1241
- id: '한국문화유산협회_보도자료',
1242
- name: '한국문화유산협회 보도자료',
1243
- url: 'https://www.kaah.kr/mass',
1244
- parseList: parseKaahList,
1245
- parseDetail: parseKaahDetail,
1246
- },
1247
- {
1248
- id: '한국문화유산협회_회원기관소식',
1249
- name: '한국문화유산협회 회원기관소식',
1250
- url: 'https://www.kaah.kr/assnews',
1251
- parseList: parseKaahList,
1252
- parseDetail: parseKaahDetail,
1253
- },
1254
- {
1255
- id: '한국문화유산협회_유관기관소식',
1256
- name: '한국문화유산협회 유관기관소식',
1257
- url: 'https://www.kaah.kr/ralnews',
1258
- parseList: parseKaahList,
1259
- parseDetail: parseKaahDetail,
1260
- },
1261
- {
1262
- id: '한국문화유산협회_발굴현장공개',
1263
- name: '한국문화유산협회 발굴현장공개',
1264
- url: 'https://www.kaah.kr/placeopen',
1265
- parseList: parseKaahPlaceList,
1266
- parseDetail: parseKaahPlaceDetail,
1267
- },
1268
- {
1269
- id: '한국고고학회_공지사항',
1270
- name: '한국고고학회 공지사항',
1271
- url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=notice',
1272
- parseList: parseKrasList,
1273
- parseDetail: parseKrasDetail,
1274
- },
1275
- {
1276
- id: '한국고고학회_학술대회및행사',
1277
- name: '한국고고학회 학술대회 및 행사',
1278
- url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=sympo',
1279
- parseList: parseKrasList,
1280
- parseDetail: parseKrasDetail,
1281
- },
1282
- {
1283
- id: '한국고고학회_신간안내_단행본',
1284
- name: '한국고고학회 신간안내 - 단행본',
1285
- url: 'https://www.kras.or.kr/?c=61/101/105',
1286
- parseList: parseKrasList,
1287
- parseDetail: parseKrasDetail,
1288
- },
1289
- {
1290
- id: '한국고고학회_현장소식',
1291
- name: '한국고고학회 현장소식',
1292
- url: 'https://www.kras.or.kr/?c=61/73',
1293
- parseList: parseKrasList,
1294
- parseDetail: parseKrasDetail,
1295
- },
1296
- {
1297
- id: '중부고고학회_공지사항',
1298
- name: '중부고고학회 공지사항',
1299
- url: 'https://www.jbgogo.or.kr/bbs/notice',
1300
- parseList: parseJbgogoList,
1301
- parseDetail: parseJbgogoDetail,
1302
- },
1303
- {
1304
- id: '중부고고학회_학계소식',
1305
- name: '중부고고학회 학계소식',
1306
- url: 'https://www.jbgogo.or.kr/bbs/news',
1307
- parseList: parseJbgogoList,
1308
- parseDetail: parseJbgogoDetail,
1309
- },
1310
- {
1311
- id: '중부고고학회_발굴현장소식',
1312
- name: '중부고고학회 발굴현장소식',
1313
- url: 'https://www.jbgogo.or.kr/bbs/spotnews',
1314
- parseList: parseJbgogoList,
1315
- parseDetail: parseJbgogoDetail,
1316
- },
1317
- {
1318
- id: '호서고고학회_공지사항',
1319
- name: '호서고고학회 공지사항',
1320
- url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=notice&menu_idx=010000',
1321
- parseList: parseHsasList,
1322
- parseDetail: parseHsasDetail,
1323
- },
1324
- {
1325
- id: '호서고고학회_학회소식',
1326
- name: '호서고고학회 학회소식',
1327
- url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=m2_01&menu_idx=020000',
1328
- parseList: parseHsasList,
1329
- parseDetail: parseHsasDetail,
1330
- },
1331
- {
1332
- id: '영남고고학회_공지사항',
1333
- name: '영남고고학회 공지사항',
1334
- url: 'http://www.yngogo.or.kr/subList/32000001120',
1335
- parseList: (html) => parseYngogoList(html, '32000001120', '32000001157', '32000001711'),
1336
- parseDetail: (html) => parseYngogoDetail(html, '32000001120', '32000001157', extractNttSeq(html), '32000001711'),
1337
- },
1338
- {
1339
- id: '영남고고학회_학계소식',
1340
- name: '영남고고학회 학계소식',
1341
- url: 'http://www.yngogo.or.kr/subList/32000001133',
1342
- parseList: (html) => parseYngogoList(html, '32000001133', '32000001161', '32000001715'),
1343
- parseDetail: (html) => parseYngogoDetail(html, '32000001133', '32000001161', extractNttSeq(html), '32000001715'),
1344
- },
1345
- {
1346
- id: '영남고고학회_현장소식',
1347
- name: '영남고고학회 현장소식',
1348
- url: 'http://www.yngogo.or.kr/subList/32000001135',
1349
- parseList: (html) => parseYngogoList(html, '32000001135', '32000001163', '32000001717'),
1350
- parseDetail: (html) => parseYngogoDetail(html, '32000001135', '32000001163', extractNttSeq(html), '32000001717'),
1351
- },
1352
- {
1353
- id: '국립중앙박물관_알림',
1354
- name: '국립중앙박물관 알림',
1355
- url: 'https://www.museum.go.kr/MUSEUM/contents/M0701010000.do?catCustomType=united&catId=128',
1356
- parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701010000.do'),
1357
- parseDetail: parseMuseumDetail,
1358
- },
1359
- {
1360
- id: '국립중앙박물관_고시공고',
1361
- name: '국립중앙박물관 고시/공고',
1362
- url: 'https://www.museum.go.kr/MUSEUM/contents/M0701020000.do',
1363
- parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701020000.do'),
1364
- parseDetail: parseMuseumDetail,
1365
- },
1366
- {
1367
- id: '국립중앙박물관_보도자료',
1368
- name: '국립중앙박물관 보도 자료',
1369
- url: 'https://www.museum.go.kr/MUSEUM/contents/M0701040000.do?catCustomType=post&catId=93',
1370
- parseList: parseMuseumPressList,
1371
- parseDetail: parseMuseumDetail,
1372
- },
1373
- {
1374
- id: '국립전주박물관_새소식',
1375
- name: '국립전주박물관 새소식',
1376
- url: 'https://jeonju.museum.go.kr/board.es?mid=a10105010000&bid=0001',
1377
- parseList: parseJeonjuMuseumList,
1378
- parseDetail: parseJeonjuMuseumDetail,
1379
- },
1380
- {
1381
- id: '국립전주박물관_보도자료',
1382
- name: '국립전주박물관 보도자료',
1383
- url: 'https://jeonju.museum.go.kr/board.es?mid=a10105050000&bid=0004',
1384
- parseList: parseJeonjuMuseumList,
1385
- parseDetail: parseJeonjuMuseumDetail,
1386
- },
1387
- {
1388
- id: '국립부여박물관_공지사항',
1389
- name: '국립부여박물관 공지사항',
1390
- url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301250005',
1391
- parseList: (html) => parseBuyeoMuseumList(html, '2301250005'),
1392
- parseDetail: parseBuyeoMuseumDetail,
1393
- },
1394
- {
1395
- id: '국립부여박물관_보도자료',
1396
- name: '국립부여박물관 보도자료',
1397
- url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2302150024',
1398
- parseList: (html) => parseBuyeoMuseumList(html, '2302150024'),
1399
- parseDetail: parseBuyeoMuseumDetail,
1400
- },
1401
- {
1402
- id: '국립진주박물관_새소식',
1403
- name: '국립진주박물관 새소식',
1404
- url: 'https://jinju.museum.go.kr/kor/html/sub06/0601.html',
1405
- parseList: parseJinjuMuseumList,
1406
- parseDetail: parseJinjuMuseumDetail,
1407
- },
1408
- // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1409
- // {
1410
- // id: '국립경주박물관_새소식',
1411
- // name: '국립경주박물관 새소식',
1412
- // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0701.html',
1413
- // parseList: (html) =>
1414
- // parseGyeongjuMuseumList(html, '/kor/html/sub07/0701.html'),
1415
- // parseDetail: parseGyeongjuMuseumDetail,
1416
- // },
1417
- // {
1418
- // id: '국립경주박물관_고시공고',
1419
- // name: '국립경주박물관 고시/공고',
1420
- // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0703.html',
1421
- // parseList: parseGyeongjuMuseumNoticeList,
1422
- // parseDetail: parseGyeongjuMuseumDetail,
1423
- // },
1424
- // {
1425
- // id: '국립경주박물관_보도자료',
1426
- // name: '국립경주박물관 보도자료',
1427
- // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0705.html',
1428
- // parseList: (html) =>
1429
- // parseGyeongjuMuseumList(html, '/kor/html/sub07/0705.html'),
1430
- // parseDetail: parseGyeongjuMuseumDetail,
1431
- // },
1432
- // {
1433
- // id: '국립청주박물관_새소식',
1434
- // name: '국립청주박물관 새소식',
1435
- // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=1&key=482&nbar=s',
1436
- // parseList: parseCheongjuMuseumList,
1437
- // parseDetail: parseCheongjuMuseumDetail,
1438
- // },
1439
- // {
1440
- // id: '국립청주박물관_언론보도자료',
1441
- // name: '국립청주박물관 언론보도자료',
1442
- // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=20&key=31&nbar=s',
1443
- // parseList: parseCheongjuMuseumList,
1444
- // parseDetail: parseCheongjuMuseumDetail,
1445
- // },
1446
- // {
1447
- // id: '국립김해박물관_새소식',
1448
- // name: '국립김해박물관 새소식',
1449
- // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0401.html',
1450
- // parseList: (html) =>
1451
- // parseGimhaeMuseumList(html, '/kr/html/sub04/0401.html'),
1452
- // parseDetail: parseGimhaeMuseumDetail,
1453
- // },
1454
- // {
1455
- // id: '국립김해박물관_보도자료',
1456
- // name: '국립김해박물관 언론보도자료',
1457
- // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0402.html',
1458
- // parseList: (html) =>
1459
- // parseGimhaeMuseumList(html, '/kr/html/sub04/0402.html'),
1460
- // parseDetail: parseGimhaeMuseumDetail,
1461
- // },
1462
- // {
1463
- // id: '국립제주박물관_새소식',
1464
- // name: '국립제주박물관 새소식',
1465
- // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050101&ntt_tag=1',
1466
- // parseList: parseJejuMuseumList,
1467
- // parseDetail: parseJejuMuseumDetail,
1468
- // },
1469
- // {
1470
- // id: '국립익산박물관_공지사항',
1471
- // name: '국립익산박물관 공지사항',
1472
- // url: 'https://iksan.museum.go.kr/kor/html/sub05/0501.html',
1473
- // parseList: parseIksanMuseumList,
1474
- // parseDetail: parseIksanMuseumDetail,
1475
- // },
1476
- ],
1477
- },
1478
- {
1479
- id: 'business',
1480
- name: 'Business',
1481
- targets: [
1482
- {
1483
- id: '국가유산청_입찰정보',
1484
- name: '국가유산청 입찰정보',
1485
- url: 'https://www.khs.go.kr/tenderBbz/selectTenderBbzList.do?mn=NS_01_05',
1486
- parseList: parseKhsList,
1487
- parseDetail: parseKhsDetail,
1488
- },
1489
- {
1490
- id: '국가유산진흥원_입찰정보',
1491
- name: '국가유산진흥원 입찰정보',
1492
- url: 'https://www.kh.or.kr/brd/board/717/L/menu/375',
1493
- parseList: parseHeritageAgencyList,
1494
- parseDetail: parseHeritageAgencyDetail,
1495
- },
1496
- {
1497
- id: '한국문화유산협회_사업공고',
1498
- name: '한국문화유산협회 사업공고',
1499
- url: 'https://www.kaah.kr/bussopen',
1500
- parseList: parseKaahList,
1501
- parseDetail: parseKaahDetail,
1502
- },
1503
- {
1504
- id: '한국문화유산협회_입찰공고',
1505
- name: '한국문화유산협회 입찰공고',
1506
- url: 'https://www.kaah.kr/ipcopen',
1507
- parseList: parseKaahList,
1508
- parseDetail: parseKaahDetail,
1509
- },
1510
- ],
1511
- },
1512
- {
1513
- id: 'employment',
1514
- name: 'Employment',
1515
- targets: [
1516
- {
1517
- id: '국가유산청_시험채용',
1518
- name: '국가유산청 시험/채용',
1519
- url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newexam&mn=NS_01_06',
1520
- parseList: parseKhsList,
1521
- parseDetail: parseKhsDetail,
1522
- },
1523
- {
1524
- id: '국가유산진흥원_인재채용',
1525
- name: '국가유산진흥원 인재채용',
1526
- url: 'https://www.kh.or.kr/brd/board/721/L/CATEGORY/719/menu/377',
1527
- parseList: parseHeritageAgencyList,
1528
- parseDetail: parseHeritageAgencyDetail,
1529
- },
1530
- {
1531
- id: '한국문화유산협회_채용공고',
1532
- name: '한국문화유산협회 채용공고',
1533
- url: 'https://www.kaah.kr/reqopen',
1534
- parseList: parseKaahList,
1535
- parseDetail: parseKaahDetail,
1536
- },
1537
- {
1538
- id: '영남고고학회_채용공고',
1539
- name: '영남고고학회 채용공고',
1540
- url: 'http://www.yngogo.or.kr/subList/32000001136',
1541
- parseList: (html) => parseYngogoList(html, '32000001136', '32000001164', '32000001718'),
1542
- parseDetail: (html) => parseYngogoDetail(html, '32000001136', '32000001164', extractNttSeq(html), '32000001718'),
1543
- },
1544
- {
1545
- id: '국립중앙박물관_채용안내',
1546
- name: '국립중앙박물관 채용 안내',
1547
- url: 'https://www.museum.go.kr/MUSEUM/contents/M0701030000.do?catCustomType=post&catId=54&recruitYn=Y',
1548
- parseList: parseMuseumRecruitList,
1549
- parseDetail: parseMuseumDetail,
1550
- },
1551
- {
1552
- id: '국립전주박물관_채용',
1553
- name: '국립전주박물관 채용',
1554
- url: 'https://jeonju.museum.go.kr/board.es?mid=a10105020000&bid=0002',
1555
- parseList: parseJeonjuMuseumRecruitList,
1556
- parseDetail: parseJeonjuMuseumDetail,
1557
- },
1558
- {
1559
- id: '국립부여박물관_채용공고',
1560
- name: '국립부여박물관 채용공고',
1561
- url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301270001',
1562
- parseList: (html) => parseBuyeoMuseumList(html, '2301270001'),
1563
- parseDetail: parseBuyeoMuseumDetail,
1564
- },
1565
- // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1566
- // {
1567
- // id: '국립경주박물관_채용안내',
1568
- // name: '국립경주박물관 채용안내',
1569
- // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0704.html',
1570
- // parseList: (html) =>
1571
- // parseGyeongjuMuseumList(html, '/kor/html/sub07/0704.html'),
1572
- // parseDetail: parseGyeongjuMuseumDetail,
1573
- // },
1574
- // {
1575
- // id: '국립청주박물관_채용및공고',
1576
- // name: '국립청주박물관 채용 및 공고',
1577
- // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=29&key=476&nbar=s',
1578
- // parseList: parseCheongjuMuseumList,
1579
- // parseDetail: parseCheongjuMuseumDetail,
1580
- // },
1581
- // {
1582
- // id: '국립제주박물관_채용정보',
1583
- // name: '국립제주박물관 채용정보',
1584
- // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050102&ntt_tag=2',
1585
- // parseList: parseJejuMuseumList,
1586
- // parseDetail: parseJejuMuseumDetail,
1587
- // },
1588
- ],
1589
- },
1590
- ];
1128
+ function createCrawlingTargetGroups(customFetch) {
1129
+ return [
1130
+ {
1131
+ id: 'news',
1132
+ name: 'News',
1133
+ targets: [
1134
+ {
1135
+ id: '국가유산청_공지사항',
1136
+ name: '국가유산청 공지사항',
1137
+ url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newpublic&mn=NS_01_01',
1138
+ parseList: parseKhsList,
1139
+ parseDetail: parseKhsDetail,
1140
+ },
1141
+ {
1142
+ id: '국가유산청_보도설명',
1143
+ name: '국가유산청 보도/설명',
1144
+ url: 'https://www.khs.go.kr/newsBbz/selectNewsBbzList.do?sectionId=all_sec_1&mn=NS_01_02',
1145
+ parseList: parseKhsList,
1146
+ parseDetail: parseKhsDetail,
1147
+ },
1148
+ {
1149
+ id: '국가유산청_사진뉴스',
1150
+ name: '국가유산청 사진뉴스',
1151
+ url: 'https://www.khs.go.kr/cop/bbs/selectBoardList.do?bbsId=BBSMSTR_1002&mn=NS_01_03',
1152
+ parseList: parseKhsGalleryList,
1153
+ parseDetail: parseKhsDetail,
1154
+ },
1155
+ {
1156
+ id: '국가유산청_입법예고',
1157
+ name: '국가유산청 입법예고',
1158
+ url: 'https://www.khs.go.kr/lawBbz/selectLawBbzList.do?mn=NS_03_01_01',
1159
+ parseList: parseKhsLawList,
1160
+ parseDetail: parseKhsDetail,
1161
+ },
1162
+ // NOTE: Parsing logic is implemented, but too much fragmented data with little value for newsletter
1163
+ // {
1164
+ // id: '국가유산청_발굴조사_현황공개',
1165
+ // name: '국가유산청 발굴조사 현황공개',
1166
+ // url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmPrmsnAply.do',
1167
+ // parseList: parseExcavationStatusList,
1168
+ // parseDetail: parseExcavationStatusDetail,
1169
+ // },
1170
+ {
1171
+ id: '국가유산청_발굴조사_보고서',
1172
+ name: '국가유산청 발굴조사 보고서',
1173
+ url: 'https://www.e-minwon.go.kr/ge/ee/getListEcexmRptp.do',
1174
+ parseList: parseExcavationReportList,
1175
+ parseDetail: parseExcavationReportDetail,
1176
+ },
1177
+ {
1178
+ id: '국가유산청_발굴조사_현장공개',
1179
+ name: '국가유산청 발굴조사 현장공개',
1180
+ url: 'https://www.e-minwon.go.kr/ge/ee/getListLinkGrndsRls.do',
1181
+ parseList: parseExcavationSiteList,
1182
+ parseDetail: parseExcavationSiteDetail,
1183
+ },
1184
+ {
1185
+ id: '국립문화유산연구원_공지사항',
1186
+ name: '국립문화유산연구원 공지사항',
1187
+ url: 'https://www.nrich.go.kr/kor/boardList.do?menuIdx=282&bbscd=32',
1188
+ parseList: parseNrichNoticeList,
1189
+ parseDetail: parseNrichNoticeDetail,
1190
+ },
1191
+ {
1192
+ id: '국립문화유산연구원_주요행사',
1193
+ name: '국립문화유산연구원 주요행사',
1194
+ url: 'https://www.nrich.go.kr/kor/majorList.do?menuIdx=286',
1195
+ parseList: parseNrichMajorEventList,
1196
+ parseDetail: parseNrichMajorEventDetail,
1197
+ },
1198
+ {
1199
+ id: '국립문화유산연구원_학술지_헤리티지',
1200
+ name: '국립문화유산연구원 헤리티지:역사와 과학 학술지',
1201
+ url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=1651&gubun=J',
1202
+ parseList: parseNrichJournalList,
1203
+ parseDetail: parseNrichJournalDetail,
1204
+ },
1205
+ {
1206
+ id: '국립문화유산연구원_학술지_보존과학연구',
1207
+ name: '국립문화유산연구원 보존과학연구 학술지',
1208
+ url: 'https://www.nrich.go.kr/kor/subscriptionDataUsrList.do?menuIdx=2065&gubun=K',
1209
+ parseList: parseNrichJournalList,
1210
+ parseDetail: parseNrichJournalDetail,
1211
+ },
1212
+ {
1213
+ id: '국가유산지식이음_공지사항',
1214
+ name: '국가유산 지식이음 공지사항',
1215
+ url: 'https://portal.nrich.go.kr/kor/boardList.do?menuIdx=1058&bbscd=9',
1216
+ parseList: parseNrichPortalList,
1217
+ parseDetail: parseNrichPortalDetail,
1218
+ },
1219
+ {
1220
+ id: '국립고궁박물관_공지사항',
1221
+ name: '국립고궁박물관 공지사항',
1222
+ url: 'https://www.gogung.go.kr/gogung/bbs/BMSR00022/list.do?gubunCd=B22_001&menuNo=800088',
1223
+ parseList: parseGogungList,
1224
+ parseDetail: parseGogungDetail,
1225
+ },
1226
+ {
1227
+ id: '국가유산진흥원_공지사항',
1228
+ name: '국가유산진흥원 공지사항',
1229
+ url: 'https://www.kh.or.kr/brd/board/644/L/SITES/100/menu/371',
1230
+ parseList: parseHeritageAgencyList,
1231
+ parseDetail: parseHeritageAgencyDetail,
1232
+ },
1233
+ {
1234
+ id: '국가유산진흥원_보도자료',
1235
+ name: '국가유산진흥원 보도자료',
1236
+ url: 'https://www.kh.or.kr/brd/board/715/L/menu/373',
1237
+ parseList: parseHeritageAgencyList,
1238
+ parseDetail: parseHeritageAgencyDetail,
1239
+ },
1240
+ {
1241
+ id: '국가유산진흥원_매장유산국비발굴단_공지사항',
1242
+ name: '국가유산진흥원 매장유산국비발굴단 공지사항',
1243
+ url: 'https://www.kh.or.kr/brd/board/644/L/SITES/201/menu/506',
1244
+ parseList: parseHeritageAgencyList,
1245
+ parseDetail: parseHeritageAgencyDetail,
1246
+ },
1247
+ {
1248
+ id: '국가유산진흥원_매장유산국비발굴단_현장설명회',
1249
+ name: '국가유산진흥원 매장유산국비발굴단 현장설명회',
1250
+ url: 'https://www.kh.or.kr/brd/board/631/L/menu/504',
1251
+ parseList: parseHeritageAgencyList,
1252
+ parseDetail: parseHeritageAgencyDetail,
1253
+ },
1254
+ {
1255
+ id: '한국문화유산협회_공지사항',
1256
+ name: '한국문화유산협회 공지사항',
1257
+ url: 'https://www.kaah.kr/notice',
1258
+ parseList: parseKaahList,
1259
+ parseDetail: parseKaahDetail,
1260
+ },
1261
+ {
1262
+ id: '한국문화유산협회_협회소식',
1263
+ name: '한국문화유산협회 협회소식',
1264
+ url: 'https://www.kaah.kr/news',
1265
+ parseList: parseKaahList,
1266
+ parseDetail: parseKaahDetail,
1267
+ },
1268
+ {
1269
+ id: '한국문화유산협회_보도자료',
1270
+ name: '한국문화유산협회 보도자료',
1271
+ url: 'https://www.kaah.kr/mass',
1272
+ parseList: parseKaahList,
1273
+ parseDetail: parseKaahDetail,
1274
+ },
1275
+ {
1276
+ id: '한국문화유산협회_회원기관소식',
1277
+ name: '한국문화유산협회 회원기관소식',
1278
+ url: 'https://www.kaah.kr/assnews',
1279
+ parseList: parseKaahList,
1280
+ parseDetail: parseKaahDetail,
1281
+ },
1282
+ {
1283
+ id: '한국문화유산협회_유관기관소식',
1284
+ name: '한국문화유산협회 유관기관소식',
1285
+ url: 'https://www.kaah.kr/ralnews',
1286
+ parseList: parseKaahList,
1287
+ parseDetail: parseKaahDetail,
1288
+ },
1289
+ {
1290
+ id: '한국문화유산협회_발굴현장공개',
1291
+ name: '한국문화유산협회 발굴현장공개',
1292
+ url: 'https://www.kaah.kr/placeopen',
1293
+ parseList: parseKaahPlaceList,
1294
+ parseDetail: parseKaahPlaceDetail,
1295
+ },
1296
+ {
1297
+ id: '한국고고학회_공지사항',
1298
+ name: '한국고고학회 공지사항',
1299
+ url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=notice',
1300
+ parseList: parseKrasList,
1301
+ parseDetail: parseKrasDetail,
1302
+ },
1303
+ {
1304
+ id: '한국고고학회_학술대회및행사',
1305
+ name: '한국고고학회 학술대회 및 행사',
1306
+ url: 'https://www.kras.or.kr/?r=kras&m=bbs&bid=sympo',
1307
+ parseList: parseKrasList,
1308
+ parseDetail: parseKrasDetail,
1309
+ },
1310
+ {
1311
+ id: '한국고고학회_신간안내_단행본',
1312
+ name: '한국고고학회 신간안내 - 단행본',
1313
+ url: 'https://www.kras.or.kr/?c=61/101/105',
1314
+ parseList: parseKrasList,
1315
+ parseDetail: parseKrasDetail,
1316
+ },
1317
+ {
1318
+ id: '한국고고학회_현장소식',
1319
+ name: '한국고고학회 현장소식',
1320
+ url: 'https://www.kras.or.kr/?c=61/73',
1321
+ parseList: parseKrasList,
1322
+ parseDetail: parseKrasDetail,
1323
+ },
1324
+ {
1325
+ id: '중부고고학회_공지사항',
1326
+ name: '중부고고학회 공지사항',
1327
+ url: 'https://www.jbgogo.or.kr/bbs/notice',
1328
+ parseList: parseJbgogoList,
1329
+ parseDetail: parseJbgogoDetail,
1330
+ },
1331
+ {
1332
+ id: '중부고고학회_학계소식',
1333
+ name: '중부고고학회 학계소식',
1334
+ url: 'https://www.jbgogo.or.kr/bbs/news',
1335
+ parseList: parseJbgogoList,
1336
+ parseDetail: parseJbgogoDetail,
1337
+ },
1338
+ {
1339
+ id: '중부고고학회_발굴현장소식',
1340
+ name: '중부고고학회 발굴현장소식',
1341
+ url: 'https://www.jbgogo.or.kr/bbs/spotnews',
1342
+ parseList: parseJbgogoList,
1343
+ parseDetail: parseJbgogoDetail,
1344
+ },
1345
+ {
1346
+ id: '호서고고학회_공지사항',
1347
+ name: '호서고고학회 공지사항',
1348
+ url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=notice&menu_idx=010000',
1349
+ parseList: parseHsasList,
1350
+ parseDetail: parseHsasDetail,
1351
+ },
1352
+ {
1353
+ id: '호서고고학회_학회소식',
1354
+ name: '호서고고학회 학회소식',
1355
+ url: 'http://www.hsas.or.kr/flow/?ref=board/board.emt&menu_table=m2_00&bbs_table=m2_01&menu_idx=020000',
1356
+ parseList: parseHsasList,
1357
+ parseDetail: parseHsasDetail,
1358
+ },
1359
+ {
1360
+ id: '영남고고학회_공지사항',
1361
+ name: '영남고고학회 공지사항',
1362
+ url: 'http://www.yngogo.or.kr/subList/32000001120',
1363
+ parseList: (html) => parseYngogoList(html, '32000001120', '32000001157', '32000001711', customFetch),
1364
+ parseDetail: (html) => parseYngogoDetail(html, '32000001120', '32000001157', extractNttSeq(html), '32000001711', customFetch),
1365
+ },
1366
+ {
1367
+ id: '영남고고학회_학계소식',
1368
+ name: '영남고고학회 학계소식',
1369
+ url: 'http://www.yngogo.or.kr/subList/32000001133',
1370
+ parseList: (html) => parseYngogoList(html, '32000001133', '32000001161', '32000001715', customFetch),
1371
+ parseDetail: (html) => parseYngogoDetail(html, '32000001133', '32000001161', extractNttSeq(html), '32000001715', customFetch),
1372
+ },
1373
+ {
1374
+ id: '영남고고학회_현장소식',
1375
+ name: '영남고고학회 현장소식',
1376
+ url: 'http://www.yngogo.or.kr/subList/32000001135',
1377
+ parseList: (html) => parseYngogoList(html, '32000001135', '32000001163', '32000001717', customFetch),
1378
+ parseDetail: (html) => parseYngogoDetail(html, '32000001135', '32000001163', extractNttSeq(html), '32000001717', customFetch),
1379
+ },
1380
+ {
1381
+ id: '국립중앙박물관_알림',
1382
+ name: '국립중앙박물관 알림',
1383
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701010000.do?catCustomType=united&catId=128',
1384
+ parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701010000.do'),
1385
+ parseDetail: parseMuseumDetail,
1386
+ },
1387
+ {
1388
+ id: '국립중앙박물관_고시공고',
1389
+ name: '국립중앙박물관 고시/공고',
1390
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701020000.do',
1391
+ parseList: (html) => parseMuseumList(html, '/MUSEUM/contents/M0701020000.do'),
1392
+ parseDetail: parseMuseumDetail,
1393
+ },
1394
+ {
1395
+ id: '국립중앙박물관_보도자료',
1396
+ name: '국립중앙박물관 보도 자료',
1397
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701040000.do?catCustomType=post&catId=93',
1398
+ parseList: parseMuseumPressList,
1399
+ parseDetail: parseMuseumDetail,
1400
+ },
1401
+ {
1402
+ id: '국립전주박물관_새소식',
1403
+ name: '국립전주박물관 새소식',
1404
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105010000&bid=0001',
1405
+ parseList: parseJeonjuMuseumList,
1406
+ parseDetail: parseJeonjuMuseumDetail,
1407
+ },
1408
+ {
1409
+ id: '국립전주박물관_보도자료',
1410
+ name: '국립전주박물관 보도자료',
1411
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105050000&bid=0004',
1412
+ parseList: parseJeonjuMuseumList,
1413
+ parseDetail: parseJeonjuMuseumDetail,
1414
+ },
1415
+ {
1416
+ id: '국립부여박물관_공지사항',
1417
+ name: '국립부여박물관 공지사항',
1418
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301250005',
1419
+ parseList: (html) => parseBuyeoMuseumList(html, '2301250005'),
1420
+ parseDetail: parseBuyeoMuseumDetail,
1421
+ },
1422
+ {
1423
+ id: '국립부여박물관_보도자료',
1424
+ name: '국립부여박물관 보도자료',
1425
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2302150024',
1426
+ parseList: (html) => parseBuyeoMuseumList(html, '2302150024'),
1427
+ parseDetail: parseBuyeoMuseumDetail,
1428
+ },
1429
+ {
1430
+ id: '국립진주박물관_새소식',
1431
+ name: '국립진주박물관 새소식',
1432
+ url: 'https://jinju.museum.go.kr/kor/html/sub06/0601.html',
1433
+ parseList: parseJinjuMuseumList,
1434
+ parseDetail: parseJinjuMuseumDetail,
1435
+ },
1436
+ // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1437
+ // {
1438
+ // id: '국립경주박물관_새소식',
1439
+ // name: '국립경주박물관 새소식',
1440
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0701.html',
1441
+ // parseList: (html) =>
1442
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0701.html'),
1443
+ // parseDetail: parseGyeongjuMuseumDetail,
1444
+ // },
1445
+ // {
1446
+ // id: '국립경주박물관_고시공고',
1447
+ // name: '국립경주박물관 고시/공고',
1448
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0703.html',
1449
+ // parseList: parseGyeongjuMuseumNoticeList,
1450
+ // parseDetail: parseGyeongjuMuseumDetail,
1451
+ // },
1452
+ // {
1453
+ // id: '국립경주박물관_보도자료',
1454
+ // name: '국립경주박물관 보도자료',
1455
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0705.html',
1456
+ // parseList: (html) =>
1457
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0705.html'),
1458
+ // parseDetail: parseGyeongjuMuseumDetail,
1459
+ // },
1460
+ // {
1461
+ // id: '국립청주박물관_새소식',
1462
+ // name: '국립청주박물관 새소식',
1463
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=1&key=482&nbar=s',
1464
+ // parseList: parseCheongjuMuseumList,
1465
+ // parseDetail: parseCheongjuMuseumDetail,
1466
+ // },
1467
+ // {
1468
+ // id: '국립청주박물관_언론보도자료',
1469
+ // name: '국립청주박물관 언론보도자료',
1470
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=20&key=31&nbar=s',
1471
+ // parseList: parseCheongjuMuseumList,
1472
+ // parseDetail: parseCheongjuMuseumDetail,
1473
+ // },
1474
+ // {
1475
+ // id: '국립김해박물관_새소식',
1476
+ // name: '국립김해박물관 새소식',
1477
+ // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0401.html',
1478
+ // parseList: (html) =>
1479
+ // parseGimhaeMuseumList(html, '/kr/html/sub04/0401.html'),
1480
+ // parseDetail: parseGimhaeMuseumDetail,
1481
+ // },
1482
+ // {
1483
+ // id: '국립김해박물관_보도자료',
1484
+ // name: '국립김해박물관 언론보도자료',
1485
+ // url: 'https://gimhae.museum.go.kr/kr/html/sub04/0402.html',
1486
+ // parseList: (html) =>
1487
+ // parseGimhaeMuseumList(html, '/kr/html/sub04/0402.html'),
1488
+ // parseDetail: parseGimhaeMuseumDetail,
1489
+ // },
1490
+ // {
1491
+ // id: '국립제주박물관_새소식',
1492
+ // name: '국립제주박물관 새소식',
1493
+ // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050101&ntt_tag=1',
1494
+ // parseList: parseJejuMuseumList,
1495
+ // parseDetail: parseJejuMuseumDetail,
1496
+ // },
1497
+ // {
1498
+ // id: '국립익산박물관_공지사항',
1499
+ // name: '국립익산박물관 공지사항',
1500
+ // url: 'https://iksan.museum.go.kr/kor/html/sub05/0501.html',
1501
+ // parseList: parseIksanMuseumList,
1502
+ // parseDetail: parseIksanMuseumDetail,
1503
+ // },
1504
+ ],
1505
+ },
1506
+ {
1507
+ id: 'business',
1508
+ name: 'Business',
1509
+ targets: [
1510
+ {
1511
+ id: '국가유산청_입찰정보',
1512
+ name: '국가유산청 입찰정보',
1513
+ url: 'https://www.khs.go.kr/tenderBbz/selectTenderBbzList.do?mn=NS_01_05',
1514
+ parseList: parseKhsTenderList,
1515
+ parseDetail: parseKhsDetail,
1516
+ },
1517
+ {
1518
+ id: '국가유산진흥원_입찰정보',
1519
+ name: '국가유산진흥원 입찰정보',
1520
+ url: 'https://www.kh.or.kr/brd/board/717/L/menu/375',
1521
+ parseList: parseHeritageAgencyList,
1522
+ parseDetail: parseHeritageAgencyDetail,
1523
+ },
1524
+ {
1525
+ id: '한국문화유산협회_사업공고',
1526
+ name: '한국문화유산협회 사업공고',
1527
+ url: 'https://www.kaah.kr/bussopen',
1528
+ parseList: parseKaahList,
1529
+ parseDetail: parseKaahDetail,
1530
+ },
1531
+ {
1532
+ id: '한국문화유산협회_입찰공고',
1533
+ name: '한국문화유산협회 입찰공고',
1534
+ url: 'https://www.kaah.kr/ipcopen',
1535
+ parseList: parseKaahList,
1536
+ parseDetail: parseKaahDetail,
1537
+ },
1538
+ ],
1539
+ },
1540
+ {
1541
+ id: 'employment',
1542
+ name: 'Employment',
1543
+ targets: [
1544
+ {
1545
+ id: '국가유산청_시험채용',
1546
+ name: '국가유산청 시험/채용',
1547
+ url: 'https://www.khs.go.kr/multiBbz/selectMultiBbzList.do?bbzId=newexam&mn=NS_01_06',
1548
+ parseList: parseKhsList,
1549
+ parseDetail: parseKhsDetail,
1550
+ },
1551
+ {
1552
+ id: '국가유산진흥원_인재채용',
1553
+ name: '국가유산진흥원 인재채용',
1554
+ url: 'https://www.kh.or.kr/brd/board/721/L/CATEGORY/719/menu/377',
1555
+ parseList: parseHeritageAgencyList,
1556
+ parseDetail: parseHeritageAgencyDetail,
1557
+ },
1558
+ {
1559
+ id: '한국문화유산협회_채용공고',
1560
+ name: '한국문화유산협회 채용공고',
1561
+ url: 'https://www.kaah.kr/reqopen',
1562
+ parseList: parseKaahList,
1563
+ parseDetail: parseKaahDetail,
1564
+ },
1565
+ {
1566
+ id: '영남고고학회_채용공고',
1567
+ name: '영남고고학회 채용공고',
1568
+ url: 'http://www.yngogo.or.kr/subList/32000001136',
1569
+ parseList: (html) => parseYngogoList(html, '32000001136', '32000001164', '32000001718', customFetch),
1570
+ parseDetail: (html) => parseYngogoDetail(html, '32000001136', '32000001164', extractNttSeq(html), '32000001718', customFetch),
1571
+ },
1572
+ {
1573
+ id: '국립중앙박물관_채용안내',
1574
+ name: '국립중앙박물관 채용 안내',
1575
+ url: 'https://www.museum.go.kr/MUSEUM/contents/M0701030000.do?catCustomType=post&catId=54&recruitYn=Y',
1576
+ parseList: parseMuseumRecruitList,
1577
+ parseDetail: parseMuseumDetail,
1578
+ },
1579
+ {
1580
+ id: '국립전주박물관_채용',
1581
+ name: '국립전주박물관 채용',
1582
+ url: 'https://jeonju.museum.go.kr/board.es?mid=a10105020000&bid=0002',
1583
+ parseList: parseJeonjuMuseumRecruitList,
1584
+ parseDetail: parseJeonjuMuseumDetail,
1585
+ },
1586
+ {
1587
+ id: '국립부여박물관_채용공고',
1588
+ name: '국립부여박물관 채용공고',
1589
+ url: 'https://buyeo.museum.go.kr/bbs/list.do?key=2301270001',
1590
+ parseList: (html) => parseBuyeoMuseumList(html, '2301270001'),
1591
+ parseDetail: parseBuyeoMuseumDetail,
1592
+ },
1593
+ // NOTE: Parsing logic is implemented, but crawling is restricted by robots.txt policy
1594
+ // {
1595
+ // id: '국립경주박물관_채용안내',
1596
+ // name: '국립경주박물관 채용안내',
1597
+ // url: 'https://gyeongju.museum.go.kr/kor/html/sub07/0704.html',
1598
+ // parseList: (html) =>
1599
+ // parseGyeongjuMuseumList(html, '/kor/html/sub07/0704.html'),
1600
+ // parseDetail: parseGyeongjuMuseumDetail,
1601
+ // },
1602
+ // {
1603
+ // id: '국립청주박물관_채용및공고',
1604
+ // name: '국립청주박물관 채용 및 공고',
1605
+ // url: 'https://cheongju.museum.go.kr/www/selectBbsNttList.do?bbsNo=29&key=476&nbar=s',
1606
+ // parseList: parseCheongjuMuseumList,
1607
+ // parseDetail: parseCheongjuMuseumDetail,
1608
+ // },
1609
+ // {
1610
+ // id: '국립제주박물관_채용정보',
1611
+ // name: '국립제주박물관 채용정보',
1612
+ // url: 'https://jeju.museum.go.kr/_prog/_board/?code=sub02_0201&site_dvs_cd=kr&menu_dvs_cd=050102&ntt_tag=2',
1613
+ // parseList: parseJejuMuseumList,
1614
+ // parseDetail: parseJejuMuseumDetail,
1615
+ // },
1616
+ ],
1617
+ },
1618
+ ];
1619
+ }
1591
1620
 
1592
1621
  /**
1593
1622
  * Newsletter content configuration
@@ -2348,7 +2377,7 @@ class ContentGenerateProvider {
2348
2377
  this.model = this.google('gemini-3-pro-preview');
2349
2378
  this.newsletterBrandName = brandName ?? newsletterConfig.brandName;
2350
2379
  this.htmlTemplate = {
2351
- html: createNewsletterHtmlTemplate(crawlingTargetGroups.flatMap((group) => group.targets), templateOptions),
2380
+ html: createNewsletterHtmlTemplate(createCrawlingTargetGroups().flatMap((group) => group.targets), templateOptions),
2352
2381
  markers: {
2353
2382
  title: 'NEWSLETTER_TITLE',
2354
2383
  content: 'NEWSLETTER_CONTENT',
@@ -2404,11 +2433,15 @@ class CrawlingProvider {
2404
2433
  articleRepository;
2405
2434
  /** Maximum number of concurrent crawling operations */
2406
2435
  maxConcurrency = 5;
2407
- constructor(articleRepository) {
2436
+ /** Optional custom fetch function (e.g., proxy-based fetch) */
2437
+ customFetch;
2438
+ /** Crawling target groups configuration */
2439
+ crawlingTargetGroups;
2440
+ constructor(articleRepository, customFetch) {
2408
2441
  this.articleRepository = articleRepository;
2442
+ this.customFetch = customFetch;
2443
+ this.crawlingTargetGroups = createCrawlingTargetGroups(customFetch);
2409
2444
  }
2410
- /** Crawling target groups configuration */
2411
- crawlingTargetGroups = crawlingTargetGroups;
2412
2445
  /**
2413
2446
  * Fetch existing articles by URLs to avoid duplicate crawling
2414
2447
  * @param articleUrls - URLs to check
@@ -2562,7 +2595,7 @@ function createNewsletterGenerator(dependencies) {
2562
2595
  });
2563
2596
  const dateService = new DateService(dependencies.publishDate);
2564
2597
  const taskService = new TaskService(dependencies.taskRepository);
2565
- const crawlingProvider = new CrawlingProvider(dependencies.articleRepository);
2598
+ const crawlingProvider = new CrawlingProvider(dependencies.articleRepository, dependencies.customFetch);
2566
2599
  const analysisProvider = new AnalysisProvider(openai$1, dependencies.articleRepository, dependencies.tagRepository);
2567
2600
  // Inject display date from DateService into template options
2568
2601
  const templateOptions = dependencies.templateOptions
@@ -2886,7 +2919,7 @@ exports.CrawlingProvider = CrawlingProvider;
2886
2919
  exports.DateService = DateService;
2887
2920
  exports.TaskService = TaskService;
2888
2921
  exports.contentOptions = contentOptions;
2889
- exports.crawlingTargetGroups = crawlingTargetGroups;
2922
+ exports.createCrawlingTargetGroups = createCrawlingTargetGroups;
2890
2923
  exports.generateNewsletter = generateNewsletter;
2891
2924
  exports.generateWelcomeHTML = generateWelcomeHTML;
2892
2925
  exports.llmConfig = llmConfig;