sh-notice-search 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # sh-notice-search
2
+
3
+ Public SH(서울주택도시개발공사) notice lookup client for the `sh-notice-search` k-skill.
4
+
5
+ ## Source
6
+
7
+ - List/detail pages: `https://www.i-sh.co.kr/app/lay2/program/.../www/brd/.../{list,view}.do`
8
+ - Default category: `주택임대` (`multi_itm_seq=2`)
9
+ - Keyword search: SH requires both `srchWord` and `srchTp`; this client defaults keyword searches to title scope (`srchTp=0`).
10
+
11
+ This is an unauthenticated public HTML surface. No proxy or API key is required. The client does not automate application, login, document submission, payment, or My Page flows.
12
+
13
+ ## Usage
14
+
15
+ ```js
16
+ const { searchNotices, getNoticeDetail } = require("sh-notice-search")
17
+
18
+ const list = await searchNotices({ keyword: "행복주택", category: "임대", page: 1 })
19
+ const detail = await getNoticeDetail({ seq: list.items[0].seq, category: "임대" })
20
+ ```
21
+
22
+ CLI:
23
+
24
+ ```bash
25
+ sh-notice-search 행복주택 --category 임대 --limit 5
26
+ sh-notice-search 매입임대 --category 주거복지 --status 진행
27
+ sh-notice-search --seq 304371 --category 임대
28
+ ```
29
+
30
+ ## Returned fields
31
+
32
+ List rows include `seq`, `title`, `department`, `registered_date`, `views`, `category`, `status`, and the official `detail_url`.
33
+
34
+ Detail rows include `content_text` plus attachment metadata: `filename`, `file_seq`, `file_size`, `file_type`, and official SH `preview_url`. Direct download URLs are intentionally not exposed because SH file-download behavior can be session/policy dependent; hand off official preview/detail URLs to the user's browser.
35
+
36
+ ## Boundaries
37
+
38
+ - `pageSize`/`limit` is capped at 10 because the SH board returns a fixed 10 rows per page.
39
+ - Status filtering uses a conservative title-text classifier because the public board list has no first-class status field.
40
+ - Category aliases map to official board tabs (`주택임대`, `주택분양`, `주택매입`, `토지`, etc.). The `주거복지` alias maps to SH's public `주택매입` tab.
41
+ - Public HTML structure, NetFunnel/rate limits, and attachment preview policy can change.
package/package.json ADDED
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "sh-notice-search",
3
+ "version": "0.2.0",
4
+ "description": "Public SH Seoul Housing notice lookup client for k-skill",
5
+ "license": "MIT",
6
+ "main": "src/index.js",
7
+ "bin": {
8
+ "sh-notice-search": "src/cli.js"
9
+ },
10
+ "files": [
11
+ "src",
12
+ "README.md"
13
+ ],
14
+ "engines": {
15
+ "node": ">=18"
16
+ },
17
+ "publishConfig": {
18
+ "access": "public"
19
+ },
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "git+https://github.com/NomaDamas/k-skill.git"
23
+ },
24
+ "keywords": [
25
+ "k-skill",
26
+ "sh",
27
+ "seoul",
28
+ "housing",
29
+ "notices",
30
+ "korea"
31
+ ],
32
+ "scripts": {
33
+ "lint": "node --check src/index.js && node --check src/cli.js && node --check test/index.test.js",
34
+ "test": "node --test"
35
+ }
36
+ }
package/src/cli.js ADDED
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env node
2
+ const { getNoticeDetail, searchNotices } = require("./index")
3
+
4
+ async function main(options = parseArgs(process.argv.slice(2)), io = console) {
5
+ const result = options.seq || options.id || options.noticeSeq
6
+ ? await getNoticeDetail(options)
7
+ : await searchNotices(options)
8
+ io.log(JSON.stringify(result, null, 2))
9
+ }
10
+
11
+ function parseArgs(argv) {
12
+ const options = {}
13
+ for (let i = 0; i < argv.length; i += 1) {
14
+ const arg = argv[i]
15
+ if (arg === "--query" || arg === "-q" || arg === "--keyword") options.keyword = argv[++i] || ""
16
+ else if (arg === "--category" || arg === "--kind") options.category = argv[++i] || ""
17
+ else if (arg === "--status") options.status = argv[++i] || ""
18
+ else if (arg === "--page") options.page = argv[++i] || ""
19
+ else if (arg === "--limit" || arg === "--page-size") options.limit = argv[++i] || ""
20
+ else if (arg === "--srch-tp" || arg === "--search-type") options.searchType = argv[++i] || ""
21
+ else if (arg === "--seq" || arg === "--id") options.seq = argv[++i] || ""
22
+ else if (arg === "--include-html") options.includeHtml = true
23
+ else if (arg === "--help" || arg === "-h") {
24
+ printHelp()
25
+ process.exit(0)
26
+ } else if (/^\d{4,}$/.test(arg) && !options.seq && (argv[i - 1] === "detail" || argv[i - 1] === "--detail")) {
27
+ options.seq = arg
28
+ } else if (arg === "detail" || arg === "--detail") {
29
+ // marker only; following numeric argument can be seq
30
+ } else if (!options.keyword) {
31
+ options.keyword = arg
32
+ }
33
+ }
34
+ return options
35
+ }
36
+
37
+ function printHelp() {
38
+ console.log(`Usage: sh-notice-search [keyword] [options]
39
+
40
+ Search public SH notices:
41
+ sh-notice-search 행복주택 --category 임대 --limit 5
42
+ sh-notice-search 매입임대 --category 주거복지 --status 진행
43
+
44
+ Fetch one detail:
45
+ sh-notice-search --seq 304371 --category 임대
46
+
47
+ Options:
48
+ -q, --query <text> Keyword. Defaults to title search when present.
49
+ --search-type <type> title/제목 or content/내용.
50
+ --category <category> all, rent/임대, sale/분양, welfare/주거복지, land/토지, etc.
51
+ --status <status> open/진행, closed/마감, announced/당첨자 (title classifier).
52
+ --page <number> Page number (default: 1).
53
+ --limit <number> Returned rows; capped at SH fixed page size 10.
54
+ --seq <number> Fetch detail by SH notice seq.
55
+ --include-html Include raw HTML in output for diagnostics.
56
+ `)
57
+ }
58
+
59
+ function formatError(error) {
60
+ return error && error.stack ? error.stack : String(error)
61
+ }
62
+
63
+ function run(argv = process.argv.slice(2), io = console) {
64
+ return main(parseArgs(argv), io).catch((error) => {
65
+ io.error(formatError(error))
66
+ process.exitCode = 1
67
+ })
68
+ }
69
+
70
+ if (require.main === module) run()
71
+
72
+ module.exports = { parseArgs, printHelp, formatError, main, run }
package/src/index.js ADDED
@@ -0,0 +1,545 @@
1
+ const SH_BASE_URL = "https://www.i-sh.co.kr"
2
+ const DEFAULT_CATEGORY = "rent"
3
+ const DEFAULT_PAGE_SIZE = 10
4
+ const MAX_PAGE_SIZE = 10
5
+ const DEFAULT_TIMEOUT_MS = 20000
6
+
7
+ const CATEGORY_CONFIGS = {
8
+ all: {
9
+ key: "all",
10
+ name: "전체",
11
+ path: "/app/lay2/program/S1T294C295/www/brd/m_241",
12
+ multiItmSeqs: "1,2,4,8,16,32,64,128,256,512",
13
+ aliases: ["all", "전체"]
14
+ },
15
+ sale: {
16
+ key: "sale",
17
+ name: "주택분양",
18
+ path: "/app/lay2/program/S1T294C296/www/brd/m_244",
19
+ multiItmSeq: "1",
20
+ aliases: ["sale", "분양", "주택분양", "분양주택"]
21
+ },
22
+ rent: {
23
+ key: "rent",
24
+ name: "주택임대",
25
+ path: "/app/lay2/program/S1T294C297/www/brd/m_247",
26
+ multiItmSeq: "2",
27
+ aliases: ["rent", "임대", "주택임대", "임대주택"]
28
+ },
29
+ purchase: {
30
+ key: "purchase",
31
+ name: "주택매입",
32
+ path: "/app/lay2/program/S1T294C3379/www/brd/m_247",
33
+ multiItmSeq: "512",
34
+ aliases: ["purchase", "매입", "주택매입", "매입임대", "welfare", "주거복지"]
35
+ },
36
+ movein: {
37
+ key: "movein",
38
+ name: "입주안내",
39
+ path: "/app/lay2/program/S1T294C298/www/brd/m_248",
40
+ multiItmSeq: "4",
41
+ aliases: ["movein", "입주", "입주안내"]
42
+ },
43
+ land: {
44
+ key: "land",
45
+ name: "토지",
46
+ path: "/app/lay2/program/S1T294C299/www/brd/m_255",
47
+ multiItmSeq: "8",
48
+ aliases: ["land", "토지"]
49
+ },
50
+ commercial: {
51
+ key: "commercial",
52
+ name: "상가/공장",
53
+ path: "/app/lay2/program/S1T294C300/www/brd/m_256",
54
+ multiItmSeq: "16",
55
+ aliases: ["commercial", "상가", "공장", "상가/공장"]
56
+ },
57
+ compensation: {
58
+ key: "compensation",
59
+ name: "보상/이주",
60
+ path: "/app/lay2/program/S1T294C301/www/brd/m_257",
61
+ multiItmSeq: "32",
62
+ aliases: ["compensation", "보상", "이주", "보상/이주"]
63
+ },
64
+ design: {
65
+ key: "design",
66
+ name: "현상설계",
67
+ path: "/app/lay2/program/S1T294C302/www/brd/m_258",
68
+ multiItmSeq: "64",
69
+ aliases: ["design", "현상설계", "설계"]
70
+ },
71
+ etc: {
72
+ key: "etc",
73
+ name: "기타",
74
+ path: "/app/lay2/program/S1T294C304/www/brd/m_260",
75
+ multiItmSeq: "256",
76
+ aliases: ["etc", "기타"]
77
+ }
78
+ }
79
+
80
+ const CATEGORY_ALIAS = Object.fromEntries(
81
+ Object.values(CATEGORY_CONFIGS).flatMap((config) => config.aliases.map((alias) => [normalizeToken(alias), config.key]))
82
+ )
83
+
84
+ const STATUS_ALIASES = {
85
+ open: "open",
86
+ ongoing: "open",
87
+ active: "open",
88
+ "진행": "open",
89
+ "공고중": "open",
90
+ "모집중": "open",
91
+ closed: "closed",
92
+ close: "closed",
93
+ ended: "closed",
94
+ "마감": "closed",
95
+ "종료": "closed",
96
+ "결과": "closed",
97
+ announced: "announced",
98
+ "발표": "announced",
99
+ "당첨": "announced",
100
+ "당첨자": "announced"
101
+ }
102
+
103
+ function normalizeToken(value) {
104
+ return String(value == null ? "" : value).replace(/\s+/g, "").trim().toLowerCase()
105
+ }
106
+
107
+ function cleanText(value) {
108
+ return decodeHtml(String(value == null ? "" : value).replace(/\s+/g, " ").trim())
109
+ }
110
+
111
+ function trimOrNull(value) {
112
+ const text = cleanText(value)
113
+ return text || null
114
+ }
115
+
116
+ function decodeHtml(value) {
117
+ if (value === undefined || value === null) return ""
118
+ return String(value)
119
+ .replace(/&#(\d+);/g, (_match, dec) => decodeNumericEntity(Number.parseInt(dec, 10), _match))
120
+ .replace(/&#x([0-9a-f]+);/gi, (_match, hex) => decodeNumericEntity(Number.parseInt(hex, 16), _match))
121
+ .replace(/&amp;/g, "&")
122
+ .replace(/&lt;/g, "<")
123
+ .replace(/&gt;/g, ">")
124
+ .replace(/&quot;/g, '"')
125
+ .replace(/&#39;/g, "'")
126
+ .replace(/&#x27;/gi, "'")
127
+ .replace(/&nbsp;/g, " ")
128
+ }
129
+
130
+ function decodeNumericEntity(codePoint, fallback) {
131
+ try {
132
+ if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) return fallback
133
+ return String.fromCodePoint(codePoint)
134
+ } catch {
135
+ return fallback
136
+ }
137
+ }
138
+
139
+ function stripTags(html) {
140
+ return decodeHtml(String(html || "")
141
+ .replace(/<script[\s\S]*?<\/script>/gi, " ")
142
+ .replace(/<style[\s\S]*?<\/style>/gi, " ")
143
+ .replace(/<[^>]+>/g, " "))
144
+ .replace(/\s+/g, " ")
145
+ .trim()
146
+ }
147
+
148
+ function getHtmlAttr(attrs, name) {
149
+ const match = String(attrs || "").match(new RegExp(`\\b${name}\\s*=\\s*(["'])([\\s\\S]*?)\\1`, "i"))
150
+ return match ? decodeHtml(match[2]) : ""
151
+ }
152
+
153
+ function compactObject(value) {
154
+ return Object.fromEntries(Object.entries(value).filter(([, entry]) => {
155
+ if (entry === null || entry === undefined || entry === "") return false
156
+ if (Array.isArray(entry) && entry.length === 0) return false
157
+ return true
158
+ }))
159
+ }
160
+
161
+ function parsePositiveInteger(value, { defaultValue, min = 1, max, label }) {
162
+ if (value === undefined || value === null || String(value).trim() === "") return defaultValue
163
+ const text = String(value).trim()
164
+ if (!/^\d+$/.test(text)) throw new Error(`Provide valid ${label}.`)
165
+ const parsed = Number.parseInt(text, 10)
166
+ if (parsed < min) return min
167
+ if (Number.isFinite(max) && parsed > max) return max
168
+ return parsed
169
+ }
170
+
171
+ function normalizeCategory(value) {
172
+ const token = normalizeToken(value || DEFAULT_CATEGORY)
173
+ const key = CATEGORY_ALIAS[token] || CATEGORY_CONFIGS[token]?.key
174
+ if (!key) throw new Error(`Unsupported SH category: ${value}`)
175
+ return key
176
+ }
177
+
178
+ function normalizeSearchType(value, hasKeyword) {
179
+ const token = normalizeToken(value)
180
+ if (!token) return hasKeyword ? "0" : null
181
+ if (["title", "제목", "0"].includes(token)) return "0"
182
+ if (["content", "contents", "본문", "내용", "1"].includes(token)) return "1"
183
+ throw new Error("srchTp must be title/content or 제목/내용.")
184
+ }
185
+
186
+ function normalizeStatus(value) {
187
+ const token = normalizeToken(value)
188
+ if (!token) return null
189
+ const status = STATUS_ALIASES[token]
190
+ if (!status) throw new Error(`Unsupported SH status: ${value}`)
191
+ return status
192
+ }
193
+
194
+ function normalizeSearchOptions(options = {}) {
195
+ const keyword = trimOrNull(options.keyword ?? options.q ?? options.query ?? options.srchWord)
196
+ if (keyword && keyword.length > 100) throw new Error("srchWord must be 100 characters or fewer.")
197
+ const category = normalizeCategory(options.category ?? options.kind ?? options.noticeType)
198
+ return {
199
+ keyword,
200
+ srchTp: normalizeSearchType(options.srchTp ?? options.searchType ?? options.type, Boolean(keyword)),
201
+ page: parsePositiveInteger(options.page ?? options.pageNo, { defaultValue: 1, min: 1, max: 1000, label: "page" }),
202
+ pageSize: parsePositiveInteger(options.pageSize ?? options.limit, { defaultValue: DEFAULT_PAGE_SIZE, min: 1, max: MAX_PAGE_SIZE, label: "pageSize" }),
203
+ category,
204
+ status: normalizeStatus(options.status),
205
+ timeoutMs: parsePositiveInteger(options.timeoutMs, { defaultValue: DEFAULT_TIMEOUT_MS, min: 1, max: 120000, label: "timeoutMs" }),
206
+ fetcher: options.fetcher,
207
+ signal: options.signal,
208
+ includeHtml: Boolean(options.includeHtml)
209
+ }
210
+ }
211
+
212
+ function normalizeDetailOptions(options = {}) {
213
+ const seq = trimOrNull(options.seq ?? options.noticeSeq ?? options.id)
214
+ if (!seq) throw new Error("seq is required")
215
+ if (!/^\d{1,20}$/.test(seq)) throw new Error("seq must be digits only.")
216
+ const category = normalizeCategory(options.category ?? options.kind ?? options.noticeType)
217
+ return {
218
+ seq,
219
+ category,
220
+ timeoutMs: parsePositiveInteger(options.timeoutMs, { defaultValue: DEFAULT_TIMEOUT_MS, min: 1, max: 120000, label: "timeoutMs" }),
221
+ fetcher: options.fetcher,
222
+ signal: options.signal,
223
+ includeHtml: Boolean(options.includeHtml)
224
+ }
225
+ }
226
+
227
+ function buildSearchUrl(options = {}) {
228
+ const normalized = normalizeSearchOptions(options)
229
+ const config = CATEGORY_CONFIGS[normalized.category]
230
+ const url = new URL(`${SH_BASE_URL}${config.path}/list.do`)
231
+ if (config.multiItmSeqs) url.searchParams.set("multi_itm_seqs", config.multiItmSeqs)
232
+ if (config.multiItmSeq) url.searchParams.set("multi_itm_seq", config.multiItmSeq)
233
+ url.searchParams.set("page", String(normalized.page || 1))
234
+ if (normalized.keyword) url.searchParams.set("srchWord", normalized.keyword)
235
+ if (normalized.srchTp) url.searchParams.set("srchTp", normalized.srchTp)
236
+ return url
237
+ }
238
+
239
+ function buildDetailUrl(options = {}) {
240
+ const normalized = normalizeDetailOptions(options)
241
+ const config = CATEGORY_CONFIGS[normalized.category]
242
+ const url = new URL(`${SH_BASE_URL}${config.path}/view.do`)
243
+ if (config.multiItmSeq) url.searchParams.set("multi_itm_seq", config.multiItmSeq)
244
+ url.searchParams.set("seq", normalized.seq)
245
+ return url
246
+ }
247
+
248
+ function extractTotalCount(html) {
249
+ const match = String(html || "").match(/총\s*<strong[^>]*>\s*([0-9,]+)\s*<\/strong>\s*건/i) || stripTags(html).match(/총\s*([0-9,]+)\s*건/)
250
+ return match ? Number.parseInt(match[1].replace(/,/g, ""), 10) : null
251
+ }
252
+
253
+ function classifyNoticeStatus(title) {
254
+ const text = cleanText(title)
255
+ if (/당첨|발표/.test(text)) return "announced"
256
+ if (/마감|계약결과|결과|완료|종료/.test(text)) return "closed"
257
+ if (/모집공고|입주자\s*모집|신청|접수|공고/.test(text)) return "open"
258
+ return "unknown"
259
+ }
260
+
261
+ function statusMatches(itemStatus, requestedStatus) {
262
+ if (!requestedStatus) return true
263
+ if (requestedStatus === "closed") return itemStatus === "closed"
264
+ if (requestedStatus === "announced") return itemStatus === "announced"
265
+ return itemStatus === requestedStatus
266
+ }
267
+
268
+ function findUpstreamBlockMarkers(html) {
269
+ const text = stripTags(html)
270
+ const markers = [
271
+ ["NetFunnel", /NetFunnel/i],
272
+ ["CAPTCHA", /captcha|보안문자/i],
273
+ ["로그인", /로그인|login/i],
274
+ ["점검", /점검|maintenance/i],
275
+ ["대기열", /대기열|queue/i],
276
+ ["차단", /차단|block/i]
277
+ ]
278
+ return markers.filter(([, pattern]) => pattern.test(text)).map(([label]) => label)
279
+ }
280
+
281
+ function buildUnexpectedHtmlWarnings(html, expectedMarkupFound, label) {
282
+ if (expectedMarkupFound) return []
283
+ const markers = findUpstreamBlockMarkers(html)
284
+ if (markers.length > 0) {
285
+ return [`unexpected SH ${label} HTML; possible block/maintenance markers: ${markers.join(", ")}`]
286
+ }
287
+ return [`unexpected SH ${label} HTML; expected public SH ${label} markup was not found.`]
288
+ }
289
+
290
+ function parseListRows(html, options = {}) {
291
+ const normalized = normalizeSearchOptions(options)
292
+ const config = CATEGORY_CONFIGS[normalized.category]
293
+ const listAreaMatch = String(html || "").match(/<div\b[^>]*id=["']listTb["'][^>]*>[\s\S]*?<tbody[^>]*>([\s\S]*?)<\/tbody>[\s\S]*?<\/div>/i)
294
+ const tbodyMatch = listAreaMatch || String(html || "").match(/<tbody[^>]*>([\s\S]*?)<\/tbody>/i)
295
+ const tbody = tbodyMatch ? tbodyMatch[1] : String(html || "")
296
+ const rows = []
297
+ let rowMatch
298
+ const rowRegex = /<tr\b[^>]*>([\s\S]*?)<\/tr>/gi
299
+ while ((rowMatch = rowRegex.exec(tbody))) {
300
+ const row = rowMatch[1]
301
+ const seqMatch = row.match(/getDetailView\(\s*['"]?(\d+)['"]?\s*\)/i)
302
+ if (!seqMatch) continue
303
+ const cells = [...row.matchAll(/<td\b[^>]*>([\s\S]*?)<\/td>/gi)].map((match) => match[1])
304
+ if (cells.length < 5) continue
305
+ const titleAnchor = cells[1].match(/<a\b[^>]*>([\s\S]*?)<\/a>/i)
306
+ const rawTitle = (titleAnchor ? titleAnchor[1] : cells[1]).replace(/<span\b[^>]*class=["'][^"']*icoNew[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ")
307
+ const title = trimOrNull(stripTags(rawTitle).replace(/^NEW\s*/i, ""))
308
+ const seq = seqMatch[1]
309
+ const status = classifyNoticeStatus(title)
310
+ const item = {
311
+ seq,
312
+ number: trimOrNull(stripTags(cells[0])),
313
+ title,
314
+ department: trimOrNull(stripTags(cells[2])),
315
+ registered_date: trimOrNull(stripTags(cells[3])),
316
+ views: parseNumberOrNull(stripTags(cells[4])),
317
+ is_new: /icoNew|>\s*NEW\s*</i.test(cells[1]),
318
+ category: config.key,
319
+ category_name: config.name,
320
+ status,
321
+ status_basis: "title_text_classifier",
322
+ detail_url: buildDetailUrl({ seq, category: config.key }).toString()
323
+ }
324
+ if (statusMatches(item.status, normalized.status)) rows.push(compactObject(item))
325
+ }
326
+ return rows
327
+ }
328
+
329
+ function parseNumberOrNull(value) {
330
+ const text = cleanText(value)
331
+ return /^[0-9,]+$/.test(text) ? Number.parseInt(text.replace(/,/g, ""), 10) : null
332
+ }
333
+
334
+ function parseListHtml(html, options = {}) {
335
+ const normalized = normalizeSearchOptions(options)
336
+ const items = parseListRows(html, normalized).slice(0, normalized.pageSize)
337
+ const hasExpectedListMarkup = /<div\b[^>]*id=["']listTb["']/i.test(String(html || "")) || /<tbody[^>]*>[\s\S]*getDetailView\(/i.test(String(html || ""))
338
+ const result = {
339
+ query: {
340
+ keyword: normalized.keyword || null,
341
+ srch_tp: normalized.srchTp || null,
342
+ category: normalized.category,
343
+ category_name: CATEGORY_CONFIGS[normalized.category].name,
344
+ status: normalized.status || null
345
+ },
346
+ summary: {
347
+ page: normalized.page,
348
+ page_size: normalized.pageSize,
349
+ returned_count: items.length,
350
+ total_count: extractTotalCount(html)
351
+ },
352
+ source: {
353
+ name: "sh-public-html",
354
+ url: buildSearchUrl(normalized).toString(),
355
+ proxy: false
356
+ },
357
+ warnings: buildUnexpectedHtmlWarnings(html, hasExpectedListMarkup, "list"),
358
+ items
359
+ }
360
+ if (normalized.status) {
361
+ result.warnings.push("SH public board has no first-class status field; status filtering uses a conservative title-text classifier.")
362
+ }
363
+ if (normalized.includeHtml) result.html = html
364
+ return result
365
+ }
366
+
367
+ function parseAttachmentDownList(html) {
368
+ const match = String(html || "").match(/downList["\']?\s*[:=]\s*(\[[\s\S]*?\])\s*[;,}]/)
369
+ if (!match) return []
370
+ try {
371
+ const parsed = JSON.parse(match[1])
372
+ return Array.isArray(parsed) ? parsed : []
373
+ } catch {
374
+ return []
375
+ }
376
+ }
377
+
378
+ function isAttachmentIconLabel(value) {
379
+ const text = trimOrNull(value)
380
+ return !text || /^\.(?:pdf|hwp|hwpx|docx?|xlsx?|pptx?|txt|zip|jpg|jpeg|png|gif|mp[34]|etc)$/i.test(text)
381
+ }
382
+
383
+ function parseAttachments(html) {
384
+ const downList = parseAttachmentDownList(html)
385
+ const byFileSeq = new Map(downList.map((file) => [String(file.fileSeq || ""), file]))
386
+ const attachments = []
387
+ const source = String(html || "").replace(/<!--[\s\S]*?-->/g, " ")
388
+ const rowRegex = /<tr\b[^>]*>[\s\S]*?<th\b[^>]*>\s*첨부(?:파일)?\s*<\/th>[\s\S]*?<td\b[^>]*>([\s\S]*?)<\/td>[\s\S]*?<\/tr>/gi
389
+ let match
390
+ while ((match = rowRegex.exec(source))) {
391
+ const cell = match[1]
392
+ const anchors = [...cell.matchAll(/<a\b([^>]*)>([\s\S]*?)<\/a>/gi)].map((anchorMatch) => {
393
+ const attrs = anchorMatch[1]
394
+ return {
395
+ className: getHtmlAttr(attrs, "class"),
396
+ href: getHtmlAttr(attrs, "href"),
397
+ onclick: getHtmlAttr(attrs, "onclick"),
398
+ text: trimOrNull(stripTags(anchorMatch[2]))
399
+ }
400
+ })
401
+ const previewUrls = anchors
402
+ .map((anchor) => anchor.href)
403
+ .filter((href) => /htmlConverter\.do/i.test(href))
404
+ .map(normalizeAttachmentPreviewUrl)
405
+ const fileAnchors = anchors.filter((anchor) => /\bbtnAttach\b/i.test(anchor.className) && /existFile\(\s*['"]?\d+['"]?\s*\)/i.test(anchor.onclick) && !isAttachmentIconLabel(anchor.text))
406
+ fileAnchors.forEach((anchor, index) => {
407
+ const previewUrl = previewUrls[index] || null
408
+ const fileSeq = previewUrl && new URL(previewUrl).searchParams.get("file_seq")
409
+ const meta = byFileSeq.get(String(fileSeq || "")) || {}
410
+ attachments.push(compactObject({
411
+ filename: cleanText(meta.oriFileNm || anchor.text),
412
+ file_seq: fileSeq || (meta.fileSeq ? String(meta.fileSeq) : null),
413
+ file_size: parseNumberOrNull(meta.fileSize),
414
+ file_type: trimOrNull(meta.fileTp),
415
+ preview_url: previewUrl
416
+ }))
417
+ })
418
+ }
419
+ return attachments
420
+ }
421
+
422
+ function normalizeAttachmentPreviewUrl(href) {
423
+ try {
424
+ const url = new URL(href, SH_BASE_URL)
425
+ if (url.origin !== SH_BASE_URL) return null
426
+ if (url.pathname !== "/app/com/util/htmlConverter.do") return null
427
+ return url.toString()
428
+ } catch {
429
+ return null
430
+ }
431
+ }
432
+
433
+ function extractDepartment(html) {
434
+ const personInfoMatch = String(html || "").match(/<ul\b[^>]*class=["'][^"']*personInfo[^"']*["'][^>]*>([\s\S]*?)<\/ul>/i)
435
+ if (!personInfoMatch) return null
436
+ const departmentMatch = personInfoMatch[1].match(/담당부서\s*<\/span>\s*:\s*([^<]+)/i) || stripTags(personInfoMatch[1]).match(/담당부서\s*:\s*([^:]+?)(?:담당자|연락처|$)/)
437
+ return departmentMatch ? trimOrNull(departmentMatch[1]) : null
438
+ }
439
+
440
+ function parseDetailHtml(html, options = {}) {
441
+ const normalized = normalizeDetailOptions(options)
442
+ const config = CATEGORY_CONFIGS[normalized.category]
443
+ const source = String(html || "")
444
+ const titleMatch = String(html || "").match(/<div\b[^>]*class=["'][^"']*detailTable[^"']*firgs0401Table[^"']*["'][^>]*>[\s\S]*?<caption>([\s\S]*?)<\/caption>/i) ||
445
+ String(html || "").match(/<thead>[\s\S]*?<th\b[^>]*colspan=["']2["'][^>]*>([\s\S]*?)<\/th>/i)
446
+ const registeredMatch = String(html || "").match(/<strong>\s*등록일\s*:\s*<\/strong>\s*([0-9]{4}[-.][0-9]{2}[-.][0-9]{2})/i)
447
+ const viewsMatch = String(html || "").match(/<strong>\s*조회수\s*:\s*<\/strong>\s*([0-9,]+)/i)
448
+ const contentMatch = String(html || "").match(/<td\b[^>]*class=["']cont["'][^>]*>([\s\S]*?)<\/td>/i)
449
+ const title = trimOrNull(stripTags(titleMatch ? titleMatch[1] : ""))
450
+ const attachments = parseAttachments(html)
451
+ const detail = compactObject({
452
+ seq: normalized.seq,
453
+ title,
454
+ registered_date: registeredMatch ? registeredMatch[1].replace(/\./g, "-") : null,
455
+ views: viewsMatch ? Number.parseInt(viewsMatch[1].replace(/,/g, ""), 10) : null,
456
+ department: extractDepartment(html),
457
+ category: config.key,
458
+ category_name: config.name,
459
+ status: classifyNoticeStatus(title),
460
+ status_basis: "title_text_classifier",
461
+ content_text: trimOrNull(stripTags(contentMatch ? contentMatch[1] : "")),
462
+ detail_url: buildDetailUrl(normalized).toString(),
463
+ warnings: buildUnexpectedHtmlWarnings(html, /detailTable|class=["']cont["']|firgs0401Table/i.test(source), "detail")
464
+ })
465
+ detail.attachments = attachments
466
+ if (normalized.includeHtml) detail.html = html
467
+ return detail
468
+ }
469
+
470
+ function createTimeoutSignal(timeoutMs) {
471
+ if (typeof AbortSignal === "undefined" || typeof AbortSignal.timeout !== "function") return null
472
+ const n = Number(timeoutMs)
473
+ return Number.isFinite(n) && n > 0 ? AbortSignal.timeout(n) : null
474
+ }
475
+
476
+ async function fetchText(url, options = {}) {
477
+ const fetcher = options.fetcher || global.fetch
478
+ if (!fetcher) throw new Error("fetch is required")
479
+ const signal = options.signal || createTimeoutSignal(options.timeoutMs || DEFAULT_TIMEOUT_MS)
480
+ let response
481
+ try {
482
+ response = await fetcher(url.toString(), {
483
+ headers: {
484
+ "user-agent": "Mozilla/5.0 (compatible; k-skill/sh-notice-search)",
485
+ accept: "text/html,application/xhtml+xml"
486
+ },
487
+ signal
488
+ })
489
+ } catch (error) {
490
+ throw new Error(`SH upstream request failed: ${error.message}`)
491
+ }
492
+ const text = await response.text()
493
+ if (!response.ok) {
494
+ throw new Error(`SH upstream responded with HTTP ${response.status}: ${text.slice(0, 200)}`)
495
+ }
496
+ return text
497
+ }
498
+
499
+ async function searchNotices(options = {}) {
500
+ const normalized = normalizeSearchOptions(options)
501
+ const html = await fetchText(buildSearchUrl(normalized), normalized)
502
+ return parseListHtml(html, normalized)
503
+ }
504
+
505
+ async function getNoticeDetail(options = {}) {
506
+ const normalized = normalizeDetailOptions(options)
507
+ const html = await fetchText(buildDetailUrl(normalized), normalized)
508
+ return {
509
+ notice: parseDetailHtml(html, normalized),
510
+ query: {
511
+ seq: normalized.seq,
512
+ category: normalized.category,
513
+ category_name: CATEGORY_CONFIGS[normalized.category].name
514
+ },
515
+ source: {
516
+ name: "sh-public-html",
517
+ url: buildDetailUrl(normalized).toString(),
518
+ proxy: false
519
+ }
520
+ }
521
+ }
522
+
523
+ module.exports = {
524
+ SH_BASE_URL,
525
+ DEFAULT_CATEGORY,
526
+ CATEGORY_CONFIGS,
527
+ STATUS_ALIASES,
528
+ cleanText,
529
+ stripTags,
530
+ normalizeCategory,
531
+ normalizeSearchOptions,
532
+ normalizeDetailOptions,
533
+ buildSearchUrl,
534
+ buildDetailUrl,
535
+ extractTotalCount,
536
+ classifyNoticeStatus,
537
+ parseListRows,
538
+ parseListHtml,
539
+ parseAttachmentDownList,
540
+ parseAttachments,
541
+ parseDetailHtml,
542
+ createTimeoutSignal,
543
+ searchNotices,
544
+ getNoticeDetail
545
+ }