sh-notice-search 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/package.json +36 -0
- package/src/cli.js +72 -0
- package/src/index.js +545 -0
package/README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# sh-notice-search
|
|
2
|
+
|
|
3
|
+
Public SH(서울주택도시개발공사) notice lookup client for the `sh-notice-search` k-skill.
|
|
4
|
+
|
|
5
|
+
## Source
|
|
6
|
+
|
|
7
|
+
- List/detail pages: `https://www.i-sh.co.kr/app/lay2/program/.../www/brd/.../{list,view}.do`
|
|
8
|
+
- Default category: `주택임대` (`multi_itm_seq=2`)
|
|
9
|
+
- Keyword search: SH requires both `srchWord` and `srchTp`; this client defaults keyword searches to title scope (`srchTp=0`).
|
|
10
|
+
|
|
11
|
+
This is an unauthenticated public HTML surface. No proxy or API key is required. The client does not automate application, login, document submission, payment, or My Page flows.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```js
|
|
16
|
+
const { searchNotices, getNoticeDetail } = require("sh-notice-search")
|
|
17
|
+
|
|
18
|
+
const list = await searchNotices({ keyword: "행복주택", category: "임대", page: 1 })
|
|
19
|
+
const detail = await getNoticeDetail({ seq: list.items[0].seq, category: "임대" })
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
CLI:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
sh-notice-search 행복주택 --category 임대 --limit 5
|
|
26
|
+
sh-notice-search 매입임대 --category 주거복지 --status 진행
|
|
27
|
+
sh-notice-search --seq 304371 --category 임대
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Returned fields
|
|
31
|
+
|
|
32
|
+
List rows include `seq`, `title`, `department`, `registered_date`, `views`, `category`, `status`, and the official `detail_url`.
|
|
33
|
+
|
|
34
|
+
Detail rows include `content_text` plus attachment metadata: `filename`, `file_seq`, `file_size`, `file_type`, and official SH `preview_url`. Direct download URLs are intentionally not exposed because SH file-download behavior can be session/policy dependent; hand off official preview/detail URLs to the user's browser.
|
|
35
|
+
|
|
36
|
+
## Boundaries
|
|
37
|
+
|
|
38
|
+
- `pageSize`/`limit` is capped at 10 because the SH board returns a fixed 10 rows per page.
|
|
39
|
+
- Status filtering uses a conservative title-text classifier because the public board list has no first-class status field.
|
|
40
|
+
- Category aliases map to official board tabs (`주택임대`, `주택분양`, `주택매입`, `토지`, etc.). The `주거복지` alias maps to SH's public `주택매입` tab.
|
|
41
|
+
- Public HTML structure, NetFunnel/rate limits, and attachment preview policy can change.
|
package/package.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "sh-notice-search",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Public SH Seoul Housing notice lookup client for k-skill",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"sh-notice-search": "src/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=18"
|
|
16
|
+
},
|
|
17
|
+
"publishConfig": {
|
|
18
|
+
"access": "public"
|
|
19
|
+
},
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "git+https://github.com/NomaDamas/k-skill.git"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"k-skill",
|
|
26
|
+
"sh",
|
|
27
|
+
"seoul",
|
|
28
|
+
"housing",
|
|
29
|
+
"notices",
|
|
30
|
+
"korea"
|
|
31
|
+
],
|
|
32
|
+
"scripts": {
|
|
33
|
+
"lint": "node --check src/index.js && node --check src/cli.js && node --check test/index.test.js",
|
|
34
|
+
"test": "node --test"
|
|
35
|
+
}
|
|
36
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const { getNoticeDetail, searchNotices } = require("./index")
|
|
3
|
+
|
|
4
|
+
async function main(options = parseArgs(process.argv.slice(2)), io = console) {
|
|
5
|
+
const result = options.seq || options.id || options.noticeSeq
|
|
6
|
+
? await getNoticeDetail(options)
|
|
7
|
+
: await searchNotices(options)
|
|
8
|
+
io.log(JSON.stringify(result, null, 2))
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function parseArgs(argv) {
|
|
12
|
+
const options = {}
|
|
13
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
14
|
+
const arg = argv[i]
|
|
15
|
+
if (arg === "--query" || arg === "-q" || arg === "--keyword") options.keyword = argv[++i] || ""
|
|
16
|
+
else if (arg === "--category" || arg === "--kind") options.category = argv[++i] || ""
|
|
17
|
+
else if (arg === "--status") options.status = argv[++i] || ""
|
|
18
|
+
else if (arg === "--page") options.page = argv[++i] || ""
|
|
19
|
+
else if (arg === "--limit" || arg === "--page-size") options.limit = argv[++i] || ""
|
|
20
|
+
else if (arg === "--srch-tp" || arg === "--search-type") options.searchType = argv[++i] || ""
|
|
21
|
+
else if (arg === "--seq" || arg === "--id") options.seq = argv[++i] || ""
|
|
22
|
+
else if (arg === "--include-html") options.includeHtml = true
|
|
23
|
+
else if (arg === "--help" || arg === "-h") {
|
|
24
|
+
printHelp()
|
|
25
|
+
process.exit(0)
|
|
26
|
+
} else if (/^\d{4,}$/.test(arg) && !options.seq && (argv[i - 1] === "detail" || argv[i - 1] === "--detail")) {
|
|
27
|
+
options.seq = arg
|
|
28
|
+
} else if (arg === "detail" || arg === "--detail") {
|
|
29
|
+
// marker only; following numeric argument can be seq
|
|
30
|
+
} else if (!options.keyword) {
|
|
31
|
+
options.keyword = arg
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return options
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function printHelp() {
|
|
38
|
+
console.log(`Usage: sh-notice-search [keyword] [options]
|
|
39
|
+
|
|
40
|
+
Search public SH notices:
|
|
41
|
+
sh-notice-search 행복주택 --category 임대 --limit 5
|
|
42
|
+
sh-notice-search 매입임대 --category 주거복지 --status 진행
|
|
43
|
+
|
|
44
|
+
Fetch one detail:
|
|
45
|
+
sh-notice-search --seq 304371 --category 임대
|
|
46
|
+
|
|
47
|
+
Options:
|
|
48
|
+
-q, --query <text> Keyword. Defaults to title search when present.
|
|
49
|
+
--search-type <type> title/제목 or content/내용.
|
|
50
|
+
--category <category> all, rent/임대, sale/분양, welfare/주거복지, land/토지, etc.
|
|
51
|
+
--status <status> open/진행, closed/마감, announced/당첨자 (title classifier).
|
|
52
|
+
--page <number> Page number (default: 1).
|
|
53
|
+
--limit <number> Returned rows; capped at SH fixed page size 10.
|
|
54
|
+
--seq <number> Fetch detail by SH notice seq.
|
|
55
|
+
--include-html Include raw HTML in output for diagnostics.
|
|
56
|
+
`)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function formatError(error) {
|
|
60
|
+
return error && error.stack ? error.stack : String(error)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function run(argv = process.argv.slice(2), io = console) {
|
|
64
|
+
return main(parseArgs(argv), io).catch((error) => {
|
|
65
|
+
io.error(formatError(error))
|
|
66
|
+
process.exitCode = 1
|
|
67
|
+
})
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (require.main === module) run()
|
|
71
|
+
|
|
72
|
+
module.exports = { parseArgs, printHelp, formatError, main, run }
|
package/src/index.js
ADDED
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
const SH_BASE_URL = "https://www.i-sh.co.kr"
|
|
2
|
+
const DEFAULT_CATEGORY = "rent"
|
|
3
|
+
const DEFAULT_PAGE_SIZE = 10
|
|
4
|
+
const MAX_PAGE_SIZE = 10
|
|
5
|
+
const DEFAULT_TIMEOUT_MS = 20000
|
|
6
|
+
|
|
7
|
+
const CATEGORY_CONFIGS = {
|
|
8
|
+
all: {
|
|
9
|
+
key: "all",
|
|
10
|
+
name: "전체",
|
|
11
|
+
path: "/app/lay2/program/S1T294C295/www/brd/m_241",
|
|
12
|
+
multiItmSeqs: "1,2,4,8,16,32,64,128,256,512",
|
|
13
|
+
aliases: ["all", "전체"]
|
|
14
|
+
},
|
|
15
|
+
sale: {
|
|
16
|
+
key: "sale",
|
|
17
|
+
name: "주택분양",
|
|
18
|
+
path: "/app/lay2/program/S1T294C296/www/brd/m_244",
|
|
19
|
+
multiItmSeq: "1",
|
|
20
|
+
aliases: ["sale", "분양", "주택분양", "분양주택"]
|
|
21
|
+
},
|
|
22
|
+
rent: {
|
|
23
|
+
key: "rent",
|
|
24
|
+
name: "주택임대",
|
|
25
|
+
path: "/app/lay2/program/S1T294C297/www/brd/m_247",
|
|
26
|
+
multiItmSeq: "2",
|
|
27
|
+
aliases: ["rent", "임대", "주택임대", "임대주택"]
|
|
28
|
+
},
|
|
29
|
+
purchase: {
|
|
30
|
+
key: "purchase",
|
|
31
|
+
name: "주택매입",
|
|
32
|
+
path: "/app/lay2/program/S1T294C3379/www/brd/m_247",
|
|
33
|
+
multiItmSeq: "512",
|
|
34
|
+
aliases: ["purchase", "매입", "주택매입", "매입임대", "welfare", "주거복지"]
|
|
35
|
+
},
|
|
36
|
+
movein: {
|
|
37
|
+
key: "movein",
|
|
38
|
+
name: "입주안내",
|
|
39
|
+
path: "/app/lay2/program/S1T294C298/www/brd/m_248",
|
|
40
|
+
multiItmSeq: "4",
|
|
41
|
+
aliases: ["movein", "입주", "입주안내"]
|
|
42
|
+
},
|
|
43
|
+
land: {
|
|
44
|
+
key: "land",
|
|
45
|
+
name: "토지",
|
|
46
|
+
path: "/app/lay2/program/S1T294C299/www/brd/m_255",
|
|
47
|
+
multiItmSeq: "8",
|
|
48
|
+
aliases: ["land", "토지"]
|
|
49
|
+
},
|
|
50
|
+
commercial: {
|
|
51
|
+
key: "commercial",
|
|
52
|
+
name: "상가/공장",
|
|
53
|
+
path: "/app/lay2/program/S1T294C300/www/brd/m_256",
|
|
54
|
+
multiItmSeq: "16",
|
|
55
|
+
aliases: ["commercial", "상가", "공장", "상가/공장"]
|
|
56
|
+
},
|
|
57
|
+
compensation: {
|
|
58
|
+
key: "compensation",
|
|
59
|
+
name: "보상/이주",
|
|
60
|
+
path: "/app/lay2/program/S1T294C301/www/brd/m_257",
|
|
61
|
+
multiItmSeq: "32",
|
|
62
|
+
aliases: ["compensation", "보상", "이주", "보상/이주"]
|
|
63
|
+
},
|
|
64
|
+
design: {
|
|
65
|
+
key: "design",
|
|
66
|
+
name: "현상설계",
|
|
67
|
+
path: "/app/lay2/program/S1T294C302/www/brd/m_258",
|
|
68
|
+
multiItmSeq: "64",
|
|
69
|
+
aliases: ["design", "현상설계", "설계"]
|
|
70
|
+
},
|
|
71
|
+
etc: {
|
|
72
|
+
key: "etc",
|
|
73
|
+
name: "기타",
|
|
74
|
+
path: "/app/lay2/program/S1T294C304/www/brd/m_260",
|
|
75
|
+
multiItmSeq: "256",
|
|
76
|
+
aliases: ["etc", "기타"]
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const CATEGORY_ALIAS = Object.fromEntries(
|
|
81
|
+
Object.values(CATEGORY_CONFIGS).flatMap((config) => config.aliases.map((alias) => [normalizeToken(alias), config.key]))
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
const STATUS_ALIASES = {
|
|
85
|
+
open: "open",
|
|
86
|
+
ongoing: "open",
|
|
87
|
+
active: "open",
|
|
88
|
+
"진행": "open",
|
|
89
|
+
"공고중": "open",
|
|
90
|
+
"모집중": "open",
|
|
91
|
+
closed: "closed",
|
|
92
|
+
close: "closed",
|
|
93
|
+
ended: "closed",
|
|
94
|
+
"마감": "closed",
|
|
95
|
+
"종료": "closed",
|
|
96
|
+
"결과": "closed",
|
|
97
|
+
announced: "announced",
|
|
98
|
+
"발표": "announced",
|
|
99
|
+
"당첨": "announced",
|
|
100
|
+
"당첨자": "announced"
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function normalizeToken(value) {
|
|
104
|
+
return String(value == null ? "" : value).replace(/\s+/g, "").trim().toLowerCase()
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function cleanText(value) {
|
|
108
|
+
return decodeHtml(String(value == null ? "" : value).replace(/\s+/g, " ").trim())
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function trimOrNull(value) {
|
|
112
|
+
const text = cleanText(value)
|
|
113
|
+
return text || null
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function decodeHtml(value) {
|
|
117
|
+
if (value === undefined || value === null) return ""
|
|
118
|
+
return String(value)
|
|
119
|
+
.replace(/&#(\d+);/g, (_match, dec) => decodeNumericEntity(Number.parseInt(dec, 10), _match))
|
|
120
|
+
.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => decodeNumericEntity(Number.parseInt(hex, 16), _match))
|
|
121
|
+
.replace(/&/g, "&")
|
|
122
|
+
.replace(/</g, "<")
|
|
123
|
+
.replace(/>/g, ">")
|
|
124
|
+
.replace(/"/g, '"')
|
|
125
|
+
.replace(/'/g, "'")
|
|
126
|
+
.replace(/'/gi, "'")
|
|
127
|
+
.replace(/ /g, " ")
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function decodeNumericEntity(codePoint, fallback) {
|
|
131
|
+
try {
|
|
132
|
+
if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) return fallback
|
|
133
|
+
return String.fromCodePoint(codePoint)
|
|
134
|
+
} catch {
|
|
135
|
+
return fallback
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function stripTags(html) {
|
|
140
|
+
return decodeHtml(String(html || "")
|
|
141
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
142
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
143
|
+
.replace(/<[^>]+>/g, " "))
|
|
144
|
+
.replace(/\s+/g, " ")
|
|
145
|
+
.trim()
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function getHtmlAttr(attrs, name) {
|
|
149
|
+
const match = String(attrs || "").match(new RegExp(`\\b${name}\\s*=\\s*(["'])([\\s\\S]*?)\\1`, "i"))
|
|
150
|
+
return match ? decodeHtml(match[2]) : ""
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function compactObject(value) {
|
|
154
|
+
return Object.fromEntries(Object.entries(value).filter(([, entry]) => {
|
|
155
|
+
if (entry === null || entry === undefined || entry === "") return false
|
|
156
|
+
if (Array.isArray(entry) && entry.length === 0) return false
|
|
157
|
+
return true
|
|
158
|
+
}))
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function parsePositiveInteger(value, { defaultValue, min = 1, max, label }) {
|
|
162
|
+
if (value === undefined || value === null || String(value).trim() === "") return defaultValue
|
|
163
|
+
const text = String(value).trim()
|
|
164
|
+
if (!/^\d+$/.test(text)) throw new Error(`Provide valid ${label}.`)
|
|
165
|
+
const parsed = Number.parseInt(text, 10)
|
|
166
|
+
if (parsed < min) return min
|
|
167
|
+
if (Number.isFinite(max) && parsed > max) return max
|
|
168
|
+
return parsed
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function normalizeCategory(value) {
|
|
172
|
+
const token = normalizeToken(value || DEFAULT_CATEGORY)
|
|
173
|
+
const key = CATEGORY_ALIAS[token] || CATEGORY_CONFIGS[token]?.key
|
|
174
|
+
if (!key) throw new Error(`Unsupported SH category: ${value}`)
|
|
175
|
+
return key
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function normalizeSearchType(value, hasKeyword) {
|
|
179
|
+
const token = normalizeToken(value)
|
|
180
|
+
if (!token) return hasKeyword ? "0" : null
|
|
181
|
+
if (["title", "제목", "0"].includes(token)) return "0"
|
|
182
|
+
if (["content", "contents", "본문", "내용", "1"].includes(token)) return "1"
|
|
183
|
+
throw new Error("srchTp must be title/content or 제목/내용.")
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function normalizeStatus(value) {
|
|
187
|
+
const token = normalizeToken(value)
|
|
188
|
+
if (!token) return null
|
|
189
|
+
const status = STATUS_ALIASES[token]
|
|
190
|
+
if (!status) throw new Error(`Unsupported SH status: ${value}`)
|
|
191
|
+
return status
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function normalizeSearchOptions(options = {}) {
|
|
195
|
+
const keyword = trimOrNull(options.keyword ?? options.q ?? options.query ?? options.srchWord)
|
|
196
|
+
if (keyword && keyword.length > 100) throw new Error("srchWord must be 100 characters or fewer.")
|
|
197
|
+
const category = normalizeCategory(options.category ?? options.kind ?? options.noticeType)
|
|
198
|
+
return {
|
|
199
|
+
keyword,
|
|
200
|
+
srchTp: normalizeSearchType(options.srchTp ?? options.searchType ?? options.type, Boolean(keyword)),
|
|
201
|
+
page: parsePositiveInteger(options.page ?? options.pageNo, { defaultValue: 1, min: 1, max: 1000, label: "page" }),
|
|
202
|
+
pageSize: parsePositiveInteger(options.pageSize ?? options.limit, { defaultValue: DEFAULT_PAGE_SIZE, min: 1, max: MAX_PAGE_SIZE, label: "pageSize" }),
|
|
203
|
+
category,
|
|
204
|
+
status: normalizeStatus(options.status),
|
|
205
|
+
timeoutMs: parsePositiveInteger(options.timeoutMs, { defaultValue: DEFAULT_TIMEOUT_MS, min: 1, max: 120000, label: "timeoutMs" }),
|
|
206
|
+
fetcher: options.fetcher,
|
|
207
|
+
signal: options.signal,
|
|
208
|
+
includeHtml: Boolean(options.includeHtml)
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function normalizeDetailOptions(options = {}) {
|
|
213
|
+
const seq = trimOrNull(options.seq ?? options.noticeSeq ?? options.id)
|
|
214
|
+
if (!seq) throw new Error("seq is required")
|
|
215
|
+
if (!/^\d{1,20}$/.test(seq)) throw new Error("seq must be digits only.")
|
|
216
|
+
const category = normalizeCategory(options.category ?? options.kind ?? options.noticeType)
|
|
217
|
+
return {
|
|
218
|
+
seq,
|
|
219
|
+
category,
|
|
220
|
+
timeoutMs: parsePositiveInteger(options.timeoutMs, { defaultValue: DEFAULT_TIMEOUT_MS, min: 1, max: 120000, label: "timeoutMs" }),
|
|
221
|
+
fetcher: options.fetcher,
|
|
222
|
+
signal: options.signal,
|
|
223
|
+
includeHtml: Boolean(options.includeHtml)
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function buildSearchUrl(options = {}) {
|
|
228
|
+
const normalized = normalizeSearchOptions(options)
|
|
229
|
+
const config = CATEGORY_CONFIGS[normalized.category]
|
|
230
|
+
const url = new URL(`${SH_BASE_URL}${config.path}/list.do`)
|
|
231
|
+
if (config.multiItmSeqs) url.searchParams.set("multi_itm_seqs", config.multiItmSeqs)
|
|
232
|
+
if (config.multiItmSeq) url.searchParams.set("multi_itm_seq", config.multiItmSeq)
|
|
233
|
+
url.searchParams.set("page", String(normalized.page || 1))
|
|
234
|
+
if (normalized.keyword) url.searchParams.set("srchWord", normalized.keyword)
|
|
235
|
+
if (normalized.srchTp) url.searchParams.set("srchTp", normalized.srchTp)
|
|
236
|
+
return url
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function buildDetailUrl(options = {}) {
|
|
240
|
+
const normalized = normalizeDetailOptions(options)
|
|
241
|
+
const config = CATEGORY_CONFIGS[normalized.category]
|
|
242
|
+
const url = new URL(`${SH_BASE_URL}${config.path}/view.do`)
|
|
243
|
+
if (config.multiItmSeq) url.searchParams.set("multi_itm_seq", config.multiItmSeq)
|
|
244
|
+
url.searchParams.set("seq", normalized.seq)
|
|
245
|
+
return url
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function extractTotalCount(html) {
|
|
249
|
+
const match = String(html || "").match(/총\s*<strong[^>]*>\s*([0-9,]+)\s*<\/strong>\s*건/i) || stripTags(html).match(/총\s*([0-9,]+)\s*건/)
|
|
250
|
+
return match ? Number.parseInt(match[1].replace(/,/g, ""), 10) : null
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function classifyNoticeStatus(title) {
|
|
254
|
+
const text = cleanText(title)
|
|
255
|
+
if (/당첨|발표/.test(text)) return "announced"
|
|
256
|
+
if (/마감|계약결과|결과|완료|종료/.test(text)) return "closed"
|
|
257
|
+
if (/모집공고|입주자\s*모집|신청|접수|공고/.test(text)) return "open"
|
|
258
|
+
return "unknown"
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function statusMatches(itemStatus, requestedStatus) {
|
|
262
|
+
if (!requestedStatus) return true
|
|
263
|
+
if (requestedStatus === "closed") return itemStatus === "closed"
|
|
264
|
+
if (requestedStatus === "announced") return itemStatus === "announced"
|
|
265
|
+
return itemStatus === requestedStatus
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function findUpstreamBlockMarkers(html) {
|
|
269
|
+
const text = stripTags(html)
|
|
270
|
+
const markers = [
|
|
271
|
+
["NetFunnel", /NetFunnel/i],
|
|
272
|
+
["CAPTCHA", /captcha|보안문자/i],
|
|
273
|
+
["로그인", /로그인|login/i],
|
|
274
|
+
["점검", /점검|maintenance/i],
|
|
275
|
+
["대기열", /대기열|queue/i],
|
|
276
|
+
["차단", /차단|block/i]
|
|
277
|
+
]
|
|
278
|
+
return markers.filter(([, pattern]) => pattern.test(text)).map(([label]) => label)
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function buildUnexpectedHtmlWarnings(html, expectedMarkupFound, label) {
|
|
282
|
+
if (expectedMarkupFound) return []
|
|
283
|
+
const markers = findUpstreamBlockMarkers(html)
|
|
284
|
+
if (markers.length > 0) {
|
|
285
|
+
return [`unexpected SH ${label} HTML; possible block/maintenance markers: ${markers.join(", ")}`]
|
|
286
|
+
}
|
|
287
|
+
return [`unexpected SH ${label} HTML; expected public SH ${label} markup was not found.`]
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function parseListRows(html, options = {}) {
|
|
291
|
+
const normalized = normalizeSearchOptions(options)
|
|
292
|
+
const config = CATEGORY_CONFIGS[normalized.category]
|
|
293
|
+
const listAreaMatch = String(html || "").match(/<div\b[^>]*id=["']listTb["'][^>]*>[\s\S]*?<tbody[^>]*>([\s\S]*?)<\/tbody>[\s\S]*?<\/div>/i)
|
|
294
|
+
const tbodyMatch = listAreaMatch || String(html || "").match(/<tbody[^>]*>([\s\S]*?)<\/tbody>/i)
|
|
295
|
+
const tbody = tbodyMatch ? tbodyMatch[1] : String(html || "")
|
|
296
|
+
const rows = []
|
|
297
|
+
let rowMatch
|
|
298
|
+
const rowRegex = /<tr\b[^>]*>([\s\S]*?)<\/tr>/gi
|
|
299
|
+
while ((rowMatch = rowRegex.exec(tbody))) {
|
|
300
|
+
const row = rowMatch[1]
|
|
301
|
+
const seqMatch = row.match(/getDetailView\(\s*['"]?(\d+)['"]?\s*\)/i)
|
|
302
|
+
if (!seqMatch) continue
|
|
303
|
+
const cells = [...row.matchAll(/<td\b[^>]*>([\s\S]*?)<\/td>/gi)].map((match) => match[1])
|
|
304
|
+
if (cells.length < 5) continue
|
|
305
|
+
const titleAnchor = cells[1].match(/<a\b[^>]*>([\s\S]*?)<\/a>/i)
|
|
306
|
+
const rawTitle = (titleAnchor ? titleAnchor[1] : cells[1]).replace(/<span\b[^>]*class=["'][^"']*icoNew[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ")
|
|
307
|
+
const title = trimOrNull(stripTags(rawTitle).replace(/^NEW\s*/i, ""))
|
|
308
|
+
const seq = seqMatch[1]
|
|
309
|
+
const status = classifyNoticeStatus(title)
|
|
310
|
+
const item = {
|
|
311
|
+
seq,
|
|
312
|
+
number: trimOrNull(stripTags(cells[0])),
|
|
313
|
+
title,
|
|
314
|
+
department: trimOrNull(stripTags(cells[2])),
|
|
315
|
+
registered_date: trimOrNull(stripTags(cells[3])),
|
|
316
|
+
views: parseNumberOrNull(stripTags(cells[4])),
|
|
317
|
+
is_new: /icoNew|>\s*NEW\s*</i.test(cells[1]),
|
|
318
|
+
category: config.key,
|
|
319
|
+
category_name: config.name,
|
|
320
|
+
status,
|
|
321
|
+
status_basis: "title_text_classifier",
|
|
322
|
+
detail_url: buildDetailUrl({ seq, category: config.key }).toString()
|
|
323
|
+
}
|
|
324
|
+
if (statusMatches(item.status, normalized.status)) rows.push(compactObject(item))
|
|
325
|
+
}
|
|
326
|
+
return rows
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function parseNumberOrNull(value) {
|
|
330
|
+
const text = cleanText(value)
|
|
331
|
+
return /^[0-9,]+$/.test(text) ? Number.parseInt(text.replace(/,/g, ""), 10) : null
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function parseListHtml(html, options = {}) {
|
|
335
|
+
const normalized = normalizeSearchOptions(options)
|
|
336
|
+
const items = parseListRows(html, normalized).slice(0, normalized.pageSize)
|
|
337
|
+
const hasExpectedListMarkup = /<div\b[^>]*id=["']listTb["']/i.test(String(html || "")) || /<tbody[^>]*>[\s\S]*getDetailView\(/i.test(String(html || ""))
|
|
338
|
+
const result = {
|
|
339
|
+
query: {
|
|
340
|
+
keyword: normalized.keyword || null,
|
|
341
|
+
srch_tp: normalized.srchTp || null,
|
|
342
|
+
category: normalized.category,
|
|
343
|
+
category_name: CATEGORY_CONFIGS[normalized.category].name,
|
|
344
|
+
status: normalized.status || null
|
|
345
|
+
},
|
|
346
|
+
summary: {
|
|
347
|
+
page: normalized.page,
|
|
348
|
+
page_size: normalized.pageSize,
|
|
349
|
+
returned_count: items.length,
|
|
350
|
+
total_count: extractTotalCount(html)
|
|
351
|
+
},
|
|
352
|
+
source: {
|
|
353
|
+
name: "sh-public-html",
|
|
354
|
+
url: buildSearchUrl(normalized).toString(),
|
|
355
|
+
proxy: false
|
|
356
|
+
},
|
|
357
|
+
warnings: buildUnexpectedHtmlWarnings(html, hasExpectedListMarkup, "list"),
|
|
358
|
+
items
|
|
359
|
+
}
|
|
360
|
+
if (normalized.status) {
|
|
361
|
+
result.warnings.push("SH public board has no first-class status field; status filtering uses a conservative title-text classifier.")
|
|
362
|
+
}
|
|
363
|
+
if (normalized.includeHtml) result.html = html
|
|
364
|
+
return result
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function parseAttachmentDownList(html) {
|
|
368
|
+
const match = String(html || "").match(/downList["\']?\s*[:=]\s*(\[[\s\S]*?\])\s*[;,}]/)
|
|
369
|
+
if (!match) return []
|
|
370
|
+
try {
|
|
371
|
+
const parsed = JSON.parse(match[1])
|
|
372
|
+
return Array.isArray(parsed) ? parsed : []
|
|
373
|
+
} catch {
|
|
374
|
+
return []
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function isAttachmentIconLabel(value) {
|
|
379
|
+
const text = trimOrNull(value)
|
|
380
|
+
return !text || /^\.(?:pdf|hwp|hwpx|docx?|xlsx?|pptx?|txt|zip|jpg|jpeg|png|gif|mp[34]|etc)$/i.test(text)
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function parseAttachments(html) {
|
|
384
|
+
const downList = parseAttachmentDownList(html)
|
|
385
|
+
const byFileSeq = new Map(downList.map((file) => [String(file.fileSeq || ""), file]))
|
|
386
|
+
const attachments = []
|
|
387
|
+
const source = String(html || "").replace(/<!--[\s\S]*?-->/g, " ")
|
|
388
|
+
const rowRegex = /<tr\b[^>]*>[\s\S]*?<th\b[^>]*>\s*첨부(?:파일)?\s*<\/th>[\s\S]*?<td\b[^>]*>([\s\S]*?)<\/td>[\s\S]*?<\/tr>/gi
|
|
389
|
+
let match
|
|
390
|
+
while ((match = rowRegex.exec(source))) {
|
|
391
|
+
const cell = match[1]
|
|
392
|
+
const anchors = [...cell.matchAll(/<a\b([^>]*)>([\s\S]*?)<\/a>/gi)].map((anchorMatch) => {
|
|
393
|
+
const attrs = anchorMatch[1]
|
|
394
|
+
return {
|
|
395
|
+
className: getHtmlAttr(attrs, "class"),
|
|
396
|
+
href: getHtmlAttr(attrs, "href"),
|
|
397
|
+
onclick: getHtmlAttr(attrs, "onclick"),
|
|
398
|
+
text: trimOrNull(stripTags(anchorMatch[2]))
|
|
399
|
+
}
|
|
400
|
+
})
|
|
401
|
+
const previewUrls = anchors
|
|
402
|
+
.map((anchor) => anchor.href)
|
|
403
|
+
.filter((href) => /htmlConverter\.do/i.test(href))
|
|
404
|
+
.map(normalizeAttachmentPreviewUrl)
|
|
405
|
+
const fileAnchors = anchors.filter((anchor) => /\bbtnAttach\b/i.test(anchor.className) && /existFile\(\s*['"]?\d+['"]?\s*\)/i.test(anchor.onclick) && !isAttachmentIconLabel(anchor.text))
|
|
406
|
+
fileAnchors.forEach((anchor, index) => {
|
|
407
|
+
const previewUrl = previewUrls[index] || null
|
|
408
|
+
const fileSeq = previewUrl && new URL(previewUrl).searchParams.get("file_seq")
|
|
409
|
+
const meta = byFileSeq.get(String(fileSeq || "")) || {}
|
|
410
|
+
attachments.push(compactObject({
|
|
411
|
+
filename: cleanText(meta.oriFileNm || anchor.text),
|
|
412
|
+
file_seq: fileSeq || (meta.fileSeq ? String(meta.fileSeq) : null),
|
|
413
|
+
file_size: parseNumberOrNull(meta.fileSize),
|
|
414
|
+
file_type: trimOrNull(meta.fileTp),
|
|
415
|
+
preview_url: previewUrl
|
|
416
|
+
}))
|
|
417
|
+
})
|
|
418
|
+
}
|
|
419
|
+
return attachments
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function normalizeAttachmentPreviewUrl(href) {
|
|
423
|
+
try {
|
|
424
|
+
const url = new URL(href, SH_BASE_URL)
|
|
425
|
+
if (url.origin !== SH_BASE_URL) return null
|
|
426
|
+
if (url.pathname !== "/app/com/util/htmlConverter.do") return null
|
|
427
|
+
return url.toString()
|
|
428
|
+
} catch {
|
|
429
|
+
return null
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function extractDepartment(html) {
|
|
434
|
+
const personInfoMatch = String(html || "").match(/<ul\b[^>]*class=["'][^"']*personInfo[^"']*["'][^>]*>([\s\S]*?)<\/ul>/i)
|
|
435
|
+
if (!personInfoMatch) return null
|
|
436
|
+
const departmentMatch = personInfoMatch[1].match(/담당부서\s*<\/span>\s*:\s*([^<]+)/i) || stripTags(personInfoMatch[1]).match(/담당부서\s*:\s*([^:]+?)(?:담당자|연락처|$)/)
|
|
437
|
+
return departmentMatch ? trimOrNull(departmentMatch[1]) : null
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
function parseDetailHtml(html, options = {}) {
|
|
441
|
+
const normalized = normalizeDetailOptions(options)
|
|
442
|
+
const config = CATEGORY_CONFIGS[normalized.category]
|
|
443
|
+
const source = String(html || "")
|
|
444
|
+
const titleMatch = String(html || "").match(/<div\b[^>]*class=["'][^"']*detailTable[^"']*firgs0401Table[^"']*["'][^>]*>[\s\S]*?<caption>([\s\S]*?)<\/caption>/i) ||
|
|
445
|
+
String(html || "").match(/<thead>[\s\S]*?<th\b[^>]*colspan=["']2["'][^>]*>([\s\S]*?)<\/th>/i)
|
|
446
|
+
const registeredMatch = String(html || "").match(/<strong>\s*등록일\s*:\s*<\/strong>\s*([0-9]{4}[-.][0-9]{2}[-.][0-9]{2})/i)
|
|
447
|
+
const viewsMatch = String(html || "").match(/<strong>\s*조회수\s*:\s*<\/strong>\s*([0-9,]+)/i)
|
|
448
|
+
const contentMatch = String(html || "").match(/<td\b[^>]*class=["']cont["'][^>]*>([\s\S]*?)<\/td>/i)
|
|
449
|
+
const title = trimOrNull(stripTags(titleMatch ? titleMatch[1] : ""))
|
|
450
|
+
const attachments = parseAttachments(html)
|
|
451
|
+
const detail = compactObject({
|
|
452
|
+
seq: normalized.seq,
|
|
453
|
+
title,
|
|
454
|
+
registered_date: registeredMatch ? registeredMatch[1].replace(/\./g, "-") : null,
|
|
455
|
+
views: viewsMatch ? Number.parseInt(viewsMatch[1].replace(/,/g, ""), 10) : null,
|
|
456
|
+
department: extractDepartment(html),
|
|
457
|
+
category: config.key,
|
|
458
|
+
category_name: config.name,
|
|
459
|
+
status: classifyNoticeStatus(title),
|
|
460
|
+
status_basis: "title_text_classifier",
|
|
461
|
+
content_text: trimOrNull(stripTags(contentMatch ? contentMatch[1] : "")),
|
|
462
|
+
detail_url: buildDetailUrl(normalized).toString(),
|
|
463
|
+
warnings: buildUnexpectedHtmlWarnings(html, /detailTable|class=["']cont["']|firgs0401Table/i.test(source), "detail")
|
|
464
|
+
})
|
|
465
|
+
detail.attachments = attachments
|
|
466
|
+
if (normalized.includeHtml) detail.html = html
|
|
467
|
+
return detail
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function createTimeoutSignal(timeoutMs) {
|
|
471
|
+
if (typeof AbortSignal === "undefined" || typeof AbortSignal.timeout !== "function") return null
|
|
472
|
+
const n = Number(timeoutMs)
|
|
473
|
+
return Number.isFinite(n) && n > 0 ? AbortSignal.timeout(n) : null
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
async function fetchText(url, options = {}) {
|
|
477
|
+
const fetcher = options.fetcher || global.fetch
|
|
478
|
+
if (!fetcher) throw new Error("fetch is required")
|
|
479
|
+
const signal = options.signal || createTimeoutSignal(options.timeoutMs || DEFAULT_TIMEOUT_MS)
|
|
480
|
+
let response
|
|
481
|
+
try {
|
|
482
|
+
response = await fetcher(url.toString(), {
|
|
483
|
+
headers: {
|
|
484
|
+
"user-agent": "Mozilla/5.0 (compatible; k-skill/sh-notice-search)",
|
|
485
|
+
accept: "text/html,application/xhtml+xml"
|
|
486
|
+
},
|
|
487
|
+
signal
|
|
488
|
+
})
|
|
489
|
+
} catch (error) {
|
|
490
|
+
throw new Error(`SH upstream request failed: ${error.message}`)
|
|
491
|
+
}
|
|
492
|
+
const text = await response.text()
|
|
493
|
+
if (!response.ok) {
|
|
494
|
+
throw new Error(`SH upstream responded with HTTP ${response.status}: ${text.slice(0, 200)}`)
|
|
495
|
+
}
|
|
496
|
+
return text
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
async function searchNotices(options = {}) {
|
|
500
|
+
const normalized = normalizeSearchOptions(options)
|
|
501
|
+
const html = await fetchText(buildSearchUrl(normalized), normalized)
|
|
502
|
+
return parseListHtml(html, normalized)
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
async function getNoticeDetail(options = {}) {
|
|
506
|
+
const normalized = normalizeDetailOptions(options)
|
|
507
|
+
const html = await fetchText(buildDetailUrl(normalized), normalized)
|
|
508
|
+
return {
|
|
509
|
+
notice: parseDetailHtml(html, normalized),
|
|
510
|
+
query: {
|
|
511
|
+
seq: normalized.seq,
|
|
512
|
+
category: normalized.category,
|
|
513
|
+
category_name: CATEGORY_CONFIGS[normalized.category].name
|
|
514
|
+
},
|
|
515
|
+
source: {
|
|
516
|
+
name: "sh-public-html",
|
|
517
|
+
url: buildDetailUrl(normalized).toString(),
|
|
518
|
+
proxy: false
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
module.exports = {
|
|
524
|
+
SH_BASE_URL,
|
|
525
|
+
DEFAULT_CATEGORY,
|
|
526
|
+
CATEGORY_CONFIGS,
|
|
527
|
+
STATUS_ALIASES,
|
|
528
|
+
cleanText,
|
|
529
|
+
stripTags,
|
|
530
|
+
normalizeCategory,
|
|
531
|
+
normalizeSearchOptions,
|
|
532
|
+
normalizeDetailOptions,
|
|
533
|
+
buildSearchUrl,
|
|
534
|
+
buildDetailUrl,
|
|
535
|
+
extractTotalCount,
|
|
536
|
+
classifyNoticeStatus,
|
|
537
|
+
parseListRows,
|
|
538
|
+
parseListHtml,
|
|
539
|
+
parseAttachmentDownList,
|
|
540
|
+
parseAttachments,
|
|
541
|
+
parseDetailHtml,
|
|
542
|
+
createTimeoutSignal,
|
|
543
|
+
searchNotices,
|
|
544
|
+
getNoticeDetail
|
|
545
|
+
}
|