local-election-candidate-search 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +35 -0
- package/src/cli.js +69 -0
- package/src/index.js +407 -0
package/README.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# local-election-candidate-search
|
|
2
|
+
|
|
3
|
+
Public Korean local election candidate lookup client for the `local-election-candidate-search` k-skill.
|
|
4
|
+
|
|
5
|
+
## Source
|
|
6
|
+
|
|
7
|
+
- Official public surface: 중앙선거관리위원회 선거통계시스템 통합검색 `https://info.nec.go.kr/search/searchCandidate.xhtml`
|
|
8
|
+
- Request method: unauthenticated `POST` with `searchKeyword=<exact candidate name>`.
|
|
9
|
+
- The NEC page states that integrated search looks up historical/recent preliminary candidates, candidates, and elected persons by exact name.
|
|
10
|
+
|
|
11
|
+
This client calls the public NEC HTML surface directly from the user's machine. No proxy, API key, login, CAPTCHA bypass, registration, or filing automation is used.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```js
|
|
16
|
+
const { searchCandidates } = require("local-election-candidate-search")
|
|
17
|
+
|
|
18
|
+
const result = await searchCandidates({
|
|
19
|
+
name: "오세훈",
|
|
20
|
+
election: "시도지사",
|
|
21
|
+
region: "서울",
|
|
22
|
+
limit: 5
|
|
23
|
+
})
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
CLI:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
local-election-candidate-search 오세훈 --election 시도지사 --region 서울 --limit 5
|
|
30
|
+
local-election-candidate-search 김동연 --date 2014 --election 기초의원
|
|
31
|
+
local-election-candidate-search 이재명 --all
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Returned fields
|
|
35
|
+
|
|
36
|
+
Each item includes parsed candidate/profile and election fields when present: `name`, `hanja`, `birth_date`, `gender`, `election_date`, `election_name`, `election_code`, `election_type`, `party`, `district`, `votes`, `vote_share`, `job`, `education`, and `career`.
|
|
37
|
+
|
|
38
|
+
By default, the client filters to local-election-related NEC election codes: 시·도지사(3), 구·시·군의 장(4), 시·도의회의원(5), 구·시·군의회의원(6), 광역비례(8), 기초비례(9), 교육감(11). Use `--all` / `localOnly:false` to include non-local races from NEC integrated search.
|
|
39
|
+
|
|
40
|
+
`summary.upstream_result_limit` records how many NEC rows were requested before local client-side filters were applied. When election/date/region/local filters are active, the client fetches up to 100 upstream rows first and then applies the user-facing `limit` after exact-name matching, filtering, and deduplication.
|
|
41
|
+
|
|
42
|
+
## Boundaries and failure modes
|
|
43
|
+
|
|
44
|
+
- NEC integrated search works best with exact Korean candidate names and may return homonyms; use `--election`, `--date`, and `--region` to narrow results.
|
|
45
|
+
- The upstream is HTML, so parser warnings are returned for empty results, maintenance pages, NetFunnel queues, login prompts, or unexpected markup changes.
|
|
46
|
+
- If the fetched upstream page reaches the 100-row cap while client-side filters are active, the result includes a warning that additional matches may require pagination.
|
|
47
|
+
- This package does not automate NEC detail popups, file downloads, account login, CAPTCHA, political filing, or any privileged workflow.
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "local-election-candidate-search",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Public NEC Korean local election candidate lookup client for k-skill",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"local-election-candidate-search": "src/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=18"
|
|
16
|
+
},
|
|
17
|
+
"publishConfig": {
|
|
18
|
+
"access": "public"
|
|
19
|
+
},
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "git+https://github.com/NomaDamas/k-skill.git"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"k-skill",
|
|
26
|
+
"nec",
|
|
27
|
+
"korea",
|
|
28
|
+
"local-election",
|
|
29
|
+
"candidate"
|
|
30
|
+
],
|
|
31
|
+
"scripts": {
|
|
32
|
+
"lint": "node --check src/index.js && node --check src/cli.js && node --check test/index.test.js",
|
|
33
|
+
"test": "node --test"
|
|
34
|
+
}
|
|
35
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const { searchCandidates } = require("./index")
|
|
3
|
+
|
|
4
|
+
async function main(options = parseArgs(process.argv.slice(2)), io = console) {
|
|
5
|
+
if (options.help) {
|
|
6
|
+
printHelp(io)
|
|
7
|
+
return
|
|
8
|
+
}
|
|
9
|
+
const result = await searchCandidates(options)
|
|
10
|
+
io.log(JSON.stringify(result, null, 2))
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function parseArgs(argv) {
|
|
14
|
+
const options = {}
|
|
15
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
16
|
+
const arg = argv[i]
|
|
17
|
+
if (arg === "--name" || arg === "--query" || arg === "-q" || arg === "--keyword") options.name = argv[++i] || ""
|
|
18
|
+
else if (arg === "--election" || arg === "--type" || arg === "--election-code") options.election = argv[++i] || ""
|
|
19
|
+
else if (arg === "--date" || arg === "--year" || arg === "--election-date") options.electionDate = argv[++i] || ""
|
|
20
|
+
else if (arg === "--region" || arg === "--city" || arg === "--district") options.region = argv[++i] || ""
|
|
21
|
+
else if (arg === "--limit") options.limit = argv[++i] || ""
|
|
22
|
+
else if (arg === "--all" || arg === "--include-all") options.localOnly = false
|
|
23
|
+
else if (arg === "--local-only") options.localOnly = true
|
|
24
|
+
else if (arg === "--include-html") options.includeHtml = true
|
|
25
|
+
else if (arg === "--fixture") options.fixture = argv[++i] || ""
|
|
26
|
+
else if (arg === "--help" || arg === "-h") options.help = true
|
|
27
|
+
else if (!options.name) options.name = arg
|
|
28
|
+
}
|
|
29
|
+
return options
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function printHelp(io = console) {
|
|
33
|
+
io.log(`Usage: local-election-candidate-search <candidate-name> [options]
|
|
34
|
+
|
|
35
|
+
Search the official NEC integrated candidate search and return Korean local election candidate entries.
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
local-election-candidate-search 오세훈 --election 시도지사 --region 서울 --limit 5
|
|
39
|
+
local-election-candidate-search 김동연 --date 2014 --election 기초의원
|
|
40
|
+
local-election-candidate-search 이재명 --all
|
|
41
|
+
|
|
42
|
+
Options:
|
|
43
|
+
--name, -q <name> Exact candidate name (required; NEC search works best with exact names).
|
|
44
|
+
--election <type> 시도지사, 기초단체장, 광역의원, 기초의원, 광역비례, 기초비례, 교육감.
|
|
45
|
+
--date, --year <date> Election year or date (YYYY, YYYYMMDD, YYYY.MM.DD).
|
|
46
|
+
--region <text> Filter district/region text, e.g. 서울 or 동작.
|
|
47
|
+
--limit <number> Max returned entries (default 20; max 100).
|
|
48
|
+
--all Include non-local election results too.
|
|
49
|
+
--include-html Include raw upstream HTML for diagnostics.
|
|
50
|
+
--fixture <path> Parse a saved NEC HTML fixture instead of fetching.
|
|
51
|
+
`)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function formatError(error) {
|
|
55
|
+
if (process.env.LOCAL_ELECTION_CANDIDATE_SEARCH_DEBUG && error && error.stack) return error.stack
|
|
56
|
+
if (error && error.message) return `Error: ${error.message}`
|
|
57
|
+
return String(error)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function run(argv = process.argv.slice(2), io = console) {
|
|
61
|
+
return main(parseArgs(argv), io).catch((error) => {
|
|
62
|
+
io.error(formatError(error))
|
|
63
|
+
process.exitCode = 1
|
|
64
|
+
})
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (require.main === module) run()
|
|
68
|
+
|
|
69
|
+
module.exports = { parseArgs, printHelp, formatError, main, run }
|
package/src/index.js
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
const fs = require("node:fs/promises")
|
|
2
|
+
|
|
3
|
+
const NEC_SEARCH_URL = "https://info.nec.go.kr/search/searchCandidate.xhtml"
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 20000
|
|
5
|
+
const DEFAULT_LIMIT = 20
|
|
6
|
+
const MAX_LIMIT = 100
|
|
7
|
+
const LOCAL_ELECTION_CODES = new Set(["3", "4", "5", "6", "8", "9", "11"])
|
|
8
|
+
|
|
9
|
+
const ELECTION_CODE_ALIASES = new Map([
|
|
10
|
+
["3", "3"], ["시도지사", "3"], ["시·도지사", "3"], ["시도지사선거", "3"], ["광역단체장", "3"], ["governor", "3"],
|
|
11
|
+
["4", "4"], ["구시군의장", "4"], ["구시군장", "4"], ["구·시·군의장", "4"], ["구·시·군의 장", "4"], ["기초단체장", "4"], ["mayor", "4"],
|
|
12
|
+
["5", "5"], ["시도의원", "5"], ["시도의회의원", "5"], ["광역의원", "5"], ["metro-council", "5"],
|
|
13
|
+
["6", "6"], ["구시군의원", "6"], ["구시군의회의원", "6"], ["기초의원", "6"], ["local-council", "6"],
|
|
14
|
+
["8", "8"], ["광역비례", "8"], ["광역의원비례", "8"], ["광역의원비례대표", "8"],
|
|
15
|
+
["9", "9"], ["기초비례", "9"], ["기초의원비례", "9"], ["기초의원비례대표", "9"],
|
|
16
|
+
["11", "11"], ["교육감", "11"], ["superintendent", "11"]
|
|
17
|
+
])
|
|
18
|
+
|
|
19
|
+
function normalizeToken(value) {
|
|
20
|
+
return String(value == null ? "" : value).replace(/[\s·ㆍ,._-]+/g, "").trim().toLowerCase()
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function decodeHtml(value) {
|
|
24
|
+
return String(value == null ? "" : value)
|
|
25
|
+
.replace(/&#(\d+);/g, (match, dec) => decodeNumericEntity(Number.parseInt(dec, 10), match))
|
|
26
|
+
.replace(/&#x([0-9a-f]+);/gi, (match, hex) => decodeNumericEntity(Number.parseInt(hex, 16), match))
|
|
27
|
+
.replace(/&/g, "&")
|
|
28
|
+
.replace(/</g, "<")
|
|
29
|
+
.replace(/>/g, ">")
|
|
30
|
+
.replace(/"/g, '"')
|
|
31
|
+
.replace(/'/g, "'")
|
|
32
|
+
.replace(/'/gi, "'")
|
|
33
|
+
.replace(/ /g, " ")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function decodeNumericEntity(codePoint, fallback) {
|
|
37
|
+
try {
|
|
38
|
+
if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) return fallback
|
|
39
|
+
return String.fromCodePoint(codePoint)
|
|
40
|
+
} catch {
|
|
41
|
+
return fallback
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function stripTags(html) {
|
|
46
|
+
return decodeHtml(String(html || "")
|
|
47
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
48
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
49
|
+
.replace(/<!--[\s\S]*?-->/g, " ")
|
|
50
|
+
.replace(/<[^>]+>/g, " "))
|
|
51
|
+
.replace(/\s+/g, " ")
|
|
52
|
+
.trim()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function cleanText(value) {
|
|
56
|
+
return decodeHtml(String(value == null ? "" : value)).replace(/\s+/g, " ").trim()
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function getHtmlAttr(attrs, name) {
|
|
60
|
+
const match = String(attrs || "").match(new RegExp(`\\b${name}\\s*=\\s*(["'])([\\s\\S]*?)\\1`, "i"))
|
|
61
|
+
return match ? decodeHtml(match[2]) : ""
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function parsePositiveInteger(value, { defaultValue, min = 1, max = Number.MAX_SAFE_INTEGER, label }) {
|
|
65
|
+
if (value === undefined || value === null || String(value).trim() === "") return defaultValue
|
|
66
|
+
const text = String(value).trim()
|
|
67
|
+
if (!/^\d+$/.test(text)) throw new Error(`Provide valid ${label}.`)
|
|
68
|
+
const parsed = Number.parseInt(text, 10)
|
|
69
|
+
if (parsed < min) return min
|
|
70
|
+
if (parsed > max) return max
|
|
71
|
+
return parsed
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function normalizeBoolean(value, defaultValue) {
|
|
75
|
+
if (value === undefined || value === null || value === "") return defaultValue
|
|
76
|
+
if (typeof value === "boolean") return value
|
|
77
|
+
const token = normalizeToken(value)
|
|
78
|
+
if (["1", "true", "yes", "y", "local", "지방", "지방선거"].includes(token)) return true
|
|
79
|
+
if (["0", "false", "no", "n", "all", "전체", "includeall"].includes(token)) return false
|
|
80
|
+
return Boolean(value)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function normalizeElectionCode(value) {
|
|
84
|
+
if (value === undefined || value === null || String(value).trim() === "") return null
|
|
85
|
+
const token = normalizeToken(value)
|
|
86
|
+
const code = ELECTION_CODE_ALIASES.get(token)
|
|
87
|
+
if (!code) throw new Error(`Unsupported local election type: ${value}`)
|
|
88
|
+
return code
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function normalizeElectionDate(value) {
|
|
92
|
+
if (value === undefined || value === null || String(value).trim() === "") return null
|
|
93
|
+
const digits = String(value).replace(/\D/g, "")
|
|
94
|
+
if (/^\d{4}$/.test(digits)) return digits
|
|
95
|
+
if (/^\d{8}$/.test(digits)) return digits
|
|
96
|
+
throw new Error("electionDate must be YYYY or YYYYMMDD/ YYYY.MM.DD.")
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function normalizeSearchOptions(options = {}) {
|
|
100
|
+
const name = cleanText(options.name ?? options.keyword ?? options.q ?? options.query ?? options.searchKeyword)
|
|
101
|
+
if (!name) throw new Error("Provide a candidate name to search.")
|
|
102
|
+
if (name.length > 30) throw new Error("Candidate name must be 30 characters or fewer.")
|
|
103
|
+
const normalized = {
|
|
104
|
+
name,
|
|
105
|
+
localOnly: normalizeBoolean(options.localOnly ?? options.local ?? options.onlyLocal, true),
|
|
106
|
+
electionCode: normalizeElectionCode(options.electionCode ?? options.election ?? options.electionType ?? options.type),
|
|
107
|
+
electionDate: normalizeElectionDate(options.electionDate ?? options.date ?? options.year ?? options.electionName),
|
|
108
|
+
region: cleanText(options.region ?? options.city ?? options.district) || null,
|
|
109
|
+
limit: parsePositiveInteger(options.limit ?? options.pageSize, { defaultValue: DEFAULT_LIMIT, min: 1, max: MAX_LIMIT, label: "limit" }),
|
|
110
|
+
includeHtml: Boolean(options.includeHtml)
|
|
111
|
+
}
|
|
112
|
+
normalized.upstreamLimit = parsePositiveInteger(options.upstreamLimit ?? options.recordCountPerPage, {
|
|
113
|
+
defaultValue: hasClientSideFilters(normalized) ? MAX_LIMIT : normalized.limit,
|
|
114
|
+
min: normalized.limit,
|
|
115
|
+
max: MAX_LIMIT,
|
|
116
|
+
label: "upstream limit"
|
|
117
|
+
})
|
|
118
|
+
return normalized
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function hasClientSideFilters(options) {
|
|
122
|
+
return Boolean(options.localOnly || options.electionCode || options.electionDate || options.region)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function buildSearchRequest(options = {}) {
|
|
126
|
+
const normalized = normalizeSearchOptions(options)
|
|
127
|
+
const body = new URLSearchParams({
|
|
128
|
+
searchKeyword: normalized.name,
|
|
129
|
+
pageIndex: "1",
|
|
130
|
+
firstIndex: "0",
|
|
131
|
+
recordCountPerPage: String(normalized.upstreamLimit)
|
|
132
|
+
}).toString()
|
|
133
|
+
return {
|
|
134
|
+
url: NEC_SEARCH_URL,
|
|
135
|
+
method: "POST",
|
|
136
|
+
headers: {
|
|
137
|
+
"content-type": "application/x-www-form-urlencoded;charset=UTF-8",
|
|
138
|
+
"user-agent": "Mozilla/5.0 (compatible; k-skill-local-election-candidate-search/0.1)",
|
|
139
|
+
referer: NEC_SEARCH_URL
|
|
140
|
+
},
|
|
141
|
+
body,
|
|
142
|
+
options: normalized
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function parseBirthDateAndGender(text, attrs = "") {
|
|
147
|
+
const attrBirthday = getHtmlAttr(attrs, "data-birthday")
|
|
148
|
+
const dateMatch = String(text || "").match(/(\d{4})년\s*(\d{1,2})월\s*(\d{1,2})일\s*\(([^)]+)\)/)
|
|
149
|
+
const birthDate = dateMatch
|
|
150
|
+
? `${dateMatch[1]}-${dateMatch[2].padStart(2, "0")}-${dateMatch[3].padStart(2, "0")}`
|
|
151
|
+
: (/^\d{8}$/.test(attrBirthday) ? `${attrBirthday.slice(0, 4)}-${attrBirthday.slice(4, 6)}-${attrBirthday.slice(6, 8)}` : null)
|
|
152
|
+
const gender = dateMatch ? cleanText(dateMatch[4]) : null
|
|
153
|
+
return { birthDate, gender }
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function parseProfileFields(listHtml) {
|
|
157
|
+
const fields = {}
|
|
158
|
+
const cellRegex = /<td\b[^>]*class=(['"])th\1[^>]*>[\s\S]*?<p[^>]*>([\s\S]*?)<\/p>[\s\S]*?<\/td>\s*<td\b[^>]*>([\s\S]*?)<\/td>/gi
|
|
159
|
+
for (const match of listHtml.matchAll(cellRegex)) {
|
|
160
|
+
const key = cleanText(stripTags(match[2]))
|
|
161
|
+
const rawValue = match[3]
|
|
162
|
+
const paragraphs = [...rawValue.matchAll(/<p\b[^>]*>([\s\S]*?)<\/p>/gi)].map((p) => stripTags(p[1])).filter(Boolean)
|
|
163
|
+
const value = paragraphs.length ? paragraphs : stripTags(rawValue)
|
|
164
|
+
if (key) fields[key] = value
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
job: asText(fields["직업"]),
|
|
168
|
+
education: asText(fields["학력"]),
|
|
169
|
+
career: asList(fields["경력"])
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function asText(value) {
|
|
174
|
+
if (Array.isArray(value)) return value.join("; ") || null
|
|
175
|
+
return value || null
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function asList(value) {
|
|
179
|
+
if (Array.isArray(value)) return value
|
|
180
|
+
return value ? [value] : []
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function parseTitle(titleHtml) {
|
|
184
|
+
const mark = titleHtml.match(/<mark[^>]*>\s*\[([0-9.]+)\]\s*([\s\S]*?)<\/mark>/i)
|
|
185
|
+
const electionDate = mark ? normalizeElectionDate(mark[1]) : null
|
|
186
|
+
const electionName = mark ? stripTags(mark[2]) : null
|
|
187
|
+
const text = stripTags(titleHtml)
|
|
188
|
+
const afterMark = mark ? stripTags(titleHtml.slice(mark.index + mark[0].length)) : text
|
|
189
|
+
const segments = afterMark.split("/").map((part) => cleanText(part)).filter(Boolean)
|
|
190
|
+
let party = segments[0] || null
|
|
191
|
+
let electionType = segments[1] || null
|
|
192
|
+
let district = segments[2] || null
|
|
193
|
+
let votes = null
|
|
194
|
+
let voteShare = null
|
|
195
|
+
let elected = /당선/.test(afterMark)
|
|
196
|
+
|
|
197
|
+
if (segments[0] && /선거$/.test(segments[0])) {
|
|
198
|
+
party = null
|
|
199
|
+
electionType = segments[0]
|
|
200
|
+
district = segments[1]
|
|
201
|
+
}
|
|
202
|
+
const voteSegment = segments.find((segment) => /표/.test(segment)) || ""
|
|
203
|
+
const voteMatch = voteSegment.match(/([0-9,]+)\s*표/)
|
|
204
|
+
if (voteMatch) votes = Number.parseInt(voteMatch[1].replace(/,/g, ""), 10)
|
|
205
|
+
const shareMatch = voteSegment.match(/\(([0-9.]+%)\)/)
|
|
206
|
+
if (shareMatch) voteShare = shareMatch[1]
|
|
207
|
+
if (district && /표/.test(district)) district = null
|
|
208
|
+
return { electionDate, electionName, party, electionType, district, votes, voteShare, elected, rawTitleText: text }
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function compactObject(value) {
|
|
212
|
+
return Object.fromEntries(Object.entries(value).filter(([, entry]) => {
|
|
213
|
+
if (entry === null || entry === undefined || entry === "") return false
|
|
214
|
+
if (Array.isArray(entry) && entry.length === 0) return false
|
|
215
|
+
return true
|
|
216
|
+
}))
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function isUnexpectedHtml(html) {
|
|
220
|
+
const text = stripTags(html)
|
|
221
|
+
return !/resultDiv|class=["']result|검색결과|fn_firstView/.test(html) && /NetFunnel|로그인|점검|대기열|접근|차단|서비스/.test(text)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function hasUnparsedCandidateResults(html) {
|
|
225
|
+
if (!/resultDiv|검색결과|fn_firstView/.test(html)) return false
|
|
226
|
+
if (/<div\b[^>]*class=(['"])[^'"]*\bresult\b[^'"]*\1/i.test(html)) return false
|
|
227
|
+
const resultDiv = String(html || "").match(/<div\b[^>]*class=(['"])[^'"]*\bresultDiv\b[^'"]*\1[^>]*>([\s\S]*?)<\/div>/i)
|
|
228
|
+
if (!resultDiv) return false
|
|
229
|
+
return stripTags(resultDiv[2]).length > 0
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function filterItem(item, options) {
|
|
233
|
+
if (options.localOnly && !item.is_local_election) return false
|
|
234
|
+
if (options.electionCode && item.election_code !== options.electionCode) return false
|
|
235
|
+
if (options.electionDate) {
|
|
236
|
+
const digits = (item.election_name_code || "").replace(/\D/g, "")
|
|
237
|
+
if (options.electionDate.length === 4) {
|
|
238
|
+
if (!digits.startsWith(options.electionDate)) return false
|
|
239
|
+
} else if (digits !== options.electionDate) return false
|
|
240
|
+
}
|
|
241
|
+
if (options.region) {
|
|
242
|
+
const haystack = `${item.district || ""} ${item.city_code || ""}`
|
|
243
|
+
if (!normalizeToken(haystack).includes(normalizeToken(options.region))) return false
|
|
244
|
+
}
|
|
245
|
+
return true
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function getCandidateElectionKey(item) {
|
|
249
|
+
return [
|
|
250
|
+
item.name,
|
|
251
|
+
item.birth_date,
|
|
252
|
+
item.election_name_code,
|
|
253
|
+
item.election_code,
|
|
254
|
+
item.party,
|
|
255
|
+
item.district,
|
|
256
|
+
item.votes,
|
|
257
|
+
item.vote_share
|
|
258
|
+
].map((value) => cleanText(value)).join("|")
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function parseSearchHtml(html, options = {}) {
|
|
262
|
+
const normalized = normalizeSearchOptions(options)
|
|
263
|
+
const warnings = []
|
|
264
|
+
const items = []
|
|
265
|
+
const itemKeys = new Set()
|
|
266
|
+
const source = { url: NEC_SEARCH_URL, method: "POST", surface: "NEC election statistics integrated candidate search" }
|
|
267
|
+
if (isUnexpectedHtml(html)) {
|
|
268
|
+
warnings.push(`unexpected NEC search HTML; possible NetFunnel 로그인 점검 block page: ${stripTags(html).slice(0, 160)}`)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const resultRegex = /<div\b([^>]*)class=(['"])[^'"]*\bresult\b[^'"]*\2([^>]*)>([\s\S]*?)(?=<div\b[^>]*class=(['"])[^'"]*\bresult\b|<div\b[^>]*class=(['"])[^'"]*\bpage\b|<\/body>|$)/gi
|
|
272
|
+
let parsedResultCards = 0
|
|
273
|
+
let parsedElectionEntries = 0
|
|
274
|
+
for (const resultMatch of html.matchAll(resultRegex)) {
|
|
275
|
+
parsedResultCards += 1
|
|
276
|
+
const resultAttrs = `${resultMatch[1] || ""} ${resultMatch[3] || ""}`
|
|
277
|
+
const resultHtml = resultMatch[4]
|
|
278
|
+
const listRegex = /<div\b([^>]*)class=(['"])[^'"]*\blist\b[^'"]*\2([^>]*)>([\s\S]*?)(?=<div\b[^>]*class=(['"])[^'"]*\blist\b|<\/div>\s*<\/div>\s*(?:<div\b[^>]*class=(['"])[^'"]*\bresult\b|<\/div>|$))/gi
|
|
279
|
+
const listMatches = [...resultHtml.matchAll(listRegex)]
|
|
280
|
+
parsedElectionEntries += listMatches.length
|
|
281
|
+
const nameMatch = resultHtml.match(/<p\b[^>]*class=(['"])[^'"]*\bname\b[^'"]*\1[^>]*>([\s\S]*?)<\/p>/i)
|
|
282
|
+
const nameHtml = nameMatch ? nameMatch[2] : ""
|
|
283
|
+
const strongMatch = nameHtml.match(/<strong[^>]*>([\s\S]*?)<\/strong>/i)
|
|
284
|
+
const hanjaMatch = nameHtml.match(/<span\b[^>]*class=(['"])[^'"]*\bhanja\b[^'"]*\1[^>]*>\s*\((.*?)\)\s*<\/span>/i)
|
|
285
|
+
const dateMatch = nameHtml.match(/<span\b[^>]*class=(['"])[^'"]*\bdate\b[^'"]*\1[^>]*>([\s\S]*?)<\/span>/i)
|
|
286
|
+
const personName = strongMatch ? stripTags(strongMatch[1]) : null
|
|
287
|
+
if (!personName) {
|
|
288
|
+
warnings.push("missing candidate name in NEC result card; skipped result because exact-name matching could not be verified")
|
|
289
|
+
continue
|
|
290
|
+
}
|
|
291
|
+
if (normalizeToken(personName) !== normalizeToken(normalized.name)) {
|
|
292
|
+
warnings.push(`candidate name mismatch in NEC result card; expected ${normalized.name} but found ${personName}; skipped result`)
|
|
293
|
+
continue
|
|
294
|
+
}
|
|
295
|
+
const hanja = hanjaMatch ? stripTags(hanjaMatch[2]) : null
|
|
296
|
+
const { birthDate, gender } = parseBirthDateAndGender(dateMatch ? stripTags(dateMatch[2]) : stripTags(nameHtml), resultAttrs)
|
|
297
|
+
|
|
298
|
+
for (const listMatch of listMatches) {
|
|
299
|
+
const listAttrs = `${listMatch[1] || ""} ${listMatch[3] || ""}`
|
|
300
|
+
const listHtml = listMatch[4]
|
|
301
|
+
const titleMatch = listHtml.match(/<div\b[^>]*class=(['"])[^'"]*\bt\b[^'"]*\1[^>]*>([\s\S]*?)(?:<button\b[^>]*class=(['"])[^'"]*\bmore\b|<div\b[^>]*class=(['"])[^'"]*\bbox\b|$)/i)
|
|
302
|
+
const title = parseTitle(titleMatch ? titleMatch[2] : listHtml)
|
|
303
|
+
const electionNameCode = getHtmlAttr(listAttrs, "data-election-name")
|
|
304
|
+
const electionCode = getHtmlAttr(listAttrs, "data-election-code")
|
|
305
|
+
const profile = parseProfileFields(listHtml)
|
|
306
|
+
const item = compactObject({
|
|
307
|
+
name: personName,
|
|
308
|
+
hanja,
|
|
309
|
+
birth_date: birthDate,
|
|
310
|
+
gender,
|
|
311
|
+
election_date: title.electionDate ? `${title.electionDate.slice(0, 4)}-${title.electionDate.slice(4, 6)}-${title.electionDate.slice(6, 8)}` : undefined,
|
|
312
|
+
election_name: title.electionName,
|
|
313
|
+
election_name_code: electionNameCode,
|
|
314
|
+
election_code: electionCode,
|
|
315
|
+
election_type: title.electionType,
|
|
316
|
+
is_local_election: LOCAL_ELECTION_CODES.has(electionCode) || /지방선거|시·도지사|구·시·군|의회의원|교육감/.test(`${title.electionName || ""} ${title.electionType || ""}`),
|
|
317
|
+
party: title.party,
|
|
318
|
+
district: title.district,
|
|
319
|
+
votes: title.votes,
|
|
320
|
+
vote_share: title.voteShare,
|
|
321
|
+
elected: title.elected || undefined,
|
|
322
|
+
city_code: getHtmlAttr(listAttrs, "data-city-code"),
|
|
323
|
+
sgg_city_code: getHtmlAttr(listAttrs, "data-sgg-city-code"),
|
|
324
|
+
town_code: getHtmlAttr(listAttrs, "data-town-code"),
|
|
325
|
+
...profile
|
|
326
|
+
})
|
|
327
|
+
if (filterItem(item, normalized)) {
|
|
328
|
+
const itemKey = getCandidateElectionKey(item)
|
|
329
|
+
if (!itemKeys.has(itemKey)) {
|
|
330
|
+
itemKeys.add(itemKey)
|
|
331
|
+
items.push(item)
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (parsedResultCards === 0 && hasUnparsedCandidateResults(html)) {
|
|
338
|
+
warnings.push("parser drift suspected: NEC search result markers were present but no supported result cards could be parsed")
|
|
339
|
+
}
|
|
340
|
+
if (hasClientSideFilters(normalized) && parsedElectionEntries >= normalized.upstreamLimit) {
|
|
341
|
+
warnings.push(`NEC search page was capped at ${normalized.upstreamLimit} upstream rows before client-side filters; additional matches may require pagination`)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const limitedItems = items.slice(0, normalized.limit)
|
|
345
|
+
if (limitedItems.length === 0 && warnings.length === 0) warnings.push("no candidate results matched the provided name/filters on the NEC search page")
|
|
346
|
+
const result = {
|
|
347
|
+
query: compactObject({
|
|
348
|
+
name: normalized.name,
|
|
349
|
+
local_only: normalized.localOnly,
|
|
350
|
+
election_code: normalized.electionCode,
|
|
351
|
+
election_date: normalized.electionDate,
|
|
352
|
+
region: normalized.region,
|
|
353
|
+
limit: normalized.limit
|
|
354
|
+
}),
|
|
355
|
+
summary: {
|
|
356
|
+
returned_count: limitedItems.length,
|
|
357
|
+
matched_before_limit: items.length,
|
|
358
|
+
upstream_result_limit: normalized.upstreamLimit,
|
|
359
|
+
local_only: normalized.localOnly
|
|
360
|
+
},
|
|
361
|
+
items: limitedItems,
|
|
362
|
+
warnings,
|
|
363
|
+
source
|
|
364
|
+
}
|
|
365
|
+
if (normalized.includeHtml) result.html = html
|
|
366
|
+
return result
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
async function searchCandidates(options = {}, deps = {}) {
|
|
370
|
+
const fixturePath = options.fixture || options.fixturePath
|
|
371
|
+
const request = buildSearchRequest(options)
|
|
372
|
+
if (fixturePath) {
|
|
373
|
+
const html = await fs.readFile(fixturePath, "utf8")
|
|
374
|
+
return parseSearchHtml(html, request.options)
|
|
375
|
+
}
|
|
376
|
+
const fetchImpl = deps.fetchImpl || globalThis.fetch
|
|
377
|
+
if (typeof fetchImpl !== "function") throw new Error("No fetch implementation is available. Use Node.js 18+ or provide fetchImpl.")
|
|
378
|
+
const controller = new AbortController()
|
|
379
|
+
const timeout = setTimeout(() => controller.abort(), deps.timeoutMs || DEFAULT_TIMEOUT_MS)
|
|
380
|
+
try {
|
|
381
|
+
const response = await fetchImpl(request.url, {
|
|
382
|
+
method: request.method,
|
|
383
|
+
headers: request.headers,
|
|
384
|
+
body: request.body,
|
|
385
|
+
signal: controller.signal
|
|
386
|
+
})
|
|
387
|
+
const html = await response.text()
|
|
388
|
+
if (!response.ok) throw new Error(`NEC candidate search failed with HTTP ${response.status}: ${html.slice(0, 160)}`)
|
|
389
|
+
return parseSearchHtml(html, request.options)
|
|
390
|
+
} finally {
|
|
391
|
+
clearTimeout(timeout)
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
module.exports = {
|
|
396
|
+
NEC_SEARCH_URL,
|
|
397
|
+
DEFAULT_TIMEOUT_MS,
|
|
398
|
+
LOCAL_ELECTION_CODES,
|
|
399
|
+
ELECTION_CODE_ALIASES,
|
|
400
|
+
buildSearchRequest,
|
|
401
|
+
cleanText,
|
|
402
|
+
decodeHtml,
|
|
403
|
+
normalizeSearchOptions,
|
|
404
|
+
parseSearchHtml,
|
|
405
|
+
searchCandidates,
|
|
406
|
+
stripTags
|
|
407
|
+
}
|