scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
- scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
- scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
- scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
- {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
- scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
- scraper2_hj3415/app/composition.py +225 -0
- scraper2_hj3415/app/domain/blocks.py +61 -0
- scraper2_hj3415/app/domain/constants.py +33 -0
- scraper2_hj3415/app/domain/doc.py +16 -0
- scraper2_hj3415/app/domain/endpoint.py +11 -0
- scraper2_hj3415/app/domain/series.py +11 -0
- scraper2_hj3415/app/domain/types.py +19 -0
- scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
- scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
- scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
- scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
- scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
- scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
- scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
- scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
- scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
- scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
- scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
- scraper2_hj3415/app/parsing/c101/sise.py +47 -0
- scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
- scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
- scraper2_hj3415/app/parsing/c101_parser.py +45 -0
- scraper2_hj3415/app/parsing/c103_parser.py +22 -0
- scraper2_hj3415/app/parsing/c104_parser.py +26 -0
- scraper2_hj3415/app/parsing/c106_parser.py +137 -0
- scraper2_hj3415/app/parsing/c108_parser.py +254 -0
- scraper2_hj3415/app/ports/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
- scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
- scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
- scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
- scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
- scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
- scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
- scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
- scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
- scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
- scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
- scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
- scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
- scraper2_hj3415/app/ports/site/__init__.py +0 -0
- scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
- scraper2_hj3415/app/services/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
- scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
- scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
- scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
- scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
- scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
- scraper2_hj3415/app/usecases/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
- scraper2/main.py → scraper2_hj3415/cli.py +45 -72
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
- scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
- scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
- scraper2/adapters/out/playwright/browser.py +0 -102
- scraper2/adapters/out/sinks/memory/__init__.py +0 -15
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
- scraper2/app/composition.py +0 -204
- scraper2/app/parsing/_converters.py +0 -85
- scraper2/app/parsing/_normalize.py +0 -134
- scraper2/app/parsing/c101_parser.py +0 -143
- scraper2/app/parsing/c103_parser.py +0 -128
- scraper2/app/parsing/c104_parser.py +0 -143
- scraper2/app/parsing/c106_parser.py +0 -153
- scraper2/app/parsing/c108_parser.py +0 -65
- scraper2/app/ports/browser/browser_factory_port.py +0 -11
- scraper2/app/ports/browser/browser_port.py +0 -22
- scraper2/app/ports/ingest_port.py +0 -14
- scraper2/app/ports/sinks/base_sink_port.py +0 -14
- scraper2/app/ports/sinks/c101_sink_port.py +0 -9
- scraper2/app/ports/sinks/c103_sink_port.py +0 -9
- scraper2/app/ports/sinks/c104_sink_port.py +0 -9
- scraper2/app/ports/sinks/c106_sink_port.py +0 -9
- scraper2/app/ports/sinks/c108_sink_port.py +0 -9
- scraper2/app/usecases/fetch/fetch_c101.py +0 -43
- scraper2/app/usecases/fetch/fetch_c103.py +0 -103
- scraper2/app/usecases/fetch/fetch_c104.py +0 -76
- scraper2/app/usecases/fetch/fetch_c106.py +0 -90
- scraper2/app/usecases/fetch/fetch_c108.py +0 -49
- scraper2/app/usecases/ingest/ingest_c101.py +0 -36
- scraper2/app/usecases/ingest/ingest_c103.py +0 -37
- scraper2/app/usecases/ingest/ingest_c104.py +0 -37
- scraper2/app/usecases/ingest/ingest_c106.py +0 -38
- scraper2/app/usecases/ingest/ingest_c108.py +0 -39
- scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
- scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
- {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
- {scraper2 → scraper2_hj3415}/__init__.py +0 -0
- {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
- {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
- {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
- {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
- {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
- {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
- {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
- {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
- {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
- {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# scraper2_hj3415/app/adapters/site/wisereport_playwright.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from scraper2_hj3415.app.ports.browser.browser_port import BrowserPort
|
|
6
|
+
from scraper2_hj3415.app.ports.site.wisereport_port import WiseReportPort
|
|
7
|
+
from logging_hj3415 import logger
|
|
8
|
+
|
|
9
|
+
_Q_SIGNAL_RE = re.compile(r"/(03|06|09)\b")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WiseReportPlaywright(WiseReportPort):
|
|
13
|
+
def __init__(self, browser: BrowserPort):
|
|
14
|
+
self.browser = browser
|
|
15
|
+
|
|
16
|
+
async def _ensure_yearly_consensus_open_in_table_nth(
|
|
17
|
+
self,
|
|
18
|
+
*,
|
|
19
|
+
table_selector: str, # 예: TABLE_XPATH ("xpath=//div[@id='wrapper']//div//table")
|
|
20
|
+
table_index: int, # 예: TABLE_INDEX (2)
|
|
21
|
+
after_click_sleep_ms: int = 150,
|
|
22
|
+
max_rounds: int = 6,
|
|
23
|
+
wait_timeout_sec: float = 12.0,
|
|
24
|
+
) -> bool:
|
|
25
|
+
"""
|
|
26
|
+
목표: 연간 컨센서스 컬럼이 '반드시 펼쳐진 상태'가 되게 한다.
|
|
27
|
+
전략:
|
|
28
|
+
- TABLE_NTH 스코프 안에서
|
|
29
|
+
- btn_moreY 또는 btn_moreQQ 이면서
|
|
30
|
+
- '연간컨센서스보기' 텍스트를 가진 a 토글들 중
|
|
31
|
+
- computedStyle(display) != 'none' 인 것들을 전부 클릭
|
|
32
|
+
- 클릭마다 테이블 텍스트 변경을 기다림
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
table_scoped = f"{table_selector} >> nth={table_index}"
|
|
36
|
+
|
|
37
|
+
# table 내부의 토글(a)만 잡기 (btn_moreY / btn_moreQQ 둘 다)
|
|
38
|
+
VIEW_ALL = (
|
|
39
|
+
f"{table_scoped} >> xpath=.//a["
|
|
40
|
+
"("
|
|
41
|
+
"contains(@class,'btn_moreY') or contains(@class,'btn_moreQQ')"
|
|
42
|
+
")"
|
|
43
|
+
" and .//span[contains(normalize-space(.),'연간컨센서스보기')]"
|
|
44
|
+
"]"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
CLOSE_ALL = (
|
|
48
|
+
f"{table_scoped} >> xpath=.//a["
|
|
49
|
+
"("
|
|
50
|
+
"contains(@class,'btn_moreY') or contains(@class,'btn_moreQQ')"
|
|
51
|
+
")"
|
|
52
|
+
" and .//span[contains(normalize-space(.),'연간컨센서스닫기')]"
|
|
53
|
+
"]"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# 테이블 텍스트 변화 감지용 “prev_text”
|
|
57
|
+
prev_text = await self.browser.wait_table_text_changed(
|
|
58
|
+
table_selector,
|
|
59
|
+
index=table_index,
|
|
60
|
+
prev_text=None,
|
|
61
|
+
timeout_sec=wait_timeout_sec,
|
|
62
|
+
min_lines=10,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
logger.debug("ensure_yearly_consensus_open_in_table_nth: start")
|
|
66
|
+
|
|
67
|
+
# round를 두는 이유:
|
|
68
|
+
# - 보기 토글이 여러 개고, 클릭할 때 DOM이 재배치될 수 있음
|
|
69
|
+
# - 1번에 다 못 누르면 다음 라운드에서 다시 스캔
|
|
70
|
+
for round_no in range(1, max_rounds + 1):
|
|
71
|
+
view_cnt = await self.browser.count(VIEW_ALL)
|
|
72
|
+
close_cnt = await self.browser.count(CLOSE_ALL)
|
|
73
|
+
logger.debug(
|
|
74
|
+
f"round={round_no} toggle exists: view={view_cnt}, close={close_cnt}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# "보기" 토글이 아예 없으면 -> 이미 다 펼쳐져 있거나(닫기만 존재),
|
|
78
|
+
# 혹은 페이지 구조가 달라서 못 찾는 것. 여기서는 '성공'으로 간주.
|
|
79
|
+
if view_cnt == 0:
|
|
80
|
+
logger.debug("no VIEW toggles found in-table -> treat as OPEN")
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
clicked_any = False
|
|
84
|
+
|
|
85
|
+
# i를 0..view_cnt-1로 돌면서 display != none 인 것만 클릭
|
|
86
|
+
# (중간에 DOM 바뀌면 count/순서가 바뀔 수 있으니, 실패해도 계속)
|
|
87
|
+
for i in range(view_cnt):
|
|
88
|
+
try:
|
|
89
|
+
# 혹시 DOM이 바뀌어 index가 사라졌으면 skip
|
|
90
|
+
if not await self.browser.is_attached(VIEW_ALL, index=i):
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
disp = await self.browser.computed_style(
|
|
94
|
+
VIEW_ALL, index=i, prop="display"
|
|
95
|
+
)
|
|
96
|
+
if disp.strip().lower() == "none":
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# 화면 밖이면 클릭 실패할 수 있으니 스크롤
|
|
100
|
+
await self.browser.scroll_into_view(VIEW_ALL, index=i)
|
|
101
|
+
|
|
102
|
+
# trial(실패해도 진행)
|
|
103
|
+
_ = await self.browser.try_click(
|
|
104
|
+
VIEW_ALL, index=i, timeout_ms=1500, force=False
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 실제 클릭
|
|
108
|
+
try:
|
|
109
|
+
await self.browser.click(
|
|
110
|
+
VIEW_ALL, index=i, timeout_ms=4000, force=False
|
|
111
|
+
)
|
|
112
|
+
except Exception:
|
|
113
|
+
await self.browser.click(
|
|
114
|
+
VIEW_ALL, index=i, timeout_ms=4000, force=True
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
await self.browser.sleep_ms(after_click_sleep_ms)
|
|
118
|
+
|
|
119
|
+
# 클릭 후 테이블 텍스트 변경 대기
|
|
120
|
+
prev_text = await self.browser.wait_table_text_changed(
|
|
121
|
+
table_selector,
|
|
122
|
+
index=table_index,
|
|
123
|
+
prev_text=prev_text,
|
|
124
|
+
timeout_sec=wait_timeout_sec,
|
|
125
|
+
min_lines=10,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
clicked_any = True
|
|
129
|
+
logger.debug(f"clicked VIEW toggle: idx={i}, display={disp}")
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.debug(
|
|
133
|
+
f"click VIEW toggle failed: idx={i}, err={type(e).__name__}: {e}"
|
|
134
|
+
)
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# 이번 라운드에서 클릭을 하나도 못 했으면:
|
|
138
|
+
# - 모든 VIEW가 display:none 이었거나
|
|
139
|
+
# - 클릭이 막혔거나
|
|
140
|
+
# => VIEW(display!=none)가 남아있는지 다시 검사
|
|
141
|
+
if not clicked_any:
|
|
142
|
+
remain = await self.browser.count(VIEW_ALL)
|
|
143
|
+
logger.debug(f"no clicks in round; remain VIEW count={remain}")
|
|
144
|
+
# VIEW는 있는데 전부 display:none 이면 사실상 '열림' 상태로 볼 수 있음
|
|
145
|
+
# (닫기만 보이는 케이스)
|
|
146
|
+
# 여기서는 “성공” 처리
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
# 다음 라운드에서 다시 스캔해서 VIEW(display!=none)가 남아있으면 또 클릭
|
|
150
|
+
# (다 눌렀으면 결국 클릭할 게 없어짐)
|
|
151
|
+
|
|
152
|
+
# 라운드 다 돌았는데도 여기까지 왔다면,
|
|
153
|
+
# “보기 토글이 계속 display!=none으로 남는다” = 열리지 않는 구조/권한/오버레이 등
|
|
154
|
+
logger.warning("ensure_yearly_consensus_open_in_table_nth: exceeded max_rounds")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
async def _click_steps(
|
|
158
|
+
self,
|
|
159
|
+
steps: list[tuple[str, str]],
|
|
160
|
+
*,
|
|
161
|
+
jitter_sec: float = 0.6,
|
|
162
|
+
) -> None:
|
|
163
|
+
for _name, selector in steps:
|
|
164
|
+
await self.browser.wait_attached(selector)
|
|
165
|
+
await self.browser.scroll_into_view(selector)
|
|
166
|
+
|
|
167
|
+
logger.info(f"click step: {_name}")
|
|
168
|
+
|
|
169
|
+
ok = await self.browser.try_click(selector, timeout_ms=1500, force=False)
|
|
170
|
+
try:
|
|
171
|
+
if ok:
|
|
172
|
+
await self.browser.click(selector, timeout_ms=4000, force=False)
|
|
173
|
+
else:
|
|
174
|
+
await self.browser.click(selector, timeout_ms=4000, force=True)
|
|
175
|
+
except Exception:
|
|
176
|
+
await self.browser.click(selector, timeout_ms=4000, force=True)
|
|
177
|
+
|
|
178
|
+
wait = int((0.5 + (jitter_sec * 0.5)) * 1000)
|
|
179
|
+
await self.browser.sleep_ms(wait)
|
|
180
|
+
|
|
181
|
+
async def _is_quarter_view_by_header(
|
|
182
|
+
self, *, table_selector: str, table_index: int
|
|
183
|
+
) -> tuple[bool, list[str]]:
|
|
184
|
+
"""
|
|
185
|
+
분기 화면 판정:
|
|
186
|
+
- 헤더에 YYYY/03 or YYYY/06 or YYYY/09 가 하나라도 있으면 분기 화면으로 확정
|
|
187
|
+
- YYYY/12만 있는 경우는 (분기 전환 실패 가능성이 높으니) 분기 화면으로 보지 않음
|
|
188
|
+
"""
|
|
189
|
+
periods = await self.browser.table_header_periods_mm_nth(
|
|
190
|
+
table_selector, index=table_index
|
|
191
|
+
)
|
|
192
|
+
is_q = any(_Q_SIGNAL_RE.search(p) for p in periods)
|
|
193
|
+
return is_q, periods
|
|
194
|
+
|
|
195
|
+
async def set_view_c103(
|
|
196
|
+
self,
|
|
197
|
+
*,
|
|
198
|
+
key: str,
|
|
199
|
+
steps: list[tuple[str, str]],
|
|
200
|
+
table_selector: str,
|
|
201
|
+
table_index: int,
|
|
202
|
+
max_attempts: int = 5,
|
|
203
|
+
stabilize_timeout_sec: float = 10.0,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""
|
|
206
|
+
c103 화면을 원하는 상태(key)에 맞게 '확정'한다.
|
|
207
|
+
- key.endswith("q") => 분기 화면이어야 함
|
|
208
|
+
- key.endswith("y") => 연간 화면이어야 함 (즉 분기 시그널이 없어야 함)
|
|
209
|
+
|
|
210
|
+
클릭 -> 안정화(wait_table_text_changed) -> 헤더 판정 -> 실패 시 복구/재시도.
|
|
211
|
+
"""
|
|
212
|
+
want_q = key.endswith("q")
|
|
213
|
+
last_periods: list[str] = []
|
|
214
|
+
last_is_q: bool | None = None
|
|
215
|
+
|
|
216
|
+
for attempt in range(1, max_attempts + 1):
|
|
217
|
+
logger.info(f"set_view_c103: key={key} attempt={attempt} want_q={want_q}")
|
|
218
|
+
|
|
219
|
+
# 1) 클릭 (가벼운 지터 포함)
|
|
220
|
+
await self._click_steps(steps, jitter_sec=0.6)
|
|
221
|
+
|
|
222
|
+
# 2) 토글 펼치기 (네 함수 그대로 사용)
|
|
223
|
+
try:
|
|
224
|
+
await self._ensure_yearly_consensus_open_in_table_nth(
|
|
225
|
+
table_selector=table_selector,
|
|
226
|
+
table_index=table_index,
|
|
227
|
+
)
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.debug(f"ensure open failed (ignored): {type(e).__name__}: {e}")
|
|
230
|
+
|
|
231
|
+
# 3) 렌더 안정화: '변경'은 보장 못하지만, 로딩 흔들림을 줄여줌
|
|
232
|
+
try:
|
|
233
|
+
_ = await self.browser.wait_table_text_changed(
|
|
234
|
+
table_selector,
|
|
235
|
+
index=table_index,
|
|
236
|
+
prev_text=None,
|
|
237
|
+
min_rows=5,
|
|
238
|
+
min_lines=30,
|
|
239
|
+
timeout_sec=stabilize_timeout_sec,
|
|
240
|
+
)
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"stabilize wait failed (ignored): {type(e).__name__}: {e}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# 4) 헤더로 상태 확정
|
|
247
|
+
is_q, periods = await self._is_quarter_view_by_header(
|
|
248
|
+
table_selector=table_selector,
|
|
249
|
+
table_index=table_index,
|
|
250
|
+
)
|
|
251
|
+
last_is_q, last_periods = is_q, periods
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
f"set_view_c103: key={key} header periods(head)={periods[:8]} is_q={is_q}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
if want_q == is_q:
|
|
258
|
+
return # ✅ 성공 확정
|
|
259
|
+
|
|
260
|
+
# 5) 실패 시 복구 전략
|
|
261
|
+
# attempt가 올라갈수록 강하게: 스크롤/force click은 click_steps 쪽을 강화하는 게 좋지만
|
|
262
|
+
# 여기서는 reload로 리셋을 걸어준다.
|
|
263
|
+
if attempt in (2, 4):
|
|
264
|
+
logger.warning(f"set_view_c103 mismatch -> reload | key={key}")
|
|
265
|
+
await self.browser.reload(timeout_ms=12_000)
|
|
266
|
+
await self.browser.sleep_ms(250)
|
|
267
|
+
|
|
268
|
+
raise RuntimeError(
|
|
269
|
+
f"set_view_c103 failed: key={key} want_q={want_q} last_is_q={last_is_q} "
|
|
270
|
+
f"last_periods={last_periods[:12]}"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
async def set_view_c104(
|
|
274
|
+
self,
|
|
275
|
+
*,
|
|
276
|
+
key: str,
|
|
277
|
+
steps: list[tuple[str, str]],
|
|
278
|
+
table_selector: str,
|
|
279
|
+
table_index: int,
|
|
280
|
+
prev_text_by_idx: dict[int, str | None],
|
|
281
|
+
max_attempts: int = 5,
|
|
282
|
+
stabilize_timeout_sec: float = 10.0,
|
|
283
|
+
min_rows: int = 5,
|
|
284
|
+
min_lines: int = 30,
|
|
285
|
+
open_consensus: bool = True,
|
|
286
|
+
) -> None:
|
|
287
|
+
"""
|
|
288
|
+
c104 화면을 원하는 상태(key)에 맞게 '확정'한다.
|
|
289
|
+
|
|
290
|
+
- key.endswith("q") => 분기 화면이어야 함 (헤더에 03/06/09 존재)
|
|
291
|
+
- key.endswith("y") => 연간 화면이어야 함 (헤더에 03/06/09 없어야 함, 12는 보통 존재)
|
|
292
|
+
|
|
293
|
+
절차:
|
|
294
|
+
1) 클릭 steps
|
|
295
|
+
2) (옵션) 연간컨센서스 펼치기
|
|
296
|
+
3) wait_table_text_changed (idx별 prev_text 추적)
|
|
297
|
+
4) header periods로 q/y 판정
|
|
298
|
+
5) mismatch면 reload/재시도
|
|
299
|
+
"""
|
|
300
|
+
want_q = key.endswith("q")
|
|
301
|
+
|
|
302
|
+
# idx별 prev_text 초기값 방어
|
|
303
|
+
if table_index not in prev_text_by_idx:
|
|
304
|
+
prev_text_by_idx[table_index] = None
|
|
305
|
+
|
|
306
|
+
last_periods: list[str] = []
|
|
307
|
+
last_is_q: bool | None = None
|
|
308
|
+
|
|
309
|
+
for attempt in range(1, max_attempts + 1):
|
|
310
|
+
logger.info(
|
|
311
|
+
f"set_view_c104: key={key} idx={table_index} attempt={attempt} want_q={want_q}"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# 1) 클릭(행동)
|
|
315
|
+
await self._click_steps(steps, jitter_sec=0.6)
|
|
316
|
+
|
|
317
|
+
# 2) 컨센서스 펼치기(옵션)
|
|
318
|
+
if open_consensus:
|
|
319
|
+
try:
|
|
320
|
+
await self._ensure_yearly_consensus_open_in_table_nth(
|
|
321
|
+
table_selector=table_selector,
|
|
322
|
+
table_index=table_index,
|
|
323
|
+
wait_timeout_sec=stabilize_timeout_sec,
|
|
324
|
+
)
|
|
325
|
+
except Exception as e:
|
|
326
|
+
logger.debug(
|
|
327
|
+
f"ensure open failed (ignored): {type(e).__name__}: {e}"
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# 3) 안정화(변경/유효 텍스트 대기) - idx별로 prev 추적
|
|
331
|
+
try:
|
|
332
|
+
prev_text_by_idx[
|
|
333
|
+
table_index
|
|
334
|
+
] = await self.browser.wait_table_text_changed(
|
|
335
|
+
table_selector,
|
|
336
|
+
index=table_index,
|
|
337
|
+
prev_text=prev_text_by_idx[table_index],
|
|
338
|
+
min_rows=min_rows,
|
|
339
|
+
min_lines=min_lines,
|
|
340
|
+
timeout_sec=stabilize_timeout_sec,
|
|
341
|
+
)
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logger.debug(
|
|
344
|
+
f"stabilize wait failed (ignored): {type(e).__name__}: {e}"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# 4) 헤더로 상태 확정
|
|
348
|
+
is_q, periods = await self._is_quarter_view_by_header(
|
|
349
|
+
table_selector=table_selector,
|
|
350
|
+
table_index=table_index,
|
|
351
|
+
)
|
|
352
|
+
last_is_q, last_periods = is_q, periods
|
|
353
|
+
|
|
354
|
+
logger.info(
|
|
355
|
+
f"set_view_c104: key={key} idx={table_index} periods(head)={periods[:8]} is_q={is_q}"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# periods 자체가 비면 "불확정"으로 보고 재시도하는 게 안전
|
|
359
|
+
if not periods:
|
|
360
|
+
logger.warning(
|
|
361
|
+
f"set_view_c104: header periods empty -> retry | key={key} idx={table_index}"
|
|
362
|
+
)
|
|
363
|
+
else:
|
|
364
|
+
if want_q == is_q:
|
|
365
|
+
return # ✅ 성공 확정
|
|
366
|
+
|
|
367
|
+
# 5) 실패 복구 전략
|
|
368
|
+
if attempt in (2, 4):
|
|
369
|
+
logger.warning(
|
|
370
|
+
f"set_view_c104 mismatch/uncertain -> reload | key={key}"
|
|
371
|
+
)
|
|
372
|
+
await self.browser.reload(timeout_ms=12_000)
|
|
373
|
+
|
|
374
|
+
await self.browser.sleep_ms(250)
|
|
375
|
+
|
|
376
|
+
raise RuntimeError(
|
|
377
|
+
f"set_view_c104 failed: key={key} idx={table_index} want_q={want_q} "
|
|
378
|
+
f"last_is_q={last_is_q} last_periods={last_periods[:8]}"
|
|
379
|
+
)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# scraper2_hj3415/app/composition.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from pymongo.asynchronous.database import AsyncDatabase
|
|
9
|
+
|
|
10
|
+
from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
11
|
+
from scraper2_hj3415.app.adapters.out.playwright.browser_factory import (
|
|
12
|
+
PlaywrightBrowserFactory,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from scraper2_hj3415.app.services.fetch.fetch_c101 import FetchC101
|
|
16
|
+
from scraper2_hj3415.app.services.fetch.fetch_c103 import FetchC103
|
|
17
|
+
from scraper2_hj3415.app.services.fetch.fetch_c104 import FetchC104
|
|
18
|
+
from scraper2_hj3415.app.services.fetch.fetch_c106 import FetchC106
|
|
19
|
+
from scraper2_hj3415.app.services.fetch.fetch_c108 import FetchC108
|
|
20
|
+
|
|
21
|
+
from scraper2_hj3415.app.usecases.ingest.ingest_c101 import IngestC101
|
|
22
|
+
from scraper2_hj3415.app.usecases.ingest.ingest_c103 import IngestC103
|
|
23
|
+
from scraper2_hj3415.app.usecases.ingest.ingest_c104 import IngestC104
|
|
24
|
+
from scraper2_hj3415.app.usecases.ingest.ingest_c106 import IngestC106
|
|
25
|
+
from scraper2_hj3415.app.usecases.ingest.ingest_c108 import IngestC108
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
29
|
+
from contracts_hj3415.nfs.c101_dto import C101DTO
|
|
30
|
+
from contracts_hj3415.nfs.c103_dto import C103DTO
|
|
31
|
+
from contracts_hj3415.nfs.c104_dto import C104DTO
|
|
32
|
+
from contracts_hj3415.nfs.c106_dto import C106DTO
|
|
33
|
+
from contracts_hj3415.nfs.c108_dto import C108DTO
|
|
34
|
+
|
|
35
|
+
from scraper2_hj3415.app.adapters.out.sinks.mongo_sink import MongoSink
|
|
36
|
+
from scraper2_hj3415.app.adapters.out.sinks.memory_sink import MemorySink
|
|
37
|
+
|
|
38
|
+
from scraper2_hj3415.app.adapters.out.sinks.store import InMemoryStore
|
|
39
|
+
|
|
40
|
+
from db2_hj3415.mongo import Mongo
|
|
41
|
+
|
|
42
|
+
from scraper2_hj3415.app.domain.types import Sink
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _env_bool(key: str, default: bool) -> bool:
|
|
46
|
+
v = os.getenv(key)
|
|
47
|
+
return (
|
|
48
|
+
default if v is None else v.strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _env_int(key: str, default: int) -> int:
|
|
53
|
+
v = os.getenv(key)
|
|
54
|
+
if v is None:
|
|
55
|
+
return default
|
|
56
|
+
try:
|
|
57
|
+
return int(v)
|
|
58
|
+
except ValueError:
|
|
59
|
+
return default
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def build_browser_factory() -> BrowserFactoryPort:
|
|
63
|
+
return PlaywrightBrowserFactory(
|
|
64
|
+
headless=_env_bool("SCRAPER_HEADLESS", True),
|
|
65
|
+
timeout_ms=_env_int("SCRAPER_TIMEOUT_MS", 20_000),
|
|
66
|
+
max_concurrency=_env_int("SCRAPER_MAX_CONCURRENCY", 2),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# -------------------------
|
|
71
|
+
# Bundles
|
|
72
|
+
# -------------------------
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True)
|
|
76
|
+
class FetchUsecases:
|
|
77
|
+
c101: FetchC101
|
|
78
|
+
c103: FetchC103
|
|
79
|
+
c104: FetchC104
|
|
80
|
+
c106: FetchC106
|
|
81
|
+
c108: FetchC108
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(frozen=True)
|
|
85
|
+
class Sinks:
|
|
86
|
+
c101: NfsSinkPort[C101DTO]
|
|
87
|
+
c103: NfsSinkPort[C103DTO]
|
|
88
|
+
c104: NfsSinkPort[C104DTO]
|
|
89
|
+
c106: NfsSinkPort[C106DTO]
|
|
90
|
+
c108: NfsSinkPort[C108DTO]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class IngestUsecases:
|
|
95
|
+
c101: IngestC101
|
|
96
|
+
c103: IngestC103
|
|
97
|
+
c104: IngestC104
|
|
98
|
+
c106: IngestC106
|
|
99
|
+
c108: IngestC108
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass(frozen=True)
|
|
103
|
+
class Usecases:
|
|
104
|
+
fetch: FetchUsecases
|
|
105
|
+
ingest: IngestUsecases
|
|
106
|
+
sinks: Sinks
|
|
107
|
+
store: InMemoryStore | None = None # ✅ memory일 때만
|
|
108
|
+
mongo: Mongo | None = None # ✅ mongo일 때만
|
|
109
|
+
db: AsyncDatabase | None = None # ✅ mongo일 때만
|
|
110
|
+
browser_factory: Optional[BrowserFactoryPort] = None
|
|
111
|
+
|
|
112
|
+
async def aclose(self) -> None:
|
|
113
|
+
if self.browser_factory is not None:
|
|
114
|
+
await self.browser_factory.aclose()
|
|
115
|
+
|
|
116
|
+
if self.mongo is not None:
|
|
117
|
+
await self.mongo.close()
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# -------------------------
|
|
121
|
+
# builders
|
|
122
|
+
# -------------------------
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def build_fetch_usecases(*, factory: BrowserFactoryPort) -> FetchUsecases:
|
|
126
|
+
return FetchUsecases(
|
|
127
|
+
c101=FetchC101(factory=factory),
|
|
128
|
+
c103=FetchC103(factory=factory),
|
|
129
|
+
c104=FetchC104(factory=factory),
|
|
130
|
+
c106=FetchC106(factory=factory),
|
|
131
|
+
c108=FetchC108(factory=factory),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass(frozen=True)
|
|
136
|
+
class MemoryBundle:
|
|
137
|
+
store: InMemoryStore
|
|
138
|
+
sinks: Sinks
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def build_memory_bundle() -> MemoryBundle:
|
|
142
|
+
store = InMemoryStore()
|
|
143
|
+
c101_sink: NfsSinkPort[C101DTO] = MemorySink(store)
|
|
144
|
+
c103_sink: NfsSinkPort[C103DTO] = MemorySink(store)
|
|
145
|
+
c104_sink: NfsSinkPort[C104DTO] = MemorySink(store)
|
|
146
|
+
c106_sink: NfsSinkPort[C106DTO] = MemorySink(store)
|
|
147
|
+
c108_sink: NfsSinkPort[C108DTO] = MemorySink(store)
|
|
148
|
+
sinks = Sinks(
|
|
149
|
+
c101=c101_sink,
|
|
150
|
+
c103=c103_sink,
|
|
151
|
+
c104=c104_sink,
|
|
152
|
+
c106=c106_sink,
|
|
153
|
+
c108=c108_sink,
|
|
154
|
+
)
|
|
155
|
+
return MemoryBundle(store=store, sinks=sinks)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---- mongo bundle ----
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@dataclass(frozen=True)
|
|
162
|
+
class MongoBundle:
|
|
163
|
+
mongo: Mongo
|
|
164
|
+
db: AsyncDatabase
|
|
165
|
+
sinks: Sinks
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def build_mongo_bundle() -> MongoBundle:
|
|
169
|
+
mongo = Mongo() # settings는 db2가 env로 읽음 (DB2_MONGO_URI 등)
|
|
170
|
+
db = mongo.get_db()
|
|
171
|
+
c101_sink: NfsSinkPort[C101DTO] = MongoSink(db)
|
|
172
|
+
c103_sink: NfsSinkPort[C103DTO] = MongoSink(db)
|
|
173
|
+
c104_sink: NfsSinkPort[C104DTO] = MongoSink(db)
|
|
174
|
+
c106_sink: NfsSinkPort[C106DTO] = MongoSink(db)
|
|
175
|
+
c108_sink: NfsSinkPort[C108DTO] = MongoSink(db)
|
|
176
|
+
sinks = Sinks(
|
|
177
|
+
c101=c101_sink,
|
|
178
|
+
c103=c103_sink,
|
|
179
|
+
c104=c104_sink,
|
|
180
|
+
c106=c106_sink,
|
|
181
|
+
c108=c108_sink,
|
|
182
|
+
)
|
|
183
|
+
return MongoBundle(mongo=mongo, db=db, sinks=sinks)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def build_ingest_usecases(*, fetch: FetchUsecases, sinks: Sinks) -> IngestUsecases:
|
|
187
|
+
return IngestUsecases(
|
|
188
|
+
c101=IngestC101(fetch=fetch.c101, sink=sinks.c101),
|
|
189
|
+
c103=IngestC103(fetch=fetch.c103, sink=sinks.c103),
|
|
190
|
+
c104=IngestC104(fetch=fetch.c104, sink=sinks.c104),
|
|
191
|
+
c106=IngestC106(fetch=fetch.c106, sink=sinks.c106),
|
|
192
|
+
c108=IngestC108(fetch=fetch.c108, sink=sinks.c108),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def build_usecases(
|
|
197
|
+
*, factory: BrowserFactoryPort | None = None, sink: Sink = "memory"
|
|
198
|
+
) -> Usecases:
|
|
199
|
+
factory = factory or build_browser_factory()
|
|
200
|
+
fetch = build_fetch_usecases(factory=factory)
|
|
201
|
+
|
|
202
|
+
if sink == "memory":
|
|
203
|
+
bundle = build_memory_bundle()
|
|
204
|
+
ingest = build_ingest_usecases(fetch=fetch, sinks=bundle.sinks)
|
|
205
|
+
return Usecases(
|
|
206
|
+
fetch=fetch,
|
|
207
|
+
ingest=ingest,
|
|
208
|
+
sinks=bundle.sinks,
|
|
209
|
+
store=bundle.store,
|
|
210
|
+
browser_factory=factory,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
if sink == "mongo":
|
|
214
|
+
bundle = build_mongo_bundle()
|
|
215
|
+
ingest = build_ingest_usecases(fetch=fetch, sinks=bundle.sinks)
|
|
216
|
+
return Usecases(
|
|
217
|
+
fetch=fetch,
|
|
218
|
+
ingest=ingest,
|
|
219
|
+
sinks=bundle.sinks,
|
|
220
|
+
mongo=bundle.mongo,
|
|
221
|
+
db=bundle.db,
|
|
222
|
+
browser_factory=factory,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
raise ValueError(f"Unknown sink_kind: {sink}")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# scraper2_hj3415/app/domain/blocks.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Mapping, Sequence
|
|
6
|
+
from scraper2_hj3415.app.domain.constants import BLOCK_KEYS_BY_ENDPOINT
|
|
7
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
8
|
+
from scraper2_hj3415.app.domain.types import BlockKey, MetricKey, Record
|
|
9
|
+
from scraper2_hj3415.app.domain.series import MetricSeries
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _validate_block_key(endpoint_kind: EndpointKind, block_key: str) -> None:
|
|
13
|
+
allowed = BLOCK_KEYS_BY_ENDPOINT.get(endpoint_kind)
|
|
14
|
+
if allowed is not None and block_key not in allowed:
|
|
15
|
+
raise ValueError(f"Invalid block key for {endpoint_kind}: {block_key!r}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class MetricsBlock:
|
|
20
|
+
endpoint_kind: EndpointKind
|
|
21
|
+
block_key: BlockKey
|
|
22
|
+
metrics: Mapping[MetricKey, MetricSeries]
|
|
23
|
+
|
|
24
|
+
def __post_init__(self) -> None:
|
|
25
|
+
_validate_block_key(self.endpoint_kind, self.block_key)
|
|
26
|
+
|
|
27
|
+
# 컨테이너 키와 엔티티 키 불일치 방지(선택)
|
|
28
|
+
for k, m in self.metrics.items():
|
|
29
|
+
if m.key != k:
|
|
30
|
+
raise ValueError(
|
|
31
|
+
f"Metric key mismatch: map key={k!r} != series key={m.key!r}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def get(self, key: MetricKey) -> MetricSeries | None:
|
|
35
|
+
m = self.metrics.get(key)
|
|
36
|
+
if m and m.key != key:
|
|
37
|
+
raise ValueError("Metric key mismatch")
|
|
38
|
+
return m
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# 다양한 블록형태 구성 추후 수정필요
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class RecordsBlock:
|
|
46
|
+
endpoint_kind: EndpointKind
|
|
47
|
+
block_key: BlockKey
|
|
48
|
+
rows: Sequence[Record]
|
|
49
|
+
|
|
50
|
+
def __post_init__(self) -> None:
|
|
51
|
+
_validate_block_key(self.endpoint_kind, self.block_key)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class KvBlock:
|
|
56
|
+
endpoint_kind: EndpointKind
|
|
57
|
+
block_key: BlockKey
|
|
58
|
+
values: Mapping[str, Any]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
BlockData = MetricsBlock | RecordsBlock | KvBlock
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# scraper2_hj3415/app/domain/constants.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Mapping
|
|
5
|
+
|
|
6
|
+
from contracts_hj3415.nfs.types import BlockKey
|
|
7
|
+
from contracts_hj3415.nfs.constants import C101_BLOCK_KEYS, C103_BLOCK_KEYS, C104_BLOCK_KEYS, C106_BLOCK_KEYS, C108_BLOCK_KEYS
|
|
8
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
BLOCK_KEYS_BY_ENDPOINT: Mapping[EndpointKind, tuple[str, ...]] = {
|
|
12
|
+
EndpointKind.C101: C101_BLOCK_KEYS,
|
|
13
|
+
EndpointKind.C103: C103_BLOCK_KEYS,
|
|
14
|
+
EndpointKind.C104: C104_BLOCK_KEYS,
|
|
15
|
+
EndpointKind.C106: C106_BLOCK_KEYS,
|
|
16
|
+
EndpointKind.C108: C108_BLOCK_KEYS,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_block_keys(endpoint: EndpointKind) -> tuple[str, ...]:
|
|
21
|
+
"""
|
|
22
|
+
엔드포인트의 "공식" 블록 키 목록.
|
|
23
|
+
- 도메인 레이어에 두되, selector/table index 같은 구현 디테일은 넣지 않는다.
|
|
24
|
+
"""
|
|
25
|
+
return BLOCK_KEYS_BY_ENDPOINT.get(endpoint, ())
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_known_block(endpoint: EndpointKind, key: BlockKey) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
블록 키가 해당 endpoint의 공식 목록에 포함되는지 여부.
|
|
31
|
+
(검증/필터링/동적 payload merge 등에 사용)
|
|
32
|
+
"""
|
|
33
|
+
return key in BLOCK_KEYS_BY_ENDPOINT.get(endpoint, ())
|