scraper2-hj3415 2.6.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/.DS_Store +0 -0
- scraper2_hj3415/app/adapters/out/.DS_Store +0 -0
- scraper2_hj3415/app/adapters/out/playwright/browser.py +22 -369
- scraper2_hj3415/app/adapters/out/playwright/browser_factory.py +2 -2
- scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
- scraper2_hj3415/app/adapters/out/sinks/.DS_Store +0 -0
- scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +3 -3
- scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +11 -11
- scraper2_hj3415/app/adapters/site/wisereport_playwright.py +220 -9
- scraper2_hj3415/app/domain/constants.py +2 -2
- scraper2_hj3415/app/parsing/_tables/html_table.py +3 -2
- scraper2_hj3415/app/parsing/c103_parser.py +4 -1
- scraper2_hj3415/app/parsing/c104_parser.py +4 -1
- scraper2_hj3415/app/ports/browser/browser_port.py +25 -108
- scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
- scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
- scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
- scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
- scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
- scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
- scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
- scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +3 -3
- scraper2_hj3415/app/ports/site/wisereport_port.py +20 -10
- scraper2_hj3415/app/services/fetch/fetch_c103.py +18 -32
- scraper2_hj3415/app/services/fetch/fetch_c104.py +28 -51
- scraper2_hj3415/app/services/nfs_doc_builders.py +21 -7
- scraper2_hj3415/app/usecases/ingest/ingest_c101.py +2 -2
- scraper2_hj3415/app/usecases/ingest/ingest_c103.py +2 -2
- scraper2_hj3415/app/usecases/ingest/ingest_c104.py +2 -2
- scraper2_hj3415/app/usecases/ingest/ingest_c106.py +2 -2
- scraper2_hj3415/app/usecases/ingest/ingest_c108.py +2 -2
- scraper2_hj3415/cli.py +10 -7
- {scraper2_hj3415-2.6.0.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +1 -1
- {scraper2_hj3415-2.6.0.dist-info → scraper2_hj3415-2.7.0.dist-info}/RECORD +44 -26
- {scraper2_hj3415-2.6.0.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-2.6.0.dist-info → scraper2_hj3415-2.7.0.dist-info}/entry_points.txt +0 -0
- {scraper2_hj3415-2.6.0.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .navigation import BrowserNavigationPort
|
|
2
|
+
from .wait import BrowserWaitPort
|
|
3
|
+
from .interaction import BrowserInteractionPort
|
|
4
|
+
from .text import BrowserTextPort
|
|
5
|
+
from .scope import BrowserScopePort
|
|
6
|
+
from .table import BrowserTablePort
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BrowserNavigationPort",
|
|
10
|
+
"BrowserWaitPort",
|
|
11
|
+
"BrowserInteractionPort",
|
|
12
|
+
"BrowserTextPort",
|
|
13
|
+
"BrowserScopePort",
|
|
14
|
+
"BrowserTablePort",
|
|
15
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserInteractionPort(Protocol):
|
|
7
|
+
"""클릭/스크롤 등 상호작용"""
|
|
8
|
+
|
|
9
|
+
async def click(
|
|
10
|
+
self,
|
|
11
|
+
selector: str,
|
|
12
|
+
*,
|
|
13
|
+
index: int = 0,
|
|
14
|
+
timeout_ms: int = 4_000,
|
|
15
|
+
force: bool = False,
|
|
16
|
+
) -> None: ...
|
|
17
|
+
|
|
18
|
+
async def try_click(
|
|
19
|
+
self,
|
|
20
|
+
selector: str,
|
|
21
|
+
*,
|
|
22
|
+
index: int = 0,
|
|
23
|
+
timeout_ms: int = 1_500,
|
|
24
|
+
force: bool = False,
|
|
25
|
+
) -> bool: ...
|
|
26
|
+
|
|
27
|
+
async def scroll_into_view(self, selector: str, *, index: int = 0) -> None: ...
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserNavigationPort(Protocol):
|
|
7
|
+
"""페이지 이동/기본 정보"""
|
|
8
|
+
|
|
9
|
+
async def title(self) -> str: ...
|
|
10
|
+
async def current_url(self) -> str: ...
|
|
11
|
+
|
|
12
|
+
async def goto_and_wait_for_stable(
|
|
13
|
+
self,
|
|
14
|
+
url: str,
|
|
15
|
+
timeout_ms: int = 10_000,
|
|
16
|
+
) -> None: ...
|
|
17
|
+
|
|
18
|
+
async def reload(self, *, timeout_ms: int = 10_000) -> None: ...
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserScopePort(Protocol):
|
|
7
|
+
"""scope/nth 컨텍스트 기반 조회 (현재는 기존 API 유지)"""
|
|
8
|
+
|
|
9
|
+
async def is_attached(self, selector: str, *, index: int = 0) -> bool: ...
|
|
10
|
+
|
|
11
|
+
async def computed_style(
|
|
12
|
+
self,
|
|
13
|
+
selector: str,
|
|
14
|
+
*,
|
|
15
|
+
index: int = 0,
|
|
16
|
+
prop: str,
|
|
17
|
+
) -> str: ...
|
|
18
|
+
|
|
19
|
+
async def count_in_nth(
|
|
20
|
+
self,
|
|
21
|
+
scope_selector: str,
|
|
22
|
+
*,
|
|
23
|
+
scope_index: int,
|
|
24
|
+
inner_selector: str,
|
|
25
|
+
) -> int: ...
|
|
26
|
+
|
|
27
|
+
async def eval_in_nth_first(
|
|
28
|
+
self,
|
|
29
|
+
scope_selector: str,
|
|
30
|
+
*,
|
|
31
|
+
scope_index: int,
|
|
32
|
+
inner_selector: str,
|
|
33
|
+
expression: str,
|
|
34
|
+
) -> Any: ...
|
|
35
|
+
|
|
36
|
+
async def inner_text_in_nth(
|
|
37
|
+
self,
|
|
38
|
+
scope_selector: str,
|
|
39
|
+
*,
|
|
40
|
+
scope_index: int,
|
|
41
|
+
inner_selector: str,
|
|
42
|
+
inner_index: int = 0,
|
|
43
|
+
timeout_ms: int = 10_000,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""
|
|
46
|
+
scope_selector의 nth(scope_index) 요소 안에서
|
|
47
|
+
inner_selector의 nth(inner_index) 요소의 innerText를 반환.
|
|
48
|
+
(렌더링 기준 텍스트: 줄바꿈/스타일 영향 반영)
|
|
49
|
+
"""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
async def text_content_in_nth(
|
|
53
|
+
self,
|
|
54
|
+
scope_selector: str,
|
|
55
|
+
*,
|
|
56
|
+
scope_index: int,
|
|
57
|
+
inner_selector: str,
|
|
58
|
+
inner_index: int = 0,
|
|
59
|
+
timeout_ms: int = 10_000,
|
|
60
|
+
) -> str:
|
|
61
|
+
"""
|
|
62
|
+
scope_selector의 nth(scope_index) 요소 안에서
|
|
63
|
+
inner_selector의 nth(inner_index) 요소의 textContent를 반환.
|
|
64
|
+
(DOM 기준 텍스트: 숨김 텍스트도 포함될 수 있음)
|
|
65
|
+
"""
|
|
66
|
+
...
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserTablePort(Protocol):
|
|
7
|
+
"""테이블 파싱/헤더 추출"""
|
|
8
|
+
|
|
9
|
+
async def table_records(
|
|
10
|
+
self,
|
|
11
|
+
table_selector: str,
|
|
12
|
+
*,
|
|
13
|
+
header: int | list[int] | None = 0,
|
|
14
|
+
) -> list[dict[str, Any]]: ...
|
|
15
|
+
|
|
16
|
+
async def table_header_texts_nth(
|
|
17
|
+
self,
|
|
18
|
+
table_selector: str,
|
|
19
|
+
*,
|
|
20
|
+
index: int,
|
|
21
|
+
) -> list[str]: ...
|
|
22
|
+
|
|
23
|
+
async def table_header_periods_mm_nth(
|
|
24
|
+
self,
|
|
25
|
+
table_selector: str,
|
|
26
|
+
*,
|
|
27
|
+
index: int,
|
|
28
|
+
) -> list[str]: ...
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserTextPort(Protocol):
|
|
7
|
+
"""DOM 조회/텍스트 추출"""
|
|
8
|
+
|
|
9
|
+
async def count(self, selector: str) -> int: ...
|
|
10
|
+
|
|
11
|
+
async def text_content_first(self, selector: str) -> str: ...
|
|
12
|
+
async def all_texts(self, selector: str) -> list[str]: ...
|
|
13
|
+
async def get_text_by_text(self, needle: str) -> str: ...
|
|
14
|
+
|
|
15
|
+
async def inner_text(self, selector: str) -> str: ...
|
|
16
|
+
async def outer_html_nth(self, selector: str, index: int) -> str: ...
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BrowserWaitPort(Protocol):
|
|
8
|
+
"""대기/동기화 유틸"""
|
|
9
|
+
|
|
10
|
+
async def sleep_ms(self, ms: int) -> None:
|
|
11
|
+
"""
|
|
12
|
+
ms에 1000을 넣으면 1초 쉼.
|
|
13
|
+
구현체가 없을 때를 대비해 기본 구현 제공.
|
|
14
|
+
"""
|
|
15
|
+
await asyncio.sleep(ms / 1000)
|
|
16
|
+
|
|
17
|
+
async def wait_attached(
|
|
18
|
+
self,
|
|
19
|
+
selector: str,
|
|
20
|
+
*,
|
|
21
|
+
timeout_ms: int = 10_000,
|
|
22
|
+
) -> None: ...
|
|
23
|
+
|
|
24
|
+
async def wait_visible(
|
|
25
|
+
self,
|
|
26
|
+
selector: str,
|
|
27
|
+
*,
|
|
28
|
+
timeout_ms: int = 10_000,
|
|
29
|
+
) -> None: ...
|
|
30
|
+
|
|
31
|
+
async def wait_table_nth_ready(
|
|
32
|
+
self,
|
|
33
|
+
table_selector: str,
|
|
34
|
+
*,
|
|
35
|
+
index: int,
|
|
36
|
+
min_rows: int = 1,
|
|
37
|
+
timeout_ms: int = 20_000,
|
|
38
|
+
poll_ms: int = 200,
|
|
39
|
+
) -> None: ...
|
|
40
|
+
|
|
41
|
+
async def wait_table_text_changed(
|
|
42
|
+
self,
|
|
43
|
+
table_selector: str,
|
|
44
|
+
*,
|
|
45
|
+
index: int,
|
|
46
|
+
prev_text: str | None,
|
|
47
|
+
min_rows: int = 1,
|
|
48
|
+
min_lines: int = 50,
|
|
49
|
+
timeout_sec: float = 12.0,
|
|
50
|
+
poll_sec: float = 0.2,
|
|
51
|
+
) -> str: ...
|
|
@@ -2,19 +2,19 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
from typing import Protocol, Iterable, TypeVar
|
|
5
|
-
from contracts_hj3415.nfs.types import
|
|
5
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
6
6
|
from contracts_hj3415.nfs.nfs_dto import NfsDTO
|
|
7
7
|
|
|
8
8
|
TDto = TypeVar("TDto", bound=NfsDTO)
|
|
9
9
|
|
|
10
10
|
class NfsSinkPort(Protocol[TDto]):
|
|
11
11
|
async def write(
|
|
12
|
-
self, dto: TDto, *, endpoint:
|
|
12
|
+
self, dto: TDto, *, endpoint: Endpoint
|
|
13
13
|
) -> None: ...
|
|
14
14
|
|
|
15
15
|
async def write_many(
|
|
16
16
|
self,
|
|
17
17
|
dtos: Iterable[TDto],
|
|
18
18
|
*,
|
|
19
|
-
endpoint:
|
|
19
|
+
endpoint: Endpoint
|
|
20
20
|
) -> None: ...
|
|
@@ -3,18 +3,28 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import Protocol
|
|
4
4
|
|
|
5
5
|
class WiseReportPort(Protocol):
|
|
6
|
-
async def
|
|
6
|
+
async def set_view_c103(
|
|
7
7
|
self,
|
|
8
8
|
*,
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
key: str,
|
|
10
|
+
steps: list[tuple[str, str]],
|
|
11
|
+
table_selector: str,
|
|
12
|
+
table_index: int,
|
|
13
|
+
max_attempts: int = 5,
|
|
14
|
+
stabilize_timeout_sec: float = 10.0,
|
|
15
|
+
) -> None: ...
|
|
16
|
+
|
|
17
|
+
async def set_view_c104(
|
|
16
18
|
self,
|
|
17
|
-
steps: list[tuple[str, str]],
|
|
18
19
|
*,
|
|
19
|
-
|
|
20
|
+
key: str,
|
|
21
|
+
steps: list[tuple[str, str]],
|
|
22
|
+
table_selector: str,
|
|
23
|
+
table_index: int,
|
|
24
|
+
prev_text_by_idx: dict[int, str | None],
|
|
25
|
+
max_attempts: int = 5,
|
|
26
|
+
stabilize_timeout_sec: float = 10.0,
|
|
27
|
+
min_rows: int = 5,
|
|
28
|
+
min_lines: int = 30,
|
|
29
|
+
open_consensus: bool = True,
|
|
20
30
|
) -> None: ...
|
|
@@ -8,9 +8,9 @@ from typing import Iterable, Any
|
|
|
8
8
|
from logging_hj3415 import logger
|
|
9
9
|
from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
10
10
|
from scraper2_hj3415.app.ports.site.wisereport_port import WiseReportPort
|
|
11
|
-
|
|
12
11
|
from scraper2_hj3415.app.adapters.site.wisereport_playwright import WiseReportPlaywright
|
|
13
|
-
|
|
12
|
+
|
|
13
|
+
from scraper2_hj3415.app.parsing.c103_parser import parse_c103_current_table, TABLE_XPATH
|
|
14
14
|
from scraper2_hj3415.app.services.nfs_doc_builders import build_metrics_doc_from_parsed
|
|
15
15
|
|
|
16
16
|
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
@@ -50,6 +50,8 @@ BTN_SETS: dict[str, list[tuple[str, str]]] = {
|
|
|
50
50
|
],
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
TABLE_INDEX = 2
|
|
54
|
+
|
|
53
55
|
|
|
54
56
|
class FetchC103:
|
|
55
57
|
def __init__(self, factory: BrowserFactoryPort):
|
|
@@ -69,40 +71,24 @@ class FetchC103:
|
|
|
69
71
|
await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
|
|
70
72
|
|
|
71
73
|
parsed: dict[str, list[dict[str, Any]]] = {}
|
|
72
|
-
prev_text: str | None = None
|
|
73
|
-
|
|
74
|
-
# 최초 기준 텍스트 확보(없어도 동작하게)
|
|
75
|
-
prev_text = await browser.wait_table_text_changed(
|
|
76
|
-
"xpath=//div[@id='wrapper']//div//table",
|
|
77
|
-
index=2,
|
|
78
|
-
prev_text=None,
|
|
79
|
-
min_rows=5,
|
|
80
|
-
min_lines=50,
|
|
81
|
-
timeout_sec=10.0,
|
|
82
|
-
)
|
|
83
74
|
|
|
84
75
|
for key, steps in BTN_SETS.items():
|
|
85
|
-
# ✅ 상태 전환 (행동)
|
|
86
|
-
await wr.click_steps(steps, jitter_sec=0.6) # 포트/어댑터로 이동 권장
|
|
87
|
-
await wr.ensure_yearly_consensus_open_in_table_nth(
|
|
88
|
-
table_selector="xpath=//div[@id='wrapper']//div//table",
|
|
89
|
-
table_index=2,
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
# ✅ 데이터 변경 대기 (행동)
|
|
93
|
-
prev_text = await browser.wait_table_text_changed(
|
|
94
|
-
"xpath=//div[@id='wrapper']//div//table",
|
|
95
|
-
index=2,
|
|
96
|
-
prev_text=prev_text,
|
|
97
|
-
min_rows=5,
|
|
98
|
-
min_lines=50,
|
|
99
|
-
timeout_sec=12.0,
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
# ✅ 파싱은 “현재 화면 테이블”만
|
|
103
76
|
try:
|
|
77
|
+
# 1) 상태 확정 (분기/연간이 맞는지 헤더로 검증)
|
|
78
|
+
await wr.set_view_c103(
|
|
79
|
+
key=key,
|
|
80
|
+
steps=steps,
|
|
81
|
+
table_selector=TABLE_XPATH,
|
|
82
|
+
table_index=TABLE_INDEX,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 2) 파싱은 “현재 화면 테이블”만
|
|
104
86
|
parsed[key] = await parse_c103_current_table(browser)
|
|
105
|
-
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.warning(
|
|
90
|
+
f"c103 view/parse failed: key={key} err={type(e).__name__}: {e}"
|
|
91
|
+
)
|
|
106
92
|
parsed[key] = []
|
|
107
93
|
|
|
108
94
|
block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C103]
|
|
@@ -3,22 +3,19 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
5
|
import random
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Iterable
|
|
7
7
|
|
|
8
8
|
from logging_hj3415 import logger
|
|
9
9
|
from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
10
10
|
from scraper2_hj3415.app.ports.site.wisereport_port import WiseReportPort
|
|
11
11
|
from scraper2_hj3415.app.adapters.site.wisereport_playwright import WiseReportPlaywright
|
|
12
12
|
|
|
13
|
-
from scraper2_hj3415.app.parsing.c104_parser import
|
|
14
|
-
parse_c104_current_table,
|
|
15
|
-
TABLE_XPATH,
|
|
16
|
-
)
|
|
13
|
+
from scraper2_hj3415.app.parsing.c104_parser import parse_c104_current_table, TABLE_XPATH
|
|
17
14
|
from scraper2_hj3415.app.services.nfs_doc_builders import build_metrics_doc_from_parsed
|
|
15
|
+
|
|
18
16
|
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
19
|
-
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
20
17
|
from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
|
|
21
|
-
|
|
18
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
22
19
|
|
|
23
20
|
BTN_SETS: dict[str, list[tuple[str, str]]] = {
|
|
24
21
|
"수익성y": [
|
|
@@ -72,13 +69,9 @@ BTN_SETS: dict[str, list[tuple[str, str]]] = {
|
|
|
72
69
|
}
|
|
73
70
|
|
|
74
71
|
|
|
75
|
-
def _is_value_analysis(key: str) -> bool:
|
|
76
|
-
return key.startswith("가치분석")
|
|
77
|
-
|
|
78
|
-
|
|
79
72
|
def _table_index_for_key(key: str) -> int:
|
|
80
|
-
#
|
|
81
|
-
return 1 if
|
|
73
|
+
# 가치분석은 별도 테이블(보통 index=1)
|
|
74
|
+
return 1 if key.startswith("가치분석") else 0
|
|
82
75
|
|
|
83
76
|
|
|
84
77
|
class FetchC104:
|
|
@@ -100,53 +93,38 @@ class FetchC104:
|
|
|
100
93
|
|
|
101
94
|
parsed: dict[str, list[dict[str, Any]]] = {}
|
|
102
95
|
|
|
103
|
-
#
|
|
96
|
+
# idx별 안정화 상태 추적
|
|
104
97
|
prev_text_by_idx: dict[int, str | None] = {0: None, 1: None}
|
|
105
98
|
|
|
106
|
-
# ✅ 최초 baseline 확보(둘 다 시도)
|
|
107
|
-
for idx in (0, 1):
|
|
108
|
-
try:
|
|
109
|
-
prev_text_by_idx[idx] = await browser.wait_table_text_changed(
|
|
110
|
-
TABLE_XPATH,
|
|
111
|
-
index=idx,
|
|
112
|
-
prev_text=None,
|
|
113
|
-
min_rows=5,
|
|
114
|
-
min_lines=50,
|
|
115
|
-
timeout_sec=10.0,
|
|
116
|
-
)
|
|
117
|
-
except Exception:
|
|
118
|
-
prev_text_by_idx[idx] = None
|
|
119
|
-
|
|
120
99
|
for key, steps in BTN_SETS.items():
|
|
121
100
|
idx = _table_index_for_key(key)
|
|
122
101
|
|
|
123
|
-
# ✅ 상태 전환(행동)
|
|
124
|
-
await wr.click_steps(steps, jitter_sec=0.6)
|
|
125
|
-
await wr.ensure_yearly_consensus_open_in_table_nth(
|
|
126
|
-
table_selector=TABLE_XPATH,
|
|
127
|
-
table_index=idx,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
# ✅ 데이터 변경 대기(행동) - idx별로 추적
|
|
131
|
-
prev_text_by_idx[idx] = await browser.wait_table_text_changed(
|
|
132
|
-
TABLE_XPATH,
|
|
133
|
-
index=idx,
|
|
134
|
-
prev_text=prev_text_by_idx[idx],
|
|
135
|
-
min_rows=5,
|
|
136
|
-
min_lines=50,
|
|
137
|
-
timeout_sec=12.0,
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
# ✅ 파싱은 “현재 화면의 idx 테이블 1개”만
|
|
141
102
|
try:
|
|
103
|
+
# 1) 상태 확정 (분기/연간이 맞는지 헤더로 검증)
|
|
104
|
+
await wr.set_view_c104(
|
|
105
|
+
key=key,
|
|
106
|
+
steps=steps,
|
|
107
|
+
table_selector=TABLE_XPATH,
|
|
108
|
+
table_index=idx,
|
|
109
|
+
prev_text_by_idx=prev_text_by_idx,
|
|
110
|
+
max_attempts=5,
|
|
111
|
+
stabilize_timeout_sec=10.0,
|
|
112
|
+
min_rows=5,
|
|
113
|
+
min_lines=30,
|
|
114
|
+
open_consensus=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# 2) 파싱은 “현재 화면의 idx 테이블 1개”만
|
|
142
118
|
parsed[key] = await parse_c104_current_table(
|
|
143
119
|
browser,
|
|
144
120
|
table_index=idx,
|
|
145
121
|
)
|
|
146
|
-
except Exception:
|
|
147
|
-
parsed[key] = []
|
|
148
122
|
|
|
149
|
-
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.warning(
|
|
125
|
+
f"c104 view/parse failed: key={key} idx={idx} err={type(e).__name__}: {e}"
|
|
126
|
+
)
|
|
127
|
+
parsed[key] = []
|
|
150
128
|
|
|
151
129
|
block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C104]
|
|
152
130
|
if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
|
|
@@ -164,7 +142,6 @@ class FetchC104:
|
|
|
164
142
|
raw_label_key="항목_raw",
|
|
165
143
|
keep_empty_blocks=True,
|
|
166
144
|
)
|
|
167
|
-
logger.debug(f"c104 doc: {doc}")
|
|
168
145
|
return doc
|
|
169
146
|
|
|
170
147
|
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
|
|
@@ -180,4 +157,4 @@ class FetchC104:
|
|
|
180
157
|
*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
|
|
181
158
|
return_exceptions=False,
|
|
182
159
|
)
|
|
183
|
-
return [r for r in results if r is not None]
|
|
160
|
+
return [r for r in results if r is not None]
|
|
@@ -7,9 +7,22 @@ from typing import Mapping, Iterable, Any
|
|
|
7
7
|
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
8
8
|
from scraper2_hj3415.app.domain.constants import BLOCK_KEYS_BY_ENDPOINT
|
|
9
9
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
10
|
-
from scraper2_hj3415.app.domain.blocks import
|
|
10
|
+
from scraper2_hj3415.app.domain.blocks import (
|
|
11
|
+
MetricsBlock,
|
|
12
|
+
RecordsBlock,
|
|
13
|
+
KvBlock,
|
|
14
|
+
BlockData,
|
|
15
|
+
)
|
|
11
16
|
from scraper2_hj3415.app.domain.series import MetricSeries
|
|
12
|
-
from scraper2_hj3415.app.domain.types import
|
|
17
|
+
from scraper2_hj3415.app.domain.types import (
|
|
18
|
+
LabelsMap,
|
|
19
|
+
MetricKey,
|
|
20
|
+
Period,
|
|
21
|
+
Num,
|
|
22
|
+
BlockKey,
|
|
23
|
+
Records,
|
|
24
|
+
Record,
|
|
25
|
+
)
|
|
13
26
|
|
|
14
27
|
from common_hj3415.utils import nan_to_none
|
|
15
28
|
|
|
@@ -18,9 +31,7 @@ def is_all_none(row: dict[str, Any]) -> bool:
|
|
|
18
31
|
return all(v is None for v in row.values())
|
|
19
32
|
|
|
20
33
|
|
|
21
|
-
ParsedBlocks = Mapping[
|
|
22
|
-
str, Any
|
|
23
|
-
] # parser가 반환한 "block_key(str) -> rows(list[dict])"
|
|
34
|
+
ParsedBlocks = Mapping[str, Any] # parser가 반환한 "block_key(str) -> rows(list[dict])"
|
|
24
35
|
|
|
25
36
|
|
|
26
37
|
def build_metrics_block_and_labels_from_rows(
|
|
@@ -155,7 +166,9 @@ def build_records_block_from_rows(
|
|
|
155
166
|
- c108 같은 레코드성 블록(리포트 목록 등)에 사용
|
|
156
167
|
"""
|
|
157
168
|
# RecordsBlock 쪽에서도 __post_init__로 block_key 검증이 수행된다는 전제(네가 정돈한 도메인)
|
|
158
|
-
return RecordsBlock(
|
|
169
|
+
return RecordsBlock(
|
|
170
|
+
endpoint_kind=endpoint_kind, block_key=block_key, rows=list(rows)
|
|
171
|
+
)
|
|
159
172
|
|
|
160
173
|
|
|
161
174
|
def build_c108_doc_from_parsed(
|
|
@@ -206,6 +219,7 @@ def build_c108_doc_from_parsed(
|
|
|
206
219
|
labels=labels,
|
|
207
220
|
)
|
|
208
221
|
|
|
222
|
+
|
|
209
223
|
def build_kv_block_from_mapping(
|
|
210
224
|
*,
|
|
211
225
|
endpoint_kind: EndpointKind,
|
|
@@ -259,7 +273,7 @@ def build_c101_doc_from_parsed(
|
|
|
259
273
|
rb = build_records_block_from_rows(
|
|
260
274
|
endpoint_kind=endpoint_kind,
|
|
261
275
|
block_key=bk,
|
|
262
|
-
rows=v,
|
|
276
|
+
rows=v, # type: ignore[arg-type] (rows 타입 맞추면 제거 가능)
|
|
263
277
|
)
|
|
264
278
|
if rb is not None:
|
|
265
279
|
blocks[bk] = rb
|
|
@@ -13,13 +13,13 @@ from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
|
13
13
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
14
|
from scraper2_hj3415.app.domain.blocks import KvBlock, RecordsBlock, MetricsBlock
|
|
15
15
|
|
|
16
|
-
from contracts_hj3415.nfs.types import
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
17
|
from contracts_hj3415.nfs.c101_dto import C101DTO, C101Payload, C101Blocks
|
|
18
18
|
|
|
19
19
|
from logging_hj3415 import logger
|
|
20
20
|
|
|
21
21
|
endpoint_kind = EndpointKind.C101
|
|
22
|
-
endpoint:
|
|
22
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def _unwrap_c101_block(block: Any) -> Any:
|
|
@@ -13,7 +13,7 @@ from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
|
13
13
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
14
|
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
15
|
|
|
16
|
-
from contracts_hj3415.nfs.types import
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
17
|
|
|
18
18
|
from contracts_hj3415.nfs.c103_dto import (
|
|
19
19
|
C103DTO,
|
|
@@ -27,7 +27,7 @@ from logging_hj3415 import logger
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
endpoint_kind = EndpointKind.C103
|
|
30
|
-
endpoint:
|
|
30
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def _metricsblock_to_c103_metric_map(block: MetricsBlock) -> dict[str, C103ValuesMap]:
|
|
@@ -13,7 +13,7 @@ from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
|
13
13
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
14
|
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
15
|
|
|
16
|
-
from contracts_hj3415.nfs.types import
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
17
|
|
|
18
18
|
from contracts_hj3415.nfs.c104_dto import (
|
|
19
19
|
C104DTO,
|
|
@@ -27,7 +27,7 @@ from logging_hj3415 import logger
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
endpoint_kind = EndpointKind.C104
|
|
30
|
-
endpoint:
|
|
30
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def _metricsblock_to_c104_metric_map(block: MetricsBlock) -> dict[str, C104ValuesMap]:
|
|
@@ -13,7 +13,7 @@ from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
|
13
13
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
14
|
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
15
|
|
|
16
|
-
from contracts_hj3415.nfs.types import
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
17
|
|
|
18
18
|
from contracts_hj3415.nfs.c106_dto import (
|
|
19
19
|
C106DTO,
|
|
@@ -27,7 +27,7 @@ from logging_hj3415 import logger
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
endpoint_kind = EndpointKind.C106
|
|
30
|
-
endpoint:
|
|
30
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def _metricsblock_to_c106_metric_map(block: MetricsBlock) -> dict[str, C106ValuesMap]:
|
|
@@ -13,14 +13,14 @@ from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
|
13
13
|
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
14
|
from scraper2_hj3415.app.domain.blocks import RecordsBlock
|
|
15
15
|
|
|
16
|
-
from contracts_hj3415.nfs.types import
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
17
|
|
|
18
18
|
from contracts_hj3415.nfs.c108_dto import C108DTO, C108Payload, C108Blocks
|
|
19
19
|
|
|
20
20
|
from logging_hj3415 import logger
|
|
21
21
|
|
|
22
22
|
endpoint_kind = EndpointKind.C108
|
|
23
|
-
endpoint:
|
|
23
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def _to_list_of_dict(rows: object) -> list[dict]:
|