scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
  2. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
  3. scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
  4. scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
  5. scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
  6. scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
  7. scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
  8. scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
  9. scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
  10. scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
  11. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
  12. scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
  13. scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
  14. {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
  15. scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
  16. scraper2_hj3415/app/composition.py +225 -0
  17. scraper2_hj3415/app/domain/blocks.py +61 -0
  18. scraper2_hj3415/app/domain/constants.py +33 -0
  19. scraper2_hj3415/app/domain/doc.py +16 -0
  20. scraper2_hj3415/app/domain/endpoint.py +11 -0
  21. scraper2_hj3415/app/domain/series.py +11 -0
  22. scraper2_hj3415/app/domain/types.py +19 -0
  23. scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
  24. scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
  25. scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
  26. scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
  27. scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
  28. scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
  29. scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
  30. scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
  31. scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
  32. scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
  33. scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
  34. scraper2_hj3415/app/parsing/c101/sise.py +47 -0
  35. scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
  36. scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
  37. scraper2_hj3415/app/parsing/c101_parser.py +45 -0
  38. scraper2_hj3415/app/parsing/c103_parser.py +22 -0
  39. scraper2_hj3415/app/parsing/c104_parser.py +26 -0
  40. scraper2_hj3415/app/parsing/c106_parser.py +137 -0
  41. scraper2_hj3415/app/parsing/c108_parser.py +254 -0
  42. scraper2_hj3415/app/ports/__init__.py +0 -0
  43. scraper2_hj3415/app/ports/browser/__init__.py +0 -0
  44. scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
  45. scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
  46. scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
  47. scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
  48. scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
  49. scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
  50. scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
  51. scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
  52. scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
  53. scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
  54. scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
  55. scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
  56. scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
  57. scraper2_hj3415/app/ports/site/__init__.py +0 -0
  58. scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
  59. scraper2_hj3415/app/services/__init__.py +0 -0
  60. scraper2_hj3415/app/services/fetch/__init__.py +0 -0
  61. scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
  62. scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
  63. scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
  64. scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
  65. scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
  66. scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
  67. scraper2_hj3415/app/usecases/__init__.py +0 -0
  68. scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
  69. scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
  70. scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
  71. scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
  72. scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
  73. scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
  74. scraper2/main.py → scraper2_hj3415/cli.py +45 -72
  75. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
  76. scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
  77. scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
  78. scraper2/adapters/out/playwright/browser.py +0 -102
  79. scraper2/adapters/out/sinks/memory/__init__.py +0 -15
  80. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
  81. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
  82. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
  83. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
  84. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
  85. scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
  86. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
  87. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
  88. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
  89. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
  90. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
  91. scraper2/app/composition.py +0 -204
  92. scraper2/app/parsing/_converters.py +0 -85
  93. scraper2/app/parsing/_normalize.py +0 -134
  94. scraper2/app/parsing/c101_parser.py +0 -143
  95. scraper2/app/parsing/c103_parser.py +0 -128
  96. scraper2/app/parsing/c104_parser.py +0 -143
  97. scraper2/app/parsing/c106_parser.py +0 -153
  98. scraper2/app/parsing/c108_parser.py +0 -65
  99. scraper2/app/ports/browser/browser_factory_port.py +0 -11
  100. scraper2/app/ports/browser/browser_port.py +0 -22
  101. scraper2/app/ports/ingest_port.py +0 -14
  102. scraper2/app/ports/sinks/base_sink_port.py +0 -14
  103. scraper2/app/ports/sinks/c101_sink_port.py +0 -9
  104. scraper2/app/ports/sinks/c103_sink_port.py +0 -9
  105. scraper2/app/ports/sinks/c104_sink_port.py +0 -9
  106. scraper2/app/ports/sinks/c106_sink_port.py +0 -9
  107. scraper2/app/ports/sinks/c108_sink_port.py +0 -9
  108. scraper2/app/usecases/fetch/fetch_c101.py +0 -43
  109. scraper2/app/usecases/fetch/fetch_c103.py +0 -103
  110. scraper2/app/usecases/fetch/fetch_c104.py +0 -76
  111. scraper2/app/usecases/fetch/fetch_c106.py +0 -90
  112. scraper2/app/usecases/fetch/fetch_c108.py +0 -49
  113. scraper2/app/usecases/ingest/ingest_c101.py +0 -36
  114. scraper2/app/usecases/ingest/ingest_c103.py +0 -37
  115. scraper2/app/usecases/ingest/ingest_c104.py +0 -37
  116. scraper2/app/usecases/ingest/ingest_c106.py +0 -38
  117. scraper2/app/usecases/ingest/ingest_c108.py +0 -39
  118. scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
  119. scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
  120. {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
  121. {scraper2 → scraper2_hj3415}/__init__.py +0 -0
  122. {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
  123. {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
  124. {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
  125. {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
  126. {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
  127. {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
  128. {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
  129. {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
  130. {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
  131. {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
  132. {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
  133. {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
  134. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
  135. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Protocol
4
+
5
+
6
+ class BrowserTextPort(Protocol):
7
+ """DOM 조회/텍스트 추출"""
8
+
9
+ async def count(self, selector: str) -> int: ...
10
+
11
+ async def text_content_first(self, selector: str) -> str: ...
12
+ async def all_texts(self, selector: str) -> list[str]: ...
13
+ async def get_text_by_text(self, needle: str) -> str: ...
14
+
15
+ async def inner_text(self, selector: str) -> str: ...
16
+ async def outer_html_nth(self, selector: str, index: int) -> str: ...
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import Protocol
5
+
6
+
7
+ class BrowserWaitPort(Protocol):
8
+ """대기/동기화 유틸"""
9
+
10
+ async def sleep_ms(self, ms: int) -> None:
11
+ """
12
+ ms에 1000을 넣으면 1초 쉼.
13
+ 구현체가 없을 때를 대비해 기본 구현 제공.
14
+ """
15
+ await asyncio.sleep(ms / 1000)
16
+
17
+ async def wait_attached(
18
+ self,
19
+ selector: str,
20
+ *,
21
+ timeout_ms: int = 10_000,
22
+ ) -> None: ...
23
+
24
+ async def wait_visible(
25
+ self,
26
+ selector: str,
27
+ *,
28
+ timeout_ms: int = 10_000,
29
+ ) -> None: ...
30
+
31
+ async def wait_table_nth_ready(
32
+ self,
33
+ table_selector: str,
34
+ *,
35
+ index: int,
36
+ min_rows: int = 1,
37
+ timeout_ms: int = 20_000,
38
+ poll_ms: int = 200,
39
+ ) -> None: ...
40
+
41
+ async def wait_table_text_changed(
42
+ self,
43
+ table_selector: str,
44
+ *,
45
+ index: int,
46
+ prev_text: str | None,
47
+ min_rows: int = 1,
48
+ min_lines: int = 50,
49
+ timeout_sec: float = 12.0,
50
+ poll_sec: float = 0.2,
51
+ ) -> str: ...
File without changes
@@ -0,0 +1,28 @@
1
+ # scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py
2
+ from __future__ import annotations
3
+ from typing import Protocol, Iterable, Optional, TypeVar
4
+ from datetime import datetime
5
+
6
+ from contracts_hj3415.nfs.nfs_dto import NfsDTO
7
+
8
+ TDto = TypeVar("TDto", bound=NfsDTO)
9
+
10
+
11
+ class NfsIngestPort(Protocol[TDto]):
12
+ async def execute(
13
+ self,
14
+ code: str,
15
+ *,
16
+ sleep_sec: float = ...,
17
+ asof: Optional[datetime] = None,
18
+ ) -> TDto:
19
+ ...
20
+
21
+ async def execute_many(
22
+ self,
23
+ codes: Iterable[str],
24
+ *,
25
+ sleep_sec: float = ...,
26
+ asof: Optional[datetime] = None,
27
+ ) -> list[TDto]:
28
+ ...
File without changes
@@ -0,0 +1,20 @@
1
+ # scraper2_hj3415/app/ports/sinks/nfs_sink_port.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Protocol, Iterable, TypeVar
5
+ from contracts_hj3415.nfs.types import Endpoint
6
+ from contracts_hj3415.nfs.nfs_dto import NfsDTO
7
+
8
+ TDto = TypeVar("TDto", bound=NfsDTO)
9
+
10
+ class NfsSinkPort(Protocol[TDto]):
11
+ async def write(
12
+ self, dto: TDto, *, endpoint: Endpoint
13
+ ) -> None: ...
14
+
15
+ async def write_many(
16
+ self,
17
+ dtos: Iterable[TDto],
18
+ *,
19
+ endpoint: Endpoint
20
+ ) -> None: ...
File without changes
@@ -0,0 +1,30 @@
1
+ # scraper2_hj3415/app/ports/site/wisereport_port.py
2
+ from __future__ import annotations
3
+ from typing import Protocol
4
+
5
+ class WiseReportPort(Protocol):
6
+ async def set_view_c103(
7
+ self,
8
+ *,
9
+ key: str,
10
+ steps: list[tuple[str, str]],
11
+ table_selector: str,
12
+ table_index: int,
13
+ max_attempts: int = 5,
14
+ stabilize_timeout_sec: float = 10.0,
15
+ ) -> None: ...
16
+
17
+ async def set_view_c104(
18
+ self,
19
+ *,
20
+ key: str,
21
+ steps: list[tuple[str, str]],
22
+ table_selector: str,
23
+ table_index: int,
24
+ prev_text_by_idx: dict[int, str | None],
25
+ max_attempts: int = 5,
26
+ stabilize_timeout_sec: float = 10.0,
27
+ min_rows: int = 5,
28
+ min_lines: int = 30,
29
+ open_consensus: bool = True,
30
+ ) -> None: ...
File without changes
File without changes
@@ -0,0 +1,59 @@
1
+ # scraper2_hj3415/app/usecases/fetch/fetch_c101.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable
7
+
8
+ from logging_hj3415 import logger
9
+ from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2_hj3415.app.parsing.c101_parser import parse_c101_to_dict
11
+
12
+ from scraper2_hj3415.app.services.nfs_doc_builders import build_c101_doc_from_parsed
13
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
14
+ from scraper2_hj3415.app.domain.doc import NfsDoc
15
+ from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
16
+
17
+
18
+ class FetchC101:
19
+ def __init__(self, factory: BrowserFactoryPort):
20
+ self.factory = factory
21
+
22
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> NfsDoc | None:
23
+ async with self.factory.lease() as browser:
24
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1010001.aspx?cmp_cd={code}"
25
+ await browser.goto_and_wait_for_stable(url, timeout_ms=10_000)
26
+
27
+ if sleep_sec > 0:
28
+ await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
29
+
30
+ parsed = await parse_c101_to_dict(browser)
31
+
32
+ logger.debug(f"parsed data: {parsed}")
33
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C101]
34
+ if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
35
+ logger.warning(
36
+ f"c101 fetch: parsed result empty; return None | code={code}"
37
+ )
38
+ return None
39
+
40
+ doc = build_c101_doc_from_parsed(
41
+ code=code, parsed=parsed, keep_empty_blocks=True
42
+ )
43
+ logger.debug(f"c101 doc: {doc}")
44
+ return doc
45
+
46
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
47
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
48
+
49
+ async def execute_many(
50
+ self,
51
+ codes: Iterable[str],
52
+ *,
53
+ sleep_sec: float = 2.0,
54
+ ) -> list[NfsDoc]:
55
+ results = await asyncio.gather(
56
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
57
+ return_exceptions=False,
58
+ )
59
+ return [r for r in results if r is not None]
@@ -0,0 +1,121 @@
1
+ # scraper2_hj3415/app/usecases/fetch/fetch_c103.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable, Any
7
+
8
+ from logging_hj3415 import logger
9
+ from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2_hj3415.app.ports.site.wisereport_port import WiseReportPort
11
+ from scraper2_hj3415.app.adapters.site.wisereport_playwright import WiseReportPlaywright
12
+
13
+ from scraper2_hj3415.app.parsing.c103_parser import parse_c103_current_table, TABLE_XPATH
14
+ from scraper2_hj3415.app.services.nfs_doc_builders import build_metrics_doc_from_parsed
15
+
16
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
17
+ from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
18
+ from scraper2_hj3415.app.domain.doc import NfsDoc
19
+
20
+ BTN_SETS: dict[str, list[tuple[str, str]]] = {
21
+ "손익계산서y": [
22
+ ("손익계산서", 'xpath=//*[@id="rpt_tab1"]'),
23
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
24
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
25
+ ],
26
+ "재무상태표y": [
27
+ ("재무상태표", 'xpath=//*[@id="rpt_tab2"]'),
28
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
29
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
30
+ ],
31
+ "현금흐름표y": [
32
+ ("현금흐름표", 'xpath=//*[@id="rpt_tab3"]'),
33
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
34
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
35
+ ],
36
+ "손익계산서q": [
37
+ ("손익계산서", 'xpath=//*[@id="rpt_tab1"]'),
38
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
39
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
40
+ ],
41
+ "재무상태표q": [
42
+ ("재무상태표", 'xpath=//*[@id="rpt_tab2"]'),
43
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
44
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
45
+ ],
46
+ "현금흐름표q": [
47
+ ("현금흐름표", 'xpath=//*[@id="rpt_tab3"]'),
48
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
49
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
50
+ ],
51
+ }
52
+
53
+ TABLE_INDEX = 2
54
+
55
+
56
+ class FetchC103:
57
+ def __init__(self, factory: BrowserFactoryPort):
58
+ self.factory = factory
59
+
60
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> NfsDoc | None:
61
+ async with self.factory.lease() as browser:
62
+ wr: WiseReportPort = WiseReportPlaywright(browser)
63
+
64
+ url = (
65
+ "https://navercomp.wisereport.co.kr/v2/company/c1030001.aspx"
66
+ f"?cn=&cmp_cd={code}"
67
+ )
68
+ await browser.goto_and_wait_for_stable(url, timeout_ms=10_000)
69
+
70
+ if sleep_sec > 0:
71
+ await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
72
+
73
+ parsed: dict[str, list[dict[str, Any]]] = {}
74
+
75
+ for key, steps in BTN_SETS.items():
76
+ try:
77
+ # 1) 상태 확정 (분기/연간이 맞는지 헤더로 검증)
78
+ await wr.set_view_c103(
79
+ key=key,
80
+ steps=steps,
81
+ table_selector=TABLE_XPATH,
82
+ table_index=TABLE_INDEX,
83
+ )
84
+
85
+ # 2) 파싱은 “현재 화면 테이블”만
86
+ parsed[key] = await parse_c103_current_table(browser)
87
+
88
+ except Exception as e:
89
+ logger.warning(
90
+ f"c103 view/parse failed: key={key} err={type(e).__name__}: {e}"
91
+ )
92
+ parsed[key] = []
93
+
94
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C103]
95
+ if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
96
+ logger.warning(
97
+ f"c103 fetch: parsed result empty; return None | code={code}"
98
+ )
99
+ return None
100
+
101
+ doc = build_metrics_doc_from_parsed(
102
+ code=code,
103
+ endpoint_kind=EndpointKind.C103,
104
+ parsed=parsed,
105
+ block_keys=block_keys,
106
+ item_key="항목",
107
+ raw_label_key="항목_raw",
108
+ keep_empty_blocks=True,
109
+ )
110
+ return doc
111
+
112
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
113
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
114
+
115
+ async def execute_many(
116
+ self, codes: Iterable[str], *, sleep_sec: float = 2.0
117
+ ) -> list[NfsDoc]:
118
+ results = await asyncio.gather(
119
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes)
120
+ )
121
+ return [r for r in results if r is not None]
@@ -0,0 +1,160 @@
1
+ # scraper2_hj3415/app/usecases/fetch/fetch_c104.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Any, Iterable
7
+
8
+ from logging_hj3415 import logger
9
+ from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2_hj3415.app.ports.site.wisereport_port import WiseReportPort
11
+ from scraper2_hj3415.app.adapters.site.wisereport_playwright import WiseReportPlaywright
12
+
13
+ from scraper2_hj3415.app.parsing.c104_parser import parse_c104_current_table, TABLE_XPATH
14
+ from scraper2_hj3415.app.services.nfs_doc_builders import build_metrics_doc_from_parsed
15
+
16
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
17
+ from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
18
+ from scraper2_hj3415.app.domain.doc import NfsDoc
19
+
20
+ BTN_SETS: dict[str, list[tuple[str, str]]] = {
21
+ "수익성y": [
22
+ ("수익성", 'xpath=//*[ @id="val_tab1"]'),
23
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
24
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
25
+ ],
26
+ "성장성y": [
27
+ ("성장성", 'xpath=//*[ @id="val_tab2"]'),
28
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
29
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
30
+ ],
31
+ "안정성y": [
32
+ ("안정성", 'xpath=//*[ @id="val_tab3"]'),
33
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
34
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
35
+ ],
36
+ "활동성y": [
37
+ ("활동성", 'xpath=//*[ @id="val_tab4"]'),
38
+ ("연간", 'xpath=//*[@id="frqTyp0"]'),
39
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
40
+ ],
41
+ "가치분석y": [
42
+ ("가치분석연간", 'xpath=//*[@id="frqTyp0_2"]'),
43
+ ("가치분석검색", 'xpath=//*[@id="hfinGubun2"]'),
44
+ ],
45
+ "수익성q": [
46
+ ("수익성", 'xpath=//*[ @id="val_tab1"]'),
47
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
48
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
49
+ ],
50
+ "성장성q": [
51
+ ("성장성", 'xpath=//*[ @id="val_tab2"]'),
52
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
53
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
54
+ ],
55
+ "안정성q": [
56
+ ("안정성", 'xpath=//*[ @id="val_tab3"]'),
57
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
58
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
59
+ ],
60
+ "활동성q": [
61
+ ("활동성", 'xpath=//*[ @id="val_tab4"]'),
62
+ ("분기", 'xpath=//*[@id="frqTyp1"]'),
63
+ ("검색", 'xpath=//*[@id="hfinGubun"]'),
64
+ ],
65
+ "가치분석q": [
66
+ ("가치분석분기", 'xpath=//*[@id="frqTyp1_2"]'),
67
+ ("가치분석검색", 'xpath=//*[@id="hfinGubun2"]'),
68
+ ],
69
+ }
70
+
71
+
72
+ def _table_index_for_key(key: str) -> int:
73
+ # 가치분석은 별도 테이블(보통 index=1)
74
+ return 1 if key.startswith("가치분석") else 0
75
+
76
+
77
+ class FetchC104:
78
+ def __init__(self, factory: BrowserFactoryPort):
79
+ self.factory = factory
80
+
81
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> NfsDoc | None:
82
+ async with self.factory.lease() as browser:
83
+ wr: WiseReportPort = WiseReportPlaywright(browser)
84
+
85
+ url = (
86
+ "https://navercomp.wisereport.co.kr/v2/company/c1040001.aspx"
87
+ f"?cn=&cmp_cd={code}"
88
+ )
89
+ await browser.goto_and_wait_for_stable(url, timeout_ms=10_000)
90
+
91
+ if sleep_sec > 0:
92
+ await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
93
+
94
+ parsed: dict[str, list[dict[str, Any]]] = {}
95
+
96
+ # idx별 안정화 상태 추적
97
+ prev_text_by_idx: dict[int, str | None] = {0: None, 1: None}
98
+
99
+ for key, steps in BTN_SETS.items():
100
+ idx = _table_index_for_key(key)
101
+
102
+ try:
103
+ # 1) 상태 확정 (분기/연간이 맞는지 헤더로 검증)
104
+ await wr.set_view_c104(
105
+ key=key,
106
+ steps=steps,
107
+ table_selector=TABLE_XPATH,
108
+ table_index=idx,
109
+ prev_text_by_idx=prev_text_by_idx,
110
+ max_attempts=5,
111
+ stabilize_timeout_sec=10.0,
112
+ min_rows=5,
113
+ min_lines=30,
114
+ open_consensus=True,
115
+ )
116
+
117
+ # 2) 파싱은 “현재 화면의 idx 테이블 1개”만
118
+ parsed[key] = await parse_c104_current_table(
119
+ browser,
120
+ table_index=idx,
121
+ )
122
+
123
+ except Exception as e:
124
+ logger.warning(
125
+ f"c104 view/parse failed: key={key} idx={idx} err={type(e).__name__}: {e}"
126
+ )
127
+ parsed[key] = []
128
+
129
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C104]
130
+ if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
131
+ logger.warning(
132
+ f"c104 fetch: parsed result empty; return None | code={code}"
133
+ )
134
+ return None
135
+
136
+ doc = build_metrics_doc_from_parsed(
137
+ code=code,
138
+ endpoint_kind=EndpointKind.C104,
139
+ parsed=parsed,
140
+ block_keys=block_keys,
141
+ item_key="항목",
142
+ raw_label_key="항목_raw",
143
+ keep_empty_blocks=True,
144
+ )
145
+ return doc
146
+
147
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
148
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
149
+
150
+ async def execute_many(
151
+ self,
152
+ codes: Iterable[str],
153
+ *,
154
+ sleep_sec: float = 2.0,
155
+ ) -> list[NfsDoc]:
156
+ results = await asyncio.gather(
157
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
158
+ return_exceptions=False,
159
+ )
160
+ return [r for r in results if r is not None]
@@ -0,0 +1,90 @@
1
+ # scraper2_hj3415/app/usecases/fetch/fetch_c106.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable, Any
7
+
8
+ from logging_hj3415 import logger
9
+ from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2_hj3415.app.parsing.c106_parser import (
11
+ parse_c106_header_codes,
12
+ parse_c106_current_table,
13
+ )
14
+ from scraper2_hj3415.app.services.nfs_doc_builders import build_metrics_doc_from_parsed
15
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
16
+ from scraper2_hj3415.app.domain.doc import NfsDoc
17
+ from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
18
+
19
+
20
+ class FetchC106:
21
+ def __init__(self, factory: BrowserFactoryPort):
22
+ self.factory = factory
23
+
24
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> NfsDoc | None:
25
+ async with self.factory.lease() as browser:
26
+ # 1) 헤더 코드 추출용(기준 페이지)
27
+ url0 = (
28
+ "https://navercomp.wisereport.co.kr/v2/company/c1060001.aspx"
29
+ f"?cn=&cmp_cd={code}"
30
+ )
31
+ await browser.goto_and_wait_for_stable(url0, timeout_ms=10_000)
32
+
33
+ if sleep_sec > 0:
34
+ await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
35
+
36
+ header_codes = await parse_c106_header_codes(browser)
37
+ if not header_codes:
38
+ logger.warning(f"c106 fetch: header codes empty; code={code}")
39
+ return None
40
+
41
+ base_url = (
42
+ "https://navercomp.wisereport.co.kr/v2/company/cF6002.aspx"
43
+ f"?cmp_cd={code}&finGubun=MAIN&sec_cd=FG000&frq="
44
+ )
45
+
46
+ parsed: dict[str, list[dict[str, Any]]] = {}
47
+
48
+ for frq in ("q", "y"):
49
+ url = base_url + frq
50
+ await browser.goto_and_wait_for_stable(url, timeout_ms=10_000)
51
+
52
+ # 기존 지터 유지(필요하면 정책화)
53
+ await asyncio.sleep(0.5 + random.uniform(0, 0.3))
54
+
55
+ parsed[frq] = await parse_c106_current_table(
56
+ browser,
57
+ columns=header_codes,
58
+ table_selector="#cTB611",
59
+ table_index=0,
60
+ timeout_ms=10_000,
61
+ )
62
+
63
+ logger.debug(f"parsed:\n{parsed}")
64
+
65
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C106]
66
+ if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
67
+ logger.warning(f"c106 fetch: parsed result empty; return None | code={code}")
68
+ return None
69
+
70
+ doc = build_metrics_doc_from_parsed(
71
+ code=code,
72
+ endpoint_kind=EndpointKind.C106,
73
+ parsed=parsed,
74
+ block_keys=block_keys,
75
+ item_key="항목",
76
+ raw_label_key="항목_raw",
77
+ keep_empty_blocks=True,
78
+ )
79
+ logger.debug(f"c106 doc: {doc}")
80
+ return doc
81
+
82
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
83
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
84
+
85
+ async def execute_many(self, codes: Iterable[str], *, sleep_sec: float = 2.0) -> list[NfsDoc]:
86
+ results = await asyncio.gather(
87
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
88
+ return_exceptions=False,
89
+ )
90
+ return [r for r in results if r is not None]
@@ -0,0 +1,59 @@
1
+ # scraper2_hj3415/app/usecases/fetch/fetch_c108.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable
7
+
8
+ from logging_hj3415 import logger
9
+ from scraper2_hj3415.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2_hj3415.app.parsing.c108_parser import parse_c108_to_dict
11
+ from scraper2_hj3415.app.services.nfs_doc_builders import build_c108_doc_from_parsed
12
+
13
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
14
+ from scraper2_hj3415.app.domain.doc import NfsDoc
15
+ from scraper2_hj3415.app.domain.blocks import BLOCK_KEYS_BY_ENDPOINT
16
+
17
+
18
+ class FetchC108:
19
+ def __init__(self, factory: BrowserFactoryPort):
20
+ self.factory = factory
21
+
22
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> NfsDoc | None:
23
+ async with self.factory.lease() as browser:
24
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1080001.aspx?cn=&cmp_cd={code}"
25
+ await browser.goto_and_wait_for_stable(url, timeout_ms=10_000)
26
+
27
+ if sleep_sec > 0:
28
+ await asyncio.sleep(sleep_sec + random.uniform(0, 1.0))
29
+
30
+ parsed = await parse_c108_to_dict(browser=browser)
31
+
32
+ logger.debug(f"parsed:\n{parsed}")
33
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[EndpointKind.C108]
34
+ if not parsed or all(not (parsed.get(str(bk)) or []) for bk in block_keys):
35
+ logger.warning(
36
+ f"c108 fetch: parsed result empty; return None | code={code}"
37
+ )
38
+ return None
39
+
40
+ doc = build_c108_doc_from_parsed(
41
+ code=code, parsed=parsed, keep_empty_blocks=True
42
+ )
43
+ logger.debug(f"c108 doc: {doc}")
44
+ return doc
45
+
46
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> NfsDoc | None:
47
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
48
+
49
+ async def execute_many(
50
+ self,
51
+ codes: Iterable[str],
52
+ *,
53
+ sleep_sec: float = 2.0,
54
+ ) -> list[NfsDoc]:
55
+ results = await asyncio.gather(
56
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
57
+ return_exceptions=False,
58
+ )
59
+ return [r for r in results if r is not None]