scraper2-hj3415 2.4.1__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/app/adapters/out/playwright/browser.py +373 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +5 -5
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
- scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
- scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
- {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
- scraper2_hj3415/app/adapters/site/wisereport_playwright.py +168 -0
- scraper2_hj3415/app/composition.py +225 -0
- scraper2_hj3415/app/domain/blocks.py +61 -0
- scraper2_hj3415/app/domain/constants.py +33 -0
- scraper2_hj3415/app/domain/doc.py +16 -0
- scraper2_hj3415/app/domain/endpoint.py +11 -0
- scraper2_hj3415/app/domain/series.py +11 -0
- scraper2_hj3415/app/domain/types.py +19 -0
- scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
- scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
- scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
- scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
- scraper2_hj3415/app/parsing/_tables/html_table.py +88 -0
- scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
- scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
- scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
- scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
- scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
- scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
- scraper2_hj3415/app/parsing/c101/sise.py +47 -0
- scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
- scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
- scraper2_hj3415/app/parsing/c101_parser.py +45 -0
- scraper2_hj3415/app/parsing/c103_parser.py +19 -0
- scraper2_hj3415/app/parsing/c104_parser.py +23 -0
- scraper2_hj3415/app/parsing/c106_parser.py +137 -0
- scraper2_hj3415/app/parsing/c108_parser.py +254 -0
- scraper2_hj3415/app/ports/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
- scraper2_hj3415/app/ports/browser/browser_port.py +115 -0
- scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
- scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
- scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
- scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
- scraper2_hj3415/app/ports/site/__init__.py +0 -0
- scraper2_hj3415/app/ports/site/wisereport_port.py +20 -0
- scraper2_hj3415/app/services/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
- scraper2_hj3415/app/services/fetch/fetch_c103.py +135 -0
- scraper2_hj3415/app/services/fetch/fetch_c104.py +183 -0
- scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
- scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
- scraper2_hj3415/app/services/nfs_doc_builders.py +290 -0
- scraper2_hj3415/app/usecases/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
- scraper2/main.py → scraper2_hj3415/cli.py +40 -70
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.6.0.dist-info}/METADATA +3 -1
- scraper2_hj3415-2.6.0.dist-info/RECORD +75 -0
- scraper2_hj3415-2.6.0.dist-info/entry_points.txt +3 -0
- scraper2/.DS_Store +0 -0
- scraper2/adapters/out/.DS_Store +0 -0
- scraper2/adapters/out/playwright/browser.py +0 -102
- scraper2/adapters/out/sinks/.DS_Store +0 -0
- scraper2/adapters/out/sinks/memory/__init__.py +0 -15
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
- scraper2/app/composition.py +0 -204
- scraper2/app/parsing/_converters.py +0 -85
- scraper2/app/parsing/_normalize.py +0 -134
- scraper2/app/parsing/c101_parser.py +0 -143
- scraper2/app/parsing/c103_parser.py +0 -128
- scraper2/app/parsing/c104_parser.py +0 -143
- scraper2/app/parsing/c106_parser.py +0 -153
- scraper2/app/parsing/c108_parser.py +0 -65
- scraper2/app/ports/browser/browser_factory_port.py +0 -11
- scraper2/app/ports/browser/browser_port.py +0 -22
- scraper2/app/ports/ingest_port.py +0 -14
- scraper2/app/ports/sinks/base_sink_port.py +0 -14
- scraper2/app/ports/sinks/c101_sink_port.py +0 -9
- scraper2/app/ports/sinks/c103_sink_port.py +0 -9
- scraper2/app/ports/sinks/c104_sink_port.py +0 -9
- scraper2/app/ports/sinks/c106_sink_port.py +0 -9
- scraper2/app/ports/sinks/c108_sink_port.py +0 -9
- scraper2/app/usecases/fetch/fetch_c101.py +0 -43
- scraper2/app/usecases/fetch/fetch_c103.py +0 -103
- scraper2/app/usecases/fetch/fetch_c104.py +0 -76
- scraper2/app/usecases/fetch/fetch_c106.py +0 -90
- scraper2/app/usecases/fetch/fetch_c108.py +0 -49
- scraper2/app/usecases/ingest/ingest_c101.py +0 -36
- scraper2/app/usecases/ingest/ingest_c103.py +0 -37
- scraper2/app/usecases/ingest/ingest_c104.py +0 -37
- scraper2/app/usecases/ingest/ingest_c106.py +0 -38
- scraper2/app/usecases/ingest/ingest_c108.py +0 -39
- scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
- scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
- {scraper2 → scraper2_hj3415}/__init__.py +0 -0
- {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
- {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
- {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
- {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
- {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
- {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
- {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
- {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
- {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
- {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.6.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c108.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c108 import FetchC108
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import RecordsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c108_dto import C108DTO, C108Payload, C108Blocks
|
|
19
|
+
|
|
20
|
+
from logging_hj3415 import logger
|
|
21
|
+
|
|
22
|
+
endpoint_kind = EndpointKind.C108
|
|
23
|
+
endpoint: Endpoints = cast(Endpoints, endpoint_kind.value)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _to_list_of_dict(rows: object) -> list[dict]:
|
|
27
|
+
"""
|
|
28
|
+
RecordsBlock.rows(Sequence[Mapping]) -> list[dict]
|
|
29
|
+
- sink/serialization 안전하게 dict로 강제
|
|
30
|
+
"""
|
|
31
|
+
if not rows:
|
|
32
|
+
return []
|
|
33
|
+
out: list[dict] = []
|
|
34
|
+
if isinstance(rows, list):
|
|
35
|
+
for r in rows:
|
|
36
|
+
if isinstance(r, dict):
|
|
37
|
+
out.append(r)
|
|
38
|
+
else:
|
|
39
|
+
out.append(dict(r)) # Mapping이면 dict() 가능
|
|
40
|
+
return out
|
|
41
|
+
|
|
42
|
+
# Sequence[Mapping] 일반 케이스
|
|
43
|
+
try:
|
|
44
|
+
for r in rows: # type: ignore[assignment]
|
|
45
|
+
out.append(dict(r)) # Mapping 가정
|
|
46
|
+
except Exception:
|
|
47
|
+
return []
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def c108_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C108DTO:
|
|
52
|
+
"""
|
|
53
|
+
NfsDoc(domain) -> C108DTO(contracts envelope)
|
|
54
|
+
|
|
55
|
+
규칙:
|
|
56
|
+
- labels는 항상 존재(빈 dict라도)
|
|
57
|
+
- c108은 labels를 비우는 것이 정상
|
|
58
|
+
- payload.blocks['리포트'] = list[dict]
|
|
59
|
+
"""
|
|
60
|
+
if doc.endpoint_kind != EndpointKind.C108:
|
|
61
|
+
raise ValueError(f"c108_doc_to_dto expects C108 doc, got: {doc.endpoint_kind}")
|
|
62
|
+
|
|
63
|
+
# contracts payload 구조에 맞게: blocks/labels를 항상 구성
|
|
64
|
+
blocks: C108Blocks = {"리포트": []}
|
|
65
|
+
|
|
66
|
+
# block_keys를 따르되, 실질적으로는 '리포트' 하나만 있어도 충분
|
|
67
|
+
for bk in get_block_keys(EndpointKind.C108):
|
|
68
|
+
if bk != "리포트":
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
block = doc.blocks.get(bk)
|
|
72
|
+
if isinstance(block, RecordsBlock):
|
|
73
|
+
blocks["리포트"] = _to_list_of_dict(block.rows)
|
|
74
|
+
else:
|
|
75
|
+
# 혹시 구조가 섞였으면 최대한 안전하게 빈 값
|
|
76
|
+
blocks["리포트"] = []
|
|
77
|
+
|
|
78
|
+
payload: C108Payload = {"blocks": blocks}
|
|
79
|
+
|
|
80
|
+
return C108DTO(
|
|
81
|
+
code=doc.code,
|
|
82
|
+
asof=asof,
|
|
83
|
+
endpoint=endpoint,
|
|
84
|
+
payload=payload,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class IngestC108:
|
|
89
|
+
def __init__(self, fetch: FetchC108, sink: NfsSinkPort[C108DTO]):
|
|
90
|
+
self.fetch = fetch
|
|
91
|
+
self.sink = sink
|
|
92
|
+
|
|
93
|
+
async def execute(
|
|
94
|
+
self, code: str, *, sleep_sec: float = 2.0, asof: datetime | None = None
|
|
95
|
+
) -> C108DTO:
|
|
96
|
+
asof = asof or utcnow()
|
|
97
|
+
|
|
98
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
99
|
+
logger.debug(f"doc:\n{doc}")
|
|
100
|
+
if doc is None:
|
|
101
|
+
raise RuntimeError(f"c108 fetch returned None: code={code}")
|
|
102
|
+
|
|
103
|
+
dto = c108_doc_to_dto(doc=doc, asof=asof)
|
|
104
|
+
logger.debug(f"dto:\n{dto}")
|
|
105
|
+
|
|
106
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
107
|
+
return dto
|
|
108
|
+
|
|
109
|
+
async def execute_many(
|
|
110
|
+
self,
|
|
111
|
+
codes: Iterable[str],
|
|
112
|
+
*,
|
|
113
|
+
sleep_sec: float = 2.0,
|
|
114
|
+
asof: Optional[datetime] = None,
|
|
115
|
+
) -> list[C108DTO]:
|
|
116
|
+
batch_asof = asof or utcnow()
|
|
117
|
+
|
|
118
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
119
|
+
dtos = [c108_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
120
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
121
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
122
|
+
return dtos
|
|
@@ -1,21 +1,32 @@
|
|
|
1
|
-
#
|
|
1
|
+
# scraper2_hj3415/cli.py
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, cast, get_args
|
|
6
6
|
|
|
7
|
+
|
|
8
|
+
import time
|
|
7
9
|
import typer
|
|
8
10
|
from datetime import datetime, timezone
|
|
9
11
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
+
from db2_hj3415.nfs.repo import ensure_indexes
|
|
13
|
+
from db2_hj3415.settings import get_settings
|
|
14
|
+
from db2_hj3415.universe.repo import list_universe_codes
|
|
15
|
+
|
|
16
|
+
from scraper2_hj3415.app.composition import build_usecases
|
|
17
|
+
from scraper2_hj3415.app.ports.ingest.nfs_ingest_port import NfsIngestPort
|
|
18
|
+
from scraper2_hj3415.app.domain.types import Sink
|
|
12
19
|
|
|
13
|
-
from
|
|
20
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
21
|
+
from contracts_hj3415.universe.types import UniverseNames
|
|
22
|
+
|
|
23
|
+
from logging_hj3415 import setup_logging, current_log_level, reset_logging, to_pretty_json
|
|
14
24
|
|
|
15
25
|
setup_logging()
|
|
26
|
+
# 운영시에는 아래 항목 주석처리하고 환경변수로 제어할것
|
|
27
|
+
reset_logging("DEBUG")
|
|
28
|
+
print(f"Current log level - {current_log_level()}")
|
|
16
29
|
|
|
17
|
-
Endpoint = Literal["c101", "c103", "c104", "c106", "c108", "all"]
|
|
18
|
-
Sink = Literal["memory", "mongo"]
|
|
19
30
|
|
|
20
31
|
app = typer.Typer(no_args_is_help=True)
|
|
21
32
|
|
|
@@ -30,54 +41,19 @@ app.add_typer(mi_app, name="mi")
|
|
|
30
41
|
# small helpers
|
|
31
42
|
# -------------------------
|
|
32
43
|
|
|
33
|
-
def _endpoint_list(
|
|
34
|
-
|
|
44
|
+
def _endpoint_list(endpoint: str) -> list[str]:
|
|
45
|
+
if endpoint == "all":
|
|
46
|
+
return list(get_args(Endpoints)) # -> ["c101", "c103", "c104", "c106", "c108"]
|
|
47
|
+
return [endpoint]
|
|
35
48
|
|
|
36
49
|
async def _mongo_bootstrap(db) -> None:
|
|
37
|
-
from db2.nfs import ensure_indexes
|
|
38
|
-
from db2.settings import get_settings
|
|
39
|
-
|
|
40
50
|
s = get_settings()
|
|
41
51
|
await ensure_indexes(db, snapshot_ttl_days=s.SNAPSHOT_TTL_DAYS)
|
|
42
52
|
|
|
43
|
-
|
|
44
|
-
async def _load_codes_from_universe(db, *, universe: str) -> list[str]:
|
|
45
|
-
"""
|
|
46
|
-
db2.universe에 저장된 universe_latest에서 codes 로드.
|
|
47
|
-
(네 db2 API 명에 맞춰 조정하면 됨)
|
|
48
|
-
"""
|
|
49
|
-
from db2.universe import get_universe_latest # 네가 가진 API
|
|
50
|
-
|
|
51
|
-
doc = await get_universe_latest(db, universe=universe)
|
|
52
|
-
if not doc:
|
|
53
|
-
return []
|
|
54
|
-
|
|
55
|
-
# doc 형태가 {"items":[{code,name,...}, ...]} 혹은 {"payload":{"items":[...]}} 일 수 있어서 방어
|
|
56
|
-
data = doc
|
|
57
|
-
if isinstance(data, dict) and "payload" in data and isinstance(data["payload"], dict):
|
|
58
|
-
data = data["payload"]
|
|
59
|
-
if isinstance(data, dict) and "items" in data:
|
|
60
|
-
data = data["items"]
|
|
61
|
-
|
|
62
|
-
if not isinstance(data, list):
|
|
63
|
-
return []
|
|
64
|
-
|
|
65
|
-
codes: list[str] = []
|
|
66
|
-
for row in data:
|
|
67
|
-
if not isinstance(row, dict):
|
|
68
|
-
continue
|
|
69
|
-
code = str(row.get("code") or "").strip()
|
|
70
|
-
if code:
|
|
71
|
-
codes.append(code)
|
|
72
|
-
return codes
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
import time
|
|
76
|
-
|
|
77
53
|
async def _run_ingest_with_progress(
|
|
78
54
|
*,
|
|
79
55
|
ucs: Any,
|
|
80
|
-
endpoint:
|
|
56
|
+
endpoint: str,
|
|
81
57
|
codes: list[str],
|
|
82
58
|
sleep_sec: float,
|
|
83
59
|
show: bool,
|
|
@@ -99,7 +75,7 @@ async def _run_ingest_with_progress(
|
|
|
99
75
|
yield xs[i:i + n]
|
|
100
76
|
|
|
101
77
|
async def _run_one_endpoint(ep: str) -> None:
|
|
102
|
-
ingest_uc = cast(
|
|
78
|
+
ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
|
|
103
79
|
|
|
104
80
|
ok = 0
|
|
105
81
|
fail = 0
|
|
@@ -128,18 +104,6 @@ async def _run_ingest_with_progress(
|
|
|
128
104
|
elapsed = time.perf_counter() - t0 # ✅ 종료 시각
|
|
129
105
|
typer.echo(f"\n⏱ elapsed time: {_format_elapsed(elapsed)}")
|
|
130
106
|
|
|
131
|
-
def _dto_to_pretty(obj: Any) -> str:
|
|
132
|
-
# pydantic v2 우선
|
|
133
|
-
if hasattr(obj, "model_dump_json"):
|
|
134
|
-
return obj.model_dump_json(indent=2, by_alias=False)
|
|
135
|
-
if hasattr(obj, "model_dump"):
|
|
136
|
-
import json
|
|
137
|
-
return json.dumps(obj.model_dump(), ensure_ascii=False, indent=2)
|
|
138
|
-
# dict fallback
|
|
139
|
-
if isinstance(obj, dict):
|
|
140
|
-
import json
|
|
141
|
-
return json.dumps(obj, ensure_ascii=False, indent=2, default=str)
|
|
142
|
-
return str(obj)
|
|
143
107
|
|
|
144
108
|
def _format_elapsed(sec: float) -> str:
|
|
145
109
|
if sec < 60:
|
|
@@ -189,11 +153,11 @@ def _parse_asof(asof: str | None) -> datetime:
|
|
|
189
153
|
|
|
190
154
|
@nfs_app.command("one")
|
|
191
155
|
def nfs_one(
|
|
192
|
-
endpoint:
|
|
156
|
+
endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
|
|
193
157
|
code: str = typer.Argument(..., help="종목코드 (예: 005930)"),
|
|
194
158
|
sleep_sec: float = typer.Option(2.0, "--sleep"),
|
|
195
159
|
sink: Sink = typer.Option("memory", "--sink"),
|
|
196
|
-
show: bool = typer.Option(
|
|
160
|
+
show: bool = typer.Option(False, "--show/--no-show", help="결과 DTO 출력"),
|
|
197
161
|
asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601, UTC 권장). 예: 2026-01-09T05:00:00Z"),
|
|
198
162
|
):
|
|
199
163
|
code = code.strip()
|
|
@@ -201,7 +165,7 @@ def nfs_one(
|
|
|
201
165
|
raise typer.BadParameter("code는 비어있을 수 없습니다.")
|
|
202
166
|
|
|
203
167
|
async def _run():
|
|
204
|
-
ucs = build_usecases(
|
|
168
|
+
ucs = build_usecases(sink=sink)
|
|
205
169
|
|
|
206
170
|
if sink == "mongo":
|
|
207
171
|
if ucs.db is None:
|
|
@@ -211,17 +175,23 @@ def nfs_one(
|
|
|
211
175
|
try:
|
|
212
176
|
run_asof = _parse_asof(asof)
|
|
213
177
|
for ep in _endpoint_list(endpoint):
|
|
214
|
-
ingest_uc = cast(
|
|
178
|
+
ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
|
|
215
179
|
results = await ingest_uc.execute_many([code], sleep_sec=sleep_sec, asof=run_asof)
|
|
216
180
|
dto = results[0] if results else None
|
|
217
181
|
|
|
218
182
|
typer.echo(f"\n=== ONE DONE: {ep} {code} ===")
|
|
219
|
-
|
|
183
|
+
is_memory_sink = sink == "memory"
|
|
184
|
+
should_show = show or is_memory_sink
|
|
185
|
+
|
|
186
|
+
if not should_show:
|
|
220
187
|
continue
|
|
188
|
+
|
|
221
189
|
if dto is None:
|
|
222
190
|
typer.echo("(no result)")
|
|
223
191
|
else:
|
|
224
|
-
|
|
192
|
+
if is_memory_sink:
|
|
193
|
+
typer.echo("memory result:")
|
|
194
|
+
typer.echo(to_pretty_json(dto))
|
|
225
195
|
finally:
|
|
226
196
|
await ucs.aclose()
|
|
227
197
|
|
|
@@ -230,7 +200,7 @@ def nfs_one(
|
|
|
230
200
|
|
|
231
201
|
@nfs_app.command("all")
|
|
232
202
|
def nfs_all(
|
|
233
|
-
endpoint:
|
|
203
|
+
endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
|
|
234
204
|
universe: str = typer.Option("krx300", "--universe"),
|
|
235
205
|
limit: int = typer.Option(0, "--limit", help="0=전체"),
|
|
236
206
|
sleep_sec: float = typer.Option(2.0, "--sleep"),
|
|
@@ -241,12 +211,12 @@ def nfs_all(
|
|
|
241
211
|
asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601). 예: 2026-01-09T05:00:00Z"),
|
|
242
212
|
):
|
|
243
213
|
async def _run():
|
|
244
|
-
ucs = build_usecases(
|
|
214
|
+
ucs = build_usecases(sink=sink)
|
|
245
215
|
if ucs.db is None:
|
|
246
216
|
raise RuntimeError("all 모드는 DB가 필요합니다. mongo sink로 ucs.db를 노출하세요.")
|
|
247
217
|
await _mongo_bootstrap(ucs.db)
|
|
248
218
|
|
|
249
|
-
codes = await
|
|
219
|
+
codes = await list_universe_codes(ucs.db, universe=cast(UniverseNames, universe))
|
|
250
220
|
if not codes:
|
|
251
221
|
raise RuntimeError(f"universe='{universe}' codes가 비었습니다. 먼저 krx sync로 universe를 채우세요.")
|
|
252
222
|
|
|
@@ -281,4 +251,4 @@ def mi():
|
|
|
281
251
|
|
|
282
252
|
|
|
283
253
|
if __name__ == "__main__":
|
|
284
|
-
app()
|
|
254
|
+
app()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scraper2-hj3415
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.0
|
|
4
4
|
Summary: Naver WiseReport scraper
|
|
5
5
|
Keywords: example,demo
|
|
6
6
|
Author-email: Hyungjin Kim <hj3415@gmail.com>
|
|
@@ -17,6 +17,8 @@ Requires-Dist: lxml>=6.0.2
|
|
|
17
17
|
Requires-Dist: typer>=0.21.0
|
|
18
18
|
Requires-Dist: db2-hj3415
|
|
19
19
|
Requires-Dist: contracts-hj3415
|
|
20
|
+
Requires-Dist: common-hj3415
|
|
21
|
+
Requires-Dist: logging-hj3415
|
|
20
22
|
|
|
21
23
|
# scraper2
|
|
22
24
|
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
scraper2_hj3415/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
scraper2_hj3415/cli.py,sha256=idwTPbCBcqof2YVRsBzyBMnl92o2b5JRyMbzOxOXKZA,8068
|
|
3
|
+
scraper2_hj3415/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
scraper2_hj3415/app/composition.py,sha256=t9NnNDLL6-VT28dT_kBtc_4Sd6qdR_cGKcY2_wmQHMI,6573
|
|
5
|
+
scraper2_hj3415/app/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
scraper2_hj3415/app/adapters/out/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
scraper2_hj3415/app/adapters/out/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
scraper2_hj3415/app/adapters/out/playwright/browser.py,sha256=mbPNLynkTHkNHLqOG7UxVo7UclfNkHbNyN4M9xrDGOk,13152
|
|
9
|
+
scraper2_hj3415/app/adapters/out/playwright/browser_factory.py,sha256=Tp30xdE4Z7cWHBCCgvaqD3tqY4qyg2ij_YmuLmj2WUg,3744
|
|
10
|
+
scraper2_hj3415/app/adapters/out/playwright/session.py,sha256=GLlpO0rLQwXnxX1GyaRxy7P2UsYrceNczfhUvx3pepE,3734
|
|
11
|
+
scraper2_hj3415/app/adapters/out/sinks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
scraper2_hj3415/app/adapters/out/sinks/memory_sink.py,sha256=VlywXRmWjfh33yuE7AuwKU4aPi_UJAZQMSWELlHJ-l8,722
|
|
13
|
+
scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py,sha256=GS61lV1gaxdxC9o7Z9APAPs9WnWL6-qa_V5Ls245yNk,1749
|
|
14
|
+
scraper2_hj3415/app/adapters/out/sinks/store.py,sha256=yerl6NvaacVHDGnQ1Obc31aQFxxXUCWfGq2DKKV8aTc,2826
|
|
15
|
+
scraper2_hj3415/app/adapters/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
scraper2_hj3415/app/adapters/site/wisereport_playwright.py,sha256=lpvhy6Rdy9j762KE0udhxgIVXi0n2pvDbmbs2y0WPHs,6970
|
|
17
|
+
scraper2_hj3415/app/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
scraper2_hj3415/app/domain/blocks.py,sha256=ddrGYo12hRI3h3mLsGzrM9RUZuEabqdfBWsx5l8jVGs,1871
|
|
19
|
+
scraper2_hj3415/app/domain/constants.py,sha256=-BxAKH6smxE5jEHJsXmf32erk0UlisRz8BkYHXYTSgA,1181
|
|
20
|
+
scraper2_hj3415/app/domain/doc.py,sha256=G9Ik6f-HiNZjiT26Obtrx6HTyYsTbMg1vKaBQ536hOI,483
|
|
21
|
+
scraper2_hj3415/app/domain/endpoint.py,sha256=8nwV0ybBYDKS8ULngNvc_dh-Pl4BqpMJrdKHDuThSTA,222
|
|
22
|
+
scraper2_hj3415/app/domain/series.py,sha256=KZqHqavPkL8p14F58wuUrOS-N9k0pKfKBdhDL6kTYU4,300
|
|
23
|
+
scraper2_hj3415/app/domain/types.py,sha256=_fPII4xFc_zTWuVm-V_SdaB092XR2OeS0sNdJVwE5t8,374
|
|
24
|
+
scraper2_hj3415/app/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
scraper2_hj3415/app/parsing/c101_parser.py,sha256=fA7-EUlG3GhLlX8l7m9p-Bn5O2WQY0H9h7IAIDvlmGQ,2049
|
|
26
|
+
scraper2_hj3415/app/parsing/c103_parser.py,sha256=Tn5tfpUh4lf6IMsKW9JAp2b12W32vKcF8FPfzEJYvtY,770
|
|
27
|
+
scraper2_hj3415/app/parsing/c104_parser.py,sha256=NfwFcgNZb6EqCcDYRoWiMq281CzATELJsDO_Np64Clk,814
|
|
28
|
+
scraper2_hj3415/app/parsing/c106_parser.py,sha256=DvXv_OndsWed4LOyv8D9bsCxbj8_6rYrfR6ICR-VBnM,4346
|
|
29
|
+
scraper2_hj3415/app/parsing/c108_parser.py,sha256=Kopf3CAV4W66YR6at7isoNV-C8A7-eCOQcqPs85FgEE,7572
|
|
30
|
+
scraper2_hj3415/app/parsing/_normalize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
scraper2_hj3415/app/parsing/_normalize/label.py,sha256=yUtUalOlXuckzsQ7RXqdQ6F4Q2UmVBGr-zoEpZ6ryX0,2752
|
|
32
|
+
scraper2_hj3415/app/parsing/_normalize/table.py,sha256=V6I79gOHeRsyFiuIXyEy0Whg-pxZeDdTaGh0cdR5mrE,1464
|
|
33
|
+
scraper2_hj3415/app/parsing/_normalize/text.py,sha256=BnBZyaQiuydsQVUSDgIEn8JYYbxrM-3BZTmvNqiFK3g,683
|
|
34
|
+
scraper2_hj3415/app/parsing/_normalize/values.py,sha256=X5H7xprg5y8pkXilXCg_br7UIPjErcLHGDkOrxjctbk,1824
|
|
35
|
+
scraper2_hj3415/app/parsing/_tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
scraper2_hj3415/app/parsing/_tables/html_table.py,sha256=m44eA0rVhhk1QL74fgR2dbldKuF1_K5mJr2xiyb-55U,2393
|
|
37
|
+
scraper2_hj3415/app/parsing/c101/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
+
scraper2_hj3415/app/parsing/c101/_sise_normalizer.py,sha256=0wG9AZ6MYvWMf_UA9QK3_TbyDctdi3ldyKNKwmLJJic,3150
|
|
39
|
+
scraper2_hj3415/app/parsing/c101/company_overview.py,sha256=R6K44Rlw9Iv740SR2XZDxBkZxsLi_SNB6n3xruO3xqk,1391
|
|
40
|
+
scraper2_hj3415/app/parsing/c101/earning_surprise.py,sha256=QqiVVrdJuQXm25mvm0AsQB7gi461IiUbAoD8iCamUjg,7028
|
|
41
|
+
scraper2_hj3415/app/parsing/c101/fundamentals.py,sha256=3Fy6ya53NF-VoXa16GDpqUTdoFo2PIEjt5rjlXNa8sI,2875
|
|
42
|
+
scraper2_hj3415/app/parsing/c101/major_shareholders.py,sha256=sF1j1VNZSoIkQYHmuhMWSx52l00WDf6El2NkiRoXW0o,2047
|
|
43
|
+
scraper2_hj3415/app/parsing/c101/sise.py,sha256=Mky6pLWZ_LZkeUMHIPcZfrv0icTNxWEc7uTYKU2uJ0M,1314
|
|
44
|
+
scraper2_hj3415/app/parsing/c101/summary_cmp.py,sha256=hhBCtH7hgAKFUh4gr7J-mz-6c9NLT9KZODFY8LTG-Fc,2776
|
|
45
|
+
scraper2_hj3415/app/parsing/c101/yearly_consensus.py,sha256=FCLA-pYCMbQffNYOV6YbZ8GnPJjyZHmCSIKdw9-EPuI,5572
|
|
46
|
+
scraper2_hj3415/app/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
+
scraper2_hj3415/app/ports/browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
+
scraper2_hj3415/app/ports/browser/browser_factory_port.py,sha256=exG5XM3yen84lqsY0ggpZ58kcyCyoaMusDMooVxCGG0,353
|
|
49
|
+
scraper2_hj3415/app/ports/browser/browser_port.py,sha256=VUxMDsrKWBAi1TVD8b-PbsHkCSjZ9ZMgsr3eVmhb_1I,3628
|
|
50
|
+
scraper2_hj3415/app/ports/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
+
scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py,sha256=Ia8GByRLV9SPEU89I8A4V6tmUwsO8f7xM7-yVxnsA0o,658
|
|
52
|
+
scraper2_hj3415/app/ports/sinks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
+
scraper2_hj3415/app/ports/sinks/nfs_sink_port.py,sha256=8EOdFH5-703yc8XP47PZ0mmdizg4d_kAzuR1-G5b4MY,522
|
|
54
|
+
scraper2_hj3415/app/ports/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
+
scraper2_hj3415/app/ports/site/wisereport_port.py,sha256=ufbYJ1jyNkSmlIbV1CqI8BekxjgGgWvxj8yb73ZRUU0,663
|
|
56
|
+
scraper2_hj3415/app/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
+
scraper2_hj3415/app/services/nfs_doc_builders.py,sha256=bz2Is3xXlM98gtd1QQUqgeoqWk2EwbuWNCJw5oJXkg8,8874
|
|
58
|
+
scraper2_hj3415/app/services/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
+
scraper2_hj3415/app/services/fetch/fetch_c101.py,sha256=nl96RKvahNS9YtusE4e9AFMb0Wf7UHaW2X6qAAzYuCY,2228
|
|
60
|
+
scraper2_hj3415/app/services/fetch/fetch_c103.py,sha256=PKJwtZLJDfVNjj5l8nzx9EJOtqH1zR8OF-pOiUlMrRc,5148
|
|
61
|
+
scraper2_hj3415/app/services/fetch/fetch_c104.py,sha256=djl7Z_CW58gpl8naMezumc_dbjc3l-EX6OQJeWU_ZAw,6549
|
|
62
|
+
scraper2_hj3415/app/services/fetch/fetch_c106.py,sha256=UIAMaQB-FHXsvQ0ONI3fTO22M3npXrHILxO_4ayY2Lk,3462
|
|
63
|
+
scraper2_hj3415/app/services/fetch/fetch_c108.py,sha256=o9GesH66jqCygZIEbrHlVwHGGK1_2Ilc2_1b24fSC54,2236
|
|
64
|
+
scraper2_hj3415/app/usecases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
|
+
scraper2_hj3415/app/usecases/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
+
scraper2_hj3415/app/usecases/ingest/ingest_c101.py,sha256=nLhdG27NsuZqxqQI6bWRcFI2T7BZV-QXmUldql6qCo8,3760
|
|
67
|
+
scraper2_hj3415/app/usecases/ingest/ingest_c103.py,sha256=g36Kc1DK8v0AsyN_FCJq_A1Lv1EwrZ1vaS7qMCZTSEQ,5405
|
|
68
|
+
scraper2_hj3415/app/usecases/ingest/ingest_c104.py,sha256=CfS1HsnnHt4N-N3YhtiZjahCE-SryHpWXoTT4-AyCac,5959
|
|
69
|
+
scraper2_hj3415/app/usecases/ingest/ingest_c106.py,sha256=pmGTO-Obp5Lw-a27BVII0eyxyBAIiFVok7xiBqLEJXk,4301
|
|
70
|
+
scraper2_hj3415/app/usecases/ingest/ingest_c108.py,sha256=6iDhJAjzLnqHiNBKDvNLsQxrnJcG5I68SdE-dojUMJY,3817
|
|
71
|
+
scraper2_hj3415-2.6.0.dist-info/entry_points.txt,sha256=jNGmOvBmptIUr9_XUMQOH4s6jNKCU51jOhKd31gOe8c,52
|
|
72
|
+
scraper2_hj3415-2.6.0.dist-info/licenses/LICENSE,sha256=QBiVGQuKAESeCfQE344Ik2ex6g2zfYdu9WqrRWydxIs,1068
|
|
73
|
+
scraper2_hj3415-2.6.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
74
|
+
scraper2_hj3415-2.6.0.dist-info/METADATA,sha256=Xw3KsE6SxvOs_CJvsXF2-x4ebhkAssA7qVD0uApNeBw,3516
|
|
75
|
+
scraper2_hj3415-2.6.0.dist-info/RECORD,,
|
scraper2/.DS_Store
DELETED
|
Binary file
|
scraper2/adapters/out/.DS_Store
DELETED
|
Binary file
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# src/scraper2/adapters/out/playwright/session.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
from typing import Any
|
|
4
|
-
from io import StringIO
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from playwright.async_api import Page
|
|
7
|
-
|
|
8
|
-
class PlaywrightBrowser:
|
|
9
|
-
def __init__(self, page: Page):
|
|
10
|
-
self.page = page
|
|
11
|
-
|
|
12
|
-
async def goto(self, url: str, timeout_ms: int = 10_000) -> None:
|
|
13
|
-
await self.page.goto(url, timeout=timeout_ms)
|
|
14
|
-
|
|
15
|
-
async def title(self) -> str:
|
|
16
|
-
return await self.page.title()
|
|
17
|
-
|
|
18
|
-
async def current_url(self) -> str:
|
|
19
|
-
return self.page.url
|
|
20
|
-
|
|
21
|
-
async def wait(self, selector: str, timeout_ms: int = 10_000) -> None:
|
|
22
|
-
await self.page.wait_for_selector(selector, timeout=timeout_ms, state="attached")
|
|
23
|
-
|
|
24
|
-
async def text(self, selector: str) -> str:
|
|
25
|
-
await self.wait(selector)
|
|
26
|
-
return (await self.page.locator(selector).first.text_content()) or ""
|
|
27
|
-
|
|
28
|
-
async def texts(self, selector: str) -> list[str]:
|
|
29
|
-
await self.wait(selector)
|
|
30
|
-
loc = self.page.locator(selector)
|
|
31
|
-
items = await loc.all()
|
|
32
|
-
out: list[str] = []
|
|
33
|
-
for it in items:
|
|
34
|
-
out.append((await it.text_content()) or "")
|
|
35
|
-
return out
|
|
36
|
-
|
|
37
|
-
async def text_first_by_text(self, needle: str) -> str:
|
|
38
|
-
return (await self.page.get_by_text(needle).first.text_content()) or ""
|
|
39
|
-
|
|
40
|
-
async def inner_text(self, selector: str) -> str:
|
|
41
|
-
await self.wait(selector)
|
|
42
|
-
return await self.page.locator(selector).first.inner_text()
|
|
43
|
-
|
|
44
|
-
async def click(self, selector: str) -> None:
|
|
45
|
-
await self.wait(selector)
|
|
46
|
-
await self.page.locator(selector).click()
|
|
47
|
-
|
|
48
|
-
async def table_records(
|
|
49
|
-
self,
|
|
50
|
-
table_selector: str,
|
|
51
|
-
*,
|
|
52
|
-
header: int | list[int] = 0
|
|
53
|
-
) -> list[dict[str, Any]]:
|
|
54
|
-
await self.wait(table_selector)
|
|
55
|
-
|
|
56
|
-
table = self.page.locator(table_selector).first
|
|
57
|
-
html = await table.evaluate("el => el.outerHTML") # <table> 포함
|
|
58
|
-
#print(html)
|
|
59
|
-
|
|
60
|
-
try:
|
|
61
|
-
df = pd.read_html(StringIO(html), header=header)[0]
|
|
62
|
-
#print(df.head(3))
|
|
63
|
-
except Exception as e:
|
|
64
|
-
# ImportError(lxml 없음), ValueError 등 모두 여기서 잡아서 원인 노출
|
|
65
|
-
raise RuntimeError(f"pd.read_html failed: {type(e).__name__}: {e}") from e
|
|
66
|
-
|
|
67
|
-
if header == 0:
|
|
68
|
-
if "항목" in df.columns:
|
|
69
|
-
df["항목"] = df["항목"].astype(str).str.replace("펼치기", "").str.strip()
|
|
70
|
-
|
|
71
|
-
df.columns = (
|
|
72
|
-
df.columns.astype(str)
|
|
73
|
-
.str.replace("연간컨센서스보기", "", regex=False)
|
|
74
|
-
.str.replace("연간컨센서스닫기", "", regex=False)
|
|
75
|
-
.str.replace("(IFRS연결)", "", regex=False)
|
|
76
|
-
.str.replace("(IFRS별도)", "", regex=False)
|
|
77
|
-
.str.replace("(GAAP개별)", "", regex=False)
|
|
78
|
-
.str.replace("(YoY)", "", regex=False)
|
|
79
|
-
.str.replace("(QoQ)", "", regex=False)
|
|
80
|
-
.str.replace("(E)", "", regex=False)
|
|
81
|
-
.str.replace(".", "", regex=False)
|
|
82
|
-
.str.strip()
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# NaN -> None 처리
|
|
86
|
-
records: list[dict[str, Any]] = df.where(pd.notnull(df), None).to_dict(orient="records")
|
|
87
|
-
return records
|
|
88
|
-
|
|
89
|
-
async def outer_html(self, selector: str) -> str:
|
|
90
|
-
loc = self.page.locator(selector).first
|
|
91
|
-
return await loc.evaluate("el => el.outerHTML")
|
|
92
|
-
|
|
93
|
-
async def all_texts(self, selector: str) -> list[str]:
|
|
94
|
-
# selector는 css도 되고, "xpath=..." 도 됨
|
|
95
|
-
loc = self.page.locator(selector)
|
|
96
|
-
return await loc.all_text_contents()
|
|
97
|
-
|
|
98
|
-
async def outer_html_nth(self, selector: str, index: int) -> str:
|
|
99
|
-
loc = self.page.locator(selector).nth(index)
|
|
100
|
-
# index가 범위를 벗어나면 playwright가 에러를 내는데,
|
|
101
|
-
# 필요하면 여기서 더 친절한 에러로 감싸도 됨.
|
|
102
|
-
return await loc.evaluate("el => el.outerHTML")
|
|
Binary file
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
# scraper2/adapters/out/sinks/memory/__init__.py
|
|
2
|
-
from .c101_memory_sink import MemoryC101Sink
|
|
3
|
-
from .c103_memory_sink import MemoryC103Sink
|
|
4
|
-
from .c104_memory_sink import MemoryC104Sink
|
|
5
|
-
from .c106_memory_sink import MemoryC106Sink
|
|
6
|
-
from .c108_memory_sink import MemoryC108Sink
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"MemoryC101Sink",
|
|
10
|
-
"MemoryC103Sink",
|
|
11
|
-
"MemoryC104Sink",
|
|
12
|
-
"MemoryC106Sink",
|
|
13
|
-
"MemoryC108Sink",
|
|
14
|
-
]
|
|
15
|
-
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
#scraper2/adapters/out/sinks/memory/c101_memory_sink.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import Iterable, Optional
|
|
6
|
-
|
|
7
|
-
from contracts.nfs.c101 import C101DTO
|
|
8
|
-
from scraper2.adapters.out.sinks.memory.store import InMemoryStore
|
|
9
|
-
from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
|
|
10
|
-
|
|
11
|
-
_ENDPOINT = "c101"
|
|
12
|
-
|
|
13
|
-
class MemoryC101Sink(C101SinkPort):
|
|
14
|
-
def __init__(self, store: InMemoryStore[C101DTO]):
|
|
15
|
-
self._store = store
|
|
16
|
-
|
|
17
|
-
async def write(self, dto: C101DTO, *, asof: Optional[datetime] = None) -> None:
|
|
18
|
-
await self._store.put(_ENDPOINT, dto.코드, dto)
|
|
19
|
-
|
|
20
|
-
async def write_many(
|
|
21
|
-
self,
|
|
22
|
-
dtos: Iterable[C101DTO],
|
|
23
|
-
*,
|
|
24
|
-
asof: Optional[datetime] = None,
|
|
25
|
-
) -> None:
|
|
26
|
-
await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# scraper2/adapters/out/sinks/memory/c103_memory_sink.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import Iterable, Optional
|
|
6
|
-
|
|
7
|
-
from contracts.nfs.c103 import C103DTO
|
|
8
|
-
from scraper2.adapters.out.sinks.memory.store import InMemoryStore
|
|
9
|
-
from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
|
|
10
|
-
|
|
11
|
-
_ENDPOINT = "c103"
|
|
12
|
-
|
|
13
|
-
class MemoryC103Sink(C103SinkPort):
|
|
14
|
-
def __init__(self, store: InMemoryStore[C103DTO]):
|
|
15
|
-
self._store = store
|
|
16
|
-
|
|
17
|
-
async def write(self, dto: C103DTO, *, asof: Optional[datetime] = None) -> None:
|
|
18
|
-
await self._store.put(_ENDPOINT, dto.코드, dto)
|
|
19
|
-
|
|
20
|
-
async def write_many(
|
|
21
|
-
self,
|
|
22
|
-
dtos: Iterable[C103DTO],
|
|
23
|
-
*,
|
|
24
|
-
asof: Optional[datetime] = None,
|
|
25
|
-
) -> None:
|
|
26
|
-
await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# scraper2/adapters/out/sinks/memory/c104_memory_sink.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import Iterable, Optional
|
|
6
|
-
|
|
7
|
-
from contracts.nfs.c104 import C104DTO
|
|
8
|
-
from scraper2.adapters.out.sinks.memory.store import InMemoryStore
|
|
9
|
-
from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
|
|
10
|
-
|
|
11
|
-
_ENDPOINT = "c104"
|
|
12
|
-
|
|
13
|
-
class MemoryC104Sink(C104SinkPort):
|
|
14
|
-
def __init__(self, store: InMemoryStore[C104DTO]):
|
|
15
|
-
self._store = store
|
|
16
|
-
|
|
17
|
-
async def write(self, dto: C104DTO, *, asof: Optional[datetime] = None) -> None:
|
|
18
|
-
await self._store.put(_ENDPOINT, dto.코드, dto)
|
|
19
|
-
|
|
20
|
-
async def write_many(
|
|
21
|
-
self,
|
|
22
|
-
dtos: Iterable[C104DTO],
|
|
23
|
-
*,
|
|
24
|
-
asof: Optional[datetime] = None,
|
|
25
|
-
) -> None:
|
|
26
|
-
await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
#scraper2/adapters/out/sinks/memory/c106_memory_sink.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import Iterable, Optional
|
|
6
|
-
|
|
7
|
-
from contracts.nfs.c106 import C106DTO
|
|
8
|
-
from scraper2.adapters.out.sinks.memory.store import InMemoryStore
|
|
9
|
-
from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
|
|
10
|
-
|
|
11
|
-
_ENDPOINT = "c106"
|
|
12
|
-
|
|
13
|
-
class MemoryC106Sink(C106SinkPort):
|
|
14
|
-
def __init__(self, store: InMemoryStore[C106DTO]):
|
|
15
|
-
self._store = store
|
|
16
|
-
|
|
17
|
-
async def write(self, dto: C106DTO, *, asof: Optional[datetime] = None) -> None:
|
|
18
|
-
await self._store.put(_ENDPOINT, dto.코드, dto)
|
|
19
|
-
|
|
20
|
-
async def write_many(
|
|
21
|
-
self,
|
|
22
|
-
dtos: Iterable[C106DTO],
|
|
23
|
-
*,
|
|
24
|
-
asof: Optional[datetime] = None,
|
|
25
|
-
) -> None:
|
|
26
|
-
await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
|