scraper2-hj3415 2.4.0__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/app/adapters/out/playwright/browser.py +373 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +5 -5
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
- scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
- scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
- {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
- scraper2_hj3415/app/adapters/site/wisereport_playwright.py +168 -0
- scraper2_hj3415/app/composition.py +225 -0
- scraper2_hj3415/app/domain/blocks.py +61 -0
- scraper2_hj3415/app/domain/constants.py +33 -0
- scraper2_hj3415/app/domain/doc.py +16 -0
- scraper2_hj3415/app/domain/endpoint.py +11 -0
- scraper2_hj3415/app/domain/series.py +11 -0
- scraper2_hj3415/app/domain/types.py +19 -0
- scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
- scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
- scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
- scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
- scraper2_hj3415/app/parsing/_tables/html_table.py +88 -0
- scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
- scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
- scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
- scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
- scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
- scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
- scraper2_hj3415/app/parsing/c101/sise.py +47 -0
- scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
- scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
- scraper2_hj3415/app/parsing/c101_parser.py +45 -0
- scraper2_hj3415/app/parsing/c103_parser.py +19 -0
- scraper2_hj3415/app/parsing/c104_parser.py +23 -0
- scraper2_hj3415/app/parsing/c106_parser.py +137 -0
- scraper2_hj3415/app/parsing/c108_parser.py +254 -0
- scraper2_hj3415/app/ports/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
- scraper2_hj3415/app/ports/browser/browser_port.py +115 -0
- scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
- scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
- scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
- scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
- scraper2_hj3415/app/ports/site/__init__.py +0 -0
- scraper2_hj3415/app/ports/site/wisereport_port.py +20 -0
- scraper2_hj3415/app/services/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
- scraper2_hj3415/app/services/fetch/fetch_c103.py +135 -0
- scraper2_hj3415/app/services/fetch/fetch_c104.py +183 -0
- scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
- scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
- scraper2_hj3415/app/services/nfs_doc_builders.py +290 -0
- scraper2_hj3415/app/usecases/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
- scraper2/main.py → scraper2_hj3415/cli.py +40 -80
- {scraper2_hj3415-2.4.0.dist-info → scraper2_hj3415-2.6.0.dist-info}/METADATA +3 -1
- scraper2_hj3415-2.6.0.dist-info/RECORD +75 -0
- scraper2_hj3415-2.6.0.dist-info/entry_points.txt +3 -0
- scraper2/.DS_Store +0 -0
- scraper2/adapters/out/.DS_Store +0 -0
- scraper2/adapters/out/playwright/browser.py +0 -102
- scraper2/adapters/out/sinks/.DS_Store +0 -0
- scraper2/adapters/out/sinks/memory/__init__.py +0 -15
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
- scraper2/app/composition.py +0 -204
- scraper2/app/parsing/_converters.py +0 -85
- scraper2/app/parsing/_normalize.py +0 -134
- scraper2/app/parsing/c101_parser.py +0 -143
- scraper2/app/parsing/c103_parser.py +0 -128
- scraper2/app/parsing/c104_parser.py +0 -143
- scraper2/app/parsing/c106_parser.py +0 -153
- scraper2/app/parsing/c108_parser.py +0 -65
- scraper2/app/ports/browser/browser_factory_port.py +0 -11
- scraper2/app/ports/browser/browser_port.py +0 -22
- scraper2/app/ports/ingest_port.py +0 -14
- scraper2/app/ports/sinks/base_sink_port.py +0 -14
- scraper2/app/ports/sinks/c101_sink_port.py +0 -9
- scraper2/app/ports/sinks/c103_sink_port.py +0 -9
- scraper2/app/ports/sinks/c104_sink_port.py +0 -9
- scraper2/app/ports/sinks/c106_sink_port.py +0 -9
- scraper2/app/ports/sinks/c108_sink_port.py +0 -9
- scraper2/app/usecases/fetch/fetch_c101.py +0 -43
- scraper2/app/usecases/fetch/fetch_c103.py +0 -103
- scraper2/app/usecases/fetch/fetch_c104.py +0 -76
- scraper2/app/usecases/fetch/fetch_c106.py +0 -90
- scraper2/app/usecases/fetch/fetch_c108.py +0 -49
- scraper2/app/usecases/ingest/ingest_c101.py +0 -36
- scraper2/app/usecases/ingest/ingest_c103.py +0 -37
- scraper2/app/usecases/ingest/ingest_c104.py +0 -37
- scraper2/app/usecases/ingest/ingest_c106.py +0 -38
- scraper2/app/usecases/ingest/ingest_c108.py +0 -39
- scraper2_hj3415-2.4.0.dist-info/RECORD +0 -63
- scraper2_hj3415-2.4.0.dist-info/entry_points.txt +0 -3
- {scraper2 → scraper2_hj3415}/__init__.py +0 -0
- {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
- {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
- {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
- {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
- {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
- {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
- {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
- {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
- {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
- {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
- {scraper2_hj3415-2.4.0.dist-info → scraper2_hj3415-2.6.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-2.4.0.dist-info → scraper2_hj3415-2.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c101.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, Any, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c101 import FetchC101
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import KvBlock, RecordsBlock, MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
17
|
+
from contracts_hj3415.nfs.c101_dto import C101DTO, C101Payload, C101Blocks
|
|
18
|
+
|
|
19
|
+
from logging_hj3415 import logger
|
|
20
|
+
|
|
21
|
+
endpoint_kind = EndpointKind.C101
|
|
22
|
+
endpoint: Endpoints = cast(Endpoints, endpoint_kind.value)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _unwrap_c101_block(block: Any) -> Any:
|
|
26
|
+
"""
|
|
27
|
+
domain BlockData -> DTO로 들어갈 순수 python 구조(dict/list/...)
|
|
28
|
+
- C101은 KvBlock/RecordsBlock 위주
|
|
29
|
+
- (혹시 MetricsBlock이 섞여도 안전하게 처리)
|
|
30
|
+
"""
|
|
31
|
+
if isinstance(block, KvBlock):
|
|
32
|
+
return dict(block.values)
|
|
33
|
+
|
|
34
|
+
if isinstance(block, RecordsBlock):
|
|
35
|
+
# rows: Sequence[Record] -> list[dict]
|
|
36
|
+
return [dict(r) for r in block.rows]
|
|
37
|
+
|
|
38
|
+
if isinstance(block, MetricsBlock):
|
|
39
|
+
# C101에서 MetricsBlock 쓸 일은 거의 없겠지만, 안전망
|
|
40
|
+
# metrics: Mapping[MetricKey, MetricSeries(values: Mapping[Period, Num])]
|
|
41
|
+
out: dict[str, dict[str, Any]] = {}
|
|
42
|
+
for mk, series in block.metrics.items():
|
|
43
|
+
out[str(mk)] = dict(series.values)
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
# 이미 dict/list 등으로 들어오는 케이스도 방어
|
|
47
|
+
return block
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def c101_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C101DTO:
|
|
51
|
+
"""
|
|
52
|
+
NfsDoc(domain) -> C101DTO(contracts)
|
|
53
|
+
|
|
54
|
+
규칙:
|
|
55
|
+
- C101은 labels를 비우는 것이 정상 (하지만 payload에는 항상 존재)
|
|
56
|
+
- blocks는 endpoint block_keys 순서대로 채우되, 각 블록은 BlockData를 언래핑해서 넣는다.
|
|
57
|
+
"""
|
|
58
|
+
blocks: dict[str, Any] = {}
|
|
59
|
+
labels: dict[str, dict[str, str]] = {}
|
|
60
|
+
|
|
61
|
+
for bk in get_block_keys(endpoint_kind):
|
|
62
|
+
block = doc.blocks.get(bk)
|
|
63
|
+
blocks[str(bk)] = _unwrap_c101_block(block) if block is not None else {}
|
|
64
|
+
|
|
65
|
+
# C101은 labels 항상 empty
|
|
66
|
+
labels[str(bk)] = {}
|
|
67
|
+
|
|
68
|
+
payload: C101Payload = cast(C101Payload, {"blocks": cast(C101Blocks, blocks), "labels": labels})
|
|
69
|
+
|
|
70
|
+
return C101DTO(
|
|
71
|
+
code=doc.code,
|
|
72
|
+
asof=asof,
|
|
73
|
+
endpoint=endpoint,
|
|
74
|
+
payload=payload,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class IngestC101:
|
|
79
|
+
def __init__(self, fetch: FetchC101, sink: NfsSinkPort[C101DTO]):
|
|
80
|
+
self.fetch = fetch
|
|
81
|
+
self.sink = sink
|
|
82
|
+
|
|
83
|
+
async def execute(
|
|
84
|
+
self, code: str, *, sleep_sec: float = 2.0, asof: datetime | None = None
|
|
85
|
+
) -> C101DTO:
|
|
86
|
+
asof = asof or utcnow()
|
|
87
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
88
|
+
logger.debug(f"doc:\n{doc}")
|
|
89
|
+
if doc is None:
|
|
90
|
+
raise RuntimeError(f"c101 fetch returned None: code={code}")
|
|
91
|
+
|
|
92
|
+
dto = c101_doc_to_dto(doc=doc, asof=asof)
|
|
93
|
+
logger.debug(f"dto:\n{dto}")
|
|
94
|
+
|
|
95
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
96
|
+
return dto
|
|
97
|
+
|
|
98
|
+
async def execute_many(
|
|
99
|
+
self,
|
|
100
|
+
codes: Iterable[str],
|
|
101
|
+
*,
|
|
102
|
+
sleep_sec: float = 2.0,
|
|
103
|
+
asof: Optional[datetime] = None,
|
|
104
|
+
) -> list[C101DTO]:
|
|
105
|
+
batch_asof = asof or utcnow()
|
|
106
|
+
|
|
107
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
108
|
+
dtos = [c101_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
109
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
110
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
111
|
+
return dtos
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c103.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c103 import FetchC103
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c103_dto import (
|
|
19
|
+
C103DTO,
|
|
20
|
+
C103Payload,
|
|
21
|
+
C103Blocks,
|
|
22
|
+
C103Labels,
|
|
23
|
+
C103ValuesMap,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from logging_hj3415 import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
endpoint_kind = EndpointKind.C103
|
|
30
|
+
endpoint: Endpoints = cast(Endpoints, endpoint_kind.value)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _metricsblock_to_c103_metric_map(block: MetricsBlock) -> dict[str, C103ValuesMap]:
|
|
34
|
+
"""
|
|
35
|
+
MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
|
|
36
|
+
- domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
|
|
37
|
+
"""
|
|
38
|
+
out: dict[str, C103ValuesMap] = {}
|
|
39
|
+
for mk, series in block.metrics.items():
|
|
40
|
+
out[mk] = dict(series.values) # Mapping -> dict
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def c103_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C103DTO:
|
|
45
|
+
"""
|
|
46
|
+
NfsDoc(domain, endpoint=c103) -> C103DTO(contracts envelope)
|
|
47
|
+
|
|
48
|
+
C103Payload 구조:
|
|
49
|
+
{
|
|
50
|
+
"blocks": {"손익계산서y": {metric: {code: num}}, "손익계산서q": {...}},
|
|
51
|
+
"labels": {"손익계산서y": {metric: raw_label}, "손익계산서q": {...}}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
정책:
|
|
55
|
+
- blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
|
|
56
|
+
- doc.labels는 없을 수도 있으니 dict()로 안전 변환
|
|
57
|
+
"""
|
|
58
|
+
# 1) 기본 골격은 항상 채운다 (규약 안정성)
|
|
59
|
+
blocks: C103Blocks = {
|
|
60
|
+
"손익계산서y": {},
|
|
61
|
+
"손익계산서q": {},
|
|
62
|
+
"재무상태표y": {},
|
|
63
|
+
"재무상태표q": {},
|
|
64
|
+
"현금흐름표y": {},
|
|
65
|
+
"현금흐름표q": {},
|
|
66
|
+
}
|
|
67
|
+
labels: C103Labels = {
|
|
68
|
+
"손익계산서y": {},
|
|
69
|
+
"손익계산서q": {},
|
|
70
|
+
"재무상태표y": {},
|
|
71
|
+
"재무상태표q": {},
|
|
72
|
+
"현금흐름표y": {},
|
|
73
|
+
"현금흐름표q": {},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# 2) 도메인 블록키 순서/목록 기준으로 채움
|
|
77
|
+
for bk in get_block_keys(endpoint_kind):
|
|
78
|
+
bd = doc.blocks.get(bk)
|
|
79
|
+
if bd is None:
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
if not isinstance(bd, MetricsBlock):
|
|
83
|
+
raise TypeError(
|
|
84
|
+
f"c103 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
metric_map = _metricsblock_to_c103_metric_map(bd)
|
|
88
|
+
label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
|
|
89
|
+
|
|
90
|
+
match bk:
|
|
91
|
+
case "손익계산서y":
|
|
92
|
+
blocks["손익계산서y"] = metric_map
|
|
93
|
+
labels["손익계산서y"] = label_map
|
|
94
|
+
case "손익계산서q":
|
|
95
|
+
blocks["손익계산서q"] = metric_map
|
|
96
|
+
labels["손익계산서q"] = label_map
|
|
97
|
+
case "재무상태표y":
|
|
98
|
+
blocks["재무상태표y"] = metric_map
|
|
99
|
+
labels["재무상태표y"] = label_map
|
|
100
|
+
case "재무상태표q":
|
|
101
|
+
blocks["재무상태표q"] = metric_map
|
|
102
|
+
labels["재무상태표q"] = label_map
|
|
103
|
+
case "현금흐름표y":
|
|
104
|
+
blocks["현금흐름표y"] = metric_map
|
|
105
|
+
labels["현금흐름표y"] = label_map
|
|
106
|
+
case "현금흐름표q":
|
|
107
|
+
blocks["현금흐름표q"] = metric_map
|
|
108
|
+
labels["현금흐름표q"] = label_map
|
|
109
|
+
case _:
|
|
110
|
+
raise ValueError(f"invalid c103 block key: {bk!r}")
|
|
111
|
+
|
|
112
|
+
payload: C103Payload = cast(C103Payload, {"blocks": blocks, "labels": labels})
|
|
113
|
+
|
|
114
|
+
# ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
|
|
115
|
+
# C103Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
|
|
116
|
+
return C103DTO(
|
|
117
|
+
code=doc.code,
|
|
118
|
+
asof=asof,
|
|
119
|
+
endpoint=endpoint,
|
|
120
|
+
payload=payload, # 그대로 dict 주입
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class IngestC103:
|
|
125
|
+
def __init__(self, fetch: FetchC103, sink: NfsSinkPort[C103DTO]):
|
|
126
|
+
self.fetch = fetch
|
|
127
|
+
self.sink = sink
|
|
128
|
+
|
|
129
|
+
async def execute(
|
|
130
|
+
self,
|
|
131
|
+
code: str,
|
|
132
|
+
*,
|
|
133
|
+
sleep_sec: float = 2.0,
|
|
134
|
+
asof: datetime | None = None,
|
|
135
|
+
) -> C103DTO:
|
|
136
|
+
asof = asof or utcnow()
|
|
137
|
+
|
|
138
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
139
|
+
logger.debug(f"doc:\n{doc}")
|
|
140
|
+
if doc is None:
|
|
141
|
+
raise RuntimeError(f"c106 fetch returned None: code={code}")
|
|
142
|
+
|
|
143
|
+
dto = c103_doc_to_dto(doc=doc, asof=asof)
|
|
144
|
+
logger.debug(f"dto:\n{dto}")
|
|
145
|
+
|
|
146
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
147
|
+
return dto
|
|
148
|
+
|
|
149
|
+
async def execute_many(
|
|
150
|
+
self,
|
|
151
|
+
codes: Iterable[str],
|
|
152
|
+
*,
|
|
153
|
+
sleep_sec: float = 2.0,
|
|
154
|
+
asof: Optional[datetime] = None,
|
|
155
|
+
) -> list[C103DTO]:
|
|
156
|
+
batch_asof = asof or utcnow()
|
|
157
|
+
|
|
158
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
159
|
+
dtos = [c103_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
160
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
161
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
162
|
+
return dtos
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c104.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c104 import FetchC104
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c104_dto import (
|
|
19
|
+
C104DTO,
|
|
20
|
+
C104Payload,
|
|
21
|
+
C104Blocks,
|
|
22
|
+
C104Labels,
|
|
23
|
+
C104ValuesMap,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from logging_hj3415 import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
endpoint_kind = EndpointKind.C104
|
|
30
|
+
endpoint: Endpoints = cast(Endpoints, endpoint_kind.value)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _metricsblock_to_c104_metric_map(block: MetricsBlock) -> dict[str, C104ValuesMap]:
|
|
34
|
+
"""
|
|
35
|
+
MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
|
|
36
|
+
- domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
|
|
37
|
+
"""
|
|
38
|
+
out: dict[str, C104ValuesMap] = {}
|
|
39
|
+
for mk, series in block.metrics.items():
|
|
40
|
+
out[mk] = dict(series.values) # Mapping -> dict
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def c104_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C104DTO:
|
|
45
|
+
"""
|
|
46
|
+
NfsDoc(domain, endpoint=c104) -> C104DTO(contracts envelope)
|
|
47
|
+
|
|
48
|
+
C104Payload 구조:
|
|
49
|
+
{
|
|
50
|
+
"blocks": {"수익성y": {metric: {code: num}}, "성장성y": {...}},
|
|
51
|
+
"labels": {"수익성y": {metric: raw_label}, "성장성y": {...}}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
정책:
|
|
55
|
+
- blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
|
|
56
|
+
- doc.labels는 없을 수도 있으니 dict()로 안전 변환
|
|
57
|
+
"""
|
|
58
|
+
# 1) 기본 골격은 항상 채운다 (규약 안정성)
|
|
59
|
+
blocks: C104Blocks = {
|
|
60
|
+
"수익성y": {},
|
|
61
|
+
"성장성y": {},
|
|
62
|
+
"안정성y": {},
|
|
63
|
+
"활동성y": {},
|
|
64
|
+
"가치분석y": {},
|
|
65
|
+
"수익성q": {},
|
|
66
|
+
"성장성q": {},
|
|
67
|
+
"안정성q": {},
|
|
68
|
+
"활동성q": {},
|
|
69
|
+
"가치분석q": {},
|
|
70
|
+
}
|
|
71
|
+
labels: C104Labels = {
|
|
72
|
+
"수익성y": {},
|
|
73
|
+
"성장성y": {},
|
|
74
|
+
"안정성y": {},
|
|
75
|
+
"활동성y": {},
|
|
76
|
+
"가치분석y": {},
|
|
77
|
+
"수익성q": {},
|
|
78
|
+
"성장성q": {},
|
|
79
|
+
"안정성q": {},
|
|
80
|
+
"활동성q": {},
|
|
81
|
+
"가치분석q": {},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# 2) 도메인 블록키 순서/목록 기준으로 채움
|
|
85
|
+
for bk in get_block_keys(endpoint_kind):
|
|
86
|
+
bd = doc.blocks.get(bk)
|
|
87
|
+
if bd is None:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
if not isinstance(bd, MetricsBlock):
|
|
91
|
+
raise TypeError(
|
|
92
|
+
f"c104 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
metric_map = _metricsblock_to_c104_metric_map(bd)
|
|
96
|
+
label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
|
|
97
|
+
|
|
98
|
+
match bk:
|
|
99
|
+
case "수익성y":
|
|
100
|
+
blocks["수익성y"] = metric_map
|
|
101
|
+
labels["수익성y"] = label_map
|
|
102
|
+
case "성장성y":
|
|
103
|
+
blocks["성장성y"] = metric_map
|
|
104
|
+
labels["성장성y"] = label_map
|
|
105
|
+
case "안정성y":
|
|
106
|
+
blocks["안정성y"] = metric_map
|
|
107
|
+
labels["안정성y"] = label_map
|
|
108
|
+
case "활동성y":
|
|
109
|
+
blocks["활동성y"] = metric_map
|
|
110
|
+
labels["활동성y"] = label_map
|
|
111
|
+
case "가치분석y":
|
|
112
|
+
blocks["가치분석y"] = metric_map
|
|
113
|
+
labels["가치분석y"] = label_map
|
|
114
|
+
case "수익성q":
|
|
115
|
+
blocks["수익성q"] = metric_map
|
|
116
|
+
labels["수익성q"] = label_map
|
|
117
|
+
case "성장성q":
|
|
118
|
+
blocks["성장성q"] = metric_map
|
|
119
|
+
labels["성장성q"] = label_map
|
|
120
|
+
case "안정성q":
|
|
121
|
+
blocks["안정성q"] = metric_map
|
|
122
|
+
labels["안정성q"] = label_map
|
|
123
|
+
case "활동성q":
|
|
124
|
+
blocks["활동성q"] = metric_map
|
|
125
|
+
labels["활동성q"] = label_map
|
|
126
|
+
case "가치분석q":
|
|
127
|
+
blocks["가치분석q"] = metric_map
|
|
128
|
+
labels["가치분석q"] = label_map
|
|
129
|
+
case _:
|
|
130
|
+
raise ValueError(f"invalid C104 block key: {bk!r}")
|
|
131
|
+
|
|
132
|
+
payload: C104Payload = cast(C104Payload, {"blocks": blocks, "labels": labels})
|
|
133
|
+
|
|
134
|
+
# ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
|
|
135
|
+
# C104Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
|
|
136
|
+
return C104DTO(
|
|
137
|
+
code=doc.code,
|
|
138
|
+
asof=asof,
|
|
139
|
+
endpoint=endpoint,
|
|
140
|
+
payload=payload, # 그대로 dict 주입
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class IngestC104:
|
|
145
|
+
def __init__(self, fetch: FetchC104, sink: NfsSinkPort[C104DTO]):
|
|
146
|
+
self.fetch = fetch
|
|
147
|
+
self.sink = sink
|
|
148
|
+
|
|
149
|
+
async def execute(
|
|
150
|
+
self,
|
|
151
|
+
code: str,
|
|
152
|
+
*,
|
|
153
|
+
sleep_sec: float = 2.0,
|
|
154
|
+
asof: datetime | None = None,
|
|
155
|
+
) -> C104DTO:
|
|
156
|
+
asof = asof or utcnow()
|
|
157
|
+
|
|
158
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
159
|
+
logger.debug(f"doc:\n{doc}")
|
|
160
|
+
if doc is None:
|
|
161
|
+
raise RuntimeError(f"c106 fetch returned None: code={code}")
|
|
162
|
+
|
|
163
|
+
dto = c104_doc_to_dto(doc=doc, asof=asof)
|
|
164
|
+
logger.debug(f"dto:\n{dto}")
|
|
165
|
+
|
|
166
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
167
|
+
return dto
|
|
168
|
+
|
|
169
|
+
async def execute_many(
|
|
170
|
+
self,
|
|
171
|
+
codes: Iterable[str],
|
|
172
|
+
*,
|
|
173
|
+
sleep_sec: float = 2.0,
|
|
174
|
+
asof: Optional[datetime] = None,
|
|
175
|
+
) -> list[C104DTO]:
|
|
176
|
+
batch_asof = asof or utcnow()
|
|
177
|
+
|
|
178
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
179
|
+
dtos = [c104_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
180
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
181
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
182
|
+
return dtos
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c106.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c106 import FetchC106
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoints
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c106_dto import (
|
|
19
|
+
C106DTO,
|
|
20
|
+
C106Payload,
|
|
21
|
+
C106Blocks,
|
|
22
|
+
C106Labels,
|
|
23
|
+
C106ValuesMap,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from logging_hj3415 import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
endpoint_kind = EndpointKind.C106
|
|
30
|
+
endpoint: Endpoints = cast(Endpoints, endpoint_kind.value)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _metricsblock_to_c106_metric_map(block: MetricsBlock) -> dict[str, C106ValuesMap]:
|
|
34
|
+
"""
|
|
35
|
+
MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
|
|
36
|
+
- domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
|
|
37
|
+
"""
|
|
38
|
+
out: dict[str, C106ValuesMap] = {}
|
|
39
|
+
for mk, series in block.metrics.items():
|
|
40
|
+
out[mk] = dict(series.values) # Mapping -> dict
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def c106_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C106DTO:
|
|
45
|
+
"""
|
|
46
|
+
NfsDoc(domain, endpoint=c106) -> C106DTO(contracts envelope)
|
|
47
|
+
|
|
48
|
+
C106Payload 구조:
|
|
49
|
+
{
|
|
50
|
+
"blocks": {"y": {metric: {code: num}}, "q": {...}},
|
|
51
|
+
"labels": {"y": {metric: raw_label}, "q": {...}}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
정책:
|
|
55
|
+
- blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
|
|
56
|
+
- doc.labels는 없을 수도 있으니 dict()로 안전 변환
|
|
57
|
+
"""
|
|
58
|
+
# 1) 기본 골격은 항상 채운다 (규약 안정성)
|
|
59
|
+
blocks: C106Blocks = {"y": {}, "q": {}}
|
|
60
|
+
labels: C106Labels = {"y": {}, "q": {}}
|
|
61
|
+
|
|
62
|
+
# 2) 도메인 블록키 순서/목록 기준으로 채움
|
|
63
|
+
for bk in get_block_keys(endpoint_kind):
|
|
64
|
+
bd = doc.blocks.get(bk)
|
|
65
|
+
if bd is None:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if not isinstance(bd, MetricsBlock):
|
|
69
|
+
raise TypeError(
|
|
70
|
+
f"c106 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
metric_map = _metricsblock_to_c106_metric_map(bd)
|
|
74
|
+
label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
|
|
75
|
+
|
|
76
|
+
match bk:
|
|
77
|
+
case "y":
|
|
78
|
+
blocks["y"] = metric_map
|
|
79
|
+
labels["y"] = label_map
|
|
80
|
+
case "q":
|
|
81
|
+
blocks["q"] = metric_map
|
|
82
|
+
labels["q"] = label_map
|
|
83
|
+
case _:
|
|
84
|
+
raise ValueError(f"invalid c106 block key: {bk!r}")
|
|
85
|
+
|
|
86
|
+
payload: C106Payload = cast(C106Payload, {"blocks": blocks, "labels": labels})
|
|
87
|
+
|
|
88
|
+
# ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
|
|
89
|
+
# C106Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
|
|
90
|
+
return C106DTO(
|
|
91
|
+
code=doc.code,
|
|
92
|
+
asof=asof,
|
|
93
|
+
endpoint=endpoint,
|
|
94
|
+
payload=payload, # 그대로 dict 주입
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IngestC106:
|
|
99
|
+
def __init__(self, fetch: FetchC106, sink: NfsSinkPort[C106DTO]):
|
|
100
|
+
self.fetch = fetch
|
|
101
|
+
self.sink = sink
|
|
102
|
+
|
|
103
|
+
async def execute(
|
|
104
|
+
self,
|
|
105
|
+
code: str,
|
|
106
|
+
*,
|
|
107
|
+
sleep_sec: float = 2.0,
|
|
108
|
+
asof: datetime | None = None,
|
|
109
|
+
) -> C106DTO:
|
|
110
|
+
asof = asof or utcnow()
|
|
111
|
+
|
|
112
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
113
|
+
logger.debug(f"doc:\n{doc}")
|
|
114
|
+
if doc is None:
|
|
115
|
+
raise RuntimeError(f"c106 fetch returned None: code={code}")
|
|
116
|
+
|
|
117
|
+
dto = c106_doc_to_dto(doc=doc, asof=asof)
|
|
118
|
+
logger.debug(f"dto:\n{dto}")
|
|
119
|
+
|
|
120
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
121
|
+
return dto
|
|
122
|
+
|
|
123
|
+
async def execute_many(
|
|
124
|
+
self,
|
|
125
|
+
codes: Iterable[str],
|
|
126
|
+
*,
|
|
127
|
+
sleep_sec: float = 2.0,
|
|
128
|
+
asof: Optional[datetime] = None,
|
|
129
|
+
) -> list[C106DTO]:
|
|
130
|
+
batch_asof = asof or utcnow()
|
|
131
|
+
|
|
132
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
133
|
+
dtos = [c106_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
134
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
135
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
136
|
+
return dtos
|