scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
- scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
- scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
- scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
- scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
- {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
- scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
- scraper2_hj3415/app/composition.py +225 -0
- scraper2_hj3415/app/domain/blocks.py +61 -0
- scraper2_hj3415/app/domain/constants.py +33 -0
- scraper2_hj3415/app/domain/doc.py +16 -0
- scraper2_hj3415/app/domain/endpoint.py +11 -0
- scraper2_hj3415/app/domain/series.py +11 -0
- scraper2_hj3415/app/domain/types.py +19 -0
- scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
- scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
- scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
- scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
- scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
- scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
- scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
- scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
- scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
- scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
- scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
- scraper2_hj3415/app/parsing/c101/sise.py +47 -0
- scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
- scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
- scraper2_hj3415/app/parsing/c101_parser.py +45 -0
- scraper2_hj3415/app/parsing/c103_parser.py +22 -0
- scraper2_hj3415/app/parsing/c104_parser.py +26 -0
- scraper2_hj3415/app/parsing/c106_parser.py +137 -0
- scraper2_hj3415/app/parsing/c108_parser.py +254 -0
- scraper2_hj3415/app/ports/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/__init__.py +0 -0
- scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
- scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
- scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
- scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
- scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
- scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
- scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
- scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
- scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
- scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
- scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
- scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
- scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
- scraper2_hj3415/app/ports/site/__init__.py +0 -0
- scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
- scraper2_hj3415/app/services/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/__init__.py +0 -0
- scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
- scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
- scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
- scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
- scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
- scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
- scraper2_hj3415/app/usecases/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
- scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
- scraper2/main.py → scraper2_hj3415/cli.py +45 -72
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
- scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
- scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
- scraper2/adapters/out/playwright/browser.py +0 -102
- scraper2/adapters/out/sinks/memory/__init__.py +0 -15
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
- scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
- scraper2/app/composition.py +0 -204
- scraper2/app/parsing/_converters.py +0 -85
- scraper2/app/parsing/_normalize.py +0 -134
- scraper2/app/parsing/c101_parser.py +0 -143
- scraper2/app/parsing/c103_parser.py +0 -128
- scraper2/app/parsing/c104_parser.py +0 -143
- scraper2/app/parsing/c106_parser.py +0 -153
- scraper2/app/parsing/c108_parser.py +0 -65
- scraper2/app/ports/browser/browser_factory_port.py +0 -11
- scraper2/app/ports/browser/browser_port.py +0 -22
- scraper2/app/ports/ingest_port.py +0 -14
- scraper2/app/ports/sinks/base_sink_port.py +0 -14
- scraper2/app/ports/sinks/c101_sink_port.py +0 -9
- scraper2/app/ports/sinks/c103_sink_port.py +0 -9
- scraper2/app/ports/sinks/c104_sink_port.py +0 -9
- scraper2/app/ports/sinks/c106_sink_port.py +0 -9
- scraper2/app/ports/sinks/c108_sink_port.py +0 -9
- scraper2/app/usecases/fetch/fetch_c101.py +0 -43
- scraper2/app/usecases/fetch/fetch_c103.py +0 -103
- scraper2/app/usecases/fetch/fetch_c104.py +0 -76
- scraper2/app/usecases/fetch/fetch_c106.py +0 -90
- scraper2/app/usecases/fetch/fetch_c108.py +0 -49
- scraper2/app/usecases/ingest/ingest_c101.py +0 -36
- scraper2/app/usecases/ingest/ingest_c103.py +0 -37
- scraper2/app/usecases/ingest/ingest_c104.py +0 -37
- scraper2/app/usecases/ingest/ingest_c106.py +0 -38
- scraper2/app/usecases/ingest/ingest_c108.py +0 -39
- scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
- scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
- {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
- {scraper2 → scraper2_hj3415}/__init__.py +0 -0
- {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
- {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
- {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
- {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
- {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
- {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
- {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
- {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
- {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
- {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
- {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c104.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c104 import FetchC104
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c104_dto import (
|
|
19
|
+
C104DTO,
|
|
20
|
+
C104Payload,
|
|
21
|
+
C104Blocks,
|
|
22
|
+
C104Labels,
|
|
23
|
+
C104ValuesMap,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from logging_hj3415 import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
endpoint_kind = EndpointKind.C104
|
|
30
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _metricsblock_to_c104_metric_map(block: MetricsBlock) -> dict[str, C104ValuesMap]:
|
|
34
|
+
"""
|
|
35
|
+
MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
|
|
36
|
+
- domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
|
|
37
|
+
"""
|
|
38
|
+
out: dict[str, C104ValuesMap] = {}
|
|
39
|
+
for mk, series in block.metrics.items():
|
|
40
|
+
out[mk] = dict(series.values) # Mapping -> dict
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def c104_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C104DTO:
|
|
45
|
+
"""
|
|
46
|
+
NfsDoc(domain, endpoint=c104) -> C104DTO(contracts envelope)
|
|
47
|
+
|
|
48
|
+
C104Payload 구조:
|
|
49
|
+
{
|
|
50
|
+
"blocks": {"수익성y": {metric: {code: num}}, "성장성y": {...}},
|
|
51
|
+
"labels": {"수익성y": {metric: raw_label}, "성장성y": {...}}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
정책:
|
|
55
|
+
- blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
|
|
56
|
+
- doc.labels는 없을 수도 있으니 dict()로 안전 변환
|
|
57
|
+
"""
|
|
58
|
+
# 1) 기본 골격은 항상 채운다 (규약 안정성)
|
|
59
|
+
blocks: C104Blocks = {
|
|
60
|
+
"수익성y": {},
|
|
61
|
+
"성장성y": {},
|
|
62
|
+
"안정성y": {},
|
|
63
|
+
"활동성y": {},
|
|
64
|
+
"가치분석y": {},
|
|
65
|
+
"수익성q": {},
|
|
66
|
+
"성장성q": {},
|
|
67
|
+
"안정성q": {},
|
|
68
|
+
"활동성q": {},
|
|
69
|
+
"가치분석q": {},
|
|
70
|
+
}
|
|
71
|
+
labels: C104Labels = {
|
|
72
|
+
"수익성y": {},
|
|
73
|
+
"성장성y": {},
|
|
74
|
+
"안정성y": {},
|
|
75
|
+
"활동성y": {},
|
|
76
|
+
"가치분석y": {},
|
|
77
|
+
"수익성q": {},
|
|
78
|
+
"성장성q": {},
|
|
79
|
+
"안정성q": {},
|
|
80
|
+
"활동성q": {},
|
|
81
|
+
"가치분석q": {},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# 2) 도메인 블록키 순서/목록 기준으로 채움
|
|
85
|
+
for bk in get_block_keys(endpoint_kind):
|
|
86
|
+
bd = doc.blocks.get(bk)
|
|
87
|
+
if bd is None:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
if not isinstance(bd, MetricsBlock):
|
|
91
|
+
raise TypeError(
|
|
92
|
+
f"c104 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
metric_map = _metricsblock_to_c104_metric_map(bd)
|
|
96
|
+
label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
|
|
97
|
+
|
|
98
|
+
match bk:
|
|
99
|
+
case "수익성y":
|
|
100
|
+
blocks["수익성y"] = metric_map
|
|
101
|
+
labels["수익성y"] = label_map
|
|
102
|
+
case "성장성y":
|
|
103
|
+
blocks["성장성y"] = metric_map
|
|
104
|
+
labels["성장성y"] = label_map
|
|
105
|
+
case "안정성y":
|
|
106
|
+
blocks["안정성y"] = metric_map
|
|
107
|
+
labels["안정성y"] = label_map
|
|
108
|
+
case "활동성y":
|
|
109
|
+
blocks["활동성y"] = metric_map
|
|
110
|
+
labels["활동성y"] = label_map
|
|
111
|
+
case "가치분석y":
|
|
112
|
+
blocks["가치분석y"] = metric_map
|
|
113
|
+
labels["가치분석y"] = label_map
|
|
114
|
+
case "수익성q":
|
|
115
|
+
blocks["수익성q"] = metric_map
|
|
116
|
+
labels["수익성q"] = label_map
|
|
117
|
+
case "성장성q":
|
|
118
|
+
blocks["성장성q"] = metric_map
|
|
119
|
+
labels["성장성q"] = label_map
|
|
120
|
+
case "안정성q":
|
|
121
|
+
blocks["안정성q"] = metric_map
|
|
122
|
+
labels["안정성q"] = label_map
|
|
123
|
+
case "활동성q":
|
|
124
|
+
blocks["활동성q"] = metric_map
|
|
125
|
+
labels["활동성q"] = label_map
|
|
126
|
+
case "가치분석q":
|
|
127
|
+
blocks["가치분석q"] = metric_map
|
|
128
|
+
labels["가치분석q"] = label_map
|
|
129
|
+
case _:
|
|
130
|
+
raise ValueError(f"invalid C104 block key: {bk!r}")
|
|
131
|
+
|
|
132
|
+
payload: C104Payload = cast(C104Payload, {"blocks": blocks, "labels": labels})
|
|
133
|
+
|
|
134
|
+
# ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
|
|
135
|
+
# C104Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
|
|
136
|
+
return C104DTO(
|
|
137
|
+
code=doc.code,
|
|
138
|
+
asof=asof,
|
|
139
|
+
endpoint=endpoint,
|
|
140
|
+
payload=payload, # 그대로 dict 주입
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class IngestC104:
|
|
145
|
+
def __init__(self, fetch: FetchC104, sink: NfsSinkPort[C104DTO]):
|
|
146
|
+
self.fetch = fetch
|
|
147
|
+
self.sink = sink
|
|
148
|
+
|
|
149
|
+
async def execute(
|
|
150
|
+
self,
|
|
151
|
+
code: str,
|
|
152
|
+
*,
|
|
153
|
+
sleep_sec: float = 2.0,
|
|
154
|
+
asof: datetime | None = None,
|
|
155
|
+
) -> C104DTO:
|
|
156
|
+
asof = asof or utcnow()
|
|
157
|
+
|
|
158
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
159
|
+
logger.debug(f"doc:\n{doc}")
|
|
160
|
+
if doc is None:
|
|
161
|
+
raise RuntimeError(f"c106 fetch returned None: code={code}")
|
|
162
|
+
|
|
163
|
+
dto = c104_doc_to_dto(doc=doc, asof=asof)
|
|
164
|
+
logger.debug(f"dto:\n{dto}")
|
|
165
|
+
|
|
166
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
167
|
+
return dto
|
|
168
|
+
|
|
169
|
+
async def execute_many(
|
|
170
|
+
self,
|
|
171
|
+
codes: Iterable[str],
|
|
172
|
+
*,
|
|
173
|
+
sleep_sec: float = 2.0,
|
|
174
|
+
asof: Optional[datetime] = None,
|
|
175
|
+
) -> list[C104DTO]:
|
|
176
|
+
batch_asof = asof or utcnow()
|
|
177
|
+
|
|
178
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
179
|
+
dtos = [c104_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
180
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
181
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
182
|
+
return dtos
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c106.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c106 import FetchC106
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import MetricsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c106_dto import (
|
|
19
|
+
C106DTO,
|
|
20
|
+
C106Payload,
|
|
21
|
+
C106Blocks,
|
|
22
|
+
C106Labels,
|
|
23
|
+
C106ValuesMap,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from logging_hj3415 import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
endpoint_kind = EndpointKind.C106
|
|
30
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _metricsblock_to_c106_metric_map(block: MetricsBlock) -> dict[str, C106ValuesMap]:
|
|
34
|
+
"""
|
|
35
|
+
MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
|
|
36
|
+
- domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
|
|
37
|
+
"""
|
|
38
|
+
out: dict[str, C106ValuesMap] = {}
|
|
39
|
+
for mk, series in block.metrics.items():
|
|
40
|
+
out[mk] = dict(series.values) # Mapping -> dict
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def c106_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C106DTO:
|
|
45
|
+
"""
|
|
46
|
+
NfsDoc(domain, endpoint=c106) -> C106DTO(contracts envelope)
|
|
47
|
+
|
|
48
|
+
C106Payload 구조:
|
|
49
|
+
{
|
|
50
|
+
"blocks": {"y": {metric: {code: num}}, "q": {...}},
|
|
51
|
+
"labels": {"y": {metric: raw_label}, "q": {...}}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
정책:
|
|
55
|
+
- blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
|
|
56
|
+
- doc.labels는 없을 수도 있으니 dict()로 안전 변환
|
|
57
|
+
"""
|
|
58
|
+
# 1) 기본 골격은 항상 채운다 (규약 안정성)
|
|
59
|
+
blocks: C106Blocks = {"y": {}, "q": {}}
|
|
60
|
+
labels: C106Labels = {"y": {}, "q": {}}
|
|
61
|
+
|
|
62
|
+
# 2) 도메인 블록키 순서/목록 기준으로 채움
|
|
63
|
+
for bk in get_block_keys(endpoint_kind):
|
|
64
|
+
bd = doc.blocks.get(bk)
|
|
65
|
+
if bd is None:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if not isinstance(bd, MetricsBlock):
|
|
69
|
+
raise TypeError(
|
|
70
|
+
f"c106 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
metric_map = _metricsblock_to_c106_metric_map(bd)
|
|
74
|
+
label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
|
|
75
|
+
|
|
76
|
+
match bk:
|
|
77
|
+
case "y":
|
|
78
|
+
blocks["y"] = metric_map
|
|
79
|
+
labels["y"] = label_map
|
|
80
|
+
case "q":
|
|
81
|
+
blocks["q"] = metric_map
|
|
82
|
+
labels["q"] = label_map
|
|
83
|
+
case _:
|
|
84
|
+
raise ValueError(f"invalid c106 block key: {bk!r}")
|
|
85
|
+
|
|
86
|
+
payload: C106Payload = cast(C106Payload, {"blocks": blocks, "labels": labels})
|
|
87
|
+
|
|
88
|
+
# ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
|
|
89
|
+
# C106Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
|
|
90
|
+
return C106DTO(
|
|
91
|
+
code=doc.code,
|
|
92
|
+
asof=asof,
|
|
93
|
+
endpoint=endpoint,
|
|
94
|
+
payload=payload, # 그대로 dict 주입
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IngestC106:
|
|
99
|
+
def __init__(self, fetch: FetchC106, sink: NfsSinkPort[C106DTO]):
|
|
100
|
+
self.fetch = fetch
|
|
101
|
+
self.sink = sink
|
|
102
|
+
|
|
103
|
+
async def execute(
|
|
104
|
+
self,
|
|
105
|
+
code: str,
|
|
106
|
+
*,
|
|
107
|
+
sleep_sec: float = 2.0,
|
|
108
|
+
asof: datetime | None = None,
|
|
109
|
+
) -> C106DTO:
|
|
110
|
+
asof = asof or utcnow()
|
|
111
|
+
|
|
112
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
113
|
+
logger.debug(f"doc:\n{doc}")
|
|
114
|
+
if doc is None:
|
|
115
|
+
raise RuntimeError(f"c106 fetch returned None: code={code}")
|
|
116
|
+
|
|
117
|
+
dto = c106_doc_to_dto(doc=doc, asof=asof)
|
|
118
|
+
logger.debug(f"dto:\n{dto}")
|
|
119
|
+
|
|
120
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
121
|
+
return dto
|
|
122
|
+
|
|
123
|
+
async def execute_many(
|
|
124
|
+
self,
|
|
125
|
+
codes: Iterable[str],
|
|
126
|
+
*,
|
|
127
|
+
sleep_sec: float = 2.0,
|
|
128
|
+
asof: Optional[datetime] = None,
|
|
129
|
+
) -> list[C106DTO]:
|
|
130
|
+
batch_asof = asof or utcnow()
|
|
131
|
+
|
|
132
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
133
|
+
dtos = [c106_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
134
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
135
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
136
|
+
return dtos
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# scraper2_hj3415/app/usecases/ingest/ingest_c108.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Iterable, Optional, cast
|
|
6
|
+
|
|
7
|
+
from scraper2_hj3415.app.services.fetch.fetch_c108 import FetchC108
|
|
8
|
+
from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
|
|
9
|
+
from common_hj3415.utils.time import utcnow
|
|
10
|
+
|
|
11
|
+
from scraper2_hj3415.app.domain.endpoint import EndpointKind
|
|
12
|
+
from scraper2_hj3415.app.domain.constants import get_block_keys
|
|
13
|
+
from scraper2_hj3415.app.domain.doc import NfsDoc
|
|
14
|
+
from scraper2_hj3415.app.domain.blocks import RecordsBlock
|
|
15
|
+
|
|
16
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
17
|
+
|
|
18
|
+
from contracts_hj3415.nfs.c108_dto import C108DTO, C108Payload, C108Blocks
|
|
19
|
+
|
|
20
|
+
from logging_hj3415 import logger
|
|
21
|
+
|
|
22
|
+
endpoint_kind = EndpointKind.C108
|
|
23
|
+
endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _to_list_of_dict(rows: object) -> list[dict]:
|
|
27
|
+
"""
|
|
28
|
+
RecordsBlock.rows(Sequence[Mapping]) -> list[dict]
|
|
29
|
+
- sink/serialization 안전하게 dict로 강제
|
|
30
|
+
"""
|
|
31
|
+
if not rows:
|
|
32
|
+
return []
|
|
33
|
+
out: list[dict] = []
|
|
34
|
+
if isinstance(rows, list):
|
|
35
|
+
for r in rows:
|
|
36
|
+
if isinstance(r, dict):
|
|
37
|
+
out.append(r)
|
|
38
|
+
else:
|
|
39
|
+
out.append(dict(r)) # Mapping이면 dict() 가능
|
|
40
|
+
return out
|
|
41
|
+
|
|
42
|
+
# Sequence[Mapping] 일반 케이스
|
|
43
|
+
try:
|
|
44
|
+
for r in rows: # type: ignore[assignment]
|
|
45
|
+
out.append(dict(r)) # Mapping 가정
|
|
46
|
+
except Exception:
|
|
47
|
+
return []
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def c108_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C108DTO:
|
|
52
|
+
"""
|
|
53
|
+
NfsDoc(domain) -> C108DTO(contracts envelope)
|
|
54
|
+
|
|
55
|
+
규칙:
|
|
56
|
+
- labels는 항상 존재(빈 dict라도)
|
|
57
|
+
- c108은 labels를 비우는 것이 정상
|
|
58
|
+
- payload.blocks['리포트'] = list[dict]
|
|
59
|
+
"""
|
|
60
|
+
if doc.endpoint_kind != EndpointKind.C108:
|
|
61
|
+
raise ValueError(f"c108_doc_to_dto expects C108 doc, got: {doc.endpoint_kind}")
|
|
62
|
+
|
|
63
|
+
# contracts payload 구조에 맞게: blocks/labels를 항상 구성
|
|
64
|
+
blocks: C108Blocks = {"리포트": []}
|
|
65
|
+
|
|
66
|
+
# block_keys를 따르되, 실질적으로는 '리포트' 하나만 있어도 충분
|
|
67
|
+
for bk in get_block_keys(EndpointKind.C108):
|
|
68
|
+
if bk != "리포트":
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
block = doc.blocks.get(bk)
|
|
72
|
+
if isinstance(block, RecordsBlock):
|
|
73
|
+
blocks["리포트"] = _to_list_of_dict(block.rows)
|
|
74
|
+
else:
|
|
75
|
+
# 혹시 구조가 섞였으면 최대한 안전하게 빈 값
|
|
76
|
+
blocks["리포트"] = []
|
|
77
|
+
|
|
78
|
+
payload: C108Payload = {"blocks": blocks}
|
|
79
|
+
|
|
80
|
+
return C108DTO(
|
|
81
|
+
code=doc.code,
|
|
82
|
+
asof=asof,
|
|
83
|
+
endpoint=endpoint,
|
|
84
|
+
payload=payload,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class IngestC108:
|
|
89
|
+
def __init__(self, fetch: FetchC108, sink: NfsSinkPort[C108DTO]):
|
|
90
|
+
self.fetch = fetch
|
|
91
|
+
self.sink = sink
|
|
92
|
+
|
|
93
|
+
async def execute(
|
|
94
|
+
self, code: str, *, sleep_sec: float = 2.0, asof: datetime | None = None
|
|
95
|
+
) -> C108DTO:
|
|
96
|
+
asof = asof or utcnow()
|
|
97
|
+
|
|
98
|
+
doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
99
|
+
logger.debug(f"doc:\n{doc}")
|
|
100
|
+
if doc is None:
|
|
101
|
+
raise RuntimeError(f"c108 fetch returned None: code={code}")
|
|
102
|
+
|
|
103
|
+
dto = c108_doc_to_dto(doc=doc, asof=asof)
|
|
104
|
+
logger.debug(f"dto:\n{dto}")
|
|
105
|
+
|
|
106
|
+
await self.sink.write(dto, endpoint=endpoint)
|
|
107
|
+
return dto
|
|
108
|
+
|
|
109
|
+
async def execute_many(
|
|
110
|
+
self,
|
|
111
|
+
codes: Iterable[str],
|
|
112
|
+
*,
|
|
113
|
+
sleep_sec: float = 2.0,
|
|
114
|
+
asof: Optional[datetime] = None,
|
|
115
|
+
) -> list[C108DTO]:
|
|
116
|
+
batch_asof = asof or utcnow()
|
|
117
|
+
|
|
118
|
+
docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
119
|
+
dtos = [c108_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
|
|
120
|
+
logger.debug(f"dtos:\n{dtos}")
|
|
121
|
+
await self.sink.write_many(dtos, endpoint=endpoint)
|
|
122
|
+
return dtos
|
|
@@ -1,21 +1,32 @@
|
|
|
1
|
-
#
|
|
1
|
+
# scraper2_hj3415/cli.py
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, cast, get_args
|
|
6
6
|
|
|
7
|
+
|
|
8
|
+
import time
|
|
7
9
|
import typer
|
|
8
10
|
from datetime import datetime, timezone
|
|
9
11
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
+
from db2_hj3415.nfs.repo import ensure_indexes
|
|
13
|
+
from db2_hj3415.settings import get_settings
|
|
14
|
+
from db2_hj3415.universe.repo import list_universe_codes
|
|
15
|
+
|
|
16
|
+
from scraper2_hj3415.app.composition import build_usecases
|
|
17
|
+
from scraper2_hj3415.app.ports.ingest.nfs_ingest_port import NfsIngestPort
|
|
18
|
+
from scraper2_hj3415.app.domain.types import Sink
|
|
12
19
|
|
|
13
|
-
from
|
|
20
|
+
from contracts_hj3415.nfs.types import Endpoint
|
|
21
|
+
from contracts_hj3415.universe.types import UniverseName
|
|
22
|
+
|
|
23
|
+
from logging_hj3415 import setup_logging, current_log_level, reset_logging, to_pretty_json
|
|
14
24
|
|
|
15
25
|
setup_logging()
|
|
26
|
+
# 운영시에는 아래 항목 주석처리하고 환경변수로 제어할것
|
|
27
|
+
reset_logging("DEBUG")
|
|
28
|
+
print(f"Current log level - {current_log_level()}")
|
|
16
29
|
|
|
17
|
-
Endpoint = Literal["c101", "c103", "c104", "c106", "c108", "all"]
|
|
18
|
-
Sink = Literal["memory", "mongo"]
|
|
19
30
|
|
|
20
31
|
app = typer.Typer(no_args_is_help=True)
|
|
21
32
|
|
|
@@ -30,54 +41,19 @@ app.add_typer(mi_app, name="mi")
|
|
|
30
41
|
# small helpers
|
|
31
42
|
# -------------------------
|
|
32
43
|
|
|
33
|
-
def _endpoint_list(
|
|
34
|
-
|
|
44
|
+
def _endpoint_list(endpoint: str) -> list[str]:
|
|
45
|
+
if endpoint == "all":
|
|
46
|
+
return list(get_args(Endpoint)) # -> ["c101", "c103", "c104", "c106", "c108"]
|
|
47
|
+
return [endpoint]
|
|
35
48
|
|
|
36
49
|
async def _mongo_bootstrap(db) -> None:
|
|
37
|
-
from db2.nfs import ensure_indexes
|
|
38
|
-
from db2.settings import get_settings
|
|
39
|
-
|
|
40
50
|
s = get_settings()
|
|
41
51
|
await ensure_indexes(db, snapshot_ttl_days=s.SNAPSHOT_TTL_DAYS)
|
|
42
52
|
|
|
43
|
-
|
|
44
|
-
async def _load_codes_from_universe(db, *, universe: str) -> list[str]:
|
|
45
|
-
"""
|
|
46
|
-
db2.universe에 저장된 universe_latest에서 codes 로드.
|
|
47
|
-
(네 db2 API 명에 맞춰 조정하면 됨)
|
|
48
|
-
"""
|
|
49
|
-
from db2.universe import get_universe_latest # 네가 가진 API
|
|
50
|
-
|
|
51
|
-
doc = await get_universe_latest(db, universe=universe)
|
|
52
|
-
if not doc:
|
|
53
|
-
return []
|
|
54
|
-
|
|
55
|
-
# doc 형태가 {"items":[{code,name,...}, ...]} 혹은 {"payload":{"items":[...]}} 일 수 있어서 방어
|
|
56
|
-
data = doc
|
|
57
|
-
if isinstance(data, dict) and "payload" in data and isinstance(data["payload"], dict):
|
|
58
|
-
data = data["payload"]
|
|
59
|
-
if isinstance(data, dict) and "items" in data:
|
|
60
|
-
data = data["items"]
|
|
61
|
-
|
|
62
|
-
if not isinstance(data, list):
|
|
63
|
-
return []
|
|
64
|
-
|
|
65
|
-
codes: list[str] = []
|
|
66
|
-
for row in data:
|
|
67
|
-
if not isinstance(row, dict):
|
|
68
|
-
continue
|
|
69
|
-
code = str(row.get("code") or "").strip()
|
|
70
|
-
if code:
|
|
71
|
-
codes.append(code)
|
|
72
|
-
return codes
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
import time
|
|
76
|
-
|
|
77
53
|
async def _run_ingest_with_progress(
|
|
78
54
|
*,
|
|
79
55
|
ucs: Any,
|
|
80
|
-
endpoint:
|
|
56
|
+
endpoint: str,
|
|
81
57
|
codes: list[str],
|
|
82
58
|
sleep_sec: float,
|
|
83
59
|
show: bool,
|
|
@@ -99,7 +75,7 @@ async def _run_ingest_with_progress(
|
|
|
99
75
|
yield xs[i:i + n]
|
|
100
76
|
|
|
101
77
|
async def _run_one_endpoint(ep: str) -> None:
|
|
102
|
-
ingest_uc = cast(
|
|
78
|
+
ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
|
|
103
79
|
|
|
104
80
|
ok = 0
|
|
105
81
|
fail = 0
|
|
@@ -128,18 +104,6 @@ async def _run_ingest_with_progress(
|
|
|
128
104
|
elapsed = time.perf_counter() - t0 # ✅ 종료 시각
|
|
129
105
|
typer.echo(f"\n⏱ elapsed time: {_format_elapsed(elapsed)}")
|
|
130
106
|
|
|
131
|
-
def _dto_to_pretty(obj: Any) -> str:
|
|
132
|
-
# pydantic v2 우선
|
|
133
|
-
if hasattr(obj, "model_dump_json"):
|
|
134
|
-
return obj.model_dump_json(indent=2, by_alias=False)
|
|
135
|
-
if hasattr(obj, "model_dump"):
|
|
136
|
-
import json
|
|
137
|
-
return json.dumps(obj.model_dump(), ensure_ascii=False, indent=2)
|
|
138
|
-
# dict fallback
|
|
139
|
-
if isinstance(obj, dict):
|
|
140
|
-
import json
|
|
141
|
-
return json.dumps(obj, ensure_ascii=False, indent=2, default=str)
|
|
142
|
-
return str(obj)
|
|
143
107
|
|
|
144
108
|
def _format_elapsed(sec: float) -> str:
|
|
145
109
|
if sec < 60:
|
|
@@ -189,11 +153,11 @@ def _parse_asof(asof: str | None) -> datetime:
|
|
|
189
153
|
|
|
190
154
|
@nfs_app.command("one")
|
|
191
155
|
def nfs_one(
|
|
192
|
-
endpoint: Endpoint = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
|
|
193
156
|
code: str = typer.Argument(..., help="종목코드 (예: 005930)"),
|
|
157
|
+
endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
|
|
194
158
|
sleep_sec: float = typer.Option(2.0, "--sleep"),
|
|
195
159
|
sink: Sink = typer.Option("memory", "--sink"),
|
|
196
|
-
show: bool = typer.Option(
|
|
160
|
+
show: bool = typer.Option(False, "--show/--no-show", help="결과 DTO 출력"),
|
|
197
161
|
asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601, UTC 권장). 예: 2026-01-09T05:00:00Z"),
|
|
198
162
|
):
|
|
199
163
|
code = code.strip()
|
|
@@ -201,7 +165,7 @@ def nfs_one(
|
|
|
201
165
|
raise typer.BadParameter("code는 비어있을 수 없습니다.")
|
|
202
166
|
|
|
203
167
|
async def _run():
|
|
204
|
-
ucs = build_usecases(
|
|
168
|
+
ucs = build_usecases(sink=sink)
|
|
205
169
|
|
|
206
170
|
if sink == "mongo":
|
|
207
171
|
if ucs.db is None:
|
|
@@ -211,17 +175,23 @@ def nfs_one(
|
|
|
211
175
|
try:
|
|
212
176
|
run_asof = _parse_asof(asof)
|
|
213
177
|
for ep in _endpoint_list(endpoint):
|
|
214
|
-
ingest_uc = cast(
|
|
178
|
+
ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
|
|
215
179
|
results = await ingest_uc.execute_many([code], sleep_sec=sleep_sec, asof=run_asof)
|
|
216
180
|
dto = results[0] if results else None
|
|
217
181
|
|
|
218
182
|
typer.echo(f"\n=== ONE DONE: {ep} {code} ===")
|
|
219
|
-
|
|
183
|
+
is_memory_sink = sink == "memory"
|
|
184
|
+
should_show = show or is_memory_sink
|
|
185
|
+
|
|
186
|
+
if not should_show:
|
|
220
187
|
continue
|
|
188
|
+
|
|
221
189
|
if dto is None:
|
|
222
190
|
typer.echo("(no result)")
|
|
223
191
|
else:
|
|
224
|
-
|
|
192
|
+
if is_memory_sink:
|
|
193
|
+
typer.echo("memory result:")
|
|
194
|
+
typer.echo(to_pretty_json(dto))
|
|
225
195
|
finally:
|
|
226
196
|
await ucs.aclose()
|
|
227
197
|
|
|
@@ -230,7 +200,7 @@ def nfs_one(
|
|
|
230
200
|
|
|
231
201
|
@nfs_app.command("all")
|
|
232
202
|
def nfs_all(
|
|
233
|
-
endpoint:
|
|
203
|
+
endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
|
|
234
204
|
universe: str = typer.Option("krx300", "--universe"),
|
|
235
205
|
limit: int = typer.Option(0, "--limit", help="0=전체"),
|
|
236
206
|
sleep_sec: float = typer.Option(2.0, "--sleep"),
|
|
@@ -241,15 +211,18 @@ def nfs_all(
|
|
|
241
211
|
asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601). 예: 2026-01-09T05:00:00Z"),
|
|
242
212
|
):
|
|
243
213
|
async def _run():
|
|
244
|
-
ucs = build_usecases(
|
|
214
|
+
ucs = build_usecases(sink=sink)
|
|
245
215
|
if ucs.db is None:
|
|
246
216
|
raise RuntimeError("all 모드는 DB가 필요합니다. mongo sink로 ucs.db를 노출하세요.")
|
|
247
217
|
await _mongo_bootstrap(ucs.db)
|
|
248
218
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
219
|
+
try:
|
|
220
|
+
u = UniverseName(universe)
|
|
221
|
+
except ValueError:
|
|
222
|
+
raise typer.BadParameter(f"unknown universe: {universe}")
|
|
223
|
+
codes = await list_universe_codes(ucs.db, universe=u)
|
|
252
224
|
|
|
225
|
+
codes = sorted(codes)
|
|
253
226
|
if limit and limit > 0:
|
|
254
227
|
codes = codes[:limit]
|
|
255
228
|
|
|
@@ -281,4 +254,4 @@ def mi():
|
|
|
281
254
|
|
|
282
255
|
|
|
283
256
|
if __name__ == "__main__":
|
|
284
|
-
app()
|
|
257
|
+
app()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scraper2-hj3415
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Summary: Naver WiseReport scraper
|
|
5
5
|
Keywords: example,demo
|
|
6
6
|
Author-email: Hyungjin Kim <hj3415@gmail.com>
|
|
@@ -17,6 +17,8 @@ Requires-Dist: lxml>=6.0.2
|
|
|
17
17
|
Requires-Dist: typer>=0.21.0
|
|
18
18
|
Requires-Dist: db2-hj3415
|
|
19
19
|
Requires-Dist: contracts-hj3415
|
|
20
|
+
Requires-Dist: common-hj3415
|
|
21
|
+
Requires-Dist: logging-hj3415
|
|
20
22
|
|
|
21
23
|
# scraper2
|
|
22
24
|
|