scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
  2. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
  3. scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
  4. scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
  5. scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
  6. scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
  7. scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
  8. scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
  9. scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
  10. scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
  11. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
  12. scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
  13. scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
  14. {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
  15. scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
  16. scraper2_hj3415/app/composition.py +225 -0
  17. scraper2_hj3415/app/domain/blocks.py +61 -0
  18. scraper2_hj3415/app/domain/constants.py +33 -0
  19. scraper2_hj3415/app/domain/doc.py +16 -0
  20. scraper2_hj3415/app/domain/endpoint.py +11 -0
  21. scraper2_hj3415/app/domain/series.py +11 -0
  22. scraper2_hj3415/app/domain/types.py +19 -0
  23. scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
  24. scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
  25. scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
  26. scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
  27. scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
  28. scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
  29. scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
  30. scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
  31. scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
  32. scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
  33. scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
  34. scraper2_hj3415/app/parsing/c101/sise.py +47 -0
  35. scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
  36. scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
  37. scraper2_hj3415/app/parsing/c101_parser.py +45 -0
  38. scraper2_hj3415/app/parsing/c103_parser.py +22 -0
  39. scraper2_hj3415/app/parsing/c104_parser.py +26 -0
  40. scraper2_hj3415/app/parsing/c106_parser.py +137 -0
  41. scraper2_hj3415/app/parsing/c108_parser.py +254 -0
  42. scraper2_hj3415/app/ports/__init__.py +0 -0
  43. scraper2_hj3415/app/ports/browser/__init__.py +0 -0
  44. scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
  45. scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
  46. scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
  47. scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
  48. scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
  49. scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
  50. scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
  51. scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
  52. scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
  53. scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
  54. scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
  55. scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
  56. scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
  57. scraper2_hj3415/app/ports/site/__init__.py +0 -0
  58. scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
  59. scraper2_hj3415/app/services/__init__.py +0 -0
  60. scraper2_hj3415/app/services/fetch/__init__.py +0 -0
  61. scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
  62. scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
  63. scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
  64. scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
  65. scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
  66. scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
  67. scraper2_hj3415/app/usecases/__init__.py +0 -0
  68. scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
  69. scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
  70. scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
  71. scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
  72. scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
  73. scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
  74. scraper2/main.py → scraper2_hj3415/cli.py +45 -72
  75. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
  76. scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
  77. scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
  78. scraper2/adapters/out/playwright/browser.py +0 -102
  79. scraper2/adapters/out/sinks/memory/__init__.py +0 -15
  80. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
  81. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
  82. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
  83. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
  84. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
  85. scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
  86. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
  87. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
  88. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
  89. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
  90. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
  91. scraper2/app/composition.py +0 -204
  92. scraper2/app/parsing/_converters.py +0 -85
  93. scraper2/app/parsing/_normalize.py +0 -134
  94. scraper2/app/parsing/c101_parser.py +0 -143
  95. scraper2/app/parsing/c103_parser.py +0 -128
  96. scraper2/app/parsing/c104_parser.py +0 -143
  97. scraper2/app/parsing/c106_parser.py +0 -153
  98. scraper2/app/parsing/c108_parser.py +0 -65
  99. scraper2/app/ports/browser/browser_factory_port.py +0 -11
  100. scraper2/app/ports/browser/browser_port.py +0 -22
  101. scraper2/app/ports/ingest_port.py +0 -14
  102. scraper2/app/ports/sinks/base_sink_port.py +0 -14
  103. scraper2/app/ports/sinks/c101_sink_port.py +0 -9
  104. scraper2/app/ports/sinks/c103_sink_port.py +0 -9
  105. scraper2/app/ports/sinks/c104_sink_port.py +0 -9
  106. scraper2/app/ports/sinks/c106_sink_port.py +0 -9
  107. scraper2/app/ports/sinks/c108_sink_port.py +0 -9
  108. scraper2/app/usecases/fetch/fetch_c101.py +0 -43
  109. scraper2/app/usecases/fetch/fetch_c103.py +0 -103
  110. scraper2/app/usecases/fetch/fetch_c104.py +0 -76
  111. scraper2/app/usecases/fetch/fetch_c106.py +0 -90
  112. scraper2/app/usecases/fetch/fetch_c108.py +0 -49
  113. scraper2/app/usecases/ingest/ingest_c101.py +0 -36
  114. scraper2/app/usecases/ingest/ingest_c103.py +0 -37
  115. scraper2/app/usecases/ingest/ingest_c104.py +0 -37
  116. scraper2/app/usecases/ingest/ingest_c106.py +0 -38
  117. scraper2/app/usecases/ingest/ingest_c108.py +0 -39
  118. scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
  119. scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
  120. {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
  121. {scraper2 → scraper2_hj3415}/__init__.py +0 -0
  122. {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
  123. {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
  124. {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
  125. {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
  126. {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
  127. {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
  128. {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
  129. {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
  130. {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
  131. {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
  132. {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
  133. {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
  134. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
  135. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,304 @@
1
+ # scraper2_hj3415/app/services/nfs_doc_builders.py
2
+ from __future__ import annotations
3
+
4
+ from collections import defaultdict
5
+ from typing import Mapping, Iterable, Any
6
+
7
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
8
+ from scraper2_hj3415.app.domain.constants import BLOCK_KEYS_BY_ENDPOINT
9
+ from scraper2_hj3415.app.domain.doc import NfsDoc
10
+ from scraper2_hj3415.app.domain.blocks import (
11
+ MetricsBlock,
12
+ RecordsBlock,
13
+ KvBlock,
14
+ BlockData,
15
+ )
16
+ from scraper2_hj3415.app.domain.series import MetricSeries
17
+ from scraper2_hj3415.app.domain.types import (
18
+ LabelsMap,
19
+ MetricKey,
20
+ Period,
21
+ Num,
22
+ BlockKey,
23
+ Records,
24
+ Record,
25
+ )
26
+
27
+ from common_hj3415.utils import nan_to_none
28
+
29
+
30
+ def is_all_none(row: dict[str, Any]) -> bool:
31
+ return all(v is None for v in row.values())
32
+
33
+
34
+ ParsedBlocks = Mapping[str, Any] # parser가 반환한 "block_key(str) -> rows(list[dict])"
35
+
36
+
37
+ def build_metrics_block_and_labels_from_rows(
38
+ *,
39
+ endpoint_kind: EndpointKind,
40
+ block_key: BlockKey,
41
+ rows: Records,
42
+ item_key: str = "항목",
43
+ raw_label_key: str = "항목_raw",
44
+ ) -> tuple[MetricsBlock, LabelsMap]:
45
+ """
46
+ rows(list[dict]) -> (MetricsBlock, LabelsMap)
47
+ c103/c104/c106 공통 빌더.
48
+
49
+ - Metric key는 item_key(보통 '항목')에서 만들고,
50
+ 기간 컬럼들은 {Period: Num}으로 유지한다.
51
+ - LabelsMap은 dto_key -> raw_label(정제된 원라벨)
52
+ """
53
+ grouped: dict[str, list[tuple[dict[Period, Num], str]]] = defaultdict(list)
54
+
55
+ for r in rows:
56
+ item = r.get(item_key)
57
+ if not item:
58
+ continue
59
+
60
+ raw_label = r.get(raw_label_key)
61
+ if raw_label is None:
62
+ raw_label = item
63
+
64
+ per_map: dict[Period, Num] = {
65
+ str(k): nan_to_none(v)
66
+ for k, v in r.items()
67
+ if k not in (item_key, raw_label_key)
68
+ }
69
+
70
+ grouped[item].append((per_map, raw_label))
71
+
72
+ series_map: dict[MetricKey, MetricSeries] = {}
73
+ labels_map: LabelsMap = {}
74
+
75
+ for item, pairs in grouped.items():
76
+ if len(pairs) == 1:
77
+ per_map, raw_label = pairs[0]
78
+ series_map[item] = MetricSeries(key=item, values=per_map)
79
+ labels_map[item] = raw_label
80
+ continue
81
+
82
+ kept = [(per_map, raw) for (per_map, raw) in pairs if not is_all_none(per_map)]
83
+ if not kept:
84
+ continue
85
+
86
+ for idx, (per_map, raw_label) in enumerate(kept, start=1):
87
+ mk = item if idx == 1 else f"{item}_{idx}"
88
+ series_map[mk] = MetricSeries(key=mk, values=per_map)
89
+ labels_map[mk] = raw_label
90
+
91
+ block = MetricsBlock(
92
+ endpoint_kind=endpoint_kind, block_key=block_key, metrics=series_map
93
+ )
94
+ return block, labels_map
95
+
96
+
97
+ def build_metrics_doc_from_parsed(
98
+ *,
99
+ code: str,
100
+ endpoint_kind: EndpointKind,
101
+ parsed: ParsedBlocks,
102
+ block_keys: Iterable[BlockKey] | None = None,
103
+ item_key: str = "항목",
104
+ raw_label_key: str = "항목_raw",
105
+ keep_empty_blocks: bool = True,
106
+ ) -> NfsDoc:
107
+ """
108
+ parser가 만든 dict(블록키 -> rows)를 받아서 NfsDoc(=MetricsBlock들)로 조립.
109
+ - c103/c104/c106 공용으로 사용 가능.
110
+
111
+ keep_empty_blocks:
112
+ - True: block은 항상 생성 (metrics 비어도 block 존재)
113
+ - False: rows가 없거나 metrics가 비면 blocks에서 제외
114
+ """
115
+ if block_keys is None:
116
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[endpoint_kind]
117
+
118
+ blocks: dict[BlockKey, MetricsBlock] = {}
119
+ labels: dict[BlockKey, LabelsMap] = {}
120
+
121
+ for bk in block_keys:
122
+ rows = parsed.get(str(bk), []) or []
123
+ block, lm = build_metrics_block_and_labels_from_rows(
124
+ endpoint_kind=endpoint_kind,
125
+ block_key=bk,
126
+ rows=rows,
127
+ item_key=item_key,
128
+ raw_label_key=raw_label_key,
129
+ )
130
+
131
+ if not keep_empty_blocks and not block.metrics:
132
+ continue
133
+
134
+ blocks[bk] = block
135
+ labels[bk] = lm # 비어있어도 {}로 유지
136
+
137
+ return NfsDoc(code=code, endpoint_kind=endpoint_kind, blocks=blocks, labels=labels)
138
+
139
+
140
+ def _as_records(x: Any) -> Records:
141
+ """
142
+ 안전하게 rows를 Records(=Sequence[Record])로 캐스팅/정리.
143
+ - None/비정상 값이면 빈 리스트
144
+ - list[dict] 형태만 통과시키고 나머지는 필터
145
+ """
146
+ if not x:
147
+ return []
148
+ if not isinstance(x, list):
149
+ return []
150
+
151
+ out: list[Record] = []
152
+ for it in x:
153
+ if isinstance(it, dict):
154
+ out.append(it)
155
+ return out
156
+
157
+
158
+ def build_records_block_from_rows(
159
+ *,
160
+ endpoint_kind: EndpointKind,
161
+ block_key: BlockKey,
162
+ rows: Records,
163
+ ) -> RecordsBlock:
164
+ """
165
+ rows(list[dict]) -> RecordsBlock
166
+ - c108 같은 레코드성 블록(리포트 목록 등)에 사용
167
+ """
168
+ # RecordsBlock 쪽에서도 __post_init__로 block_key 검증이 수행된다는 전제(네가 정돈한 도메인)
169
+ return RecordsBlock(
170
+ endpoint_kind=endpoint_kind, block_key=block_key, rows=list(rows)
171
+ )
172
+
173
+
174
+ def build_c108_doc_from_parsed(
175
+ *,
176
+ code: str,
177
+ parsed: ParsedBlocks,
178
+ block_keys: Iterable[BlockKey] | None = None,
179
+ keep_empty_blocks: bool = True,
180
+ ) -> NfsDoc:
181
+ """
182
+ c108 parser 결과(dict)를 받아서 NfsDoc(=RecordsBlock들)로 조립.
183
+
184
+ 규칙(너가 정한 원칙):
185
+ - labels는 항상 존재(빈 dict라도)
186
+ - c108은 labels를 비우는 것을 정상으로 간주
187
+
188
+ keep_empty_blocks:
189
+ - True: block은 항상 생성(rows 비어도 block 존재)
190
+ - False: rows가 비면 blocks에서 제외
191
+ """
192
+ endpoint_kind = EndpointKind.C108
193
+
194
+ if block_keys is None:
195
+ # 보통 ("리포트",) 같은 튜플
196
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[endpoint_kind]
197
+
198
+ blocks: dict[BlockKey, RecordsBlock] = {}
199
+ labels: dict[BlockKey, LabelsMap] = {}
200
+
201
+ for bk in block_keys:
202
+ rows = _as_records(parsed.get(str(bk)))
203
+ block = build_records_block_from_rows(
204
+ endpoint_kind=endpoint_kind,
205
+ block_key=bk,
206
+ rows=rows,
207
+ )
208
+
209
+ if not keep_empty_blocks and not block.rows:
210
+ continue
211
+
212
+ blocks[bk] = block
213
+ labels[bk] = {} # c108은 labels 비우는 것이 정상
214
+
215
+ return NfsDoc(
216
+ code=code,
217
+ endpoint_kind=endpoint_kind,
218
+ blocks=blocks,
219
+ labels=labels,
220
+ )
221
+
222
+
223
+ def build_kv_block_from_mapping(
224
+ *,
225
+ endpoint_kind: EndpointKind,
226
+ block_key: BlockKey,
227
+ data: Mapping[str, Any] | None,
228
+ keep_empty: bool = True,
229
+ ) -> KvBlock | None:
230
+ """
231
+ dict 형태 블록을 KvBlock으로 감싼다.
232
+ - c101 요약/시세/기업개요/펀더멘털/어닝서프라이즈 같은 "구조 dict"에 사용
233
+ """
234
+ if not data:
235
+ if not keep_empty:
236
+ return None
237
+ data = {}
238
+
239
+ return KvBlock(endpoint_kind=endpoint_kind, block_key=block_key, values=data)
240
+
241
+
242
+ ParsedC101 = Mapping[str, Any] # c101은 dict/list/dict(중첩) 섞여서 Any가 현실적
243
+
244
+
245
+ def build_c101_doc_from_parsed(
246
+ *,
247
+ code: str,
248
+ parsed: ParsedC101,
249
+ block_keys: Iterable[BlockKey] | None = None,
250
+ keep_empty_blocks: bool = True,
251
+ ) -> NfsDoc:
252
+ """
253
+ c101 parser 결과(블록별 다양한 타입)를 NfsDoc으로 조립.
254
+ labels는 c101은 '비어도 정상' 규칙을 따르므로 항상 {}로 둔다.
255
+ """
256
+ endpoint_kind = EndpointKind.C101
257
+
258
+ if block_keys is None:
259
+ block_keys = BLOCK_KEYS_BY_ENDPOINT[endpoint_kind]
260
+
261
+ blocks: dict[BlockKey, BlockData] = {}
262
+ labels: dict[BlockKey, LabelsMap] = {}
263
+
264
+ for bk in block_keys:
265
+ v = parsed.get(str(bk))
266
+
267
+ # c101 규칙: labels는 비어도 정상, 있으면 넣는 게 아니라 "기본 비움" 추천
268
+ labels[bk] = {}
269
+
270
+ # list -> RecordsBlock
271
+ if isinstance(v, list):
272
+ # v: list[dict[str, Any]] 가정 (파서가 그렇게 만들고 있음)
273
+ rb = build_records_block_from_rows(
274
+ endpoint_kind=endpoint_kind,
275
+ block_key=bk,
276
+ rows=v, # type: ignore[arg-type] (rows 타입 맞추면 제거 가능)
277
+ )
278
+ if rb is not None:
279
+ blocks[bk] = rb
280
+ continue
281
+
282
+ # dict(중첩 포함) -> KvBlock
283
+ if isinstance(v, dict):
284
+ kb = build_kv_block_from_mapping(
285
+ endpoint_kind=endpoint_kind,
286
+ block_key=bk,
287
+ data=v,
288
+ keep_empty=keep_empty_blocks,
289
+ )
290
+ if kb is not None:
291
+ blocks[bk] = kb
292
+ continue
293
+
294
+ # None/기타 -> empty policy
295
+ if keep_empty_blocks:
296
+ kb = build_kv_block_from_mapping(
297
+ endpoint_kind=endpoint_kind,
298
+ block_key=bk,
299
+ data={},
300
+ keep_empty=True,
301
+ )
302
+ blocks[bk] = kb
303
+
304
+ return NfsDoc(code=code, endpoint_kind=endpoint_kind, blocks=blocks, labels=labels)
File without changes
File without changes
@@ -0,0 +1,111 @@
1
+ # scraper2_hj3415/app/usecases/ingest/ingest_c101.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ from typing import Iterable, Optional, Any, cast
6
+
7
+ from scraper2_hj3415.app.services.fetch.fetch_c101 import FetchC101
8
+ from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
9
+ from common_hj3415.utils.time import utcnow
10
+
11
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
12
+ from scraper2_hj3415.app.domain.constants import get_block_keys
13
+ from scraper2_hj3415.app.domain.doc import NfsDoc
14
+ from scraper2_hj3415.app.domain.blocks import KvBlock, RecordsBlock, MetricsBlock
15
+
16
+ from contracts_hj3415.nfs.types import Endpoint
17
+ from contracts_hj3415.nfs.c101_dto import C101DTO, C101Payload, C101Blocks
18
+
19
+ from logging_hj3415 import logger
20
+
21
+ endpoint_kind = EndpointKind.C101
22
+ endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
23
+
24
+
25
+ def _unwrap_c101_block(block: Any) -> Any:
26
+ """
27
+ domain BlockData -> DTO로 들어갈 순수 python 구조(dict/list/...)
28
+ - C101은 KvBlock/RecordsBlock 위주
29
+ - (혹시 MetricsBlock이 섞여도 안전하게 처리)
30
+ """
31
+ if isinstance(block, KvBlock):
32
+ return dict(block.values)
33
+
34
+ if isinstance(block, RecordsBlock):
35
+ # rows: Sequence[Record] -> list[dict]
36
+ return [dict(r) for r in block.rows]
37
+
38
+ if isinstance(block, MetricsBlock):
39
+ # C101에서 MetricsBlock 쓸 일은 거의 없겠지만, 안전망
40
+ # metrics: Mapping[MetricKey, MetricSeries(values: Mapping[Period, Num])]
41
+ out: dict[str, dict[str, Any]] = {}
42
+ for mk, series in block.metrics.items():
43
+ out[str(mk)] = dict(series.values)
44
+ return out
45
+
46
+ # 이미 dict/list 등으로 들어오는 케이스도 방어
47
+ return block
48
+
49
+
50
+ def c101_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C101DTO:
51
+ """
52
+ NfsDoc(domain) -> C101DTO(contracts)
53
+
54
+ 규칙:
55
+ - C101은 labels를 비우는 것이 정상 (하지만 payload에는 항상 존재)
56
+ - blocks는 endpoint block_keys 순서대로 채우되, 각 블록은 BlockData를 언래핑해서 넣는다.
57
+ """
58
+ blocks: dict[str, Any] = {}
59
+ labels: dict[str, dict[str, str]] = {}
60
+
61
+ for bk in get_block_keys(endpoint_kind):
62
+ block = doc.blocks.get(bk)
63
+ blocks[str(bk)] = _unwrap_c101_block(block) if block is not None else {}
64
+
65
+ # C101은 labels 항상 empty
66
+ labels[str(bk)] = {}
67
+
68
+ payload: C101Payload = cast(C101Payload, {"blocks": cast(C101Blocks, blocks), "labels": labels})
69
+
70
+ return C101DTO(
71
+ code=doc.code,
72
+ asof=asof,
73
+ endpoint=endpoint,
74
+ payload=payload,
75
+ )
76
+
77
+
78
+ class IngestC101:
79
+ def __init__(self, fetch: FetchC101, sink: NfsSinkPort[C101DTO]):
80
+ self.fetch = fetch
81
+ self.sink = sink
82
+
83
+ async def execute(
84
+ self, code: str, *, sleep_sec: float = 2.0, asof: datetime | None = None
85
+ ) -> C101DTO:
86
+ asof = asof or utcnow()
87
+ doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
88
+ logger.debug(f"doc:\n{doc}")
89
+ if doc is None:
90
+ raise RuntimeError(f"c101 fetch returned None: code={code}")
91
+
92
+ dto = c101_doc_to_dto(doc=doc, asof=asof)
93
+ logger.debug(f"dto:\n{dto}")
94
+
95
+ await self.sink.write(dto, endpoint=endpoint)
96
+ return dto
97
+
98
+ async def execute_many(
99
+ self,
100
+ codes: Iterable[str],
101
+ *,
102
+ sleep_sec: float = 2.0,
103
+ asof: Optional[datetime] = None,
104
+ ) -> list[C101DTO]:
105
+ batch_asof = asof or utcnow()
106
+
107
+ docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
108
+ dtos = [c101_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
109
+ logger.debug(f"dtos:\n{dtos}")
110
+ await self.sink.write_many(dtos, endpoint=endpoint)
111
+ return dtos
@@ -0,0 +1,162 @@
1
+ # scraper2_hj3415/app/usecases/ingest/ingest_c103.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ from typing import Iterable, Optional, cast
6
+
7
+ from scraper2_hj3415.app.services.fetch.fetch_c103 import FetchC103
8
+ from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
9
+ from common_hj3415.utils.time import utcnow
10
+
11
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
12
+ from scraper2_hj3415.app.domain.constants import get_block_keys
13
+ from scraper2_hj3415.app.domain.doc import NfsDoc
14
+ from scraper2_hj3415.app.domain.blocks import MetricsBlock
15
+
16
+ from contracts_hj3415.nfs.types import Endpoint
17
+
18
+ from contracts_hj3415.nfs.c103_dto import (
19
+ C103DTO,
20
+ C103Payload,
21
+ C103Blocks,
22
+ C103Labels,
23
+ C103ValuesMap,
24
+ )
25
+
26
+ from logging_hj3415 import logger
27
+
28
+
29
+ endpoint_kind = EndpointKind.C103
30
+ endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
31
+
32
+
33
+ def _metricsblock_to_c103_metric_map(block: MetricsBlock) -> dict[str, C103ValuesMap]:
34
+ """
35
+ MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
36
+ - domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
37
+ """
38
+ out: dict[str, C103ValuesMap] = {}
39
+ for mk, series in block.metrics.items():
40
+ out[mk] = dict(series.values) # Mapping -> dict
41
+ return out
42
+
43
+
44
+ def c103_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C103DTO:
45
+ """
46
+ NfsDoc(domain, endpoint=c103) -> C103DTO(contracts envelope)
47
+
48
+ C103Payload 구조:
49
+ {
50
+ "blocks": {"손익계산서y": {metric: {code: num}}, "손익계산서q": {...}},
51
+ "labels": {"손익계산서y": {metric: raw_label}, "손익계산서q": {...}}
52
+ }
53
+
54
+ 정책:
55
+ - blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
56
+ - doc.labels는 없을 수도 있으니 dict()로 안전 변환
57
+ """
58
+ # 1) 기본 골격은 항상 채운다 (규약 안정성)
59
+ blocks: C103Blocks = {
60
+ "손익계산서y": {},
61
+ "손익계산서q": {},
62
+ "재무상태표y": {},
63
+ "재무상태표q": {},
64
+ "현금흐름표y": {},
65
+ "현금흐름표q": {},
66
+ }
67
+ labels: C103Labels = {
68
+ "손익계산서y": {},
69
+ "손익계산서q": {},
70
+ "재무상태표y": {},
71
+ "재무상태표q": {},
72
+ "현금흐름표y": {},
73
+ "현금흐름표q": {},
74
+ }
75
+
76
+ # 2) 도메인 블록키 순서/목록 기준으로 채움
77
+ for bk in get_block_keys(endpoint_kind):
78
+ bd = doc.blocks.get(bk)
79
+ if bd is None:
80
+ continue
81
+
82
+ if not isinstance(bd, MetricsBlock):
83
+ raise TypeError(
84
+ f"c103 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
85
+ )
86
+
87
+ metric_map = _metricsblock_to_c103_metric_map(bd)
88
+ label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
89
+
90
+ match bk:
91
+ case "손익계산서y":
92
+ blocks["손익계산서y"] = metric_map
93
+ labels["손익계산서y"] = label_map
94
+ case "손익계산서q":
95
+ blocks["손익계산서q"] = metric_map
96
+ labels["손익계산서q"] = label_map
97
+ case "재무상태표y":
98
+ blocks["재무상태표y"] = metric_map
99
+ labels["재무상태표y"] = label_map
100
+ case "재무상태표q":
101
+ blocks["재무상태표q"] = metric_map
102
+ labels["재무상태표q"] = label_map
103
+ case "현금흐름표y":
104
+ blocks["현금흐름표y"] = metric_map
105
+ labels["현금흐름표y"] = label_map
106
+ case "현금흐름표q":
107
+ blocks["현금흐름표q"] = metric_map
108
+ labels["현금흐름표q"] = label_map
109
+ case _:
110
+ raise ValueError(f"invalid c103 block key: {bk!r}")
111
+
112
+ payload: C103Payload = cast(C103Payload, {"blocks": blocks, "labels": labels})
113
+
114
+ # ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
115
+ # C103Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
116
+ return C103DTO(
117
+ code=doc.code,
118
+ asof=asof,
119
+ endpoint=endpoint,
120
+ payload=payload, # 그대로 dict 주입
121
+ )
122
+
123
+
124
+ class IngestC103:
125
+ def __init__(self, fetch: FetchC103, sink: NfsSinkPort[C103DTO]):
126
+ self.fetch = fetch
127
+ self.sink = sink
128
+
129
+ async def execute(
130
+ self,
131
+ code: str,
132
+ *,
133
+ sleep_sec: float = 2.0,
134
+ asof: datetime | None = None,
135
+ ) -> C103DTO:
136
+ asof = asof or utcnow()
137
+
138
+ doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
139
+ logger.debug(f"doc:\n{doc}")
140
+ if doc is None:
141
+ raise RuntimeError(f"c106 fetch returned None: code={code}")
142
+
143
+ dto = c103_doc_to_dto(doc=doc, asof=asof)
144
+ logger.debug(f"dto:\n{dto}")
145
+
146
+ await self.sink.write(dto, endpoint=endpoint)
147
+ return dto
148
+
149
+ async def execute_many(
150
+ self,
151
+ codes: Iterable[str],
152
+ *,
153
+ sleep_sec: float = 2.0,
154
+ asof: Optional[datetime] = None,
155
+ ) -> list[C103DTO]:
156
+ batch_asof = asof or utcnow()
157
+
158
+ docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
159
+ dtos = [c103_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
160
+ logger.debug(f"dtos:\n{dtos}")
161
+ await self.sink.write_many(dtos, endpoint=endpoint)
162
+ return dtos