scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
  2. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
  3. scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
  4. scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
  5. scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
  6. scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
  7. scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
  8. scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
  9. scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
  10. scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
  11. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
  12. scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
  13. scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
  14. {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
  15. scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
  16. scraper2_hj3415/app/composition.py +225 -0
  17. scraper2_hj3415/app/domain/blocks.py +61 -0
  18. scraper2_hj3415/app/domain/constants.py +33 -0
  19. scraper2_hj3415/app/domain/doc.py +16 -0
  20. scraper2_hj3415/app/domain/endpoint.py +11 -0
  21. scraper2_hj3415/app/domain/series.py +11 -0
  22. scraper2_hj3415/app/domain/types.py +19 -0
  23. scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
  24. scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
  25. scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
  26. scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
  27. scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
  28. scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
  29. scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
  30. scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
  31. scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
  32. scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
  33. scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
  34. scraper2_hj3415/app/parsing/c101/sise.py +47 -0
  35. scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
  36. scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
  37. scraper2_hj3415/app/parsing/c101_parser.py +45 -0
  38. scraper2_hj3415/app/parsing/c103_parser.py +22 -0
  39. scraper2_hj3415/app/parsing/c104_parser.py +26 -0
  40. scraper2_hj3415/app/parsing/c106_parser.py +137 -0
  41. scraper2_hj3415/app/parsing/c108_parser.py +254 -0
  42. scraper2_hj3415/app/ports/__init__.py +0 -0
  43. scraper2_hj3415/app/ports/browser/__init__.py +0 -0
  44. scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
  45. scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
  46. scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
  47. scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
  48. scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
  49. scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
  50. scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
  51. scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
  52. scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
  53. scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
  54. scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
  55. scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
  56. scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
  57. scraper2_hj3415/app/ports/site/__init__.py +0 -0
  58. scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
  59. scraper2_hj3415/app/services/__init__.py +0 -0
  60. scraper2_hj3415/app/services/fetch/__init__.py +0 -0
  61. scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
  62. scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
  63. scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
  64. scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
  65. scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
  66. scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
  67. scraper2_hj3415/app/usecases/__init__.py +0 -0
  68. scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
  69. scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
  70. scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
  71. scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
  72. scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
  73. scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
  74. scraper2/main.py → scraper2_hj3415/cli.py +45 -72
  75. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
  76. scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
  77. scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
  78. scraper2/adapters/out/playwright/browser.py +0 -102
  79. scraper2/adapters/out/sinks/memory/__init__.py +0 -15
  80. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
  81. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
  82. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
  83. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
  84. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
  85. scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
  86. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
  87. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
  88. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
  89. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
  90. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
  91. scraper2/app/composition.py +0 -204
  92. scraper2/app/parsing/_converters.py +0 -85
  93. scraper2/app/parsing/_normalize.py +0 -134
  94. scraper2/app/parsing/c101_parser.py +0 -143
  95. scraper2/app/parsing/c103_parser.py +0 -128
  96. scraper2/app/parsing/c104_parser.py +0 -143
  97. scraper2/app/parsing/c106_parser.py +0 -153
  98. scraper2/app/parsing/c108_parser.py +0 -65
  99. scraper2/app/ports/browser/browser_factory_port.py +0 -11
  100. scraper2/app/ports/browser/browser_port.py +0 -22
  101. scraper2/app/ports/ingest_port.py +0 -14
  102. scraper2/app/ports/sinks/base_sink_port.py +0 -14
  103. scraper2/app/ports/sinks/c101_sink_port.py +0 -9
  104. scraper2/app/ports/sinks/c103_sink_port.py +0 -9
  105. scraper2/app/ports/sinks/c104_sink_port.py +0 -9
  106. scraper2/app/ports/sinks/c106_sink_port.py +0 -9
  107. scraper2/app/ports/sinks/c108_sink_port.py +0 -9
  108. scraper2/app/usecases/fetch/fetch_c101.py +0 -43
  109. scraper2/app/usecases/fetch/fetch_c103.py +0 -103
  110. scraper2/app/usecases/fetch/fetch_c104.py +0 -76
  111. scraper2/app/usecases/fetch/fetch_c106.py +0 -90
  112. scraper2/app/usecases/fetch/fetch_c108.py +0 -49
  113. scraper2/app/usecases/ingest/ingest_c101.py +0 -36
  114. scraper2/app/usecases/ingest/ingest_c103.py +0 -37
  115. scraper2/app/usecases/ingest/ingest_c104.py +0 -37
  116. scraper2/app/usecases/ingest/ingest_c106.py +0 -38
  117. scraper2/app/usecases/ingest/ingest_c108.py +0 -39
  118. scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
  119. scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
  120. {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
  121. {scraper2 → scraper2_hj3415}/__init__.py +0 -0
  122. {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
  123. {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
  124. {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
  125. {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
  126. {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
  127. {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
  128. {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
  129. {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
  130. {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
  131. {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
  132. {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
  133. {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
  134. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
  135. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,182 @@
1
+ # scraper2_hj3415/app/usecases/ingest/ingest_c104.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ from typing import Iterable, Optional, cast
6
+
7
+ from scraper2_hj3415.app.services.fetch.fetch_c104 import FetchC104
8
+ from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
9
+ from common_hj3415.utils.time import utcnow
10
+
11
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
12
+ from scraper2_hj3415.app.domain.constants import get_block_keys
13
+ from scraper2_hj3415.app.domain.doc import NfsDoc
14
+ from scraper2_hj3415.app.domain.blocks import MetricsBlock
15
+
16
+ from contracts_hj3415.nfs.types import Endpoint
17
+
18
+ from contracts_hj3415.nfs.c104_dto import (
19
+ C104DTO,
20
+ C104Payload,
21
+ C104Blocks,
22
+ C104Labels,
23
+ C104ValuesMap,
24
+ )
25
+
26
+ from logging_hj3415 import logger
27
+
28
+
29
+ endpoint_kind = EndpointKind.C104
30
+ endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
31
+
32
+
33
+ def _metricsblock_to_c104_metric_map(block: MetricsBlock) -> dict[str, C104ValuesMap]:
34
+ """
35
+ MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
36
+ - domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
37
+ """
38
+ out: dict[str, C104ValuesMap] = {}
39
+ for mk, series in block.metrics.items():
40
+ out[mk] = dict(series.values) # Mapping -> dict
41
+ return out
42
+
43
+
44
+ def c104_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C104DTO:
45
+ """
46
+ NfsDoc(domain, endpoint=c104) -> C104DTO(contracts envelope)
47
+
48
+ C104Payload 구조:
49
+ {
50
+ "blocks": {"수익성y": {metric: {code: num}}, "성장성y": {...}},
51
+ "labels": {"수익성y": {metric: raw_label}, "성장성y": {...}}
52
+ }
53
+
54
+ 정책:
55
+ - blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
56
+ - doc.labels는 없을 수도 있으니 dict()로 안전 변환
57
+ """
58
+ # 1) 기본 골격은 항상 채운다 (규약 안정성)
59
+ blocks: C104Blocks = {
60
+ "수익성y": {},
61
+ "성장성y": {},
62
+ "안정성y": {},
63
+ "활동성y": {},
64
+ "가치분석y": {},
65
+ "수익성q": {},
66
+ "성장성q": {},
67
+ "안정성q": {},
68
+ "활동성q": {},
69
+ "가치분석q": {},
70
+ }
71
+ labels: C104Labels = {
72
+ "수익성y": {},
73
+ "성장성y": {},
74
+ "안정성y": {},
75
+ "활동성y": {},
76
+ "가치분석y": {},
77
+ "수익성q": {},
78
+ "성장성q": {},
79
+ "안정성q": {},
80
+ "활동성q": {},
81
+ "가치분석q": {},
82
+ }
83
+
84
+ # 2) 도메인 블록키 순서/목록 기준으로 채움
85
+ for bk in get_block_keys(endpoint_kind):
86
+ bd = doc.blocks.get(bk)
87
+ if bd is None:
88
+ continue
89
+
90
+ if not isinstance(bd, MetricsBlock):
91
+ raise TypeError(
92
+ f"c104 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
93
+ )
94
+
95
+ metric_map = _metricsblock_to_c104_metric_map(bd)
96
+ label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
97
+
98
+ match bk:
99
+ case "수익성y":
100
+ blocks["수익성y"] = metric_map
101
+ labels["수익성y"] = label_map
102
+ case "성장성y":
103
+ blocks["성장성y"] = metric_map
104
+ labels["성장성y"] = label_map
105
+ case "안정성y":
106
+ blocks["안정성y"] = metric_map
107
+ labels["안정성y"] = label_map
108
+ case "활동성y":
109
+ blocks["활동성y"] = metric_map
110
+ labels["활동성y"] = label_map
111
+ case "가치분석y":
112
+ blocks["가치분석y"] = metric_map
113
+ labels["가치분석y"] = label_map
114
+ case "수익성q":
115
+ blocks["수익성q"] = metric_map
116
+ labels["수익성q"] = label_map
117
+ case "성장성q":
118
+ blocks["성장성q"] = metric_map
119
+ labels["성장성q"] = label_map
120
+ case "안정성q":
121
+ blocks["안정성q"] = metric_map
122
+ labels["안정성q"] = label_map
123
+ case "활동성q":
124
+ blocks["활동성q"] = metric_map
125
+ labels["활동성q"] = label_map
126
+ case "가치분석q":
127
+ blocks["가치분석q"] = metric_map
128
+ labels["가치분석q"] = label_map
129
+ case _:
130
+ raise ValueError(f"invalid C104 block key: {bk!r}")
131
+
132
+ payload: C104Payload = cast(C104Payload, {"blocks": blocks, "labels": labels})
133
+
134
+ # ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
135
+ # C104Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
136
+ return C104DTO(
137
+ code=doc.code,
138
+ asof=asof,
139
+ endpoint=endpoint,
140
+ payload=payload, # 그대로 dict 주입
141
+ )
142
+
143
+
144
+ class IngestC104:
145
+ def __init__(self, fetch: FetchC104, sink: NfsSinkPort[C104DTO]):
146
+ self.fetch = fetch
147
+ self.sink = sink
148
+
149
+ async def execute(
150
+ self,
151
+ code: str,
152
+ *,
153
+ sleep_sec: float = 2.0,
154
+ asof: datetime | None = None,
155
+ ) -> C104DTO:
156
+ asof = asof or utcnow()
157
+
158
+ doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
159
+ logger.debug(f"doc:\n{doc}")
160
+ if doc is None:
161
+ raise RuntimeError(f"c106 fetch returned None: code={code}")
162
+
163
+ dto = c104_doc_to_dto(doc=doc, asof=asof)
164
+ logger.debug(f"dto:\n{dto}")
165
+
166
+ await self.sink.write(dto, endpoint=endpoint)
167
+ return dto
168
+
169
+ async def execute_many(
170
+ self,
171
+ codes: Iterable[str],
172
+ *,
173
+ sleep_sec: float = 2.0,
174
+ asof: Optional[datetime] = None,
175
+ ) -> list[C104DTO]:
176
+ batch_asof = asof or utcnow()
177
+
178
+ docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
179
+ dtos = [c104_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
180
+ logger.debug(f"dtos:\n{dtos}")
181
+ await self.sink.write_many(dtos, endpoint=endpoint)
182
+ return dtos
@@ -0,0 +1,136 @@
1
+ # scraper2_hj3415/app/usecases/ingest/ingest_c106.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ from typing import Iterable, Optional, cast
6
+
7
+ from scraper2_hj3415.app.services.fetch.fetch_c106 import FetchC106
8
+ from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
9
+ from common_hj3415.utils.time import utcnow
10
+
11
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
12
+ from scraper2_hj3415.app.domain.constants import get_block_keys
13
+ from scraper2_hj3415.app.domain.doc import NfsDoc
14
+ from scraper2_hj3415.app.domain.blocks import MetricsBlock
15
+
16
+ from contracts_hj3415.nfs.types import Endpoint
17
+
18
+ from contracts_hj3415.nfs.c106_dto import (
19
+ C106DTO,
20
+ C106Payload,
21
+ C106Blocks,
22
+ C106Labels,
23
+ C106ValuesMap,
24
+ )
25
+
26
+ from logging_hj3415 import logger
27
+
28
+
29
+ endpoint_kind = EndpointKind.C106
30
+ endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
31
+
32
+
33
+ def _metricsblock_to_c106_metric_map(block: MetricsBlock) -> dict[str, C106ValuesMap]:
34
+ """
35
+ MetricsBlock(domain) -> dict[MetricKey, dict[CodeKey, Num]]
36
+ - domain MetricSeries.values 가 이미 {code: num} 형태라면 그대로 dict로 고정시킴.
37
+ """
38
+ out: dict[str, C106ValuesMap] = {}
39
+ for mk, series in block.metrics.items():
40
+ out[mk] = dict(series.values) # Mapping -> dict
41
+ return out
42
+
43
+
44
+ def c106_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C106DTO:
45
+ """
46
+ NfsDoc(domain, endpoint=c106) -> C106DTO(contracts envelope)
47
+
48
+ C106Payload 구조:
49
+ {
50
+ "blocks": {"y": {metric: {code: num}}, "q": {...}},
51
+ "labels": {"y": {metric: raw_label}, "q": {...}}
52
+ }
53
+
54
+ 정책:
55
+ - blocks/labels는 항상 y/q 키를 가진다. (없으면 빈 dict)
56
+ - doc.labels는 없을 수도 있으니 dict()로 안전 변환
57
+ """
58
+ # 1) 기본 골격은 항상 채운다 (규약 안정성)
59
+ blocks: C106Blocks = {"y": {}, "q": {}}
60
+ labels: C106Labels = {"y": {}, "q": {}}
61
+
62
+ # 2) 도메인 블록키 순서/목록 기준으로 채움
63
+ for bk in get_block_keys(endpoint_kind):
64
+ bd = doc.blocks.get(bk)
65
+ if bd is None:
66
+ continue
67
+
68
+ if not isinstance(bd, MetricsBlock):
69
+ raise TypeError(
70
+ f"c106 expects MetricsBlock, got {type(bd).__name__} | block_key={bk!r}"
71
+ )
72
+
73
+ metric_map = _metricsblock_to_c106_metric_map(bd)
74
+ label_map = dict(doc.labels.get(bk, {})) # 없으면 {}
75
+
76
+ match bk:
77
+ case "y":
78
+ blocks["y"] = metric_map
79
+ labels["y"] = label_map
80
+ case "q":
81
+ blocks["q"] = metric_map
82
+ labels["q"] = label_map
83
+ case _:
84
+ raise ValueError(f"invalid c106 block key: {bk!r}")
85
+
86
+ payload: C106Payload = cast(C106Payload, {"blocks": blocks, "labels": labels})
87
+
88
+ # ⚠️ TypedDict는 런타임 검증이 아니라 타입체커용이므로,
89
+ # C106Payload(**payload) 같은 생성은 불가능(=TypedDict는 호출 불가)
90
+ return C106DTO(
91
+ code=doc.code,
92
+ asof=asof,
93
+ endpoint=endpoint,
94
+ payload=payload, # 그대로 dict 주입
95
+ )
96
+
97
+
98
+ class IngestC106:
99
+ def __init__(self, fetch: FetchC106, sink: NfsSinkPort[C106DTO]):
100
+ self.fetch = fetch
101
+ self.sink = sink
102
+
103
+ async def execute(
104
+ self,
105
+ code: str,
106
+ *,
107
+ sleep_sec: float = 2.0,
108
+ asof: datetime | None = None,
109
+ ) -> C106DTO:
110
+ asof = asof or utcnow()
111
+
112
+ doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
113
+ logger.debug(f"doc:\n{doc}")
114
+ if doc is None:
115
+ raise RuntimeError(f"c106 fetch returned None: code={code}")
116
+
117
+ dto = c106_doc_to_dto(doc=doc, asof=asof)
118
+ logger.debug(f"dto:\n{dto}")
119
+
120
+ await self.sink.write(dto, endpoint=endpoint)
121
+ return dto
122
+
123
+ async def execute_many(
124
+ self,
125
+ codes: Iterable[str],
126
+ *,
127
+ sleep_sec: float = 2.0,
128
+ asof: Optional[datetime] = None,
129
+ ) -> list[C106DTO]:
130
+ batch_asof = asof or utcnow()
131
+
132
+ docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
133
+ dtos = [c106_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
134
+ logger.debug(f"dtos:\n{dtos}")
135
+ await self.sink.write_many(dtos, endpoint=endpoint)
136
+ return dtos
@@ -0,0 +1,122 @@
1
+ # scraper2_hj3415/app/usecases/ingest/ingest_c108.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ from typing import Iterable, Optional, cast
6
+
7
+ from scraper2_hj3415.app.services.fetch.fetch_c108 import FetchC108
8
+ from scraper2_hj3415.app.ports.sinks.nfs_sink_port import NfsSinkPort
9
+ from common_hj3415.utils.time import utcnow
10
+
11
+ from scraper2_hj3415.app.domain.endpoint import EndpointKind
12
+ from scraper2_hj3415.app.domain.constants import get_block_keys
13
+ from scraper2_hj3415.app.domain.doc import NfsDoc
14
+ from scraper2_hj3415.app.domain.blocks import RecordsBlock
15
+
16
+ from contracts_hj3415.nfs.types import Endpoint
17
+
18
+ from contracts_hj3415.nfs.c108_dto import C108DTO, C108Payload, C108Blocks
19
+
20
+ from logging_hj3415 import logger
21
+
22
+ endpoint_kind = EndpointKind.C108
23
+ endpoint: Endpoint = cast(Endpoint, endpoint_kind.value)
24
+
25
+
26
+ def _to_list_of_dict(rows: object) -> list[dict]:
27
+ """
28
+ RecordsBlock.rows(Sequence[Mapping]) -> list[dict]
29
+ - sink/serialization 안전하게 dict로 강제
30
+ """
31
+ if not rows:
32
+ return []
33
+ out: list[dict] = []
34
+ if isinstance(rows, list):
35
+ for r in rows:
36
+ if isinstance(r, dict):
37
+ out.append(r)
38
+ else:
39
+ out.append(dict(r)) # Mapping이면 dict() 가능
40
+ return out
41
+
42
+ # Sequence[Mapping] 일반 케이스
43
+ try:
44
+ for r in rows: # type: ignore[assignment]
45
+ out.append(dict(r)) # Mapping 가정
46
+ except Exception:
47
+ return []
48
+ return out
49
+
50
+
51
+ def c108_doc_to_dto(*, doc: NfsDoc, asof: datetime) -> C108DTO:
52
+ """
53
+ NfsDoc(domain) -> C108DTO(contracts envelope)
54
+
55
+ 규칙:
56
+ - labels는 항상 존재(빈 dict라도)
57
+ - c108은 labels를 비우는 것이 정상
58
+ - payload.blocks['리포트'] = list[dict]
59
+ """
60
+ if doc.endpoint_kind != EndpointKind.C108:
61
+ raise ValueError(f"c108_doc_to_dto expects C108 doc, got: {doc.endpoint_kind}")
62
+
63
+ # contracts payload 구조에 맞게: blocks/labels를 항상 구성
64
+ blocks: C108Blocks = {"리포트": []}
65
+
66
+ # block_keys를 따르되, 실질적으로는 '리포트' 하나만 있어도 충분
67
+ for bk in get_block_keys(EndpointKind.C108):
68
+ if bk != "리포트":
69
+ continue
70
+
71
+ block = doc.blocks.get(bk)
72
+ if isinstance(block, RecordsBlock):
73
+ blocks["리포트"] = _to_list_of_dict(block.rows)
74
+ else:
75
+ # 혹시 구조가 섞였으면 최대한 안전하게 빈 값
76
+ blocks["리포트"] = []
77
+
78
+ payload: C108Payload = {"blocks": blocks}
79
+
80
+ return C108DTO(
81
+ code=doc.code,
82
+ asof=asof,
83
+ endpoint=endpoint,
84
+ payload=payload,
85
+ )
86
+
87
+
88
+ class IngestC108:
89
+ def __init__(self, fetch: FetchC108, sink: NfsSinkPort[C108DTO]):
90
+ self.fetch = fetch
91
+ self.sink = sink
92
+
93
+ async def execute(
94
+ self, code: str, *, sleep_sec: float = 2.0, asof: datetime | None = None
95
+ ) -> C108DTO:
96
+ asof = asof or utcnow()
97
+
98
+ doc = await self.fetch.execute(code, sleep_sec=sleep_sec)
99
+ logger.debug(f"doc:\n{doc}")
100
+ if doc is None:
101
+ raise RuntimeError(f"c108 fetch returned None: code={code}")
102
+
103
+ dto = c108_doc_to_dto(doc=doc, asof=asof)
104
+ logger.debug(f"dto:\n{dto}")
105
+
106
+ await self.sink.write(dto, endpoint=endpoint)
107
+ return dto
108
+
109
+ async def execute_many(
110
+ self,
111
+ codes: Iterable[str],
112
+ *,
113
+ sleep_sec: float = 2.0,
114
+ asof: Optional[datetime] = None,
115
+ ) -> list[C108DTO]:
116
+ batch_asof = asof or utcnow()
117
+
118
+ docs = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
119
+ dtos = [c108_doc_to_dto(doc=d, asof=batch_asof) for d in docs]
120
+ logger.debug(f"dtos:\n{dtos}")
121
+ await self.sink.write_many(dtos, endpoint=endpoint)
122
+ return dtos
@@ -1,21 +1,32 @@
1
- # scraper2/main.py
1
+ # scraper2_hj3415/cli.py
2
2
  from __future__ import annotations
3
3
 
4
4
  import asyncio
5
- from typing import Any, Literal, cast
5
+ from typing import Any, cast, get_args
6
6
 
7
+
8
+ import time
7
9
  import typer
8
10
  from datetime import datetime, timezone
9
11
 
10
- from scraper2.app.composition import build_usecases
11
- from scraper2.app.ports.ingest_port import IngestPort
12
+ from db2_hj3415.nfs.repo import ensure_indexes
13
+ from db2_hj3415.settings import get_settings
14
+ from db2_hj3415.universe.repo import list_universe_codes
15
+
16
+ from scraper2_hj3415.app.composition import build_usecases
17
+ from scraper2_hj3415.app.ports.ingest.nfs_ingest_port import NfsIngestPort
18
+ from scraper2_hj3415.app.domain.types import Sink
12
19
 
13
- from logging_hj3415 import setup_logging
20
+ from contracts_hj3415.nfs.types import Endpoint
21
+ from contracts_hj3415.universe.types import UniverseName
22
+
23
+ from logging_hj3415 import setup_logging, current_log_level, reset_logging, to_pretty_json
14
24
 
15
25
  setup_logging()
26
+ # 운영시에는 아래 항목 주석처리하고 환경변수로 제어할것
27
+ reset_logging("DEBUG")
28
+ print(f"Current log level - {current_log_level()}")
16
29
 
17
- Endpoint = Literal["c101", "c103", "c104", "c106", "c108", "all"]
18
- Sink = Literal["memory", "mongo"]
19
30
 
20
31
  app = typer.Typer(no_args_is_help=True)
21
32
 
@@ -30,54 +41,19 @@ app.add_typer(mi_app, name="mi")
30
41
  # small helpers
31
42
  # -------------------------
32
43
 
33
- def _endpoint_list(ep: Endpoint) -> list[str]:
34
- return ["c101", "c103", "c104", "c106", "c108"] if ep == "all" else [ep]
44
+ def _endpoint_list(endpoint: str) -> list[str]:
45
+ if endpoint == "all":
46
+ return list(get_args(Endpoint)) # -> ["c101", "c103", "c104", "c106", "c108"]
47
+ return [endpoint]
35
48
 
36
49
  async def _mongo_bootstrap(db) -> None:
37
- from db2.nfs import ensure_indexes
38
- from db2.settings import get_settings
39
-
40
50
  s = get_settings()
41
51
  await ensure_indexes(db, snapshot_ttl_days=s.SNAPSHOT_TTL_DAYS)
42
52
 
43
-
44
- async def _load_codes_from_universe(db, *, universe: str) -> list[str]:
45
- """
46
- db2.universe에 저장된 universe_latest에서 codes 로드.
47
- (네 db2 API 명에 맞춰 조정하면 됨)
48
- """
49
- from db2.universe import get_universe_latest # 네가 가진 API
50
-
51
- doc = await get_universe_latest(db, universe=universe)
52
- if not doc:
53
- return []
54
-
55
- # doc 형태가 {"items":[{code,name,...}, ...]} 혹은 {"payload":{"items":[...]}} 일 수 있어서 방어
56
- data = doc
57
- if isinstance(data, dict) and "payload" in data and isinstance(data["payload"], dict):
58
- data = data["payload"]
59
- if isinstance(data, dict) and "items" in data:
60
- data = data["items"]
61
-
62
- if not isinstance(data, list):
63
- return []
64
-
65
- codes: list[str] = []
66
- for row in data:
67
- if not isinstance(row, dict):
68
- continue
69
- code = str(row.get("code") or "").strip()
70
- if code:
71
- codes.append(code)
72
- return codes
73
-
74
-
75
- import time
76
-
77
53
  async def _run_ingest_with_progress(
78
54
  *,
79
55
  ucs: Any,
80
- endpoint: Endpoint,
56
+ endpoint: str,
81
57
  codes: list[str],
82
58
  sleep_sec: float,
83
59
  show: bool,
@@ -99,7 +75,7 @@ async def _run_ingest_with_progress(
99
75
  yield xs[i:i + n]
100
76
 
101
77
  async def _run_one_endpoint(ep: str) -> None:
102
- ingest_uc = cast(IngestPort, getattr(ucs.ingest, ep))
78
+ ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
103
79
 
104
80
  ok = 0
105
81
  fail = 0
@@ -128,18 +104,6 @@ async def _run_ingest_with_progress(
128
104
  elapsed = time.perf_counter() - t0 # ✅ 종료 시각
129
105
  typer.echo(f"\n⏱ elapsed time: {_format_elapsed(elapsed)}")
130
106
 
131
- def _dto_to_pretty(obj: Any) -> str:
132
- # pydantic v2 우선
133
- if hasattr(obj, "model_dump_json"):
134
- return obj.model_dump_json(indent=2, by_alias=False)
135
- if hasattr(obj, "model_dump"):
136
- import json
137
- return json.dumps(obj.model_dump(), ensure_ascii=False, indent=2)
138
- # dict fallback
139
- if isinstance(obj, dict):
140
- import json
141
- return json.dumps(obj, ensure_ascii=False, indent=2, default=str)
142
- return str(obj)
143
107
 
144
108
  def _format_elapsed(sec: float) -> str:
145
109
  if sec < 60:
@@ -189,11 +153,11 @@ def _parse_asof(asof: str | None) -> datetime:
189
153
 
190
154
  @nfs_app.command("one")
191
155
  def nfs_one(
192
- endpoint: Endpoint = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
193
156
  code: str = typer.Argument(..., help="종목코드 (예: 005930)"),
157
+ endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
194
158
  sleep_sec: float = typer.Option(2.0, "--sleep"),
195
159
  sink: Sink = typer.Option("memory", "--sink"),
196
- show: bool = typer.Option(True, "--show/--no-show", help="결과 DTO 출력"),
160
+ show: bool = typer.Option(False, "--show/--no-show", help="결과 DTO 출력"),
197
161
  asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601, UTC 권장). 예: 2026-01-09T05:00:00Z"),
198
162
  ):
199
163
  code = code.strip()
@@ -201,7 +165,7 @@ def nfs_one(
201
165
  raise typer.BadParameter("code는 비어있을 수 없습니다.")
202
166
 
203
167
  async def _run():
204
- ucs = build_usecases(sink_kind=sink)
168
+ ucs = build_usecases(sink=sink)
205
169
 
206
170
  if sink == "mongo":
207
171
  if ucs.db is None:
@@ -211,17 +175,23 @@ def nfs_one(
211
175
  try:
212
176
  run_asof = _parse_asof(asof)
213
177
  for ep in _endpoint_list(endpoint):
214
- ingest_uc = cast(IngestPort, getattr(ucs.ingest, ep))
178
+ ingest_uc = cast(NfsIngestPort, getattr(ucs.ingest, ep))
215
179
  results = await ingest_uc.execute_many([code], sleep_sec=sleep_sec, asof=run_asof)
216
180
  dto = results[0] if results else None
217
181
 
218
182
  typer.echo(f"\n=== ONE DONE: {ep} {code} ===")
219
- if not show:
183
+ is_memory_sink = sink == "memory"
184
+ should_show = show or is_memory_sink
185
+
186
+ if not should_show:
220
187
  continue
188
+
221
189
  if dto is None:
222
190
  typer.echo("(no result)")
223
191
  else:
224
- typer.echo(_dto_to_pretty(dto))
192
+ if is_memory_sink:
193
+ typer.echo("memory result:")
194
+ typer.echo(to_pretty_json(dto))
225
195
  finally:
226
196
  await ucs.aclose()
227
197
 
@@ -230,7 +200,7 @@ def nfs_one(
230
200
 
231
201
  @nfs_app.command("all")
232
202
  def nfs_all(
233
- endpoint: Endpoint = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
203
+ endpoint: str = typer.Argument(..., help="c101|c103|c104|c106|c108|all"),
234
204
  universe: str = typer.Option("krx300", "--universe"),
235
205
  limit: int = typer.Option(0, "--limit", help="0=전체"),
236
206
  sleep_sec: float = typer.Option(2.0, "--sleep"),
@@ -241,15 +211,18 @@ def nfs_all(
241
211
  asof: str | None = typer.Option(None, "--asof", help="배치 기준시각(ISO8601). 예: 2026-01-09T05:00:00Z"),
242
212
  ):
243
213
  async def _run():
244
- ucs = build_usecases(sink_kind=sink)
214
+ ucs = build_usecases(sink=sink)
245
215
  if ucs.db is None:
246
216
  raise RuntimeError("all 모드는 DB가 필요합니다. mongo sink로 ucs.db를 노출하세요.")
247
217
  await _mongo_bootstrap(ucs.db)
248
218
 
249
- codes = await _load_codes_from_universe(ucs.db, universe=universe)
250
- if not codes:
251
- raise RuntimeError(f"universe='{universe}' codes가 비었습니다. 먼저 krx sync로 universe를 채우세요.")
219
+ try:
220
+ u = UniverseName(universe)
221
+ except ValueError:
222
+ raise typer.BadParameter(f"unknown universe: {universe}")
223
+ codes = await list_universe_codes(ucs.db, universe=u)
252
224
 
225
+ codes = sorted(codes)
253
226
  if limit and limit > 0:
254
227
  codes = codes[:limit]
255
228
 
@@ -281,4 +254,4 @@ def mi():
281
254
 
282
255
 
283
256
  if __name__ == "__main__":
284
- app()
257
+ app()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scraper2-hj3415
3
- Version: 2.4.1
3
+ Version: 2.7.0
4
4
  Summary: Naver WiseReport scraper
5
5
  Keywords: example,demo
6
6
  Author-email: Hyungjin Kim <hj3415@gmail.com>
@@ -17,6 +17,8 @@ Requires-Dist: lxml>=6.0.2
17
17
  Requires-Dist: typer>=0.21.0
18
18
  Requires-Dist: db2-hj3415
19
19
  Requires-Dist: contracts-hj3415
20
+ Requires-Dist: common-hj3415
21
+ Requires-Dist: logging-hj3415
20
22
 
21
23
  # scraper2
22
24