scraper2-hj3415 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraper2/.DS_Store +0 -0
- scraper2/adapters/out/.DS_Store +0 -0
- scraper2/adapters/out/playwright/browser.py +103 -0
- scraper2/adapters/out/playwright/browser_factory.py +112 -0
- scraper2/adapters/out/playwright/session.py +121 -0
- scraper2/adapters/out/sinks/.DS_Store +0 -0
- scraper2/adapters/out/sinks/memory/__init__.py +15 -0
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py +20 -0
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py +20 -0
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py +20 -0
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py +20 -0
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py +20 -0
- scraper2/adapters/out/sinks/memory/store.py +74 -0
- scraper2/adapters/out/sinks/mongo/__init__.py +14 -0
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +43 -0
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +41 -0
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +41 -0
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +41 -0
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +41 -0
- scraper2/app/composition.py +195 -0
- scraper2/app/parsing/_converters.py +85 -0
- scraper2/app/parsing/_normalize.py +134 -0
- scraper2/app/parsing/c101_parser.py +143 -0
- scraper2/app/parsing/c103_parser.py +128 -0
- scraper2/app/parsing/c104_parser.py +143 -0
- scraper2/app/parsing/c106_parser.py +153 -0
- scraper2/app/parsing/c108_parser.py +65 -0
- scraper2/app/ports/browser/browser_factory_port.py +11 -0
- scraper2/app/ports/browser/browser_port.py +22 -0
- scraper2/app/ports/ingest_port.py +13 -0
- scraper2/app/ports/sinks/base_sink_port.py +14 -0
- scraper2/app/ports/sinks/c101_sink_port.py +9 -0
- scraper2/app/ports/sinks/c103_sink_port.py +9 -0
- scraper2/app/ports/sinks/c104_sink_port.py +9 -0
- scraper2/app/ports/sinks/c106_sink_port.py +9 -0
- scraper2/app/ports/sinks/c108_sink_port.py +9 -0
- scraper2/app/usecases/fetch/fetch_c101.py +43 -0
- scraper2/app/usecases/fetch/fetch_c103.py +103 -0
- scraper2/app/usecases/fetch/fetch_c104.py +76 -0
- scraper2/app/usecases/fetch/fetch_c106.py +90 -0
- scraper2/app/usecases/fetch/fetch_c108.py +49 -0
- scraper2/app/usecases/ingest/ingest_c101.py +36 -0
- scraper2/app/usecases/ingest/ingest_c103.py +37 -0
- scraper2/app/usecases/ingest/ingest_c104.py +37 -0
- scraper2/app/usecases/ingest/ingest_c106.py +38 -0
- scraper2/app/usecases/ingest/ingest_c108.py +39 -0
- scraper2/main.py +257 -0
- scraper2_hj3415-2.0.0.dist-info/METADATA +164 -0
- scraper2_hj3415-2.0.0.dist-info/RECORD +63 -0
- scraper2_hj3415-2.0.0.dist-info/entry_points.txt +3 -0
- scraper2_hj3415/__main__.py +0 -6
- scraper2_hj3415/adapters/_shared/utils.py +0 -29
- scraper2_hj3415/adapters/clients/browser.py +0 -124
- scraper2_hj3415/adapters/clients/http.py +0 -51
- scraper2_hj3415/adapters/nfs/pipelines/c1034_pipeline.py +0 -55
- scraper2_hj3415/adapters/nfs/pipelines/normalize_c1034.py +0 -109
- scraper2_hj3415/adapters/nfs/sinks/c1034_sink.py +0 -51
- scraper2_hj3415/adapters/nfs/sinks/df_to_dto_mappers.py +0 -106
- scraper2_hj3415/adapters/nfs/sources/bundle_source.py +0 -24
- scraper2_hj3415/adapters/nfs/sources/c1034_fetch.py +0 -117
- scraper2_hj3415/adapters/nfs/sources/c1034_session.py +0 -90
- scraper2_hj3415/core/constants.py +0 -47
- scraper2_hj3415/core/ports/sink_port.py +0 -16
- scraper2_hj3415/core/ports/source_port.py +0 -13
- scraper2_hj3415/core/types.py +0 -11
- scraper2_hj3415/core/usecases/c1034_ingest.py +0 -139
- scraper2_hj3415/di.py +0 -103
- scraper2_hj3415/entrypoints/cli.py +0 -226
- scraper2_hj3415/entrypoints/main.py +0 -20
- scraper2_hj3415-1.0.1.dist-info/METADATA +0 -66
- scraper2_hj3415-1.0.1.dist-info/RECORD +0 -35
- scraper2_hj3415-1.0.1.dist-info/entry_points.txt +0 -3
- {scraper2_hj3415 → scraper2}/__init__.py +0 -0
- {scraper2_hj3415/adapters → scraper2/adapters/out}/__init__.py +0 -0
- {scraper2_hj3415/adapters/_shared → scraper2/adapters/out/playwright}/__init__.py +0 -0
- {scraper2_hj3415/adapters/clients → scraper2/app}/__init__.py +0 -0
- {scraper2_hj3415/adapters/nfs/pipelines → scraper2/app/parsing}/__init__.py +0 -0
- {scraper2_hj3415/adapters/nfs/sinks → scraper2/app/ports}/__init__.py +0 -0
- {scraper2_hj3415/adapters/nfs/sources → scraper2/app/ports/browser}/__init__.py +0 -0
- {scraper2_hj3415/core → scraper2/app/ports/sinks}/__init__.py +0 -0
- {scraper2_hj3415/core/ports → scraper2/app/usecases}/__init__.py +0 -0
- {scraper2_hj3415/core/usecases → scraper2/app/usecases/fetch}/__init__.py +0 -0
- {scraper2_hj3415/entrypoints → scraper2/app/usecases/ingest}/__init__.py +0 -0
- {scraper2_hj3415-1.0.1.dist-info → scraper2_hj3415-2.0.0.dist-info}/WHEEL +0 -0
- {scraper2_hj3415-1.0.1.dist-info → scraper2_hj3415-2.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# scraper2/app/usecases/fetch/fetch_c103.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
import asyncio
|
|
6
|
+
import random
|
|
7
|
+
from typing import Iterable, Any
|
|
8
|
+
from contracts.nfs.c103 import C103DTO, ItemsMap
|
|
9
|
+
from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
10
|
+
from scraper2.app.parsing.c103_parser import parse_c103_to_dict
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
BLOCK_KEYS = (
|
|
15
|
+
"손익계산서y", "손익계산서q",
|
|
16
|
+
"재무상태표y", "재무상태표q",
|
|
17
|
+
"현금흐름표y", "현금흐름표q",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def _clean(v: Any) -> Any:
|
|
21
|
+
if isinstance(v, float) and math.isnan(v):
|
|
22
|
+
return None
|
|
23
|
+
return v
|
|
24
|
+
|
|
25
|
+
def _is_all_none(row: dict[str, Any]) -> bool:
|
|
26
|
+
# row는 '항목'을 제외한 값들만 들어있다고 가정
|
|
27
|
+
return all(v is None for v in row.values())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def records_to_items_map(records: list[dict[str, Any]]) -> ItemsMap:
|
|
31
|
+
grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
32
|
+
|
|
33
|
+
# 1) 항목별로 row를 먼저 모은다
|
|
34
|
+
for r in records:
|
|
35
|
+
item = r.get("항목")
|
|
36
|
+
if not item:
|
|
37
|
+
continue
|
|
38
|
+
item = str(item).strip()
|
|
39
|
+
|
|
40
|
+
row = {k: _clean(v) for k, v in r.items() if k != "항목"}
|
|
41
|
+
grouped[item].append(row)
|
|
42
|
+
|
|
43
|
+
# 2) 규칙 적용해서 out 구성
|
|
44
|
+
out: ItemsMap = {}
|
|
45
|
+
|
|
46
|
+
for item, rows in grouped.items():
|
|
47
|
+
if len(rows) == 1:
|
|
48
|
+
# ✅ 규칙 1: 중복이 아니면 전부 None이어도 추가
|
|
49
|
+
out[item] = rows[0]
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
# ✅ 규칙 2: 중복이면 all-None row는 제거
|
|
53
|
+
kept = [row for row in rows if not _is_all_none(row)]
|
|
54
|
+
if not kept:
|
|
55
|
+
continue # 중복 전부 None이면 그룹 자체 제거
|
|
56
|
+
|
|
57
|
+
# 남은 것들만 suffix 부여 (첫 번째는 suffix 없음)
|
|
58
|
+
for idx, row in enumerate(kept, start=1):
|
|
59
|
+
key = item if idx == 1 else f"{item}_{idx}"
|
|
60
|
+
out[key] = row
|
|
61
|
+
|
|
62
|
+
return out
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FetchC103:
|
|
66
|
+
def __init__(self, factory: BrowserFactoryPort):
|
|
67
|
+
self.factory = factory
|
|
68
|
+
|
|
69
|
+
async def _fetch_one(self, code: str, *, sleep_sec: float) -> C103DTO | None:
|
|
70
|
+
async with self.factory.lease() as browser:
|
|
71
|
+
url = f"https://navercomp.wisereport.co.kr/v2/company/c1030001.aspx?cn=&cmp_cd={code}"
|
|
72
|
+
await browser.goto(url, timeout_ms=10_000)
|
|
73
|
+
|
|
74
|
+
jitter_sec = 1.0
|
|
75
|
+
if sleep_sec > 0:
|
|
76
|
+
delay = sleep_sec + random.uniform(0, jitter_sec)
|
|
77
|
+
await asyncio.sleep(delay)
|
|
78
|
+
|
|
79
|
+
parsed = await parse_c103_to_dict(browser)
|
|
80
|
+
|
|
81
|
+
if not parsed or all(not parsed.get(k) for k in BLOCK_KEYS):
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
data: dict[str, Any] = {"코드": code}
|
|
85
|
+
for k in BLOCK_KEYS:
|
|
86
|
+
data[k] = records_to_items_map(parsed.get(k, []))
|
|
87
|
+
|
|
88
|
+
return C103DTO(**data)
|
|
89
|
+
|
|
90
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C103DTO | None:
|
|
91
|
+
return await self._fetch_one(code, sleep_sec=sleep_sec)
|
|
92
|
+
|
|
93
|
+
async def execute_many(
|
|
94
|
+
self,
|
|
95
|
+
codes: Iterable[str],
|
|
96
|
+
*,
|
|
97
|
+
sleep_sec: float = 2.0,
|
|
98
|
+
) -> list[C103DTO]:
|
|
99
|
+
results = await asyncio.gather(
|
|
100
|
+
*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
|
|
101
|
+
return_exceptions=False,
|
|
102
|
+
)
|
|
103
|
+
return [r for r in results if r is not None]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# scraper2/app/usecases/fetch/fetch_c104.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
import asyncio
|
|
6
|
+
import random
|
|
7
|
+
from typing import Iterable, Any
|
|
8
|
+
from collections import Counter
|
|
9
|
+
|
|
10
|
+
from contracts.nfs.c104 import C104DTO, ItemsMap
|
|
11
|
+
from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
12
|
+
from scraper2.app.parsing.c104_parser import parse_c104_to_dict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
BLOCK_KEYS = (
|
|
16
|
+
"수익성y", "성장성y", "안정성y", "활동성y", "가치분석y",
|
|
17
|
+
"수익성q", "성장성q", "안정성q", "활동성q", "가치분석q",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def _clean(v: Any) -> Any:
|
|
21
|
+
if isinstance(v, float) and math.isnan(v):
|
|
22
|
+
return None
|
|
23
|
+
return v
|
|
24
|
+
|
|
25
|
+
def records_to_items_map(records: list[dict[str, Any]]) -> ItemsMap:
|
|
26
|
+
"""
|
|
27
|
+
records(list[dict]) -> ItemsMap(dict[item -> row])
|
|
28
|
+
- item(항목) 중복이면 _2, _3...
|
|
29
|
+
"""
|
|
30
|
+
out: ItemsMap = {}
|
|
31
|
+
seen: Counter[str] = Counter()
|
|
32
|
+
|
|
33
|
+
for r in records:
|
|
34
|
+
item = r.get("항목")
|
|
35
|
+
if not item:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
item = str(item).strip()
|
|
39
|
+
seen[item] += 1
|
|
40
|
+
key = item if seen[item] == 1 else f"{item}_{seen[item]}"
|
|
41
|
+
|
|
42
|
+
out[key] = {k: _clean(v) for k, v in r.items() if k != "항목"}
|
|
43
|
+
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
class FetchC104:
|
|
47
|
+
def __init__(self, factory: BrowserFactoryPort):
|
|
48
|
+
self.factory = factory
|
|
49
|
+
|
|
50
|
+
async def _fetch_one(self, code: str, *, sleep_sec: float) -> C104DTO | None:
|
|
51
|
+
async with self.factory.lease() as browser:
|
|
52
|
+
url = f"https://navercomp.wisereport.co.kr/v2/company/c1040001.aspx?cn=&cmp_cd={code}"
|
|
53
|
+
await browser.goto(url, timeout_ms=10_000)
|
|
54
|
+
|
|
55
|
+
jitter_sec = 1.0
|
|
56
|
+
if sleep_sec > 0:
|
|
57
|
+
delay = sleep_sec + random.uniform(0, jitter_sec)
|
|
58
|
+
await asyncio.sleep(delay)
|
|
59
|
+
|
|
60
|
+
parsed = await parse_c104_to_dict(browser)
|
|
61
|
+
|
|
62
|
+
if not parsed or all(not parsed.get(k) for k in BLOCK_KEYS):
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
data: dict[str, Any] = {"코드": code}
|
|
66
|
+
for k in BLOCK_KEYS:
|
|
67
|
+
data[k] = records_to_items_map(parsed.get(k, []))
|
|
68
|
+
|
|
69
|
+
return C104DTO(**data)
|
|
70
|
+
|
|
71
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO | None:
|
|
72
|
+
return await self._fetch_one(code, sleep_sec=sleep_sec)
|
|
73
|
+
|
|
74
|
+
async def execute_many(self, codes: Iterable[str], *, sleep_sec: float = 2.0) -> list[C104DTO]:
|
|
75
|
+
results = await asyncio.gather(*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes))
|
|
76
|
+
return [r for r in results if r is not None]
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# scraper2/app/usecases/fetch/fetch_c106.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import random
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from contracts.nfs.c106 import C106DTO, C106Block
|
|
9
|
+
from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
10
|
+
from scraper2.app.parsing.c106_parser import parse_c106_header, parse_c106_table_to_metrics, normalize_c106_metrics
|
|
11
|
+
|
|
12
|
+
from logging_hj3415 import logger
|
|
13
|
+
|
|
14
|
+
class FetchC106:
|
|
15
|
+
def __init__(self, factory: BrowserFactoryPort):
|
|
16
|
+
self.factory = factory
|
|
17
|
+
|
|
18
|
+
async def _fetch_one(self, code: str, *, sleep_sec: float) -> C106DTO | None:
|
|
19
|
+
async with self.factory.lease() as browser:
|
|
20
|
+
url = f"https://navercomp.wisereport.co.kr/v2/company/c1060001.aspx?cn=&cmp_cd={code}"
|
|
21
|
+
await browser.goto(url, timeout_ms=10_000)
|
|
22
|
+
|
|
23
|
+
jitter_sec = 1.0
|
|
24
|
+
if sleep_sec > 0:
|
|
25
|
+
delay = sleep_sec + random.uniform(0, jitter_sec)
|
|
26
|
+
await asyncio.sleep(delay)
|
|
27
|
+
|
|
28
|
+
company_names = await parse_c106_header(browser)
|
|
29
|
+
|
|
30
|
+
table_url = (
|
|
31
|
+
f"https://navercomp.wisereport.co.kr/v2/company/cF6002.aspx"
|
|
32
|
+
f"?cmp_cd={code}&finGubun=MAIN&sec_cd=FG000&frq="
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
stage = "init"
|
|
36
|
+
try:
|
|
37
|
+
stage = "goto_q"
|
|
38
|
+
await browser.goto(table_url+'q', timeout_ms=10_000)
|
|
39
|
+
await asyncio.sleep(1)
|
|
40
|
+
|
|
41
|
+
stage = "parse_q"
|
|
42
|
+
q = await parse_c106_table_to_metrics(browser, company_names)
|
|
43
|
+
|
|
44
|
+
stage = "goto_y"
|
|
45
|
+
await browser.goto(table_url+'y', timeout_ms=10_000)
|
|
46
|
+
await asyncio.sleep(1)
|
|
47
|
+
|
|
48
|
+
stage = "parse_y"
|
|
49
|
+
y = await parse_c106_table_to_metrics(browser, company_names)
|
|
50
|
+
|
|
51
|
+
q_norm = normalize_c106_metrics(q)
|
|
52
|
+
y_norm = normalize_c106_metrics(y)
|
|
53
|
+
|
|
54
|
+
return C106DTO(
|
|
55
|
+
코드=code,
|
|
56
|
+
q=C106Block(**q_norm),
|
|
57
|
+
y=C106Block(**y_norm),
|
|
58
|
+
)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
title = ""
|
|
61
|
+
try:
|
|
62
|
+
title = await browser.title()
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
logger.bind(
|
|
67
|
+
endpoint="c106",
|
|
68
|
+
code=code,
|
|
69
|
+
stage=stage,
|
|
70
|
+
page_title=title,
|
|
71
|
+
url_q=table_url + "q",
|
|
72
|
+
).warning("c106 skipped (unstable page/table)")
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
|
|
78
|
+
return await self._fetch_one(code, sleep_sec=sleep_sec)
|
|
79
|
+
|
|
80
|
+
async def execute_many(
|
|
81
|
+
self,
|
|
82
|
+
codes: Iterable[str],
|
|
83
|
+
*,
|
|
84
|
+
sleep_sec: float = 2.0,
|
|
85
|
+
) -> list[C106DTO]:
|
|
86
|
+
results = await asyncio.gather(
|
|
87
|
+
*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
|
|
88
|
+
return_exceptions=False,
|
|
89
|
+
)
|
|
90
|
+
return [r for r in results if r is not None]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# scraper2/app/usecases/fetch/fetch_c108.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import random
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from contracts.nfs.c108 import C108DTO
|
|
9
|
+
from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
|
|
10
|
+
from scraper2.app.parsing.c108_parser import parse_c108_to_dicts
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FetchC108:
|
|
14
|
+
def __init__(self, factory: BrowserFactoryPort):
|
|
15
|
+
self.factory = factory
|
|
16
|
+
|
|
17
|
+
async def _fetch_one(self, code: str, *, sleep_sec: float) -> list[C108DTO]:
|
|
18
|
+
async with self.factory.lease() as browser:
|
|
19
|
+
url = f"https://navercomp.wisereport.co.kr/v2/company/c1080001.aspx?cn=&cmp_cd={code}"
|
|
20
|
+
await browser.goto(url, timeout_ms=10_000)
|
|
21
|
+
|
|
22
|
+
# (C101과 동일한 형태로) sleep + jitter
|
|
23
|
+
jitter_sec = 1.0
|
|
24
|
+
if sleep_sec > 0:
|
|
25
|
+
delay = sleep_sec + random.uniform(0, jitter_sec)
|
|
26
|
+
await asyncio.sleep(delay)
|
|
27
|
+
|
|
28
|
+
c108_dicts = await parse_c108_to_dicts(browser)
|
|
29
|
+
|
|
30
|
+
return [C108DTO(**{**x, "코드": code}) for x in c108_dicts]
|
|
31
|
+
|
|
32
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> list[C108DTO]:
|
|
33
|
+
return await self._fetch_one(code, sleep_sec=sleep_sec)
|
|
34
|
+
|
|
35
|
+
async def execute_many(
|
|
36
|
+
self,
|
|
37
|
+
codes: Iterable[str],
|
|
38
|
+
*,
|
|
39
|
+
sleep_sec: float = 2.0,
|
|
40
|
+
) -> list[C108DTO]:
|
|
41
|
+
results = await asyncio.gather(
|
|
42
|
+
*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
|
|
43
|
+
return_exceptions=False,
|
|
44
|
+
)
|
|
45
|
+
# list[list[C108DTO]] -> flat list[C108DTO]
|
|
46
|
+
out: list[C108DTO] = []
|
|
47
|
+
for chunk in results:
|
|
48
|
+
out.extend(chunk)
|
|
49
|
+
return out
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# scraper2/app/usecases/ingest/ingest_c101.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional, Iterable
|
|
6
|
+
|
|
7
|
+
from contracts.nfs.c101 import C101DTO
|
|
8
|
+
from scraper2.app.usecases.fetch.fetch_c101 import FetchC101
|
|
9
|
+
from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
|
|
10
|
+
from scraper2.app.ports.ingest_port import IngestPort
|
|
11
|
+
|
|
12
|
+
def _utcnow():
|
|
13
|
+
return datetime.now(timezone.utc)
|
|
14
|
+
|
|
15
|
+
class IngestC101(IngestPort):
|
|
16
|
+
def __init__(self, fetch: FetchC101, sink: C101SinkPort):
|
|
17
|
+
self.fetch = fetch
|
|
18
|
+
self.sink = sink
|
|
19
|
+
|
|
20
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C101DTO:
|
|
21
|
+
dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
22
|
+
await self.sink.write(dto)
|
|
23
|
+
return dto
|
|
24
|
+
|
|
25
|
+
async def execute_many(
|
|
26
|
+
self,
|
|
27
|
+
codes: Iterable[str],
|
|
28
|
+
*,
|
|
29
|
+
sleep_sec: float = 2.0,
|
|
30
|
+
asof: Optional[datetime] = None,
|
|
31
|
+
) -> list[C101DTO]:
|
|
32
|
+
batch_asof = asof or _utcnow()
|
|
33
|
+
|
|
34
|
+
dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
35
|
+
await self.sink.write_many(dtos, asof=batch_asof)
|
|
36
|
+
return dtos
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# scraper2/app/usecases/ingest/ingest_c103.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional, Iterable
|
|
6
|
+
|
|
7
|
+
from contracts.nfs.c103 import C103DTO
|
|
8
|
+
from scraper2.app.usecases.fetch.fetch_c103 import FetchC103
|
|
9
|
+
from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
|
|
10
|
+
from scraper2.app.ports.ingest_port import IngestPort
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow():
|
|
14
|
+
return datetime.now(timezone.utc)
|
|
15
|
+
|
|
16
|
+
class IngestC103(IngestPort):
|
|
17
|
+
def __init__(self, fetch: FetchC103, sink: C103SinkPort):
|
|
18
|
+
self.fetch = fetch
|
|
19
|
+
self.sink = sink
|
|
20
|
+
|
|
21
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C103DTO:
|
|
22
|
+
dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
23
|
+
await self.sink.write(dto)
|
|
24
|
+
return dto
|
|
25
|
+
|
|
26
|
+
async def execute_many(
|
|
27
|
+
self,
|
|
28
|
+
codes: Iterable[str],
|
|
29
|
+
*,
|
|
30
|
+
sleep_sec: float = 2.0,
|
|
31
|
+
asof: Optional[datetime] = None,
|
|
32
|
+
) -> list[C103DTO]:
|
|
33
|
+
batch_asof = asof or _utcnow()
|
|
34
|
+
|
|
35
|
+
dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
36
|
+
await self.sink.write_many(dtos, asof=batch_asof)
|
|
37
|
+
return dtos
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# scraper2/app/usecases/ingest/ingest_c104.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional, Iterable
|
|
6
|
+
|
|
7
|
+
from contracts.nfs.c104 import C104DTO
|
|
8
|
+
from scraper2.app.usecases.fetch.fetch_c104 import FetchC104
|
|
9
|
+
from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
|
|
10
|
+
from scraper2.app.ports.ingest_port import IngestPort
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow():
|
|
14
|
+
return datetime.now(timezone.utc)
|
|
15
|
+
|
|
16
|
+
class IngestC104(IngestPort):
|
|
17
|
+
def __init__(self, fetch: FetchC104, sink: C104SinkPort):
|
|
18
|
+
self.fetch = fetch
|
|
19
|
+
self.sink = sink
|
|
20
|
+
|
|
21
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO:
|
|
22
|
+
dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
23
|
+
await self.sink.write(dto)
|
|
24
|
+
return dto
|
|
25
|
+
|
|
26
|
+
async def execute_many(
|
|
27
|
+
self,
|
|
28
|
+
codes: Iterable[str],
|
|
29
|
+
*,
|
|
30
|
+
sleep_sec: float = 2.0,
|
|
31
|
+
asof: Optional[datetime] = None,
|
|
32
|
+
) -> list[C104DTO]:
|
|
33
|
+
batch_asof = asof or _utcnow()
|
|
34
|
+
|
|
35
|
+
dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
36
|
+
await self.sink.write_many(dtos, asof=batch_asof)
|
|
37
|
+
return dtos
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# scraper2/app/usecases/ingest/ingest_c106.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional, Iterable
|
|
6
|
+
|
|
7
|
+
from contracts.nfs.c106 import C106DTO
|
|
8
|
+
from scraper2.app.usecases.fetch.fetch_c106 import FetchC106
|
|
9
|
+
from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
|
|
10
|
+
from scraper2.app.ports.ingest_port import IngestPort
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow():
|
|
14
|
+
return datetime.now(timezone.utc)
|
|
15
|
+
|
|
16
|
+
class IngestC106(IngestPort):
|
|
17
|
+
def __init__(self, fetch: FetchC106, sink: C106SinkPort):
|
|
18
|
+
self.fetch = fetch
|
|
19
|
+
self.sink = sink
|
|
20
|
+
|
|
21
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
|
|
22
|
+
dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
23
|
+
if dto is not None:
|
|
24
|
+
await self.sink.write(dto)
|
|
25
|
+
return dto
|
|
26
|
+
|
|
27
|
+
async def execute_many(
|
|
28
|
+
self,
|
|
29
|
+
codes: Iterable[str],
|
|
30
|
+
*,
|
|
31
|
+
sleep_sec: float = 2.0,
|
|
32
|
+
asof: Optional[datetime] = None,
|
|
33
|
+
) -> list[C106DTO]:
|
|
34
|
+
batch_asof = asof or _utcnow()
|
|
35
|
+
|
|
36
|
+
dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
37
|
+
await self.sink.write_many(dtos, asof=batch_asof)
|
|
38
|
+
return dtos
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# scraper2/app/usecases/ingest/ingest_c108.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional, Iterable
|
|
6
|
+
|
|
7
|
+
from contracts.nfs.c108 import C108DTO
|
|
8
|
+
from scraper2.app.usecases.fetch.fetch_c108 import FetchC108
|
|
9
|
+
from scraper2.app.ports.sinks.c108_sink_port import C108SinkPort
|
|
10
|
+
from scraper2.app.ports.ingest_port import IngestPort
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow():
|
|
14
|
+
return datetime.now(timezone.utc)
|
|
15
|
+
|
|
16
|
+
class IngestC108(IngestPort):
|
|
17
|
+
def __init__(self, fetch: FetchC108, sink: C108SinkPort):
|
|
18
|
+
self.fetch = fetch
|
|
19
|
+
self.sink = sink
|
|
20
|
+
|
|
21
|
+
async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C108DTO | None:
|
|
22
|
+
dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
|
|
23
|
+
if dto is not None:
|
|
24
|
+
await self.sink.write(dto)
|
|
25
|
+
return dto
|
|
26
|
+
|
|
27
|
+
async def execute_many(
|
|
28
|
+
self,
|
|
29
|
+
codes: Iterable[str],
|
|
30
|
+
*,
|
|
31
|
+
sleep_sec: float = 2.0,
|
|
32
|
+
asof: Optional[datetime] = None,
|
|
33
|
+
) -> list[C108DTO]:
|
|
34
|
+
batch_asof = asof or _utcnow()
|
|
35
|
+
|
|
36
|
+
dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
|
|
37
|
+
await self.sink.write_many(dtos, asof=batch_asof)
|
|
38
|
+
return dtos
|
|
39
|
+
|