scraper2-hj3415 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. scraper2/.DS_Store +0 -0
  2. scraper2/adapters/out/.DS_Store +0 -0
  3. scraper2/adapters/out/playwright/browser.py +103 -0
  4. scraper2/adapters/out/playwright/browser_factory.py +112 -0
  5. scraper2/adapters/out/playwright/session.py +121 -0
  6. scraper2/adapters/out/sinks/.DS_Store +0 -0
  7. scraper2/adapters/out/sinks/memory/__init__.py +15 -0
  8. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +20 -0
  9. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +20 -0
  10. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +20 -0
  11. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +20 -0
  12. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +20 -0
  13. scraper2/adapters/out/sinks/memory/store.py +74 -0
  14. scraper2/adapters/out/sinks/mongo/__init__.py +14 -0
  15. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +43 -0
  16. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +41 -0
  17. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +41 -0
  18. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +41 -0
  19. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +41 -0
  20. scraper2/app/composition.py +195 -0
  21. scraper2/app/parsing/_converters.py +85 -0
  22. scraper2/app/parsing/_normalize.py +134 -0
  23. scraper2/app/parsing/c101_parser.py +143 -0
  24. scraper2/app/parsing/c103_parser.py +128 -0
  25. scraper2/app/parsing/c104_parser.py +143 -0
  26. scraper2/app/parsing/c106_parser.py +153 -0
  27. scraper2/app/parsing/c108_parser.py +65 -0
  28. scraper2/app/ports/browser/browser_factory_port.py +11 -0
  29. scraper2/app/ports/browser/browser_port.py +22 -0
  30. scraper2/app/ports/ingest_port.py +13 -0
  31. scraper2/app/ports/sinks/base_sink_port.py +14 -0
  32. scraper2/app/ports/sinks/c101_sink_port.py +9 -0
  33. scraper2/app/ports/sinks/c103_sink_port.py +9 -0
  34. scraper2/app/ports/sinks/c104_sink_port.py +9 -0
  35. scraper2/app/ports/sinks/c106_sink_port.py +9 -0
  36. scraper2/app/ports/sinks/c108_sink_port.py +9 -0
  37. scraper2/app/usecases/fetch/fetch_c101.py +43 -0
  38. scraper2/app/usecases/fetch/fetch_c103.py +103 -0
  39. scraper2/app/usecases/fetch/fetch_c104.py +76 -0
  40. scraper2/app/usecases/fetch/fetch_c106.py +90 -0
  41. scraper2/app/usecases/fetch/fetch_c108.py +49 -0
  42. scraper2/app/usecases/ingest/ingest_c101.py +36 -0
  43. scraper2/app/usecases/ingest/ingest_c103.py +37 -0
  44. scraper2/app/usecases/ingest/ingest_c104.py +37 -0
  45. scraper2/app/usecases/ingest/ingest_c106.py +38 -0
  46. scraper2/app/usecases/ingest/ingest_c108.py +39 -0
  47. scraper2/main.py +257 -0
  48. scraper2_hj3415-2.0.0.dist-info/METADATA +164 -0
  49. scraper2_hj3415-2.0.0.dist-info/RECORD +63 -0
  50. scraper2_hj3415-2.0.0.dist-info/entry_points.txt +3 -0
  51. scraper2_hj3415/__main__.py +0 -6
  52. scraper2_hj3415/adapters/_shared/utils.py +0 -29
  53. scraper2_hj3415/adapters/clients/browser.py +0 -124
  54. scraper2_hj3415/adapters/clients/http.py +0 -51
  55. scraper2_hj3415/adapters/nfs/pipelines/c1034_pipeline.py +0 -55
  56. scraper2_hj3415/adapters/nfs/pipelines/normalize_c1034.py +0 -109
  57. scraper2_hj3415/adapters/nfs/sinks/c1034_sink.py +0 -51
  58. scraper2_hj3415/adapters/nfs/sinks/df_to_dto_mappers.py +0 -106
  59. scraper2_hj3415/adapters/nfs/sources/bundle_source.py +0 -24
  60. scraper2_hj3415/adapters/nfs/sources/c1034_fetch.py +0 -117
  61. scraper2_hj3415/adapters/nfs/sources/c1034_session.py +0 -90
  62. scraper2_hj3415/core/constants.py +0 -47
  63. scraper2_hj3415/core/ports/sink_port.py +0 -16
  64. scraper2_hj3415/core/ports/source_port.py +0 -13
  65. scraper2_hj3415/core/types.py +0 -11
  66. scraper2_hj3415/core/usecases/c1034_ingest.py +0 -139
  67. scraper2_hj3415/di.py +0 -103
  68. scraper2_hj3415/entrypoints/cli.py +0 -226
  69. scraper2_hj3415/entrypoints/main.py +0 -20
  70. scraper2_hj3415-1.0.1.dist-info/METADATA +0 -66
  71. scraper2_hj3415-1.0.1.dist-info/RECORD +0 -35
  72. scraper2_hj3415-1.0.1.dist-info/entry_points.txt +0 -3
  73. {scraper2_hj3415 → scraper2}/__init__.py +0 -0
  74. {scraper2_hj3415/adapters → scraper2/adapters/out}/__init__.py +0 -0
  75. {scraper2_hj3415/adapters/_shared → scraper2/adapters/out/playwright}/__init__.py +0 -0
  76. {scraper2_hj3415/adapters/clients → scraper2/app}/__init__.py +0 -0
  77. {scraper2_hj3415/adapters/nfs/pipelines → scraper2/app/parsing}/__init__.py +0 -0
  78. {scraper2_hj3415/adapters/nfs/sinks → scraper2/app/ports}/__init__.py +0 -0
  79. {scraper2_hj3415/adapters/nfs/sources → scraper2/app/ports/browser}/__init__.py +0 -0
  80. {scraper2_hj3415/core → scraper2/app/ports/sinks}/__init__.py +0 -0
  81. {scraper2_hj3415/core/ports → scraper2/app/usecases}/__init__.py +0 -0
  82. {scraper2_hj3415/core/usecases → scraper2/app/usecases/fetch}/__init__.py +0 -0
  83. {scraper2_hj3415/entrypoints → scraper2/app/usecases/ingest}/__init__.py +0 -0
  84. {scraper2_hj3415-1.0.1.dist-info → scraper2_hj3415-2.0.0.dist-info}/WHEEL +0 -0
  85. {scraper2_hj3415-1.0.1.dist-info → scraper2_hj3415-2.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,103 @@
1
+ # scraper2/app/usecases/fetch/fetch_c103.py
2
+ from __future__ import annotations
3
+
4
+ import math
5
+ import asyncio
6
+ import random
7
+ from typing import Iterable, Any
8
+ from contracts.nfs.c103 import C103DTO, ItemsMap
9
+ from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2.app.parsing.c103_parser import parse_c103_to_dict
11
+ from collections import defaultdict
12
+
13
+
14
+ BLOCK_KEYS = (
15
+ "손익계산서y", "손익계산서q",
16
+ "재무상태표y", "재무상태표q",
17
+ "현금흐름표y", "현금흐름표q",
18
+ )
19
+
20
+ def _clean(v: Any) -> Any:
21
+ if isinstance(v, float) and math.isnan(v):
22
+ return None
23
+ return v
24
+
25
+ def _is_all_none(row: dict[str, Any]) -> bool:
26
+ # row는 '항목'을 제외한 값들만 들어있다고 가정
27
+ return all(v is None for v in row.values())
28
+
29
+
30
+ def records_to_items_map(records: list[dict[str, Any]]) -> ItemsMap:
31
+ grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
32
+
33
+ # 1) 항목별로 row를 먼저 모은다
34
+ for r in records:
35
+ item = r.get("항목")
36
+ if not item:
37
+ continue
38
+ item = str(item).strip()
39
+
40
+ row = {k: _clean(v) for k, v in r.items() if k != "항목"}
41
+ grouped[item].append(row)
42
+
43
+ # 2) 규칙 적용해서 out 구성
44
+ out: ItemsMap = {}
45
+
46
+ for item, rows in grouped.items():
47
+ if len(rows) == 1:
48
+ # ✅ 규칙 1: 중복이 아니면 전부 None이어도 추가
49
+ out[item] = rows[0]
50
+ continue
51
+
52
+ # ✅ 규칙 2: 중복이면 all-None row는 제거
53
+ kept = [row for row in rows if not _is_all_none(row)]
54
+ if not kept:
55
+ continue # 중복 전부 None이면 그룹 자체 제거
56
+
57
+ # 남은 것들만 suffix 부여 (첫 번째는 suffix 없음)
58
+ for idx, row in enumerate(kept, start=1):
59
+ key = item if idx == 1 else f"{item}_{idx}"
60
+ out[key] = row
61
+
62
+ return out
63
+
64
+
65
+ class FetchC103:
66
+ def __init__(self, factory: BrowserFactoryPort):
67
+ self.factory = factory
68
+
69
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> C103DTO | None:
70
+ async with self.factory.lease() as browser:
71
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1030001.aspx?cn=&cmp_cd={code}"
72
+ await browser.goto(url, timeout_ms=10_000)
73
+
74
+ jitter_sec = 1.0
75
+ if sleep_sec > 0:
76
+ delay = sleep_sec + random.uniform(0, jitter_sec)
77
+ await asyncio.sleep(delay)
78
+
79
+ parsed = await parse_c103_to_dict(browser)
80
+
81
+ if not parsed or all(not parsed.get(k) for k in BLOCK_KEYS):
82
+ return None
83
+
84
+ data: dict[str, Any] = {"코드": code}
85
+ for k in BLOCK_KEYS:
86
+ data[k] = records_to_items_map(parsed.get(k, []))
87
+
88
+ return C103DTO(**data)
89
+
90
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C103DTO | None:
91
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
92
+
93
+ async def execute_many(
94
+ self,
95
+ codes: Iterable[str],
96
+ *,
97
+ sleep_sec: float = 2.0,
98
+ ) -> list[C103DTO]:
99
+ results = await asyncio.gather(
100
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
101
+ return_exceptions=False,
102
+ )
103
+ return [r for r in results if r is not None]
@@ -0,0 +1,76 @@
1
+ # scraper2/app/usecases/fetch/fetch_c104.py
2
+ from __future__ import annotations
3
+
4
+ import math
5
+ import asyncio
6
+ import random
7
+ from typing import Iterable, Any
8
+ from collections import Counter
9
+
10
+ from contracts.nfs.c104 import C104DTO, ItemsMap
11
+ from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
12
+ from scraper2.app.parsing.c104_parser import parse_c104_to_dict
13
+
14
+
15
+ BLOCK_KEYS = (
16
+ "수익성y", "성장성y", "안정성y", "활동성y", "가치분석y",
17
+ "수익성q", "성장성q", "안정성q", "활동성q", "가치분석q",
18
+ )
19
+
20
+ def _clean(v: Any) -> Any:
21
+ if isinstance(v, float) and math.isnan(v):
22
+ return None
23
+ return v
24
+
25
+ def records_to_items_map(records: list[dict[str, Any]]) -> ItemsMap:
26
+ """
27
+ records(list[dict]) -> ItemsMap(dict[item -> row])
28
+ - item(항목) 중복이면 _2, _3...
29
+ """
30
+ out: ItemsMap = {}
31
+ seen: Counter[str] = Counter()
32
+
33
+ for r in records:
34
+ item = r.get("항목")
35
+ if not item:
36
+ continue
37
+
38
+ item = str(item).strip()
39
+ seen[item] += 1
40
+ key = item if seen[item] == 1 else f"{item}_{seen[item]}"
41
+
42
+ out[key] = {k: _clean(v) for k, v in r.items() if k != "항목"}
43
+
44
+ return out
45
+
46
+ class FetchC104:
47
+ def __init__(self, factory: BrowserFactoryPort):
48
+ self.factory = factory
49
+
50
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> C104DTO | None:
51
+ async with self.factory.lease() as browser:
52
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1040001.aspx?cn=&cmp_cd={code}"
53
+ await browser.goto(url, timeout_ms=10_000)
54
+
55
+ jitter_sec = 1.0
56
+ if sleep_sec > 0:
57
+ delay = sleep_sec + random.uniform(0, jitter_sec)
58
+ await asyncio.sleep(delay)
59
+
60
+ parsed = await parse_c104_to_dict(browser)
61
+
62
+ if not parsed or all(not parsed.get(k) for k in BLOCK_KEYS):
63
+ return None
64
+
65
+ data: dict[str, Any] = {"코드": code}
66
+ for k in BLOCK_KEYS:
67
+ data[k] = records_to_items_map(parsed.get(k, []))
68
+
69
+ return C104DTO(**data)
70
+
71
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO | None:
72
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
73
+
74
+ async def execute_many(self, codes: Iterable[str], *, sleep_sec: float = 2.0) -> list[C104DTO]:
75
+ results = await asyncio.gather(*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes))
76
+ return [r for r in results if r is not None]
@@ -0,0 +1,90 @@
1
+ # scraper2/app/usecases/fetch/fetch_c106.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable
7
+
8
+ from contracts.nfs.c106 import C106DTO, C106Block
9
+ from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2.app.parsing.c106_parser import parse_c106_header, parse_c106_table_to_metrics, normalize_c106_metrics
11
+
12
+ from logging_hj3415 import logger
13
+
14
+ class FetchC106:
15
+ def __init__(self, factory: BrowserFactoryPort):
16
+ self.factory = factory
17
+
18
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> C106DTO | None:
19
+ async with self.factory.lease() as browser:
20
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1060001.aspx?cn=&cmp_cd={code}"
21
+ await browser.goto(url, timeout_ms=10_000)
22
+
23
+ jitter_sec = 1.0
24
+ if sleep_sec > 0:
25
+ delay = sleep_sec + random.uniform(0, jitter_sec)
26
+ await asyncio.sleep(delay)
27
+
28
+ company_names = await parse_c106_header(browser)
29
+
30
+ table_url = (
31
+ f"https://navercomp.wisereport.co.kr/v2/company/cF6002.aspx"
32
+ f"?cmp_cd={code}&finGubun=MAIN&sec_cd=FG000&frq="
33
+ )
34
+
35
+ stage = "init"
36
+ try:
37
+ stage = "goto_q"
38
+ await browser.goto(table_url+'q', timeout_ms=10_000)
39
+ await asyncio.sleep(1)
40
+
41
+ stage = "parse_q"
42
+ q = await parse_c106_table_to_metrics(browser, company_names)
43
+
44
+ stage = "goto_y"
45
+ await browser.goto(table_url+'y', timeout_ms=10_000)
46
+ await asyncio.sleep(1)
47
+
48
+ stage = "parse_y"
49
+ y = await parse_c106_table_to_metrics(browser, company_names)
50
+
51
+ q_norm = normalize_c106_metrics(q)
52
+ y_norm = normalize_c106_metrics(y)
53
+
54
+ return C106DTO(
55
+ 코드=code,
56
+ q=C106Block(**q_norm),
57
+ y=C106Block(**y_norm),
58
+ )
59
+ except Exception as e:
60
+ title = ""
61
+ try:
62
+ title = await browser.title()
63
+ except Exception:
64
+ pass
65
+
66
+ logger.bind(
67
+ endpoint="c106",
68
+ code=code,
69
+ stage=stage,
70
+ page_title=title,
71
+ url_q=table_url + "q",
72
+ ).warning("c106 skipped (unstable page/table)")
73
+ return None
74
+
75
+
76
+
77
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
78
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
79
+
80
+ async def execute_many(
81
+ self,
82
+ codes: Iterable[str],
83
+ *,
84
+ sleep_sec: float = 2.0,
85
+ ) -> list[C106DTO]:
86
+ results = await asyncio.gather(
87
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
88
+ return_exceptions=False,
89
+ )
90
+ return [r for r in results if r is not None]
@@ -0,0 +1,49 @@
1
+ # scraper2/app/usecases/fetch/fetch_c108.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import random
6
+ from typing import Iterable
7
+
8
+ from contracts.nfs.c108 import C108DTO
9
+ from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
+ from scraper2.app.parsing.c108_parser import parse_c108_to_dicts
11
+
12
+
13
+ class FetchC108:
14
+ def __init__(self, factory: BrowserFactoryPort):
15
+ self.factory = factory
16
+
17
+ async def _fetch_one(self, code: str, *, sleep_sec: float) -> list[C108DTO]:
18
+ async with self.factory.lease() as browser:
19
+ url = f"https://navercomp.wisereport.co.kr/v2/company/c1080001.aspx?cn=&cmp_cd={code}"
20
+ await browser.goto(url, timeout_ms=10_000)
21
+
22
+ # (C101과 동일한 형태로) sleep + jitter
23
+ jitter_sec = 1.0
24
+ if sleep_sec > 0:
25
+ delay = sleep_sec + random.uniform(0, jitter_sec)
26
+ await asyncio.sleep(delay)
27
+
28
+ c108_dicts = await parse_c108_to_dicts(browser)
29
+
30
+ return [C108DTO(**{**x, "코드": code}) for x in c108_dicts]
31
+
32
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> list[C108DTO]:
33
+ return await self._fetch_one(code, sleep_sec=sleep_sec)
34
+
35
+ async def execute_many(
36
+ self,
37
+ codes: Iterable[str],
38
+ *,
39
+ sleep_sec: float = 2.0,
40
+ ) -> list[C108DTO]:
41
+ results = await asyncio.gather(
42
+ *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
43
+ return_exceptions=False,
44
+ )
45
+ # list[list[C108DTO]] -> flat list[C108DTO]
46
+ out: list[C108DTO] = []
47
+ for chunk in results:
48
+ out.extend(chunk)
49
+ return out
@@ -0,0 +1,36 @@
1
+ # scraper2/app/usecases/ingest/ingest_c101.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Iterable
6
+
7
+ from contracts.nfs.c101 import C101DTO
8
+ from scraper2.app.usecases.fetch.fetch_c101 import FetchC101
9
+ from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
10
+ from scraper2.app.ports.ingest_port import IngestPort
11
+
12
+ def _utcnow():
13
+ return datetime.now(timezone.utc)
14
+
15
+ class IngestC101(IngestPort):
16
+ def __init__(self, fetch: FetchC101, sink: C101SinkPort):
17
+ self.fetch = fetch
18
+ self.sink = sink
19
+
20
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C101DTO:
21
+ dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
22
+ await self.sink.write(dto)
23
+ return dto
24
+
25
+ async def execute_many(
26
+ self,
27
+ codes: Iterable[str],
28
+ *,
29
+ sleep_sec: float = 2.0,
30
+ asof: Optional[datetime] = None,
31
+ ) -> list[C101DTO]:
32
+ batch_asof = asof or _utcnow()
33
+
34
+ dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
35
+ await self.sink.write_many(dtos, asof=batch_asof)
36
+ return dtos
@@ -0,0 +1,37 @@
1
+ # scraper2/app/usecases/ingest/ingest_c103.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Iterable
6
+
7
+ from contracts.nfs.c103 import C103DTO
8
+ from scraper2.app.usecases.fetch.fetch_c103 import FetchC103
9
+ from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
10
+ from scraper2.app.ports.ingest_port import IngestPort
11
+
12
+
13
+ def _utcnow():
14
+ return datetime.now(timezone.utc)
15
+
16
+ class IngestC103(IngestPort):
17
+ def __init__(self, fetch: FetchC103, sink: C103SinkPort):
18
+ self.fetch = fetch
19
+ self.sink = sink
20
+
21
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C103DTO:
22
+ dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
+ await self.sink.write(dto)
24
+ return dto
25
+
26
+ async def execute_many(
27
+ self,
28
+ codes: Iterable[str],
29
+ *,
30
+ sleep_sec: float = 2.0,
31
+ asof: Optional[datetime] = None,
32
+ ) -> list[C103DTO]:
33
+ batch_asof = asof or _utcnow()
34
+
35
+ dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
36
+ await self.sink.write_many(dtos, asof=batch_asof)
37
+ return dtos
@@ -0,0 +1,37 @@
1
+ # scraper2/app/usecases/ingest/ingest_c104.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Iterable
6
+
7
+ from contracts.nfs.c104 import C104DTO
8
+ from scraper2.app.usecases.fetch.fetch_c104 import FetchC104
9
+ from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
10
+ from scraper2.app.ports.ingest_port import IngestPort
11
+
12
+
13
+ def _utcnow():
14
+ return datetime.now(timezone.utc)
15
+
16
+ class IngestC104(IngestPort):
17
+ def __init__(self, fetch: FetchC104, sink: C104SinkPort):
18
+ self.fetch = fetch
19
+ self.sink = sink
20
+
21
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO:
22
+ dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
+ await self.sink.write(dto)
24
+ return dto
25
+
26
+ async def execute_many(
27
+ self,
28
+ codes: Iterable[str],
29
+ *,
30
+ sleep_sec: float = 2.0,
31
+ asof: Optional[datetime] = None,
32
+ ) -> list[C104DTO]:
33
+ batch_asof = asof or _utcnow()
34
+
35
+ dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
36
+ await self.sink.write_many(dtos, asof=batch_asof)
37
+ return dtos
@@ -0,0 +1,38 @@
1
+ # scraper2/app/usecases/ingest/ingest_c106.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Iterable
6
+
7
+ from contracts.nfs.c106 import C106DTO
8
+ from scraper2.app.usecases.fetch.fetch_c106 import FetchC106
9
+ from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
10
+ from scraper2.app.ports.ingest_port import IngestPort
11
+
12
+
13
+ def _utcnow():
14
+ return datetime.now(timezone.utc)
15
+
16
+ class IngestC106(IngestPort):
17
+ def __init__(self, fetch: FetchC106, sink: C106SinkPort):
18
+ self.fetch = fetch
19
+ self.sink = sink
20
+
21
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
22
+ dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
+ if dto is not None:
24
+ await self.sink.write(dto)
25
+ return dto
26
+
27
+ async def execute_many(
28
+ self,
29
+ codes: Iterable[str],
30
+ *,
31
+ sleep_sec: float = 2.0,
32
+ asof: Optional[datetime] = None,
33
+ ) -> list[C106DTO]:
34
+ batch_asof = asof or _utcnow()
35
+
36
+ dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
37
+ await self.sink.write_many(dtos, asof=batch_asof)
38
+ return dtos
@@ -0,0 +1,39 @@
1
+ # scraper2/app/usecases/ingest/ingest_c108.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Iterable
6
+
7
+ from contracts.nfs.c108 import C108DTO
8
+ from scraper2.app.usecases.fetch.fetch_c108 import FetchC108
9
+ from scraper2.app.ports.sinks.c108_sink_port import C108SinkPort
10
+ from scraper2.app.ports.ingest_port import IngestPort
11
+
12
+
13
+ def _utcnow():
14
+ return datetime.now(timezone.utc)
15
+
16
+ class IngestC108(IngestPort):
17
+ def __init__(self, fetch: FetchC108, sink: C108SinkPort):
18
+ self.fetch = fetch
19
+ self.sink = sink
20
+
21
+ async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C108DTO | None:
22
+ dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
+ if dto is not None:
24
+ await self.sink.write(dto)
25
+ return dto
26
+
27
+ async def execute_many(
28
+ self,
29
+ codes: Iterable[str],
30
+ *,
31
+ sleep_sec: float = 2.0,
32
+ asof: Optional[datetime] = None,
33
+ ) -> list[C108DTO]:
34
+ batch_asof = asof or _utcnow()
35
+
36
+ dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
37
+ await self.sink.write_many(dtos, asof=batch_asof)
38
+ return dtos
39
+