scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
  2. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
  3. scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
  4. scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
  5. scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
  6. scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
  7. scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
  8. scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
  9. scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
  10. scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
  11. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
  12. scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
  13. scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
  14. {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
  15. scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
  16. scraper2_hj3415/app/composition.py +225 -0
  17. scraper2_hj3415/app/domain/blocks.py +61 -0
  18. scraper2_hj3415/app/domain/constants.py +33 -0
  19. scraper2_hj3415/app/domain/doc.py +16 -0
  20. scraper2_hj3415/app/domain/endpoint.py +11 -0
  21. scraper2_hj3415/app/domain/series.py +11 -0
  22. scraper2_hj3415/app/domain/types.py +19 -0
  23. scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
  24. scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
  25. scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
  26. scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
  27. scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
  28. scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
  29. scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
  30. scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
  31. scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
  32. scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
  33. scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
  34. scraper2_hj3415/app/parsing/c101/sise.py +47 -0
  35. scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
  36. scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
  37. scraper2_hj3415/app/parsing/c101_parser.py +45 -0
  38. scraper2_hj3415/app/parsing/c103_parser.py +22 -0
  39. scraper2_hj3415/app/parsing/c104_parser.py +26 -0
  40. scraper2_hj3415/app/parsing/c106_parser.py +137 -0
  41. scraper2_hj3415/app/parsing/c108_parser.py +254 -0
  42. scraper2_hj3415/app/ports/__init__.py +0 -0
  43. scraper2_hj3415/app/ports/browser/__init__.py +0 -0
  44. scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
  45. scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
  46. scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
  47. scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
  48. scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
  49. scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
  50. scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
  51. scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
  52. scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
  53. scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
  54. scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
  55. scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
  56. scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
  57. scraper2_hj3415/app/ports/site/__init__.py +0 -0
  58. scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
  59. scraper2_hj3415/app/services/__init__.py +0 -0
  60. scraper2_hj3415/app/services/fetch/__init__.py +0 -0
  61. scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
  62. scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
  63. scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
  64. scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
  65. scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
  66. scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
  67. scraper2_hj3415/app/usecases/__init__.py +0 -0
  68. scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
  69. scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
  70. scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
  71. scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
  72. scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
  73. scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
  74. scraper2/main.py → scraper2_hj3415/cli.py +45 -72
  75. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
  76. scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
  77. scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
  78. scraper2/adapters/out/playwright/browser.py +0 -102
  79. scraper2/adapters/out/sinks/memory/__init__.py +0 -15
  80. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
  81. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
  82. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
  83. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
  84. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
  85. scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
  86. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
  87. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
  88. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
  89. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
  90. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
  91. scraper2/app/composition.py +0 -204
  92. scraper2/app/parsing/_converters.py +0 -85
  93. scraper2/app/parsing/_normalize.py +0 -134
  94. scraper2/app/parsing/c101_parser.py +0 -143
  95. scraper2/app/parsing/c103_parser.py +0 -128
  96. scraper2/app/parsing/c104_parser.py +0 -143
  97. scraper2/app/parsing/c106_parser.py +0 -153
  98. scraper2/app/parsing/c108_parser.py +0 -65
  99. scraper2/app/ports/browser/browser_factory_port.py +0 -11
  100. scraper2/app/ports/browser/browser_port.py +0 -22
  101. scraper2/app/ports/ingest_port.py +0 -14
  102. scraper2/app/ports/sinks/base_sink_port.py +0 -14
  103. scraper2/app/ports/sinks/c101_sink_port.py +0 -9
  104. scraper2/app/ports/sinks/c103_sink_port.py +0 -9
  105. scraper2/app/ports/sinks/c104_sink_port.py +0 -9
  106. scraper2/app/ports/sinks/c106_sink_port.py +0 -9
  107. scraper2/app/ports/sinks/c108_sink_port.py +0 -9
  108. scraper2/app/usecases/fetch/fetch_c101.py +0 -43
  109. scraper2/app/usecases/fetch/fetch_c103.py +0 -103
  110. scraper2/app/usecases/fetch/fetch_c104.py +0 -76
  111. scraper2/app/usecases/fetch/fetch_c106.py +0 -90
  112. scraper2/app/usecases/fetch/fetch_c108.py +0 -49
  113. scraper2/app/usecases/ingest/ingest_c101.py +0 -36
  114. scraper2/app/usecases/ingest/ingest_c103.py +0 -37
  115. scraper2/app/usecases/ingest/ingest_c104.py +0 -37
  116. scraper2/app/usecases/ingest/ingest_c106.py +0 -38
  117. scraper2/app/usecases/ingest/ingest_c108.py +0 -39
  118. scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
  119. scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
  120. {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
  121. {scraper2 → scraper2_hj3415}/__init__.py +0 -0
  122. {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
  123. {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
  124. {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
  125. {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
  126. {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
  127. {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
  128. {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
  129. {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
  130. {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
  131. {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
  132. {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
  133. {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
  134. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
  135. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,76 +0,0 @@
1
- # scraper2/app/usecases/fetch/fetch_c104.py
2
- from __future__ import annotations
3
-
4
- import math
5
- import asyncio
6
- import random
7
- from typing import Iterable, Any
8
- from collections import Counter
9
-
10
- from contracts.nfs.c104 import C104DTO, ItemsMap
11
- from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
12
- from scraper2.app.parsing.c104_parser import parse_c104_to_dict
13
-
14
-
15
- BLOCK_KEYS = (
16
- "수익성y", "성장성y", "안정성y", "활동성y", "가치분석y",
17
- "수익성q", "성장성q", "안정성q", "활동성q", "가치분석q",
18
- )
19
-
20
- def _clean(v: Any) -> Any:
21
- if isinstance(v, float) and math.isnan(v):
22
- return None
23
- return v
24
-
25
- def records_to_items_map(records: list[dict[str, Any]]) -> ItemsMap:
26
- """
27
- records(list[dict]) -> ItemsMap(dict[item -> row])
28
- - item(항목) 중복이면 _2, _3...
29
- """
30
- out: ItemsMap = {}
31
- seen: Counter[str] = Counter()
32
-
33
- for r in records:
34
- item = r.get("항목")
35
- if not item:
36
- continue
37
-
38
- item = str(item).strip()
39
- seen[item] += 1
40
- key = item if seen[item] == 1 else f"{item}_{seen[item]}"
41
-
42
- out[key] = {k: _clean(v) for k, v in r.items() if k != "항목"}
43
-
44
- return out
45
-
46
- class FetchC104:
47
- def __init__(self, factory: BrowserFactoryPort):
48
- self.factory = factory
49
-
50
- async def _fetch_one(self, code: str, *, sleep_sec: float) -> C104DTO | None:
51
- async with self.factory.lease() as browser:
52
- url = f"https://navercomp.wisereport.co.kr/v2/company/c1040001.aspx?cn=&cmp_cd={code}"
53
- await browser.goto(url, timeout_ms=10_000)
54
-
55
- jitter_sec = 1.0
56
- if sleep_sec > 0:
57
- delay = sleep_sec + random.uniform(0, jitter_sec)
58
- await asyncio.sleep(delay)
59
-
60
- parsed = await parse_c104_to_dict(browser)
61
-
62
- if not parsed or all(not parsed.get(k) for k in BLOCK_KEYS):
63
- return None
64
-
65
- data: dict[str, Any] = {"코드": code}
66
- for k in BLOCK_KEYS:
67
- data[k] = records_to_items_map(parsed.get(k, []))
68
-
69
- return C104DTO(**data)
70
-
71
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO | None:
72
- return await self._fetch_one(code, sleep_sec=sleep_sec)
73
-
74
- async def execute_many(self, codes: Iterable[str], *, sleep_sec: float = 2.0) -> list[C104DTO]:
75
- results = await asyncio.gather(*(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes))
76
- return [r for r in results if r is not None]
@@ -1,90 +0,0 @@
1
- # scraper2/app/usecases/fetch/fetch_c106.py
2
- from __future__ import annotations
3
-
4
- import asyncio
5
- import random
6
- from typing import Iterable
7
-
8
- from contracts.nfs.c106 import C106DTO, C106Block
9
- from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
- from scraper2.app.parsing.c106_parser import parse_c106_header, parse_c106_table_to_metrics, normalize_c106_metrics
11
-
12
- from logging_hj3415 import logger
13
-
14
- class FetchC106:
15
- def __init__(self, factory: BrowserFactoryPort):
16
- self.factory = factory
17
-
18
- async def _fetch_one(self, code: str, *, sleep_sec: float) -> C106DTO | None:
19
- async with self.factory.lease() as browser:
20
- url = f"https://navercomp.wisereport.co.kr/v2/company/c1060001.aspx?cn=&cmp_cd={code}"
21
- await browser.goto(url, timeout_ms=10_000)
22
-
23
- jitter_sec = 1.0
24
- if sleep_sec > 0:
25
- delay = sleep_sec + random.uniform(0, jitter_sec)
26
- await asyncio.sleep(delay)
27
-
28
- company_names = await parse_c106_header(browser)
29
-
30
- table_url = (
31
- f"https://navercomp.wisereport.co.kr/v2/company/cF6002.aspx"
32
- f"?cmp_cd={code}&finGubun=MAIN&sec_cd=FG000&frq="
33
- )
34
-
35
- stage = "init"
36
- try:
37
- stage = "goto_q"
38
- await browser.goto(table_url+'q', timeout_ms=10_000)
39
- await asyncio.sleep(1)
40
-
41
- stage = "parse_q"
42
- q = await parse_c106_table_to_metrics(browser, company_names)
43
-
44
- stage = "goto_y"
45
- await browser.goto(table_url+'y', timeout_ms=10_000)
46
- await asyncio.sleep(1)
47
-
48
- stage = "parse_y"
49
- y = await parse_c106_table_to_metrics(browser, company_names)
50
-
51
- q_norm = normalize_c106_metrics(q)
52
- y_norm = normalize_c106_metrics(y)
53
-
54
- return C106DTO(
55
- 코드=code,
56
- q=C106Block(**q_norm),
57
- y=C106Block(**y_norm),
58
- )
59
- except Exception as e:
60
- title = ""
61
- try:
62
- title = await browser.title()
63
- except Exception:
64
- pass
65
-
66
- logger.bind(
67
- endpoint="c106",
68
- code=code,
69
- stage=stage,
70
- page_title=title,
71
- url_q=table_url + "q",
72
- ).warning("c106 skipped (unstable page/table)")
73
- return None
74
-
75
-
76
-
77
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
78
- return await self._fetch_one(code, sleep_sec=sleep_sec)
79
-
80
- async def execute_many(
81
- self,
82
- codes: Iterable[str],
83
- *,
84
- sleep_sec: float = 2.0,
85
- ) -> list[C106DTO]:
86
- results = await asyncio.gather(
87
- *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
88
- return_exceptions=False,
89
- )
90
- return [r for r in results if r is not None]
@@ -1,49 +0,0 @@
1
- # scraper2/app/usecases/fetch/fetch_c108.py
2
- from __future__ import annotations
3
-
4
- import asyncio
5
- import random
6
- from typing import Iterable
7
-
8
- from contracts.nfs.c108 import C108DTO
9
- from scraper2.app.ports.browser.browser_factory_port import BrowserFactoryPort
10
- from scraper2.app.parsing.c108_parser import parse_c108_to_dicts
11
-
12
-
13
- class FetchC108:
14
- def __init__(self, factory: BrowserFactoryPort):
15
- self.factory = factory
16
-
17
- async def _fetch_one(self, code: str, *, sleep_sec: float) -> list[C108DTO]:
18
- async with self.factory.lease() as browser:
19
- url = f"https://navercomp.wisereport.co.kr/v2/company/c1080001.aspx?cn=&cmp_cd={code}"
20
- await browser.goto(url, timeout_ms=10_000)
21
-
22
- # (C101과 동일한 형태로) sleep + jitter
23
- jitter_sec = 1.0
24
- if sleep_sec > 0:
25
- delay = sleep_sec + random.uniform(0, jitter_sec)
26
- await asyncio.sleep(delay)
27
-
28
- c108_dicts = await parse_c108_to_dicts(browser)
29
-
30
- return [C108DTO(**{**x, "코드": code}) for x in c108_dicts]
31
-
32
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> list[C108DTO]:
33
- return await self._fetch_one(code, sleep_sec=sleep_sec)
34
-
35
- async def execute_many(
36
- self,
37
- codes: Iterable[str],
38
- *,
39
- sleep_sec: float = 2.0,
40
- ) -> list[C108DTO]:
41
- results = await asyncio.gather(
42
- *(self._fetch_one(c, sleep_sec=sleep_sec) for c in codes),
43
- return_exceptions=False,
44
- )
45
- # list[list[C108DTO]] -> flat list[C108DTO]
46
- out: list[C108DTO] = []
47
- for chunk in results:
48
- out.extend(chunk)
49
- return out
@@ -1,36 +0,0 @@
1
- # scraper2/app/usecases/ingest/ingest_c101.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime, timezone
5
- from typing import Optional, Iterable
6
-
7
- from contracts.nfs.c101 import C101DTO
8
- from scraper2.app.usecases.fetch.fetch_c101 import FetchC101
9
- from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
10
- from scraper2.app.ports.ingest_port import IngestPort
11
-
12
- def _utcnow():
13
- return datetime.now(timezone.utc)
14
-
15
- class IngestC101(IngestPort):
16
- def __init__(self, fetch: FetchC101, sink: C101SinkPort):
17
- self.fetch = fetch
18
- self.sink = sink
19
-
20
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C101DTO:
21
- dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
22
- await self.sink.write(dto)
23
- return dto
24
-
25
- async def execute_many(
26
- self,
27
- codes: Iterable[str],
28
- *,
29
- sleep_sec: float = 2.0,
30
- asof: Optional[datetime] = None,
31
- ) -> list[C101DTO]:
32
- batch_asof = asof or _utcnow()
33
-
34
- dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
35
- await self.sink.write_many(dtos, asof=batch_asof)
36
- return dtos
@@ -1,37 +0,0 @@
1
- # scraper2/app/usecases/ingest/ingest_c103.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime, timezone
5
- from typing import Optional, Iterable
6
-
7
- from contracts.nfs.c103 import C103DTO
8
- from scraper2.app.usecases.fetch.fetch_c103 import FetchC103
9
- from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
10
- from scraper2.app.ports.ingest_port import IngestPort
11
-
12
-
13
- def _utcnow():
14
- return datetime.now(timezone.utc)
15
-
16
- class IngestC103(IngestPort):
17
- def __init__(self, fetch: FetchC103, sink: C103SinkPort):
18
- self.fetch = fetch
19
- self.sink = sink
20
-
21
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C103DTO:
22
- dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
- await self.sink.write(dto)
24
- return dto
25
-
26
- async def execute_many(
27
- self,
28
- codes: Iterable[str],
29
- *,
30
- sleep_sec: float = 2.0,
31
- asof: Optional[datetime] = None,
32
- ) -> list[C103DTO]:
33
- batch_asof = asof or _utcnow()
34
-
35
- dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
36
- await self.sink.write_many(dtos, asof=batch_asof)
37
- return dtos
@@ -1,37 +0,0 @@
1
- # scraper2/app/usecases/ingest/ingest_c104.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime, timezone
5
- from typing import Optional, Iterable
6
-
7
- from contracts.nfs.c104 import C104DTO
8
- from scraper2.app.usecases.fetch.fetch_c104 import FetchC104
9
- from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
10
- from scraper2.app.ports.ingest_port import IngestPort
11
-
12
-
13
- def _utcnow():
14
- return datetime.now(timezone.utc)
15
-
16
- class IngestC104(IngestPort):
17
- def __init__(self, fetch: FetchC104, sink: C104SinkPort):
18
- self.fetch = fetch
19
- self.sink = sink
20
-
21
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C104DTO:
22
- dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
- await self.sink.write(dto)
24
- return dto
25
-
26
- async def execute_many(
27
- self,
28
- codes: Iterable[str],
29
- *,
30
- sleep_sec: float = 2.0,
31
- asof: Optional[datetime] = None,
32
- ) -> list[C104DTO]:
33
- batch_asof = asof or _utcnow()
34
-
35
- dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
36
- await self.sink.write_many(dtos, asof=batch_asof)
37
- return dtos
@@ -1,38 +0,0 @@
1
- # scraper2/app/usecases/ingest/ingest_c106.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime, timezone
5
- from typing import Optional, Iterable
6
-
7
- from contracts.nfs.c106 import C106DTO
8
- from scraper2.app.usecases.fetch.fetch_c106 import FetchC106
9
- from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
10
- from scraper2.app.ports.ingest_port import IngestPort
11
-
12
-
13
- def _utcnow():
14
- return datetime.now(timezone.utc)
15
-
16
- class IngestC106(IngestPort):
17
- def __init__(self, fetch: FetchC106, sink: C106SinkPort):
18
- self.fetch = fetch
19
- self.sink = sink
20
-
21
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C106DTO | None:
22
- dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
- if dto is not None:
24
- await self.sink.write(dto)
25
- return dto
26
-
27
- async def execute_many(
28
- self,
29
- codes: Iterable[str],
30
- *,
31
- sleep_sec: float = 2.0,
32
- asof: Optional[datetime] = None,
33
- ) -> list[C106DTO]:
34
- batch_asof = asof or _utcnow()
35
-
36
- dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
37
- await self.sink.write_many(dtos, asof=batch_asof)
38
- return dtos
@@ -1,39 +0,0 @@
1
- # scraper2/app/usecases/ingest/ingest_c108.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime, timezone
5
- from typing import Optional, Iterable
6
-
7
- from contracts.nfs.c108 import C108DTO
8
- from scraper2.app.usecases.fetch.fetch_c108 import FetchC108
9
- from scraper2.app.ports.sinks.c108_sink_port import C108SinkPort
10
- from scraper2.app.ports.ingest_port import IngestPort
11
-
12
-
13
- def _utcnow():
14
- return datetime.now(timezone.utc)
15
-
16
- class IngestC108(IngestPort):
17
- def __init__(self, fetch: FetchC108, sink: C108SinkPort):
18
- self.fetch = fetch
19
- self.sink = sink
20
-
21
- async def execute(self, code: str, *, sleep_sec: float = 2.0) -> C108DTO | None:
22
- dto = await self.fetch.execute(code, sleep_sec=sleep_sec)
23
- if dto is not None:
24
- await self.sink.write(dto)
25
- return dto
26
-
27
- async def execute_many(
28
- self,
29
- codes: Iterable[str],
30
- *,
31
- sleep_sec: float = 2.0,
32
- asof: Optional[datetime] = None,
33
- ) -> list[C108DTO]:
34
- batch_asof = asof or _utcnow()
35
-
36
- dtos = await self.fetch.execute_many(codes, sleep_sec=sleep_sec)
37
- await self.sink.write_many(dtos, asof=batch_asof)
38
- return dtos
39
-
@@ -1,63 +0,0 @@
1
- scraper2/.DS_Store,sha256=sEtRhy6uiX4PgYuHnRIsUN_QtI0jR0lSLi4kYurHsso,6148
2
- scraper2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- scraper2/main.py,sha256=Vwelc73P3WVbjwKFTXXA20P-nA7RoUE6hEq2oD0E2WU,8873
4
- scraper2/adapters/out/.DS_Store,sha256=nUqwRB5F2DM82P8BALYvDI0YoD1UbmngfSi8ukKkY7E,6148
5
- scraper2/adapters/out/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- scraper2/adapters/out/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- scraper2/adapters/out/playwright/browser.py,sha256=cjz-1iz3U2fmY0tntO5h4NADfW4hYtusCfPTCqSo2NM,3942
8
- scraper2/adapters/out/playwright/browser_factory.py,sha256=y008Dbu6VmzrEe7zn7CbODndQEVJtv_EBu04GsMbPGM,3697
9
- scraper2/adapters/out/playwright/session.py,sha256=hQDmYpi7pIVDjkymaTKQzJVWUsRRlvJg1V777V8q44M,3727
10
- scraper2/adapters/out/sinks/.DS_Store,sha256=c6VOGBl6bMmFzab335dcT09WinGd4BCZXZiPjrZjd7o,6148
11
- scraper2/adapters/out/sinks/memory/__init__.py,sha256=djvn50E0dBZr-H6Xmh9FMYalG2Zc0jL5kTXaBjnRaRo,400
12
- scraper2/adapters/out/sinks/memory/c101_memory_sink.py,sha256=yqIYGF43-Jja-IlFo6o6HcuL4RS0CEKRvqS8IULqRC8,833
13
- scraper2/adapters/out/sinks/memory/c103_memory_sink.py,sha256=WWjkaGp8v0TW-_NydRfv7wRDMrvItkv1qqvlTh_XUbg,850
14
- scraper2/adapters/out/sinks/memory/c104_memory_sink.py,sha256=Pa-1limYsYtQ0Y3qIU3M0f-QWbagUEIuzfzEtEgBgW4,850
15
- scraper2/adapters/out/sinks/memory/c106_memory_sink.py,sha256=UNse5lDJla-nTFgsytovw45SRhnGVWH0HWGR8RQRL1Q,849
16
- scraper2/adapters/out/sinks/memory/c108_memory_sink.py,sha256=XrDy28FtGbLllzTDance9R3LtVYvRP9GHLJVG-npEUI,849
17
- scraper2/adapters/out/sinks/memory/store.py,sha256=h4dwiCF5gne5kloRdD78NWlqtcaailIqoId9xAjtBk4,2738
18
- scraper2/adapters/out/sinks/mongo/__init__.py,sha256=YmEZqNqh7S4PFTufxd5sCF2k24rTOsxY3ZFrFVyQzh8,382
19
- scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py,sha256=CpcafwjBJ-Jo-sm02hRC1H214B4aKfvV5MNoNP1AfxQ,1270
20
- scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py,sha256=gKoAOL3Dj4_JVjhdc5QAZObk49pFT-ERCyJCpUF9j2k,1203
21
- scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py,sha256=IicVnc2fyeBXoBbgMasB7utzF7f1S6upgHV4g3sjs4g,1203
22
- scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py,sha256=FMdCp8WVjPwidnh7tIPUoViQWr48O16xtB34O_iCtJI,1203
23
- scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py,sha256=eSvIRtofWvNKVPchglwL1mOw5hsKDpUfNz5EOum-H3Y,1203
24
- scraper2/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- scraper2/app/composition.py,sha256=sJuEAPg-mQ51-0ZsXmMOuAN_whKaq1BCMWHy0uAwxMA,6572
26
- scraper2/app/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- scraper2/app/parsing/_converters.py,sha256=z0kSL4nQnqq5w7QfJZBSbjZOLo1JhoqqDLpqlEAN4bo,1815
28
- scraper2/app/parsing/_normalize.py,sha256=2qqbXxTbzbuYlu7ttzQjyKgatFnwopme2b_fd0zahio,3738
29
- scraper2/app/parsing/c101_parser.py,sha256=QybZcd_7om4u3w5BWzbVNMcj50WrHtKr8hDOmkBMZGw,5537
30
- scraper2/app/parsing/c103_parser.py,sha256=BIHJ0OHUaGbVu3kyfgYQQIKf4O_lj4ZTXXPk6vl7Iok,3744
31
- scraper2/app/parsing/c104_parser.py,sha256=NGnzdVbhdqXFqJphwEDSlJPnM18RU759FCgry20a4ko,4193
32
- scraper2/app/parsing/c106_parser.py,sha256=JCml8HHnczgMUVnUkRI8AMEJ9mog1dOJfdd6hQKtv9I,4505
33
- scraper2/app/parsing/c108_parser.py,sha256=VEzzXliatoRdxR2_uSnHMHLNvV5h2irYiyoXAMQm8jc,1961
34
- scraper2/app/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- scraper2/app/ports/ingest_port.py,sha256=mm8jzoaJeiqm2CCznzD347gEzLu7doBjpEwzbRu0R3A,351
36
- scraper2/app/ports/browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- scraper2/app/ports/browser/browser_factory_port.py,sha256=dJ3JCc38MVF7wPmCH4MO3BdnHIXE5wSmfsV7cLysJ54,401
38
- scraper2/app/ports/browser/browser_port.py,sha256=tFYkdCeTCF-5XHvT1duioj31ytsc-ATQpmEgposi1X4,1133
39
- scraper2/app/ports/sinks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- scraper2/app/ports/sinks/base_sink_port.py,sha256=1fJ0fCZ-uDOwfCy2MJLPOV5b_twsfOKGphlsxM2-uOw,414
41
- scraper2/app/ports/sinks/c101_sink_port.py,sha256=dO_A4AR-7lbPO_MlYz_CnumhO5SM_aJfC-09I2nTr2U,297
42
- scraper2/app/ports/sinks/c103_sink_port.py,sha256=xjRkgu_mxCMwnMHMyJy5dTHw8AxwrXmiWzT6cUCujXg,297
43
- scraper2/app/ports/sinks/c104_sink_port.py,sha256=0k_47ZZFTCt9jsFhWhDhhvtfdyZa3hVfF65bcLJX1AU,297
44
- scraper2/app/ports/sinks/c106_sink_port.py,sha256=cge47IiMoFGC_wmHAcHn2nTiH0h65df5N8HLwqzBuY4,297
45
- scraper2/app/ports/sinks/c108_sink_port.py,sha256=RLZRHJTvdZRsHcs18J0H98XQirW6xRmuMDx2XhiB3ac,297
46
- scraper2/app/usecases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- scraper2/app/usecases/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- scraper2/app/usecases/fetch/fetch_c101.py,sha256=uqvnH_D8Jp2_BtoiEUcbwkPulv7M9qcq5WI77vsOzCc,1480
49
- scraper2/app/usecases/fetch/fetch_c103.py,sha256=NHXkUQxM2-Z7N7oW0uW88G40j57eGHX7RQtXCnZVWcY,3321
50
- scraper2/app/usecases/fetch/fetch_c104.py,sha256=ZAja-G3hCEqLCzVDba2iuu1EFN_wUiDnm9iMcG5nsO4,2518
51
- scraper2/app/usecases/fetch/fetch_c106.py,sha256=cIFNJ1-_MgyOCIGtVSkEKbiVRBFIhwtKiv0C9uKvrB0,3049
52
- scraper2/app/usecases/fetch/fetch_c108.py,sha256=okVbNmCcXcgy9-9GOhvgqrHd6ujXv_lL0uogPr9POEs,1685
53
- scraper2/app/usecases/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- scraper2/app/usecases/ingest/ingest_c101.py,sha256=feaasz-Tx_CCdcs_4Wh3KBQxj3oMefacbq3Ds6UDpmk,1149
55
- scraper2/app/usecases/ingest/ingest_c103.py,sha256=T_IKs5ikckin_05FnL-dxirAW1PPav867AuQsVsrZ5Y,1150
56
- scraper2/app/usecases/ingest/ingest_c104.py,sha256=2rGTcFbsATsn3d2KsSEkL5x4fmkGo7x9MHrysoxiICM,1150
57
- scraper2/app/usecases/ingest/ingest_c106.py,sha256=mQrbASbKVUQyVcIWiXzIczbX-1mNR5NGk760enVCfvo,1190
58
- scraper2/app/usecases/ingest/ingest_c108.py,sha256=49ULzdl0dN6z3istAKg29PcD5wHTxYholqAZiIEmUzU,1191
59
- scraper2_hj3415-2.4.1.dist-info/entry_points.txt,sha256=jUNx7ZJQedQ3QnsDN1ompQ0PjwdvVmnKdHHFMfQQPlI,46
60
- scraper2_hj3415-2.4.1.dist-info/licenses/LICENSE,sha256=QBiVGQuKAESeCfQE344Ik2ex6g2zfYdu9WqrRWydxIs,1068
61
- scraper2_hj3415-2.4.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
62
- scraper2_hj3415-2.4.1.dist-info/METADATA,sha256=NDBmtiIxLt2b5RsScBZdBiAoNTthZayM0p9zvwV4GO4,3457
63
- scraper2_hj3415-2.4.1.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- scraper2=scraper2.main:app
3
-
File without changes
File without changes
File without changes