aioscrapper 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1 @@
1
- from .base import BasePipeline, BaseItem
2
- from .dispatcher import PipelineDispatcher
1
+ from .base import BasePipeline
@@ -1,11 +1,7 @@
1
1
  import abc
2
- from typing import TypeVar, Generic, Protocol
3
-
4
-
5
- class BaseItem(Protocol):
6
- @property
7
- def pipeline_name(self) -> str: ...
2
+ from typing import TypeVar, Generic
8
3
 
4
+ from ..types import BaseItem
9
5
 
10
6
  ItemType = TypeVar("ItemType", bound=BaseItem)
11
7
 
@@ -10,7 +10,7 @@ class PipelineDispatcher:
10
10
  self._logger = logger
11
11
  self._pipelines = pipelines
12
12
 
13
- async def put_item(self, item: BaseItem) -> BaseItem:
13
+ async def __call__(self, item: BaseItem) -> BaseItem:
14
14
  self._logger.debug(f"pipeline item received: {item}")
15
15
  try:
16
16
  pipelines = self._pipelines[item.pipeline_name]
@@ -3,8 +3,28 @@ import abc
3
3
 
4
4
  class BaseScrapper(abc.ABC):
5
5
  @abc.abstractmethod
6
- async def start(self, *args, **kwargs) -> None: ...
6
+ async def start(self, *args, **kwargs) -> None:
7
+ """
8
+ Starts the scrapper.
7
9
 
8
- async def initialize(self, *args, **kwargs) -> None: ...
10
+ This method is called to start the scraper by sending the initial requests required for its operation.
11
+ """
12
+ ...
9
13
 
10
- async def close(self, *args, **kwargs) -> None: ...
14
+ async def initialize(self, *args, **kwargs) -> None:
15
+ """
16
+ Initializes the scrapper.
17
+
18
+ This method is called before starting the scrapper. It should be used to initialize any
19
+ necessary state or resources required by the scrapper.
20
+ """
21
+ ...
22
+
23
+ async def close(self, *args, **kwargs) -> None:
24
+ """
25
+ Closes the scrapper.
26
+
27
+ This method is called to clean up any resources created by the scrapper after it has finished
28
+ running.
29
+ """
30
+ ...
@@ -9,7 +9,8 @@ from aiojobs import Scheduler
9
9
  from .request_manager import RequestManager
10
10
  from ..config import Config
11
11
  from ..helpers import get_func_kwargs
12
- from ..pipeline import PipelineDispatcher, BasePipeline
12
+ from ..pipeline import BasePipeline
13
+ from ..pipeline.dispatcher import PipelineDispatcher
13
14
  from ..scrapper import BaseScrapper
14
15
  from ..session.aiohttp import AiohttpSession
15
16
  from ..types import RequestMiddleware, ResponseMiddleware
@@ -32,9 +33,7 @@ class AIOScrapper:
32
33
  self._response_middlewares = []
33
34
 
34
35
  self._pipelines: dict[str, list[BasePipeline]] = {}
35
- self._pipeline_dispatcher = PipelineDispatcher(
36
- logger=self._logger.getChild("pipeline"), pipelines=self._pipelines
37
- )
36
+ self._pipeline_dispatcher = PipelineDispatcher(self._logger.getChild("pipeline"), pipelines=self._pipelines)
38
37
 
39
38
  def _exception_handler(_, context: dict[str, Any]):
40
39
  if "job" in context:
@@ -96,7 +95,7 @@ class AIOScrapper:
96
95
  await self._pipeline_dispatcher.initialize()
97
96
  self._request_manager.listen_queue()
98
97
 
99
- scrapper_kwargs = {"request_sender": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
98
+ scrapper_kwargs = {"send_request": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
100
99
  for scrapper in self._scrappers:
101
100
  await scrapper.initialize(**get_func_kwargs(scrapper.initialize, scrapper_kwargs))
102
101
 
@@ -1,4 +1,5 @@
1
1
  from .middleware import RequestMiddleware, ResponseMiddleware
2
+ from .pipeline import BaseItem, Pipeline
2
3
  from .session import (
3
4
  QueryParams,
4
5
  Cookies,
@@ -0,0 +1,10 @@
1
+ from typing import Protocol
2
+
3
+
4
+ class BaseItem(Protocol):
5
+ @property
6
+ def pipeline_name(self) -> str: ...
7
+
8
+
9
+ class Pipeline(Protocol):
10
+ async def __call__(self, item: BaseItem) -> BaseItem: ...
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -12,18 +12,28 @@ Classifier: Intended Audience :: Developers
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
14
14
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
- Requires-Python: >=3.12
15
+ Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: aiohttp[speedups]~=3.11.16
19
19
  Requires-Dist: aiojobs~=1.4.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: flake8~=7.1.2; extra == "dev"
22
+ Requires-Dist: black~=25.1.0; extra == "dev"
23
+ Requires-Dist: pyright~=1.1.399; extra == "dev"
24
+ Requires-Dist: aiohttp[speedups]~=3.11.16; extra == "dev"
25
+ Requires-Dist: aiojobs~=1.4.0; extra == "dev"
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest~=8.3.5; extra == "test"
28
+ Requires-Dist: pytest-asyncio~=0.26.0; extra == "test"
29
+ Requires-Dist: aresponses~=3.0.0; extra == "test"
20
30
  Dynamic: license-file
21
31
 
22
32
  # aioscrapper
23
33
 
24
34
  **Asynchronous framework for building modular and scalable web scrapers.**
25
35
 
26
- ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
36
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
27
37
  ![License](https://img.shields.io/github/license/darkstussy/aioscrapper)
28
38
  ![Version](https://img.shields.io/github/v/tag/darkstussy/aioscrapper?label=version)
29
39
 
@@ -44,7 +54,7 @@ pip install aioscrapper
44
54
 
45
55
  ## Requirements
46
56
 
47
- - Python 3.12 or higher
57
+ - Python 3.10 or higher
48
58
  - aiohttp
49
59
  - aiojobs
50
60
 
@@ -53,13 +63,13 @@ pip install aioscrapper
53
63
  ```python
54
64
  import asyncio
55
65
 
56
- from aioscrapper import BaseScrapper, AIOScrapper, RequestSender
57
- from aioscrapper.types import Response
66
+ from aioscrapper import BaseScrapper, AIOScrapper
67
+ from aioscrapper.types import Response, RequestSender
58
68
 
59
69
 
60
70
  class Scrapper(BaseScrapper):
61
- async def start(self, request_sender: RequestSender) -> None:
62
- await request_sender(url="https://example.com", callback=self.parse)
71
+ async def start(self, send_request: RequestSender) -> None:
72
+ await send_request(url="https://example.com", callback=self.parse)
63
73
 
64
74
  async def parse(self, response: Response) -> None:
65
75
  # handle response
@@ -2,21 +2,22 @@ aioscrapper/__init__.py,sha256=Yl57BbmijQN_UgP5nRUiWYqgU8kXQ9kplzzzyMcsUMY,197
2
2
  aioscrapper/config.py,sha256=yO5ipQUHxA_-CiSqJ0u7WioN6lu8VgT1ss5PRvS1foc,844
3
3
  aioscrapper/exceptions.py,sha256=Akk3zDTgws9E7J-Sh8bgdlgS8L3auDKuv3_U3aefxMc,765
4
4
  aioscrapper/helpers.py,sha256=slq9r5oCHrR7M9hKZFBLFRsWoqJcw_QFptQI1NjIdQw,610
5
- aioscrapper/pipeline/__init__.py,sha256=hv7Kcssd2BP0LM9fNZtaMs1tmRuAUu4mwAescoeV3Uk,84
6
- aioscrapper/pipeline/base.py,sha256=Ro7YGUOB-V2NJCtfgwhtQDedY4OYMu-jwEV8iR-L89k,405
7
- aioscrapper/pipeline/dispatcher.py,sha256=H4cHNxTyHEF4BnEwaW6nwmcRmK839GqbDTzZh1Zftv4,1156
5
+ aioscrapper/pipeline/__init__.py,sha256=SX4r3KYdApDdMc5uc7hP_KWEBZJfiawgfdKxgbORdv0,31
6
+ aioscrapper/pipeline/base.py,sha256=HN1gpvS0J9zU3DMfW480QrkivFXzRR36SMvgTjZNzCQ,342
7
+ aioscrapper/pipeline/dispatcher.py,sha256=_erY39J0qf4ZP0R1xcN3OgvsbL6EDAh6jbju34iSo-0,1156
8
8
  aioscrapper/scrapper/__init__.py,sha256=UR7bTck-_YVoP2BqYdPldN9PgaCuJf9wvDdQLTVJ578,65
9
- aioscrapper/scrapper/base.py,sha256=2_WeLMyJICLmIG7N9r6BGmBg0f-wjEQPsVY076WHKOI,241
10
- aioscrapper/scrapper/executor.py,sha256=Rz2dNmdFOvjUXM6-8GLGNTpZmlEkxD24ZK3qlnioSuQ,5495
9
+ aioscrapper/scrapper/base.py,sha256=It3l_X4AzbgxeKfiAqlo-6RmAyYwBFUrYZDJYGYAK3A,821
10
+ aioscrapper/scrapper/executor.py,sha256=-zUibR8gsQ11zkpbMwL04s6tuTD9qadIcUragPQv4G8,5497
11
11
  aioscrapper/scrapper/request_manager.py,sha256=xhF_feppHQognTbbHjVUC13V4NwJJt7bCWwcyznFK84,5831
12
12
  aioscrapper/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  aioscrapper/session/aiohttp.py,sha256=8h4Ca1o2JJ7L24iqEnZ5I8bUudTn2cvTngBhM8eJPD4,1753
14
14
  aioscrapper/session/base.py,sha256=Zxw1VHIe_LgveUufJXh_cl0YeHykiutQveMUwZ6VL54,356
15
- aioscrapper/types/__init__.py,sha256=OK7vKZznJAWXQFBhXbQJha-XqGoRAE3lCaJUF7WXn64,210
15
+ aioscrapper/types/__init__.py,sha256=6oaFkDXG57Ban9-46u7HiTFAg91KDb2_CFzfSikF7as,251
16
16
  aioscrapper/types/middleware.py,sha256=WtT73QTAlwhdP6UNFgyFHGpFOx1vlxehCAwiO6xjR10,326
17
+ aioscrapper/types/pipeline.py,sha256=yoLTSZMaC6jFGKCrnXnlHEKlrMjHGpUPG90flNRfqbQ,200
17
18
  aioscrapper/types/session.py,sha256=WppvDBZ0sBWVddzz7RXLkg8iZCfZipTdHpKuGW-U090,2970
18
- aioscrapper-0.1.1.dist-info/licenses/LICENSE,sha256=EEeV20hghyroJWe2vcHjJma9PcjSkjD6vIwlUtaAjLE,1067
19
- aioscrapper-0.1.1.dist-info/METADATA,sha256=Q10bIQpw0JBM8oG3x5mP1pOFtUURYWcMOHvWLs-rVQQ,2350
20
- aioscrapper-0.1.1.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
21
- aioscrapper-0.1.1.dist-info/top_level.txt,sha256=d7lbzXOwzzk2HLh-A0X7dkqn8q3zGAJcKqx6TkaEEWI,12
22
- aioscrapper-0.1.1.dist-info/RECORD,,
19
+ aioscrapper-0.2.0.dist-info/licenses/LICENSE,sha256=EEeV20hghyroJWe2vcHjJma9PcjSkjD6vIwlUtaAjLE,1067
20
+ aioscrapper-0.2.0.dist-info/METADATA,sha256=ud9KV0MB_23bGV1v3aFF-XHGIjeNzgqsZ_TZElvCxtI,2780
21
+ aioscrapper-0.2.0.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
22
+ aioscrapper-0.2.0.dist-info/top_level.txt,sha256=d7lbzXOwzzk2HLh-A0X7dkqn8q3zGAJcKqx6TkaEEWI,12
23
+ aioscrapper-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.1)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5