aioscrapper 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aioscrapper/pipeline/__init__.py +1 -2
- aioscrapper/pipeline/base.py +2 -6
- aioscrapper/pipeline/dispatcher.py +1 -1
- aioscrapper/scrapper/base.py +23 -3
- aioscrapper/scrapper/executor.py +4 -5
- aioscrapper/types/__init__.py +1 -0
- aioscrapper/types/pipeline.py +10 -0
- {aioscrapper-0.1.1.dist-info → aioscrapper-0.2.0.dist-info}/METADATA +18 -8
- {aioscrapper-0.1.1.dist-info → aioscrapper-0.2.0.dist-info}/RECORD +12 -11
- {aioscrapper-0.1.1.dist-info → aioscrapper-0.2.0.dist-info}/WHEEL +1 -1
- {aioscrapper-0.1.1.dist-info → aioscrapper-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {aioscrapper-0.1.1.dist-info → aioscrapper-0.2.0.dist-info}/top_level.txt +0 -0
aioscrapper/pipeline/__init__.py
CHANGED
@@ -1,2 +1 @@
|
|
1
|
-
from .base import BasePipeline
|
2
|
-
from .dispatcher import PipelineDispatcher
|
1
|
+
from .base import BasePipeline
|
aioscrapper/pipeline/base.py
CHANGED
@@ -1,11 +1,7 @@
|
|
1
1
|
import abc
|
2
|
-
from typing import TypeVar, Generic
|
3
|
-
|
4
|
-
|
5
|
-
class BaseItem(Protocol):
|
6
|
-
@property
|
7
|
-
def pipeline_name(self) -> str: ...
|
2
|
+
from typing import TypeVar, Generic
|
8
3
|
|
4
|
+
from ..types import BaseItem
|
9
5
|
|
10
6
|
ItemType = TypeVar("ItemType", bound=BaseItem)
|
11
7
|
|
@@ -10,7 +10,7 @@ class PipelineDispatcher:
|
|
10
10
|
self._logger = logger
|
11
11
|
self._pipelines = pipelines
|
12
12
|
|
13
|
-
async def
|
13
|
+
async def __call__(self, item: BaseItem) -> BaseItem:
|
14
14
|
self._logger.debug(f"pipeline item received: {item}")
|
15
15
|
try:
|
16
16
|
pipelines = self._pipelines[item.pipeline_name]
|
aioscrapper/scrapper/base.py
CHANGED
@@ -3,8 +3,28 @@ import abc
|
|
3
3
|
|
4
4
|
class BaseScrapper(abc.ABC):
|
5
5
|
@abc.abstractmethod
|
6
|
-
async def start(self, *args, **kwargs) -> None:
|
6
|
+
async def start(self, *args, **kwargs) -> None:
|
7
|
+
"""
|
8
|
+
Starts the scrapper.
|
7
9
|
|
8
|
-
|
10
|
+
This method is called to start the scraper by sending the initial requests required for its operation.
|
11
|
+
"""
|
12
|
+
...
|
9
13
|
|
10
|
-
async def
|
14
|
+
async def initialize(self, *args, **kwargs) -> None:
|
15
|
+
"""
|
16
|
+
Initializes the scrapper.
|
17
|
+
|
18
|
+
This method is called before starting the scrapper. It should be used to initialize any
|
19
|
+
necessary state or resources required by the scrapper.
|
20
|
+
"""
|
21
|
+
...
|
22
|
+
|
23
|
+
async def close(self, *args, **kwargs) -> None:
|
24
|
+
"""
|
25
|
+
Closes the scrapper.
|
26
|
+
|
27
|
+
This method is called to clean up any resources created by the scrapper after it has finished
|
28
|
+
running.
|
29
|
+
"""
|
30
|
+
...
|
aioscrapper/scrapper/executor.py
CHANGED
@@ -9,7 +9,8 @@ from aiojobs import Scheduler
|
|
9
9
|
from .request_manager import RequestManager
|
10
10
|
from ..config import Config
|
11
11
|
from ..helpers import get_func_kwargs
|
12
|
-
from ..pipeline import
|
12
|
+
from ..pipeline import BasePipeline
|
13
|
+
from ..pipeline.dispatcher import PipelineDispatcher
|
13
14
|
from ..scrapper import BaseScrapper
|
14
15
|
from ..session.aiohttp import AiohttpSession
|
15
16
|
from ..types import RequestMiddleware, ResponseMiddleware
|
@@ -32,9 +33,7 @@ class AIOScrapper:
|
|
32
33
|
self._response_middlewares = []
|
33
34
|
|
34
35
|
self._pipelines: dict[str, list[BasePipeline]] = {}
|
35
|
-
self._pipeline_dispatcher = PipelineDispatcher(
|
36
|
-
logger=self._logger.getChild("pipeline"), pipelines=self._pipelines
|
37
|
-
)
|
36
|
+
self._pipeline_dispatcher = PipelineDispatcher(self._logger.getChild("pipeline"), pipelines=self._pipelines)
|
38
37
|
|
39
38
|
def _exception_handler(_, context: dict[str, Any]):
|
40
39
|
if "job" in context:
|
@@ -96,7 +95,7 @@ class AIOScrapper:
|
|
96
95
|
await self._pipeline_dispatcher.initialize()
|
97
96
|
self._request_manager.listen_queue()
|
98
97
|
|
99
|
-
scrapper_kwargs = {"
|
98
|
+
scrapper_kwargs = {"send_request": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
|
100
99
|
for scrapper in self._scrappers:
|
101
100
|
await scrapper.initialize(**get_func_kwargs(scrapper.initialize, scrapper_kwargs))
|
102
101
|
|
aioscrapper/types/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aioscrapper
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Async framework for building modular and scalable web scrapers.
|
5
5
|
Author: darkstussy
|
6
6
|
Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
|
@@ -12,18 +12,28 @@ Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
13
13
|
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
14
14
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
15
|
-
Requires-Python: >=3.
|
15
|
+
Requires-Python: >=3.10
|
16
16
|
Description-Content-Type: text/markdown
|
17
17
|
License-File: LICENSE
|
18
18
|
Requires-Dist: aiohttp[speedups]~=3.11.16
|
19
19
|
Requires-Dist: aiojobs~=1.4.0
|
20
|
+
Provides-Extra: dev
|
21
|
+
Requires-Dist: flake8~=7.1.2; extra == "dev"
|
22
|
+
Requires-Dist: black~=25.1.0; extra == "dev"
|
23
|
+
Requires-Dist: pyright~=1.1.399; extra == "dev"
|
24
|
+
Requires-Dist: aiohttp[speedups]~=3.11.16; extra == "dev"
|
25
|
+
Requires-Dist: aiojobs~=1.4.0; extra == "dev"
|
26
|
+
Provides-Extra: test
|
27
|
+
Requires-Dist: pytest~=8.3.5; extra == "test"
|
28
|
+
Requires-Dist: pytest-asyncio~=0.26.0; extra == "test"
|
29
|
+
Requires-Dist: aresponses~=3.0.0; extra == "test"
|
20
30
|
Dynamic: license-file
|
21
31
|
|
22
32
|
# aioscrapper
|
23
33
|
|
24
34
|
**Asynchronous framework for building modular and scalable web scrapers.**
|
25
35
|
|
26
|
-

|
27
37
|

|
28
38
|

|
29
39
|
|
@@ -44,7 +54,7 @@ pip install aioscrapper
|
|
44
54
|
|
45
55
|
## Requirements
|
46
56
|
|
47
|
-
- Python 3.
|
57
|
+
- Python 3.10 or higher
|
48
58
|
- aiohttp
|
49
59
|
- aiojobs
|
50
60
|
|
@@ -53,13 +63,13 @@ pip install aioscrapper
|
|
53
63
|
```python
|
54
64
|
import asyncio
|
55
65
|
|
56
|
-
from aioscrapper import BaseScrapper, AIOScrapper
|
57
|
-
from aioscrapper.types import Response
|
66
|
+
from aioscrapper import BaseScrapper, AIOScrapper
|
67
|
+
from aioscrapper.types import Response, RequestSender
|
58
68
|
|
59
69
|
|
60
70
|
class Scrapper(BaseScrapper):
|
61
|
-
async def start(self,
|
62
|
-
await
|
71
|
+
async def start(self, send_request: RequestSender) -> None:
|
72
|
+
await send_request(url="https://example.com", callback=self.parse)
|
63
73
|
|
64
74
|
async def parse(self, response: Response) -> None:
|
65
75
|
# handle response
|
@@ -2,21 +2,22 @@ aioscrapper/__init__.py,sha256=Yl57BbmijQN_UgP5nRUiWYqgU8kXQ9kplzzzyMcsUMY,197
|
|
2
2
|
aioscrapper/config.py,sha256=yO5ipQUHxA_-CiSqJ0u7WioN6lu8VgT1ss5PRvS1foc,844
|
3
3
|
aioscrapper/exceptions.py,sha256=Akk3zDTgws9E7J-Sh8bgdlgS8L3auDKuv3_U3aefxMc,765
|
4
4
|
aioscrapper/helpers.py,sha256=slq9r5oCHrR7M9hKZFBLFRsWoqJcw_QFptQI1NjIdQw,610
|
5
|
-
aioscrapper/pipeline/__init__.py,sha256=
|
6
|
-
aioscrapper/pipeline/base.py,sha256=
|
7
|
-
aioscrapper/pipeline/dispatcher.py,sha256=
|
5
|
+
aioscrapper/pipeline/__init__.py,sha256=SX4r3KYdApDdMc5uc7hP_KWEBZJfiawgfdKxgbORdv0,31
|
6
|
+
aioscrapper/pipeline/base.py,sha256=HN1gpvS0J9zU3DMfW480QrkivFXzRR36SMvgTjZNzCQ,342
|
7
|
+
aioscrapper/pipeline/dispatcher.py,sha256=_erY39J0qf4ZP0R1xcN3OgvsbL6EDAh6jbju34iSo-0,1156
|
8
8
|
aioscrapper/scrapper/__init__.py,sha256=UR7bTck-_YVoP2BqYdPldN9PgaCuJf9wvDdQLTVJ578,65
|
9
|
-
aioscrapper/scrapper/base.py,sha256=
|
10
|
-
aioscrapper/scrapper/executor.py,sha256
|
9
|
+
aioscrapper/scrapper/base.py,sha256=It3l_X4AzbgxeKfiAqlo-6RmAyYwBFUrYZDJYGYAK3A,821
|
10
|
+
aioscrapper/scrapper/executor.py,sha256=-zUibR8gsQ11zkpbMwL04s6tuTD9qadIcUragPQv4G8,5497
|
11
11
|
aioscrapper/scrapper/request_manager.py,sha256=xhF_feppHQognTbbHjVUC13V4NwJJt7bCWwcyznFK84,5831
|
12
12
|
aioscrapper/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
aioscrapper/session/aiohttp.py,sha256=8h4Ca1o2JJ7L24iqEnZ5I8bUudTn2cvTngBhM8eJPD4,1753
|
14
14
|
aioscrapper/session/base.py,sha256=Zxw1VHIe_LgveUufJXh_cl0YeHykiutQveMUwZ6VL54,356
|
15
|
-
aioscrapper/types/__init__.py,sha256=
|
15
|
+
aioscrapper/types/__init__.py,sha256=6oaFkDXG57Ban9-46u7HiTFAg91KDb2_CFzfSikF7as,251
|
16
16
|
aioscrapper/types/middleware.py,sha256=WtT73QTAlwhdP6UNFgyFHGpFOx1vlxehCAwiO6xjR10,326
|
17
|
+
aioscrapper/types/pipeline.py,sha256=yoLTSZMaC6jFGKCrnXnlHEKlrMjHGpUPG90flNRfqbQ,200
|
17
18
|
aioscrapper/types/session.py,sha256=WppvDBZ0sBWVddzz7RXLkg8iZCfZipTdHpKuGW-U090,2970
|
18
|
-
aioscrapper-0.
|
19
|
-
aioscrapper-0.
|
20
|
-
aioscrapper-0.
|
21
|
-
aioscrapper-0.
|
22
|
-
aioscrapper-0.
|
19
|
+
aioscrapper-0.2.0.dist-info/licenses/LICENSE,sha256=EEeV20hghyroJWe2vcHjJma9PcjSkjD6vIwlUtaAjLE,1067
|
20
|
+
aioscrapper-0.2.0.dist-info/METADATA,sha256=ud9KV0MB_23bGV1v3aFF-XHGIjeNzgqsZ_TZElvCxtI,2780
|
21
|
+
aioscrapper-0.2.0.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
22
|
+
aioscrapper-0.2.0.dist-info/top_level.txt,sha256=d7lbzXOwzzk2HLh-A0X7dkqn8q3zGAJcKqx6TkaEEWI,12
|
23
|
+
aioscrapper-0.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|