aioscrapper 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aioscrapper/__init__.py CHANGED
@@ -4,7 +4,6 @@ __author__ = "darkstussy"
4
4
 
5
5
  __copyright__ = f"Copyright (c) 2025 {__author__}"
6
6
 
7
- from .request_sender import RequestSender
8
7
  from .scrapper import AIOScrapper, BaseScrapper
9
8
 
10
- __all__ = ["AIOScrapper", "BaseScrapper", "RequestSender"]
9
+ __all__ = ["AIOScrapper", "BaseScrapper"]
@@ -7,4 +7,4 @@ class BaseScrapper(abc.ABC):
7
7
 
8
8
  async def initialize(self, *args, **kwargs) -> None: ...
9
9
 
10
- async def close(self) -> None: ...
10
+ async def close(self, *args, **kwargs) -> None: ...
@@ -6,11 +6,10 @@ from typing import Type, Any
6
6
 
7
7
  from aiojobs import Scheduler
8
8
 
9
+ from .request_manager import RequestManager
9
10
  from ..config import Config
10
11
  from ..helpers import get_func_kwargs
11
12
  from ..pipeline import PipelineDispatcher, BasePipeline
12
- from ..request_manager import RequestManager
13
- from ..request_sender import RequestSender
14
13
  from ..scrapper import BaseScrapper
15
14
  from ..session.aiohttp import AiohttpSession
16
15
  from ..types import RequestMiddleware, ResponseMiddleware
@@ -33,13 +32,11 @@ class AIOScrapper:
33
32
  self._response_middlewares = []
34
33
 
35
34
  self._pipelines: dict[str, list[BasePipeline]] = {}
36
- self._pipeline_dispatcher = PipelineDispatcher(
37
- logger=self._logger.getChild("pipeline"), pipelines=self._pipelines
38
- )
35
+ self._pipeline_dispatcher = PipelineDispatcher(self._logger.getChild("pipeline"), pipelines=self._pipelines)
39
36
 
40
37
  def _exception_handler(_, context: dict[str, Any]):
41
38
  if "job" in context:
42
- self._logger.error(f'{context['message']}: {context["exception"]}', extra={"context": context})
39
+ self._logger.error(f'{context["message"]}: {context["exception"]}', extra={"context": context})
43
40
  else:
44
41
  self._logger.error("Unhandled error", extra={"context": context})
45
42
 
@@ -51,7 +48,6 @@ class AIOScrapper:
51
48
  )
52
49
 
53
50
  self._request_queue = asyncio.PriorityQueue()
54
- self._request_sender = RequestSender(self._request_queue)
55
51
  self._request_manager = RequestManager(
56
52
  logger=self._logger.getChild("request_worker"),
57
53
  session=AiohttpSession(
@@ -59,7 +55,6 @@ class AIOScrapper:
59
55
  ssl=self._config.session.request.ssl,
60
56
  ),
61
57
  schedule_request=self._scheduler.spawn,
62
- sender=self._request_sender,
63
58
  queue=self._request_queue,
64
59
  delay=self._config.session.request.delay,
65
60
  shutdown_timeout=self._config.execution.shutdown_timeout,
@@ -69,8 +64,6 @@ class AIOScrapper:
69
64
  response_middlewares=self._response_middlewares,
70
65
  )
71
66
 
72
- self._scrapper_kwargs = {"request_sender": self._request_sender, "pipeline": self._pipeline_dispatcher}
73
-
74
67
  def add_pipeline(self, name: str, pipeline: BasePipeline) -> None:
75
68
  if name not in self._pipelines:
76
69
  self._pipelines[name] = [pipeline]
@@ -86,7 +79,7 @@ class AIOScrapper:
86
79
  def add_response_middlewares(self, *middlewares: ResponseMiddleware) -> None:
87
80
  self._response_middlewares.extend(middlewares)
88
81
 
89
- async def __aenter__(self):
82
+ async def __aenter__(self) -> "AIOScrapper":
90
83
  return self
91
84
 
92
85
  async def __aexit__(
@@ -101,11 +94,12 @@ class AIOScrapper:
101
94
  await self._pipeline_dispatcher.initialize()
102
95
  self._request_manager.listen_queue()
103
96
 
97
+ scrapper_kwargs = {"request_sender": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
104
98
  for scrapper in self._scrappers:
105
- await scrapper.initialize(**get_func_kwargs(scrapper.initialize, self._scrapper_kwargs))
99
+ await scrapper.initialize(**get_func_kwargs(scrapper.initialize, scrapper_kwargs))
106
100
 
107
101
  await asyncio.gather(
108
- *[scrapper.start(**get_func_kwargs(scrapper.start, self._scrapper_kwargs)) for scrapper in self._scrappers]
102
+ *[scrapper.start(**get_func_kwargs(scrapper.start, scrapper_kwargs)) for scrapper in self._scrappers]
109
103
  )
110
104
 
111
105
  async def _shutdown(self) -> bool:
@@ -138,9 +132,11 @@ class AIOScrapper:
138
132
  if shutdown:
139
133
  await self.shutdown()
140
134
 
141
- for scrapper in self._scrappers:
142
- await scrapper.close()
143
-
144
- await self._scheduler.close()
145
- await self._request_manager.close()
146
- await self._pipeline_dispatcher.close()
135
+ scrapper_kwargs = {"pipeline": self._pipeline_dispatcher}
136
+ try:
137
+ for scrapper in self._scrappers:
138
+ await scrapper.close(**get_func_kwargs(scrapper.close, scrapper_kwargs))
139
+ finally:
140
+ await self._scheduler.close()
141
+ await self._request_manager.close()
142
+ await self._pipeline_dispatcher.close()
@@ -1,13 +1,76 @@
1
1
  import asyncio
2
+ from dataclasses import dataclass, field
2
3
  from logging import Logger
3
- from typing import Callable, Awaitable, Any, Coroutine
4
+ from typing import Callable, Awaitable, Any
5
+ from typing import Coroutine
4
6
 
5
- from .exceptions import HTTPException, RequestException, ClientException
6
- from .helpers import get_cb_kwargs
7
- from .request_sender import RequestSender
8
- from .session.base import BaseSession
9
- from .types import Request, RequestParams, RequestQueue
10
- from .types import RequestMiddleware, ResponseMiddleware
7
+ from ..exceptions import HTTPException, RequestException, ClientException
8
+ from ..helpers import get_cb_kwargs
9
+ from ..session.base import BaseSession
10
+ from ..types import (
11
+ QueryParams,
12
+ Cookies,
13
+ Headers,
14
+ BasicAuth,
15
+ Request,
16
+ RequestParams,
17
+ RequestMiddleware,
18
+ ResponseMiddleware,
19
+ RequestSender,
20
+ )
21
+
22
+
23
+ @dataclass(slots=True, order=True)
24
+ class _PRPRequest:
25
+ priority: int
26
+ request: Request = field(compare=False)
27
+ request_params: RequestParams = field(compare=False)
28
+
29
+
30
+ _RequestQueue = asyncio.PriorityQueue[_PRPRequest | None]
31
+
32
+
33
+ def _get_request_sender(queue: _RequestQueue) -> RequestSender:
34
+ async def sender(
35
+ url: str,
36
+ method: str = "GET",
37
+ callback: Callable[..., Awaitable] | None = None,
38
+ cb_kwargs: dict[str, Any] | None = None,
39
+ errback: Callable[..., Awaitable] | None = None,
40
+ params: QueryParams | None = None,
41
+ data: Any = None,
42
+ json_data: Any = None,
43
+ cookies: Cookies | None = None,
44
+ headers: Headers | None = None,
45
+ proxy: str | None = None,
46
+ auth: BasicAuth | None = None,
47
+ timeout: float | None = None,
48
+ priority: int = 0,
49
+ ) -> None:
50
+ await queue.put(
51
+ _PRPRequest(
52
+ priority=priority,
53
+ request=Request(
54
+ method=method,
55
+ url=url,
56
+ params=params,
57
+ data=data,
58
+ json_data=json_data,
59
+ cookies=cookies,
60
+ headers=headers,
61
+ auth=auth,
62
+ proxy=proxy,
63
+ timeout=timeout,
64
+ ),
65
+ request_params=RequestParams(
66
+ callback=callback,
67
+ cb_kwargs=cb_kwargs,
68
+ errback=errback,
69
+ ),
70
+ )
71
+ )
72
+
73
+ return sender
11
74
 
12
75
 
13
76
  class RequestManager:
@@ -16,8 +79,7 @@ class RequestManager:
16
79
  logger: Logger,
17
80
  session: BaseSession,
18
81
  schedule_request: Callable[[Coroutine], Awaitable],
19
- sender: RequestSender,
20
- queue: RequestQueue,
82
+ queue: _RequestQueue,
21
83
  delay: float,
22
84
  shutdown_timeout: float,
23
85
  srv_kwargs: dict[str, Any],
@@ -31,12 +93,17 @@ class RequestManager:
31
93
  self._queue = queue
32
94
  self._delay = delay
33
95
  self._shutdown_timeout = shutdown_timeout
34
- self._srv_kwargs = {"send_request": sender, **srv_kwargs}
96
+ self._request_sender = _get_request_sender(queue)
97
+ self._srv_kwargs = {"send_request": self._request_sender, **srv_kwargs}
35
98
  self._request_outer_middlewares = request_outer_middlewares
36
99
  self._request_inner_middlewares = request_inner_middlewares
37
100
  self._response_middlewares = response_middlewares
38
101
  self._task: asyncio.Task | None = None
39
102
 
103
+ @property
104
+ def sender(self) -> RequestSender:
105
+ return self._request_sender
106
+
40
107
  async def _send_request(self, request: Request, params: RequestParams) -> None:
41
108
  full_url = request.full_url
42
109
  self._logger.debug(f"request: {request.method} {full_url}")
@@ -6,7 +6,6 @@ from .session import (
6
6
  BasicAuth,
7
7
  Request,
8
8
  RequestParams,
9
- PRPRequest,
10
- RequestQueue,
9
+ RequestSender,
11
10
  Response,
12
11
  )
@@ -1,7 +1,6 @@
1
- import asyncio
2
1
  import json
3
- from dataclasses import field, dataclass
4
- from typing import Union, Mapping, Any, Callable, Awaitable, TypedDict
2
+ from dataclasses import dataclass
3
+ from typing import Union, Mapping, Any, Callable, Awaitable, TypedDict, Protocol
5
4
  from urllib.parse import urlencode
6
5
 
7
6
  QueryParams = Mapping[str, Union[str, int, float]]
@@ -40,14 +39,24 @@ class RequestParams:
40
39
  errback: Callable[..., Awaitable] | None = None
41
40
 
42
41
 
43
- @dataclass(slots=True, order=True)
44
- class PRPRequest:
45
- priority: int
46
- request: Request = field(compare=False)
47
- request_params: RequestParams = field(compare=False)
48
-
49
-
50
- RequestQueue = asyncio.PriorityQueue[PRPRequest | None]
42
+ class RequestSender(Protocol):
43
+ async def __call__(
44
+ self,
45
+ url: str,
46
+ method: str = "GET",
47
+ callback: Callable[..., Awaitable] | None = None,
48
+ cb_kwargs: dict[str, Any] | None = None,
49
+ errback: Callable[..., Awaitable] | None = None,
50
+ params: QueryParams | None = None,
51
+ data: Any = None,
52
+ json_data: Any = None,
53
+ cookies: Cookies | None = None,
54
+ headers: Headers | None = None,
55
+ proxy: str | None = None,
56
+ auth: BasicAuth | None = None,
57
+ timeout: float | None = None,
58
+ priority: int = 0,
59
+ ) -> None: ...
51
60
 
52
61
 
53
62
  class Response:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -12,18 +12,28 @@ Classifier: Intended Audience :: Developers
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
14
14
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
- Requires-Python: >=3.12
15
+ Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: aiohttp[speedups]~=3.11.16
19
19
  Requires-Dist: aiojobs~=1.4.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: flake8~=7.1.2; extra == "dev"
22
+ Requires-Dist: black~=25.1.0; extra == "dev"
23
+ Requires-Dist: pyright~=1.1.399; extra == "dev"
24
+ Requires-Dist: aiohttp[speedups]~=3.11.16; extra == "dev"
25
+ Requires-Dist: aiojobs~=1.4.0; extra == "dev"
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest~=8.3.5; extra == "test"
28
+ Requires-Dist: pytest-asyncio~=0.26.0; extra == "test"
29
+ Requires-Dist: aresponses~=3.0.0; extra == "test"
20
30
  Dynamic: license-file
21
31
 
22
32
  # aioscrapper
23
33
 
24
34
  **Asynchronous framework for building modular and scalable web scrapers.**
25
35
 
26
- ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
36
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
27
37
  ![License](https://img.shields.io/github/license/darkstussy/aioscrapper)
28
38
  ![Version](https://img.shields.io/github/v/tag/darkstussy/aioscrapper?label=version)
29
39
 
@@ -44,7 +54,7 @@ pip install aioscrapper
44
54
 
45
55
  ## Requirements
46
56
 
47
- - Python 3.12 or higher
57
+ - Python 3.10 or higher
48
58
  - aiohttp
49
59
  - aiojobs
50
60
 
@@ -53,8 +63,8 @@ pip install aioscrapper
53
63
  ```python
54
64
  import asyncio
55
65
 
56
- from aioscrapper import BaseScrapper, AIOScrapper, RequestSender
57
- from aioscrapper.types import Response
66
+ from aioscrapper import BaseScrapper, AIOScrapper
67
+ from aioscrapper.types import Response, RequestSender
58
68
 
59
69
 
60
70
  class Scrapper(BaseScrapper):
@@ -83,5 +93,6 @@ Copyright (c) 2025 darkstussy
83
93
 
84
94
  ## Links
85
95
 
96
+ - [PyPI](https://pypi.org/project/aioscrapper)
86
97
  - [GitHub](https://github.com/darkstussy/aioscrapper)
87
98
  - [Issues](https://github.com/darkstussy/aioscrapper/issues)
@@ -0,0 +1,22 @@
1
+ aioscrapper/__init__.py,sha256=Yl57BbmijQN_UgP5nRUiWYqgU8kXQ9kplzzzyMcsUMY,197
2
+ aioscrapper/config.py,sha256=yO5ipQUHxA_-CiSqJ0u7WioN6lu8VgT1ss5PRvS1foc,844
3
+ aioscrapper/exceptions.py,sha256=Akk3zDTgws9E7J-Sh8bgdlgS8L3auDKuv3_U3aefxMc,765
4
+ aioscrapper/helpers.py,sha256=slq9r5oCHrR7M9hKZFBLFRsWoqJcw_QFptQI1NjIdQw,610
5
+ aioscrapper/pipeline/__init__.py,sha256=hv7Kcssd2BP0LM9fNZtaMs1tmRuAUu4mwAescoeV3Uk,84
6
+ aioscrapper/pipeline/base.py,sha256=Ro7YGUOB-V2NJCtfgwhtQDedY4OYMu-jwEV8iR-L89k,405
7
+ aioscrapper/pipeline/dispatcher.py,sha256=H4cHNxTyHEF4BnEwaW6nwmcRmK839GqbDTzZh1Zftv4,1156
8
+ aioscrapper/scrapper/__init__.py,sha256=UR7bTck-_YVoP2BqYdPldN9PgaCuJf9wvDdQLTVJ578,65
9
+ aioscrapper/scrapper/base.py,sha256=2_WeLMyJICLmIG7N9r6BGmBg0f-wjEQPsVY076WHKOI,241
10
+ aioscrapper/scrapper/executor.py,sha256=5j66FqVaZ5AWXMF-SHfTiqkORlTkY53rZDPO2pWLfww,5466
11
+ aioscrapper/scrapper/request_manager.py,sha256=xhF_feppHQognTbbHjVUC13V4NwJJt7bCWwcyznFK84,5831
12
+ aioscrapper/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ aioscrapper/session/aiohttp.py,sha256=8h4Ca1o2JJ7L24iqEnZ5I8bUudTn2cvTngBhM8eJPD4,1753
14
+ aioscrapper/session/base.py,sha256=Zxw1VHIe_LgveUufJXh_cl0YeHykiutQveMUwZ6VL54,356
15
+ aioscrapper/types/__init__.py,sha256=OK7vKZznJAWXQFBhXbQJha-XqGoRAE3lCaJUF7WXn64,210
16
+ aioscrapper/types/middleware.py,sha256=WtT73QTAlwhdP6UNFgyFHGpFOx1vlxehCAwiO6xjR10,326
17
+ aioscrapper/types/session.py,sha256=WppvDBZ0sBWVddzz7RXLkg8iZCfZipTdHpKuGW-U090,2970
18
+ aioscrapper-0.1.2.dist-info/licenses/LICENSE,sha256=EEeV20hghyroJWe2vcHjJma9PcjSkjD6vIwlUtaAjLE,1067
19
+ aioscrapper-0.1.2.dist-info/METADATA,sha256=CBYHtlAtL4dgEp7Y0RMdx4JKvAaLV3roEfHL8YQGRk8,2784
20
+ aioscrapper-0.1.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
21
+ aioscrapper-0.1.2.dist-info/top_level.txt,sha256=d7lbzXOwzzk2HLh-A0X7dkqn8q3zGAJcKqx6TkaEEWI,12
22
+ aioscrapper-0.1.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Stanislav
3
+ Copyright (c) 2025 darkstussy
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,52 +0,0 @@
1
- import asyncio
2
- from typing import Callable, Awaitable, Any
3
-
4
- from .types import QueryParams, Cookies, Headers, BasicAuth, Request, RequestParams, RequestQueue, PRPRequest
5
-
6
-
7
- class RequestSender:
8
- def __init__(self, queue: RequestQueue) -> None:
9
- self._queue = queue
10
-
11
- async def __call__(
12
- self,
13
- url: str,
14
- method: str = "GET",
15
- callback: Callable[..., Awaitable] | None = None,
16
- cb_kwargs: dict[str, Any] | None = None,
17
- errback: Callable[..., Awaitable] | None = None,
18
- params: QueryParams | None = None,
19
- data: Any = None,
20
- json_data: Any = None,
21
- cookies: Cookies | None = None,
22
- headers: Headers | None = None,
23
- proxy: str | None = None,
24
- auth: BasicAuth | None = None,
25
- timeout: float | None = None,
26
- priority: int = 0,
27
- delay: float | None = None,
28
- ) -> None:
29
- await self._queue.put(
30
- PRPRequest(
31
- priority=priority,
32
- request=Request(
33
- method=method,
34
- url=url,
35
- params=params,
36
- data=data,
37
- json_data=json_data,
38
- cookies=cookies,
39
- headers=headers,
40
- auth=auth,
41
- proxy=proxy,
42
- timeout=timeout,
43
- ),
44
- request_params=RequestParams(
45
- callback=callback,
46
- cb_kwargs=cb_kwargs,
47
- errback=errback,
48
- ),
49
- )
50
- )
51
- if delay:
52
- await asyncio.sleep(delay)
@@ -1,23 +0,0 @@
1
- aioscrapper/__init__.py,sha256=_01EI59FLQmHspoN9HqJMoJ9OHpEaYGDyFXAKtfnnYY,256
2
- aioscrapper/config.py,sha256=yO5ipQUHxA_-CiSqJ0u7WioN6lu8VgT1ss5PRvS1foc,844
3
- aioscrapper/exceptions.py,sha256=Akk3zDTgws9E7J-Sh8bgdlgS8L3auDKuv3_U3aefxMc,765
4
- aioscrapper/helpers.py,sha256=slq9r5oCHrR7M9hKZFBLFRsWoqJcw_QFptQI1NjIdQw,610
5
- aioscrapper/request_manager.py,sha256=YLZvuPthhFMnJVQ7pV9-YCsni0Kdu8baP5tmOccEDOM,4037
6
- aioscrapper/request_sender.py,sha256=_Vx_LJyV_5qb23-C3VdnOUUUcQPW42OJNbtERVu1DIA,1644
7
- aioscrapper/pipeline/__init__.py,sha256=hv7Kcssd2BP0LM9fNZtaMs1tmRuAUu4mwAescoeV3Uk,84
8
- aioscrapper/pipeline/base.py,sha256=Ro7YGUOB-V2NJCtfgwhtQDedY4OYMu-jwEV8iR-L89k,405
9
- aioscrapper/pipeline/dispatcher.py,sha256=H4cHNxTyHEF4BnEwaW6nwmcRmK839GqbDTzZh1Zftv4,1156
10
- aioscrapper/scrapper/__init__.py,sha256=UR7bTck-_YVoP2BqYdPldN9PgaCuJf9wvDdQLTVJ578,65
11
- aioscrapper/scrapper/base.py,sha256=_wFrI0UVsTBIAV7EOZCk_QMy2-chPjr1pKzu6w8Huso,224
12
- aioscrapper/scrapper/executor.py,sha256=TrZBh0JyFeQIJd_O4S86cSZNgywjKxJWdb2QSzZyObU,5475
13
- aioscrapper/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- aioscrapper/session/aiohttp.py,sha256=8h4Ca1o2JJ7L24iqEnZ5I8bUudTn2cvTngBhM8eJPD4,1753
15
- aioscrapper/session/base.py,sha256=Zxw1VHIe_LgveUufJXh_cl0YeHykiutQveMUwZ6VL54,356
16
- aioscrapper/types/__init__.py,sha256=SC6De0ThepMK-wPQGFYGq6x8aGlzsWscphFxGQGBWek,225
17
- aioscrapper/types/middleware.py,sha256=WtT73QTAlwhdP6UNFgyFHGpFOx1vlxehCAwiO6xjR10,326
18
- aioscrapper/types/session.py,sha256=ffJelDaZmeIoNOk_ivGb_nSC5bBpgKwCyiSsUl4e-B0,2595
19
- aioscrapper-0.1.0.dist-info/licenses/LICENSE,sha256=LefKIkLsd_UuLWYOatzEjY5yscQS8nZAFi8rzCs54OM,1066
20
- aioscrapper-0.1.0.dist-info/METADATA,sha256=pbjxh2xCDlGRYDEFXT40JBL6_7Xkw31-BXivX_Fmog0,2303
21
- aioscrapper-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
22
- aioscrapper-0.1.0.dist-info/top_level.txt,sha256=d7lbzXOwzzk2HLh-A0X7dkqn8q3zGAJcKqx6TkaEEWI,12
23
- aioscrapper-0.1.0.dist-info/RECORD,,