aioscrapper 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {aioscrapper-0.1.0 → aioscrapper-0.1.2}/LICENSE +1 -1
  2. {aioscrapper-0.1.0/src/aioscrapper.egg-info → aioscrapper-0.1.2}/PKG-INFO +17 -6
  3. {aioscrapper-0.1.0 → aioscrapper-0.1.2}/README.md +5 -4
  4. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/__init__.py +1 -2
  5. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/scrapper/base.py +1 -1
  6. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/scrapper/executor.py +15 -19
  7. {aioscrapper-0.1.0/src/aioscrapper → aioscrapper-0.1.2/aioscrapper/scrapper}/request_manager.py +77 -10
  8. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/types/__init__.py +1 -2
  9. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/types/session.py +20 -11
  10. {aioscrapper-0.1.0 → aioscrapper-0.1.2/aioscrapper.egg-info}/PKG-INFO +17 -6
  11. aioscrapper-0.1.2/aioscrapper.egg-info/SOURCES.txt +27 -0
  12. aioscrapper-0.1.2/aioscrapper.egg-info/requires.txt +14 -0
  13. aioscrapper-0.1.2/pyproject.toml +63 -0
  14. aioscrapper-0.1.2/tests/test_error.py +37 -0
  15. aioscrapper-0.1.2/tests/test_success.py +29 -0
  16. aioscrapper-0.1.0/pyproject.toml +0 -24
  17. aioscrapper-0.1.0/src/aioscrapper/request_sender.py +0 -52
  18. aioscrapper-0.1.0/src/aioscrapper.egg-info/SOURCES.txt +0 -26
  19. aioscrapper-0.1.0/src/aioscrapper.egg-info/requires.txt +0 -2
  20. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/config.py +0 -0
  21. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/exceptions.py +0 -0
  22. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/helpers.py +0 -0
  23. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/pipeline/__init__.py +0 -0
  24. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/pipeline/base.py +0 -0
  25. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/pipeline/dispatcher.py +0 -0
  26. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/scrapper/__init__.py +0 -0
  27. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/session/__init__.py +0 -0
  28. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/session/aiohttp.py +0 -0
  29. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/session/base.py +0 -0
  30. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper/types/middleware.py +0 -0
  31. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper.egg-info/dependency_links.txt +0 -0
  32. {aioscrapper-0.1.0/src → aioscrapper-0.1.2}/aioscrapper.egg-info/top_level.txt +0 -0
  33. {aioscrapper-0.1.0 → aioscrapper-0.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Stanislav
3
+ Copyright (c) 2025 darkstussy
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -12,18 +12,28 @@ Classifier: Intended Audience :: Developers
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
14
14
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
- Requires-Python: >=3.12
15
+ Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: aiohttp[speedups]~=3.11.16
19
19
  Requires-Dist: aiojobs~=1.4.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: flake8~=7.1.2; extra == "dev"
22
+ Requires-Dist: black~=25.1.0; extra == "dev"
23
+ Requires-Dist: pyright~=1.1.399; extra == "dev"
24
+ Requires-Dist: aiohttp[speedups]~=3.11.16; extra == "dev"
25
+ Requires-Dist: aiojobs~=1.4.0; extra == "dev"
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest~=8.3.5; extra == "test"
28
+ Requires-Dist: pytest-asyncio~=0.26.0; extra == "test"
29
+ Requires-Dist: aresponses~=3.0.0; extra == "test"
20
30
  Dynamic: license-file
21
31
 
22
32
  # aioscrapper
23
33
 
24
34
  **Asynchronous framework for building modular and scalable web scrapers.**
25
35
 
26
- ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
36
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
27
37
  ![License](https://img.shields.io/github/license/darkstussy/aioscrapper)
28
38
  ![Version](https://img.shields.io/github/v/tag/darkstussy/aioscrapper?label=version)
29
39
 
@@ -44,7 +54,7 @@ pip install aioscrapper
44
54
 
45
55
  ## Requirements
46
56
 
47
- - Python 3.12 or higher
57
+ - Python 3.10 or higher
48
58
  - aiohttp
49
59
  - aiojobs
50
60
 
@@ -53,8 +63,8 @@ pip install aioscrapper
53
63
  ```python
54
64
  import asyncio
55
65
 
56
- from aioscrapper import BaseScrapper, AIOScrapper, RequestSender
57
- from aioscrapper.types import Response
66
+ from aioscrapper import BaseScrapper, AIOScrapper
67
+ from aioscrapper.types import Response, RequestSender
58
68
 
59
69
 
60
70
  class Scrapper(BaseScrapper):
@@ -83,5 +93,6 @@ Copyright (c) 2025 darkstussy
83
93
 
84
94
  ## Links
85
95
 
96
+ - [PyPI](https://pypi.org/project/aioscrapper)
86
97
  - [GitHub](https://github.com/darkstussy/aioscrapper)
87
98
  - [Issues](https://github.com/darkstussy/aioscrapper/issues)
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Asynchronous framework for building modular and scalable web scrapers.**
4
4
 
5
- ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
5
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
6
6
  ![License](https://img.shields.io/github/license/darkstussy/aioscrapper)
7
7
  ![Version](https://img.shields.io/github/v/tag/darkstussy/aioscrapper?label=version)
8
8
 
@@ -23,7 +23,7 @@ pip install aioscrapper
23
23
 
24
24
  ## Requirements
25
25
 
26
- - Python 3.12 or higher
26
+ - Python 3.10 or higher
27
27
  - aiohttp
28
28
  - aiojobs
29
29
 
@@ -32,8 +32,8 @@ pip install aioscrapper
32
32
  ```python
33
33
  import asyncio
34
34
 
35
- from aioscrapper import BaseScrapper, AIOScrapper, RequestSender
36
- from aioscrapper.types import Response
35
+ from aioscrapper import BaseScrapper, AIOScrapper
36
+ from aioscrapper.types import Response, RequestSender
37
37
 
38
38
 
39
39
  class Scrapper(BaseScrapper):
@@ -62,5 +62,6 @@ Copyright (c) 2025 darkstussy
62
62
 
63
63
  ## Links
64
64
 
65
+ - [PyPI](https://pypi.org/project/aioscrapper)
65
66
  - [GitHub](https://github.com/darkstussy/aioscrapper)
66
67
  - [Issues](https://github.com/darkstussy/aioscrapper/issues)
@@ -4,7 +4,6 @@ __author__ = "darkstussy"
4
4
 
5
5
  __copyright__ = f"Copyright (c) 2025 {__author__}"
6
6
 
7
- from .request_sender import RequestSender
8
7
  from .scrapper import AIOScrapper, BaseScrapper
9
8
 
10
- __all__ = ["AIOScrapper", "BaseScrapper", "RequestSender"]
9
+ __all__ = ["AIOScrapper", "BaseScrapper"]
@@ -7,4 +7,4 @@ class BaseScrapper(abc.ABC):
7
7
 
8
8
  async def initialize(self, *args, **kwargs) -> None: ...
9
9
 
10
- async def close(self) -> None: ...
10
+ async def close(self, *args, **kwargs) -> None: ...
@@ -6,11 +6,10 @@ from typing import Type, Any
6
6
 
7
7
  from aiojobs import Scheduler
8
8
 
9
+ from .request_manager import RequestManager
9
10
  from ..config import Config
10
11
  from ..helpers import get_func_kwargs
11
12
  from ..pipeline import PipelineDispatcher, BasePipeline
12
- from ..request_manager import RequestManager
13
- from ..request_sender import RequestSender
14
13
  from ..scrapper import BaseScrapper
15
14
  from ..session.aiohttp import AiohttpSession
16
15
  from ..types import RequestMiddleware, ResponseMiddleware
@@ -33,13 +32,11 @@ class AIOScrapper:
33
32
  self._response_middlewares = []
34
33
 
35
34
  self._pipelines: dict[str, list[BasePipeline]] = {}
36
- self._pipeline_dispatcher = PipelineDispatcher(
37
- logger=self._logger.getChild("pipeline"), pipelines=self._pipelines
38
- )
35
+ self._pipeline_dispatcher = PipelineDispatcher(self._logger.getChild("pipeline"), pipelines=self._pipelines)
39
36
 
40
37
  def _exception_handler(_, context: dict[str, Any]):
41
38
  if "job" in context:
42
- self._logger.error(f'{context['message']}: {context["exception"]}', extra={"context": context})
39
+ self._logger.error(f'{context["message"]}: {context["exception"]}', extra={"context": context})
43
40
  else:
44
41
  self._logger.error("Unhandled error", extra={"context": context})
45
42
 
@@ -51,7 +48,6 @@ class AIOScrapper:
51
48
  )
52
49
 
53
50
  self._request_queue = asyncio.PriorityQueue()
54
- self._request_sender = RequestSender(self._request_queue)
55
51
  self._request_manager = RequestManager(
56
52
  logger=self._logger.getChild("request_worker"),
57
53
  session=AiohttpSession(
@@ -59,7 +55,6 @@ class AIOScrapper:
59
55
  ssl=self._config.session.request.ssl,
60
56
  ),
61
57
  schedule_request=self._scheduler.spawn,
62
- sender=self._request_sender,
63
58
  queue=self._request_queue,
64
59
  delay=self._config.session.request.delay,
65
60
  shutdown_timeout=self._config.execution.shutdown_timeout,
@@ -69,8 +64,6 @@ class AIOScrapper:
69
64
  response_middlewares=self._response_middlewares,
70
65
  )
71
66
 
72
- self._scrapper_kwargs = {"request_sender": self._request_sender, "pipeline": self._pipeline_dispatcher}
73
-
74
67
  def add_pipeline(self, name: str, pipeline: BasePipeline) -> None:
75
68
  if name not in self._pipelines:
76
69
  self._pipelines[name] = [pipeline]
@@ -86,7 +79,7 @@ class AIOScrapper:
86
79
  def add_response_middlewares(self, *middlewares: ResponseMiddleware) -> None:
87
80
  self._response_middlewares.extend(middlewares)
88
81
 
89
- async def __aenter__(self):
82
+ async def __aenter__(self) -> "AIOScrapper":
90
83
  return self
91
84
 
92
85
  async def __aexit__(
@@ -101,11 +94,12 @@ class AIOScrapper:
101
94
  await self._pipeline_dispatcher.initialize()
102
95
  self._request_manager.listen_queue()
103
96
 
97
+ scrapper_kwargs = {"request_sender": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
104
98
  for scrapper in self._scrappers:
105
- await scrapper.initialize(**get_func_kwargs(scrapper.initialize, self._scrapper_kwargs))
99
+ await scrapper.initialize(**get_func_kwargs(scrapper.initialize, scrapper_kwargs))
106
100
 
107
101
  await asyncio.gather(
108
- *[scrapper.start(**get_func_kwargs(scrapper.start, self._scrapper_kwargs)) for scrapper in self._scrappers]
102
+ *[scrapper.start(**get_func_kwargs(scrapper.start, scrapper_kwargs)) for scrapper in self._scrappers]
109
103
  )
110
104
 
111
105
  async def _shutdown(self) -> bool:
@@ -138,9 +132,11 @@ class AIOScrapper:
138
132
  if shutdown:
139
133
  await self.shutdown()
140
134
 
141
- for scrapper in self._scrappers:
142
- await scrapper.close()
143
-
144
- await self._scheduler.close()
145
- await self._request_manager.close()
146
- await self._pipeline_dispatcher.close()
135
+ scrapper_kwargs = {"pipeline": self._pipeline_dispatcher}
136
+ try:
137
+ for scrapper in self._scrappers:
138
+ await scrapper.close(**get_func_kwargs(scrapper.close, scrapper_kwargs))
139
+ finally:
140
+ await self._scheduler.close()
141
+ await self._request_manager.close()
142
+ await self._pipeline_dispatcher.close()
@@ -1,13 +1,76 @@
1
1
  import asyncio
2
+ from dataclasses import dataclass, field
2
3
  from logging import Logger
3
- from typing import Callable, Awaitable, Any, Coroutine
4
+ from typing import Callable, Awaitable, Any
5
+ from typing import Coroutine
4
6
 
5
- from .exceptions import HTTPException, RequestException, ClientException
6
- from .helpers import get_cb_kwargs
7
- from .request_sender import RequestSender
8
- from .session.base import BaseSession
9
- from .types import Request, RequestParams, RequestQueue
10
- from .types import RequestMiddleware, ResponseMiddleware
7
+ from ..exceptions import HTTPException, RequestException, ClientException
8
+ from ..helpers import get_cb_kwargs
9
+ from ..session.base import BaseSession
10
+ from ..types import (
11
+ QueryParams,
12
+ Cookies,
13
+ Headers,
14
+ BasicAuth,
15
+ Request,
16
+ RequestParams,
17
+ RequestMiddleware,
18
+ ResponseMiddleware,
19
+ RequestSender,
20
+ )
21
+
22
+
23
+ @dataclass(slots=True, order=True)
24
+ class _PRPRequest:
25
+ priority: int
26
+ request: Request = field(compare=False)
27
+ request_params: RequestParams = field(compare=False)
28
+
29
+
30
+ _RequestQueue = asyncio.PriorityQueue[_PRPRequest | None]
31
+
32
+
33
+ def _get_request_sender(queue: _RequestQueue) -> RequestSender:
34
+ async def sender(
35
+ url: str,
36
+ method: str = "GET",
37
+ callback: Callable[..., Awaitable] | None = None,
38
+ cb_kwargs: dict[str, Any] | None = None,
39
+ errback: Callable[..., Awaitable] | None = None,
40
+ params: QueryParams | None = None,
41
+ data: Any = None,
42
+ json_data: Any = None,
43
+ cookies: Cookies | None = None,
44
+ headers: Headers | None = None,
45
+ proxy: str | None = None,
46
+ auth: BasicAuth | None = None,
47
+ timeout: float | None = None,
48
+ priority: int = 0,
49
+ ) -> None:
50
+ await queue.put(
51
+ _PRPRequest(
52
+ priority=priority,
53
+ request=Request(
54
+ method=method,
55
+ url=url,
56
+ params=params,
57
+ data=data,
58
+ json_data=json_data,
59
+ cookies=cookies,
60
+ headers=headers,
61
+ auth=auth,
62
+ proxy=proxy,
63
+ timeout=timeout,
64
+ ),
65
+ request_params=RequestParams(
66
+ callback=callback,
67
+ cb_kwargs=cb_kwargs,
68
+ errback=errback,
69
+ ),
70
+ )
71
+ )
72
+
73
+ return sender
11
74
 
12
75
 
13
76
  class RequestManager:
@@ -16,8 +79,7 @@ class RequestManager:
16
79
  logger: Logger,
17
80
  session: BaseSession,
18
81
  schedule_request: Callable[[Coroutine], Awaitable],
19
- sender: RequestSender,
20
- queue: RequestQueue,
82
+ queue: _RequestQueue,
21
83
  delay: float,
22
84
  shutdown_timeout: float,
23
85
  srv_kwargs: dict[str, Any],
@@ -31,12 +93,17 @@ class RequestManager:
31
93
  self._queue = queue
32
94
  self._delay = delay
33
95
  self._shutdown_timeout = shutdown_timeout
34
- self._srv_kwargs = {"send_request": sender, **srv_kwargs}
96
+ self._request_sender = _get_request_sender(queue)
97
+ self._srv_kwargs = {"send_request": self._request_sender, **srv_kwargs}
35
98
  self._request_outer_middlewares = request_outer_middlewares
36
99
  self._request_inner_middlewares = request_inner_middlewares
37
100
  self._response_middlewares = response_middlewares
38
101
  self._task: asyncio.Task | None = None
39
102
 
103
+ @property
104
+ def sender(self) -> RequestSender:
105
+ return self._request_sender
106
+
40
107
  async def _send_request(self, request: Request, params: RequestParams) -> None:
41
108
  full_url = request.full_url
42
109
  self._logger.debug(f"request: {request.method} {full_url}")
@@ -6,7 +6,6 @@ from .session import (
6
6
  BasicAuth,
7
7
  Request,
8
8
  RequestParams,
9
- PRPRequest,
10
- RequestQueue,
9
+ RequestSender,
11
10
  Response,
12
11
  )
@@ -1,7 +1,6 @@
1
- import asyncio
2
1
  import json
3
- from dataclasses import field, dataclass
4
- from typing import Union, Mapping, Any, Callable, Awaitable, TypedDict
2
+ from dataclasses import dataclass
3
+ from typing import Union, Mapping, Any, Callable, Awaitable, TypedDict, Protocol
5
4
  from urllib.parse import urlencode
6
5
 
7
6
  QueryParams = Mapping[str, Union[str, int, float]]
@@ -40,14 +39,24 @@ class RequestParams:
40
39
  errback: Callable[..., Awaitable] | None = None
41
40
 
42
41
 
43
- @dataclass(slots=True, order=True)
44
- class PRPRequest:
45
- priority: int
46
- request: Request = field(compare=False)
47
- request_params: RequestParams = field(compare=False)
48
-
49
-
50
- RequestQueue = asyncio.PriorityQueue[PRPRequest | None]
42
+ class RequestSender(Protocol):
43
+ async def __call__(
44
+ self,
45
+ url: str,
46
+ method: str = "GET",
47
+ callback: Callable[..., Awaitable] | None = None,
48
+ cb_kwargs: dict[str, Any] | None = None,
49
+ errback: Callable[..., Awaitable] | None = None,
50
+ params: QueryParams | None = None,
51
+ data: Any = None,
52
+ json_data: Any = None,
53
+ cookies: Cookies | None = None,
54
+ headers: Headers | None = None,
55
+ proxy: str | None = None,
56
+ auth: BasicAuth | None = None,
57
+ timeout: float | None = None,
58
+ priority: int = 0,
59
+ ) -> None: ...
51
60
 
52
61
 
53
62
  class Response:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -12,18 +12,28 @@ Classifier: Intended Audience :: Developers
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
14
14
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
- Requires-Python: >=3.12
15
+ Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: aiohttp[speedups]~=3.11.16
19
19
  Requires-Dist: aiojobs~=1.4.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: flake8~=7.1.2; extra == "dev"
22
+ Requires-Dist: black~=25.1.0; extra == "dev"
23
+ Requires-Dist: pyright~=1.1.399; extra == "dev"
24
+ Requires-Dist: aiohttp[speedups]~=3.11.16; extra == "dev"
25
+ Requires-Dist: aiojobs~=1.4.0; extra == "dev"
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest~=8.3.5; extra == "test"
28
+ Requires-Dist: pytest-asyncio~=0.26.0; extra == "test"
29
+ Requires-Dist: aresponses~=3.0.0; extra == "test"
20
30
  Dynamic: license-file
21
31
 
22
32
  # aioscrapper
23
33
 
24
34
  **Asynchronous framework for building modular and scalable web scrapers.**
25
35
 
26
- ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
36
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
27
37
  ![License](https://img.shields.io/github/license/darkstussy/aioscrapper)
28
38
  ![Version](https://img.shields.io/github/v/tag/darkstussy/aioscrapper?label=version)
29
39
 
@@ -44,7 +54,7 @@ pip install aioscrapper
44
54
 
45
55
  ## Requirements
46
56
 
47
- - Python 3.12 or higher
57
+ - Python 3.10 or higher
48
58
  - aiohttp
49
59
  - aiojobs
50
60
 
@@ -53,8 +63,8 @@ pip install aioscrapper
53
63
  ```python
54
64
  import asyncio
55
65
 
56
- from aioscrapper import BaseScrapper, AIOScrapper, RequestSender
57
- from aioscrapper.types import Response
66
+ from aioscrapper import BaseScrapper, AIOScrapper
67
+ from aioscrapper.types import Response, RequestSender
58
68
 
59
69
 
60
70
  class Scrapper(BaseScrapper):
@@ -83,5 +93,6 @@ Copyright (c) 2025 darkstussy
83
93
 
84
94
  ## Links
85
95
 
96
+ - [PyPI](https://pypi.org/project/aioscrapper)
86
97
  - [GitHub](https://github.com/darkstussy/aioscrapper)
87
98
  - [Issues](https://github.com/darkstussy/aioscrapper/issues)
@@ -0,0 +1,27 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ aioscrapper/__init__.py
5
+ aioscrapper/config.py
6
+ aioscrapper/exceptions.py
7
+ aioscrapper/helpers.py
8
+ aioscrapper.egg-info/PKG-INFO
9
+ aioscrapper.egg-info/SOURCES.txt
10
+ aioscrapper.egg-info/dependency_links.txt
11
+ aioscrapper.egg-info/requires.txt
12
+ aioscrapper.egg-info/top_level.txt
13
+ aioscrapper/pipeline/__init__.py
14
+ aioscrapper/pipeline/base.py
15
+ aioscrapper/pipeline/dispatcher.py
16
+ aioscrapper/scrapper/__init__.py
17
+ aioscrapper/scrapper/base.py
18
+ aioscrapper/scrapper/executor.py
19
+ aioscrapper/scrapper/request_manager.py
20
+ aioscrapper/session/__init__.py
21
+ aioscrapper/session/aiohttp.py
22
+ aioscrapper/session/base.py
23
+ aioscrapper/types/__init__.py
24
+ aioscrapper/types/middleware.py
25
+ aioscrapper/types/session.py
26
+ tests/test_error.py
27
+ tests/test_success.py
@@ -0,0 +1,14 @@
1
+ aiohttp[speedups]~=3.11.16
2
+ aiojobs~=1.4.0
3
+
4
+ [dev]
5
+ flake8~=7.1.2
6
+ black~=25.1.0
7
+ pyright~=1.1.399
8
+ aiohttp[speedups]~=3.11.16
9
+ aiojobs~=1.4.0
10
+
11
+ [test]
12
+ pytest~=8.3.5
13
+ pytest-asyncio~=0.26.0
14
+ aresponses~=3.0.0
@@ -0,0 +1,63 @@
1
+ [project]
2
+ name = "aioscrapper"
3
+ version = "0.1.2"
4
+ authors = [{ name = "darkstussy" }, ]
5
+ description = "Async framework for building modular and scalable web scrapers."
6
+ readme = "README.md"
7
+ requires-python = ">=3.10"
8
+ dependencies = [
9
+ "aiohttp[speedups]~=3.11.16",
10
+ "aiojobs~=1.4.0",
11
+ ]
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Framework :: AsyncIO",
16
+ "Intended Audience :: Developers",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
19
+ "Topic :: Software Development :: Libraries :: Application Frameworks",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/darkstussy/aioscrapper"
24
+ Issues = "https://github.com/darkstussy/aioscrapper/issues"
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "flake8~=7.1.2",
29
+ "black~=25.1.0",
30
+ "pyright~=1.1.399",
31
+ "aiohttp[speedups]~=3.11.16",
32
+ "aiojobs~=1.4.0",
33
+ ]
34
+ test = [
35
+ "pytest~=8.3.5",
36
+ "pytest-asyncio~=0.26.0",
37
+ "aresponses~=3.0.0"
38
+ ]
39
+
40
+ [tool.black]
41
+ line-length = 120
42
+ include = '\.pyi?$'
43
+ exclude = '''
44
+ /(
45
+ \.eggs/|
46
+ \.git/|
47
+ \.hg/|
48
+ \.mypy_cache/|
49
+ \.tox/|
50
+ \.venv/|
51
+ \venv/|
52
+ _build/|
53
+ buck-out/|
54
+ build/|
55
+ dist/
56
+ )'''
57
+
58
+ [tool.pytest.ini_options]
59
+ asyncio_mode = "auto"
60
+ testpaths = [
61
+ "tests",
62
+ ]
63
+ asyncio_default_fixture_loop_scope = "function"
@@ -0,0 +1,37 @@
1
+ import pytest
2
+ from aresponses import ResponsesMockServer
3
+
4
+ from aioscrapper import AIOScrapper
5
+ from aioscrapper.exceptions import ClientException, HTTPException
6
+ from aioscrapper.scrapper.base import BaseScrapper
7
+ from aioscrapper.types import RequestSender
8
+
9
+
10
+ class Scrapper(BaseScrapper):
11
+ def __init__(self):
12
+ self.status = None
13
+ self.response_data = None
14
+
15
+ async def start(self, request_sender: RequestSender) -> None:
16
+ await request_sender(url="https://api.test.com/v1", errback=self.errback)
17
+
18
+ async def errback(self, exc: ClientException) -> None:
19
+ if isinstance(exc, HTTPException):
20
+ self.status = exc.status_code
21
+ self.response_data = exc.message
22
+
23
+
24
+ @pytest.mark.asyncio
25
+ async def test_error(aresponses: ResponsesMockServer):
26
+ def handle_request(request):
27
+ return aresponses.Response(status=500, text="Internal Server Error")
28
+
29
+ aresponses.add("api.test.com", "/v1", "GET", response=handle_request) # pyright: ignore
30
+
31
+ scrapper = Scrapper()
32
+ async with AIOScrapper(scrappers=[scrapper]) as executor:
33
+ await executor.start()
34
+
35
+ assert scrapper.status == 500
36
+ assert scrapper.response_data == "Internal Server Error"
37
+ aresponses.assert_plan_strictly_followed()
@@ -0,0 +1,29 @@
1
+ import pytest
2
+ from aresponses import ResponsesMockServer
3
+
4
+ from aioscrapper import AIOScrapper
5
+ from aioscrapper.scrapper.base import BaseScrapper
6
+ from aioscrapper.types import Response, RequestSender
7
+
8
+
9
+ class Scrapper(BaseScrapper):
10
+ def __init__(self):
11
+ self.response_data = None
12
+
13
+ async def start(self, request_sender: RequestSender) -> None:
14
+ await request_sender(url="https://api.test.com/v1", callback=self.parse)
15
+
16
+ async def parse(self, response: Response) -> None:
17
+ self.response_data = response.json()
18
+
19
+
20
+ @pytest.mark.asyncio
21
+ async def test_success(aresponses: ResponsesMockServer):
22
+ aresponses.add("api.test.com", "/v1", "GET", response={"status": "OK"}) # pyright: ignore
23
+
24
+ scrapper = Scrapper()
25
+ async with AIOScrapper(scrappers=[scrapper]) as executor:
26
+ await executor.start()
27
+
28
+ assert scrapper.response_data == {"status": "OK"}
29
+ aresponses.assert_plan_strictly_followed()
@@ -1,24 +0,0 @@
1
- [project]
2
- name = "aioscrapper"
3
- version = "0.1.0"
4
- authors = [{ name = "darkstussy" }, ]
5
- description = "Async framework for building modular and scalable web scrapers."
6
- readme = "README.md"
7
- requires-python = ">=3.12"
8
- dependencies = [
9
- "aiohttp[speedups] ~= 3.11.16",
10
- "aiojobs ~= 1.4.0",
11
- ]
12
- classifiers = [
13
- "Programming Language :: Python :: 3",
14
- "Programming Language :: Python :: 3.12",
15
- "Framework :: AsyncIO",
16
- "Intended Audience :: Developers",
17
- "Operating System :: OS Independent",
18
- "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
19
- "Topic :: Software Development :: Libraries :: Application Frameworks",
20
- ]
21
-
22
- [project.urls]
23
- Homepage = "https://github.com/darkstussy/aioscrapper"
24
- Issues = "https://github.com/darkstussy/aioscrapper/issues"
@@ -1,52 +0,0 @@
1
- import asyncio
2
- from typing import Callable, Awaitable, Any
3
-
4
- from .types import QueryParams, Cookies, Headers, BasicAuth, Request, RequestParams, RequestQueue, PRPRequest
5
-
6
-
7
- class RequestSender:
8
- def __init__(self, queue: RequestQueue) -> None:
9
- self._queue = queue
10
-
11
- async def __call__(
12
- self,
13
- url: str,
14
- method: str = "GET",
15
- callback: Callable[..., Awaitable] | None = None,
16
- cb_kwargs: dict[str, Any] | None = None,
17
- errback: Callable[..., Awaitable] | None = None,
18
- params: QueryParams | None = None,
19
- data: Any = None,
20
- json_data: Any = None,
21
- cookies: Cookies | None = None,
22
- headers: Headers | None = None,
23
- proxy: str | None = None,
24
- auth: BasicAuth | None = None,
25
- timeout: float | None = None,
26
- priority: int = 0,
27
- delay: float | None = None,
28
- ) -> None:
29
- await self._queue.put(
30
- PRPRequest(
31
- priority=priority,
32
- request=Request(
33
- method=method,
34
- url=url,
35
- params=params,
36
- data=data,
37
- json_data=json_data,
38
- cookies=cookies,
39
- headers=headers,
40
- auth=auth,
41
- proxy=proxy,
42
- timeout=timeout,
43
- ),
44
- request_params=RequestParams(
45
- callback=callback,
46
- cb_kwargs=cb_kwargs,
47
- errback=errback,
48
- ),
49
- )
50
- )
51
- if delay:
52
- await asyncio.sleep(delay)
@@ -1,26 +0,0 @@
1
- LICENSE
2
- README.md
3
- pyproject.toml
4
- src/aioscrapper/__init__.py
5
- src/aioscrapper/config.py
6
- src/aioscrapper/exceptions.py
7
- src/aioscrapper/helpers.py
8
- src/aioscrapper/request_manager.py
9
- src/aioscrapper/request_sender.py
10
- src/aioscrapper.egg-info/PKG-INFO
11
- src/aioscrapper.egg-info/SOURCES.txt
12
- src/aioscrapper.egg-info/dependency_links.txt
13
- src/aioscrapper.egg-info/requires.txt
14
- src/aioscrapper.egg-info/top_level.txt
15
- src/aioscrapper/pipeline/__init__.py
16
- src/aioscrapper/pipeline/base.py
17
- src/aioscrapper/pipeline/dispatcher.py
18
- src/aioscrapper/scrapper/__init__.py
19
- src/aioscrapper/scrapper/base.py
20
- src/aioscrapper/scrapper/executor.py
21
- src/aioscrapper/session/__init__.py
22
- src/aioscrapper/session/aiohttp.py
23
- src/aioscrapper/session/base.py
24
- src/aioscrapper/types/__init__.py
25
- src/aioscrapper/types/middleware.py
26
- src/aioscrapper/types/session.py
@@ -1,2 +0,0 @@
1
- aiohttp[speedups]~=3.11.16
2
- aiojobs~=1.4.0
File without changes