aioscrapper 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/PKG-INFO +3 -3
  2. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/README.md +2 -2
  3. aioscrapper-0.2.0/aioscrapper/pipeline/__init__.py +1 -0
  4. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/pipeline/base.py +2 -6
  5. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/pipeline/dispatcher.py +1 -1
  6. aioscrapper-0.2.0/aioscrapper/scrapper/base.py +30 -0
  7. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/scrapper/executor.py +3 -2
  8. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/types/__init__.py +1 -0
  9. aioscrapper-0.2.0/aioscrapper/types/pipeline.py +10 -0
  10. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper.egg-info/PKG-INFO +3 -3
  11. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper.egg-info/SOURCES.txt +1 -0
  12. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/pyproject.toml +1 -1
  13. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/tests/test_error.py +2 -2
  14. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/tests/test_success.py +2 -2
  15. aioscrapper-0.1.2/aioscrapper/pipeline/__init__.py +0 -2
  16. aioscrapper-0.1.2/aioscrapper/scrapper/base.py +0 -10
  17. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/LICENSE +0 -0
  18. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/__init__.py +0 -0
  19. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/config.py +0 -0
  20. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/exceptions.py +0 -0
  21. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/helpers.py +0 -0
  22. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/scrapper/__init__.py +0 -0
  23. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/scrapper/request_manager.py +0 -0
  24. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/session/__init__.py +0 -0
  25. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/session/aiohttp.py +0 -0
  26. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/session/base.py +0 -0
  27. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/types/middleware.py +0 -0
  28. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper/types/session.py +0 -0
  29. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper.egg-info/dependency_links.txt +0 -0
  30. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper.egg-info/requires.txt +0 -0
  31. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/aioscrapper.egg-info/top_level.txt +0 -0
  32. {aioscrapper-0.1.2 → aioscrapper-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -68,8 +68,8 @@ from aioscrapper.types import Response, RequestSender
68
68
 
69
69
 
70
70
  class Scrapper(BaseScrapper):
71
- async def start(self, request_sender: RequestSender) -> None:
72
- await request_sender(url="https://example.com", callback=self.parse)
71
+ async def start(self, send_request: RequestSender) -> None:
72
+ await send_request(url="https://example.com", callback=self.parse)
73
73
 
74
74
  async def parse(self, response: Response) -> None:
75
75
  # handle response
@@ -37,8 +37,8 @@ from aioscrapper.types import Response, RequestSender
37
37
 
38
38
 
39
39
  class Scrapper(BaseScrapper):
40
- async def start(self, request_sender: RequestSender) -> None:
41
- await request_sender(url="https://example.com", callback=self.parse)
40
+ async def start(self, send_request: RequestSender) -> None:
41
+ await send_request(url="https://example.com", callback=self.parse)
42
42
 
43
43
  async def parse(self, response: Response) -> None:
44
44
  # handle response
@@ -0,0 +1 @@
1
+ from .base import BasePipeline
@@ -1,11 +1,7 @@
1
1
  import abc
2
- from typing import TypeVar, Generic, Protocol
3
-
4
-
5
- class BaseItem(Protocol):
6
- @property
7
- def pipeline_name(self) -> str: ...
2
+ from typing import TypeVar, Generic
8
3
 
4
+ from ..types import BaseItem
9
5
 
10
6
  ItemType = TypeVar("ItemType", bound=BaseItem)
11
7
 
@@ -10,7 +10,7 @@ class PipelineDispatcher:
10
10
  self._logger = logger
11
11
  self._pipelines = pipelines
12
12
 
13
- async def put_item(self, item: BaseItem) -> BaseItem:
13
+ async def __call__(self, item: BaseItem) -> BaseItem:
14
14
  self._logger.debug(f"pipeline item received: {item}")
15
15
  try:
16
16
  pipelines = self._pipelines[item.pipeline_name]
@@ -0,0 +1,30 @@
1
+ import abc
2
+
3
+
4
+ class BaseScrapper(abc.ABC):
5
+ @abc.abstractmethod
6
+ async def start(self, *args, **kwargs) -> None:
7
+ """
8
+ Starts the scrapper.
9
+
10
+ This method is called to start the scraper by sending the initial requests required for its operation.
11
+ """
12
+ ...
13
+
14
+ async def initialize(self, *args, **kwargs) -> None:
15
+ """
16
+ Initializes the scrapper.
17
+
18
+ This method is called before starting the scrapper. It should be used to initialize any
19
+ necessary state or resources required by the scrapper.
20
+ """
21
+ ...
22
+
23
+ async def close(self, *args, **kwargs) -> None:
24
+ """
25
+ Closes the scrapper.
26
+
27
+ This method is called to clean up any resources created by the scrapper after it has finished
28
+ running.
29
+ """
30
+ ...
@@ -9,7 +9,8 @@ from aiojobs import Scheduler
9
9
  from .request_manager import RequestManager
10
10
  from ..config import Config
11
11
  from ..helpers import get_func_kwargs
12
- from ..pipeline import PipelineDispatcher, BasePipeline
12
+ from ..pipeline import BasePipeline
13
+ from ..pipeline.dispatcher import PipelineDispatcher
13
14
  from ..scrapper import BaseScrapper
14
15
  from ..session.aiohttp import AiohttpSession
15
16
  from ..types import RequestMiddleware, ResponseMiddleware
@@ -94,7 +95,7 @@ class AIOScrapper:
94
95
  await self._pipeline_dispatcher.initialize()
95
96
  self._request_manager.listen_queue()
96
97
 
97
- scrapper_kwargs = {"request_sender": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
98
+ scrapper_kwargs = {"send_request": self._request_manager.sender, "pipeline": self._pipeline_dispatcher}
98
99
  for scrapper in self._scrappers:
99
100
  await scrapper.initialize(**get_func_kwargs(scrapper.initialize, scrapper_kwargs))
100
101
 
@@ -1,4 +1,5 @@
1
1
  from .middleware import RequestMiddleware, ResponseMiddleware
2
+ from .pipeline import BaseItem, Pipeline
2
3
  from .session import (
3
4
  QueryParams,
4
5
  Cookies,
@@ -0,0 +1,10 @@
1
+ from typing import Protocol
2
+
3
+
4
+ class BaseItem(Protocol):
5
+ @property
6
+ def pipeline_name(self) -> str: ...
7
+
8
+
9
+ class Pipeline(Protocol):
10
+ async def __call__(self, item: BaseItem) -> BaseItem: ...
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aioscrapper
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Async framework for building modular and scalable web scrapers.
5
5
  Author: darkstussy
6
6
  Project-URL: Homepage, https://github.com/darkstussy/aioscrapper
@@ -68,8 +68,8 @@ from aioscrapper.types import Response, RequestSender
68
68
 
69
69
 
70
70
  class Scrapper(BaseScrapper):
71
- async def start(self, request_sender: RequestSender) -> None:
72
- await request_sender(url="https://example.com", callback=self.parse)
71
+ async def start(self, send_request: RequestSender) -> None:
72
+ await send_request(url="https://example.com", callback=self.parse)
73
73
 
74
74
  async def parse(self, response: Response) -> None:
75
75
  # handle response
@@ -22,6 +22,7 @@ aioscrapper/session/aiohttp.py
22
22
  aioscrapper/session/base.py
23
23
  aioscrapper/types/__init__.py
24
24
  aioscrapper/types/middleware.py
25
+ aioscrapper/types/pipeline.py
25
26
  aioscrapper/types/session.py
26
27
  tests/test_error.py
27
28
  tests/test_success.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "aioscrapper"
3
- version = "0.1.2"
3
+ version = "0.2.0"
4
4
  authors = [{ name = "darkstussy" }, ]
5
5
  description = "Async framework for building modular and scalable web scrapers."
6
6
  readme = "README.md"
@@ -12,8 +12,8 @@ class Scrapper(BaseScrapper):
12
12
  self.status = None
13
13
  self.response_data = None
14
14
 
15
- async def start(self, request_sender: RequestSender) -> None:
16
- await request_sender(url="https://api.test.com/v1", errback=self.errback)
15
+ async def start(self, send_request: RequestSender) -> None:
16
+ await send_request(url="https://api.test.com/v1", errback=self.errback)
17
17
 
18
18
  async def errback(self, exc: ClientException) -> None:
19
19
  if isinstance(exc, HTTPException):
@@ -10,8 +10,8 @@ class Scrapper(BaseScrapper):
10
10
  def __init__(self):
11
11
  self.response_data = None
12
12
 
13
- async def start(self, request_sender: RequestSender) -> None:
14
- await request_sender(url="https://api.test.com/v1", callback=self.parse)
13
+ async def start(self, send_request: RequestSender) -> None:
14
+ await send_request(url="https://api.test.com/v1", callback=self.parse)
15
15
 
16
16
  async def parse(self, response: Response) -> None:
17
17
  self.response_data = response.json()
@@ -1,2 +0,0 @@
1
- from .base import BasePipeline, BaseItem
2
- from .dispatcher import PipelineDispatcher
@@ -1,10 +0,0 @@
1
- import abc
2
-
3
-
4
- class BaseScrapper(abc.ABC):
5
- @abc.abstractmethod
6
- async def start(self, *args, **kwargs) -> None: ...
7
-
8
- async def initialize(self, *args, **kwargs) -> None: ...
9
-
10
- async def close(self, *args, **kwargs) -> None: ...
File without changes
File without changes