apify 2.0.0b13__tar.gz → 2.0.0b15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (39) hide show
  1. {apify-2.0.0b13 → apify-2.0.0b15}/PKG-INFO +96 -15
  2. apify-2.0.0b15/README.md +171 -0
  3. {apify-2.0.0b13 → apify-2.0.0b15}/pyproject.toml +3 -3
  4. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_actor.py +22 -6
  5. apify-2.0.0b15/src/apify/log.py +43 -0
  6. apify-2.0.0b13/README.md +0 -90
  7. apify-2.0.0b13/src/apify/log.py +0 -15
  8. {apify-2.0.0b13 → apify-2.0.0b15}/LICENSE +0 -0
  9. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/__init__.py +0 -0
  10. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_configuration.py +0 -0
  11. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_consts.py +0 -0
  12. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_crypto.py +0 -0
  13. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_models.py +0 -0
  14. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_platform_event_manager.py +0 -0
  15. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_proxy_configuration.py +0 -0
  16. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/_utils.py +0 -0
  17. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/__init__.py +0 -0
  18. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
  19. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_dataset_client.py +0 -0
  20. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
  21. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
  22. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
  23. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
  24. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
  25. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/apify_storage_client/py.typed +0 -0
  26. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/py.typed +0 -0
  27. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/__init__.py +0 -0
  28. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/middlewares/__init__.py +0 -0
  29. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
  30. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/middlewares/py.typed +0 -0
  31. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/pipelines/__init__.py +0 -0
  32. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
  33. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/pipelines/py.typed +0 -0
  34. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/py.typed +0 -0
  35. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/requests.py +0 -0
  36. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/scheduler.py +0 -0
  37. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/scrapy/utils.py +0 -0
  38. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/storages/__init__.py +0 -0
  39. {apify-2.0.0b13 → apify-2.0.0b15}/src/apify/storages/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 2.0.0b13
3
+ Version: 2.0.0b15
4
4
  Summary: Apify SDK for Python
5
5
  License: Apache-2.0
6
6
  Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
@@ -20,7 +20,7 @@ Classifier: Topic :: Software Development :: Libraries
20
20
  Provides-Extra: scrapy
21
21
  Requires-Dist: apify-client (>=1.7.1)
22
22
  Requires-Dist: apify-shared (>=1.1.2)
23
- Requires-Dist: crawlee (>=0.3.0)
23
+ Requires-Dist: crawlee (>=0.3.5)
24
24
  Requires-Dist: cryptography (>=42.0.0)
25
25
  Requires-Dist: httpx (>=0.27.0)
26
26
  Requires-Dist: lazy-object-proxy (>=1.10.0)
@@ -64,27 +64,108 @@ pip install apify[scrapy]
64
64
 
65
65
  For usage instructions, check the documentation on [Apify Docs](https://docs.apify.com/sdk/python/).
66
66
 
67
- ## Example
67
+ ## Examples
68
+
69
+ Below are few examples demonstrating how to use the Apify SDK with some web scraping-related libraries.
70
+
71
+ ### Apify SDK with HTTPX and BeautifulSoup
72
+
73
+ This example illustrates how to integrate the Apify SDK with [HTTPX](https://www.python-httpx.org/) and [BeautifulSoup](https://pypi.org/project/beautifulsoup4/) to scrape data from web pages.
68
74
 
69
75
  ```python
70
76
  from apify import Actor
71
77
  from bs4 import BeautifulSoup
72
78
  from httpx import AsyncClient
73
79
 
80
+
81
+ async def main() -> None:
82
+ async with Actor:
83
+ # Retrieve the Actor input, and use default values if not provided.
84
+ actor_input = await Actor.get_input() or {}
85
+ start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
86
+
87
+ # Open the default request queue for handling URLs to be processed.
88
+ request_queue = await Actor.open_request_queue()
89
+
90
+ # Enqueue the start URLs.
91
+ for start_url in start_urls:
92
+ url = start_url.get('url')
93
+ await request_queue.add_request(url)
94
+
95
+ # Process the URLs from the request queue.
96
+ while request := await request_queue.fetch_next_request():
97
+ Actor.log.info(f'Scraping {request.url} ...')
98
+
99
+ # Fetch the HTTP response from the specified URL using HTTPX.
100
+ async with AsyncClient() as client:
101
+ response = await client.get(request.url)
102
+
103
+ # Parse the HTML content using Beautiful Soup.
104
+ soup = BeautifulSoup(response.content, 'html.parser')
105
+
106
+ # Extract the desired data.
107
+ data = {
108
+ 'url': actor_input['url'],
109
+ 'title': soup.title.string,
110
+ 'h1s': [h1.text for h1 in soup.find_all('h1')],
111
+ 'h2s': [h2.text for h2 in soup.find_all('h2')],
112
+ 'h3s': [h3.text for h3 in soup.find_all('h3')],
113
+ }
114
+
115
+ # Store the extracted data to the default dataset.
116
+ await Actor.push_data(data)
117
+ ```
118
+
119
+ ### Apify SDK with PlaywrightCrawler from Crawlee
120
+
121
+ This example demonstrates how to use the Apify SDK alongside `PlaywrightCrawler` from [Crawlee](https://crawlee.dev/python) to perform web scraping.
122
+
123
+ ```python
124
+ from apify import Actor, Request
125
+ from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext
126
+
127
+
74
128
  async def main() -> None:
75
129
  async with Actor:
76
- # Read the input parameters from the Actor input
77
- actor_input = await Actor.get_input()
78
- # Fetch the HTTP response from the specified URL
79
- async with AsyncClient() as client:
80
- response = await client.get(actor_input['url'])
81
- # Process the HTML content
82
- soup = BeautifulSoup(response.content, 'html.parser')
83
- # Push the extracted data
84
- await Actor.push_data({
85
- 'url': actor_input['url'],
86
- 'title': soup.title.string,
87
- })
130
+ # Retrieve the Actor input, and use default values if not provided.
131
+ actor_input = await Actor.get_input() or {}
132
+ start_urls = [url.get('url') for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])]
133
+
134
+ # Exit if no start URLs are provided.
135
+ if not start_urls:
136
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
137
+ await Actor.exit()
138
+
139
+ # Create a crawler.
140
+ crawler = PlaywrightCrawler(
141
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
142
+ max_requests_per_crawl=50,
143
+ headless=True,
144
+ )
145
+
146
+ # Define a request handler, which will be called for every request.
147
+ @crawler.router.default_handler
148
+ async def request_handler(context: PlaywrightCrawlingContext) -> None:
149
+ url = context.request.url
150
+ Actor.log.info(f'Scraping {url}...')
151
+
152
+ # Extract the desired data.
153
+ data = {
154
+ 'url': context.request.url,
155
+ 'title': await context.page.title(),
156
+ 'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
157
+ 'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
158
+ 'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
159
+ }
160
+
161
+ # Store the extracted data to the default dataset.
162
+ await context.push_data(data)
163
+
164
+ # Enqueue additional links found on the current page.
165
+ await context.enqueue_links()
166
+
167
+ # Run the crawler with the starting URLs.
168
+ await crawler.run(start_urls)
88
169
  ```
89
170
 
90
171
  ## What are Actors?
@@ -0,0 +1,171 @@
1
+ # Apify SDK for Python
2
+
3
+ The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
4
+ in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
5
+ event handling.
6
+
7
+ If you just need to access the [Apify API](https://docs.apify.com/api/v2) from your Python applications,
8
+ check out the [Apify Client for Python](https://docs.apify.com/api/client/python) instead.
9
+
10
+ ## Installation
11
+
12
+ The Apify SDK for Python is available on PyPI as the `apify` package.
13
+ For default installation, using Pip, run the following:
14
+
15
+ ```bash
16
+ pip install apify
17
+ ```
18
+
19
+ For users interested in integrating Apify with Scrapy, we provide a package extra called `scrapy`.
20
+ To install Apify with the `scrapy` extra, use the following command:
21
+
22
+ ```bash
23
+ pip install apify[scrapy]
24
+ ```
25
+
26
+ ## Documentation
27
+
28
+ For usage instructions, check the documentation on [Apify Docs](https://docs.apify.com/sdk/python/).
29
+
30
+ ## Examples
31
+
32
+ Below are few examples demonstrating how to use the Apify SDK with some web scraping-related libraries.
33
+
34
+ ### Apify SDK with HTTPX and BeautifulSoup
35
+
36
+ This example illustrates how to integrate the Apify SDK with [HTTPX](https://www.python-httpx.org/) and [BeautifulSoup](https://pypi.org/project/beautifulsoup4/) to scrape data from web pages.
37
+
38
+ ```python
39
+ from apify import Actor
40
+ from bs4 import BeautifulSoup
41
+ from httpx import AsyncClient
42
+
43
+
44
+ async def main() -> None:
45
+ async with Actor:
46
+ # Retrieve the Actor input, and use default values if not provided.
47
+ actor_input = await Actor.get_input() or {}
48
+ start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
49
+
50
+ # Open the default request queue for handling URLs to be processed.
51
+ request_queue = await Actor.open_request_queue()
52
+
53
+ # Enqueue the start URLs.
54
+ for start_url in start_urls:
55
+ url = start_url.get('url')
56
+ await request_queue.add_request(url)
57
+
58
+ # Process the URLs from the request queue.
59
+ while request := await request_queue.fetch_next_request():
60
+ Actor.log.info(f'Scraping {request.url} ...')
61
+
62
+ # Fetch the HTTP response from the specified URL using HTTPX.
63
+ async with AsyncClient() as client:
64
+ response = await client.get(request.url)
65
+
66
+ # Parse the HTML content using Beautiful Soup.
67
+ soup = BeautifulSoup(response.content, 'html.parser')
68
+
69
+ # Extract the desired data.
70
+ data = {
71
+ 'url': actor_input['url'],
72
+ 'title': soup.title.string,
73
+ 'h1s': [h1.text for h1 in soup.find_all('h1')],
74
+ 'h2s': [h2.text for h2 in soup.find_all('h2')],
75
+ 'h3s': [h3.text for h3 in soup.find_all('h3')],
76
+ }
77
+
78
+ # Store the extracted data to the default dataset.
79
+ await Actor.push_data(data)
80
+ ```
81
+
82
+ ### Apify SDK with PlaywrightCrawler from Crawlee
83
+
84
+ This example demonstrates how to use the Apify SDK alongside `PlaywrightCrawler` from [Crawlee](https://crawlee.dev/python) to perform web scraping.
85
+
86
+ ```python
87
+ from apify import Actor, Request
88
+ from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext
89
+
90
+
91
+ async def main() -> None:
92
+ async with Actor:
93
+ # Retrieve the Actor input, and use default values if not provided.
94
+ actor_input = await Actor.get_input() or {}
95
+ start_urls = [url.get('url') for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])]
96
+
97
+ # Exit if no start URLs are provided.
98
+ if not start_urls:
99
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
100
+ await Actor.exit()
101
+
102
+ # Create a crawler.
103
+ crawler = PlaywrightCrawler(
104
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
105
+ max_requests_per_crawl=50,
106
+ headless=True,
107
+ )
108
+
109
+ # Define a request handler, which will be called for every request.
110
+ @crawler.router.default_handler
111
+ async def request_handler(context: PlaywrightCrawlingContext) -> None:
112
+ url = context.request.url
113
+ Actor.log.info(f'Scraping {url}...')
114
+
115
+ # Extract the desired data.
116
+ data = {
117
+ 'url': context.request.url,
118
+ 'title': await context.page.title(),
119
+ 'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
120
+ 'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
121
+ 'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
122
+ }
123
+
124
+ # Store the extracted data to the default dataset.
125
+ await context.push_data(data)
126
+
127
+ # Enqueue additional links found on the current page.
128
+ await context.enqueue_links()
129
+
130
+ # Run the crawler with the starting URLs.
131
+ await crawler.run(start_urls)
132
+ ```
133
+
134
+ ## What are Actors?
135
+
136
+ Actors are serverless cloud programs that can do almost anything a human can do in a web browser.
137
+ They can do anything from small tasks such as filling in forms or unsubscribing from online services,
138
+ all the way up to scraping and processing vast numbers of web pages.
139
+
140
+ They can be run either locally, or on the [Apify platform](https://docs.apify.com/platform/),
141
+ where you can run them at scale, monitor them, schedule them, or publish and monetize them.
142
+
143
+ If you're new to Apify, learn [what is Apify](https://docs.apify.com/platform/about)
144
+ in the Apify platform documentation.
145
+
146
+ ## Creating Actors
147
+
148
+ To create and run Actors through Apify Console,
149
+ see the [Console documentation](https://docs.apify.com/academy/getting-started/creating-actors#choose-your-template).
150
+
151
+ To create and run Python Actors locally, check the documentation for
152
+ [how to create and run Python Actors locally](https://docs.apify.com/sdk/python/docs/overview/running-locally).
153
+
154
+ ## Guides
155
+
156
+ To see how you can use the Apify SDK with other popular libraries used for web scraping,
157
+ check out our guides for using
158
+ [Requests and HTTPX](https://docs.apify.com/sdk/python/docs/guides/requests-and-httpx),
159
+ [Beautiful Soup](https://docs.apify.com/sdk/python/docs/guides/beautiful-soup),
160
+ [Playwright](https://docs.apify.com/sdk/python/docs/guides/playwright),
161
+ [Selenium](https://docs.apify.com/sdk/python/docs/guides/selenium),
162
+ or [Scrapy](https://docs.apify.com/sdk/python/docs/guides/scrapy).
163
+
164
+ ## Usage concepts
165
+
166
+ To learn more about the features of the Apify SDK and how to use them,
167
+ check out the Usage Concepts section in the sidebar,
168
+ particularly the guides for the [Actor lifecycle](https://docs.apify.com/sdk/python/docs/concepts/actor-lifecycle),
169
+ [working with storages](https://docs.apify.com/sdk/python/docs/concepts/storages),
170
+ [handling Actor events](https://docs.apify.com/sdk/python/docs/concepts/actor-events)
171
+ or [how to use proxies](https://docs.apify.com/sdk/python/docs/concepts/proxy-management).
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "apify"
7
- version = "2.0.0b13"
7
+ version = "2.0.0b15"
8
8
  description = "Apify SDK for Python"
9
9
  authors = ["Apify Technologies s.r.o. <support@apify.com>"]
10
10
  license = "Apache-2.0"
@@ -48,7 +48,7 @@ keywords = [
48
48
  python = "^3.9"
49
49
  apify-client = ">=1.7.1"
50
50
  apify-shared = ">=1.1.2"
51
- crawlee = ">=0.3.0"
51
+ crawlee = ">=0.3.5"
52
52
  cryptography = ">=42.0.0"
53
53
  httpx = ">=0.27.0"
54
54
  lazy-object-proxy = ">=1.10.0"
@@ -58,7 +58,7 @@ websockets = ">=10.0"
58
58
 
59
59
  [tool.poetry.group.dev.dependencies]
60
60
  build = "~1.2.0"
61
- filelock = "~3.15.0"
61
+ filelock = "~3.16.0"
62
62
  griffe = "~1.2.0"
63
63
  mypy = "~1.11.0"
64
64
  pre-commit = "~3.8.0"
@@ -24,7 +24,7 @@ from apify._platform_event_manager import EventManager, LocalEventManager, Platf
24
24
  from apify._proxy_configuration import ProxyConfiguration
25
25
  from apify._utils import get_system_info, is_running_in_ipython
26
26
  from apify.apify_storage_client import ApifyStorageClient
27
- from apify.log import logger
27
+ from apify.log import _configure_logging, logger
28
28
  from apify.storages import Dataset, KeyValueStore, RequestQueue
29
29
 
30
30
  if TYPE_CHECKING:
@@ -46,16 +46,24 @@ class _ActorType:
46
46
  _configuration: Configuration
47
47
  _is_exiting = False
48
48
 
49
- def __init__(self, config: Configuration | None = None) -> None:
49
+ def __init__(
50
+ self,
51
+ configuration: Configuration | None = None,
52
+ *,
53
+ configure_logging: bool = True,
54
+ ) -> None:
50
55
  """Create an Actor instance.
51
56
 
52
57
  Note that you don't have to do this, all the functionality is accessible using the default instance
53
58
  (e.g. `Actor.open_dataset()`).
54
59
 
55
60
  Args:
56
- config: The Actor configuration to be used. If not passed, a new Configuration instance will be created.
61
+ configuration: The Actor configuration to be used. If not passed, a new Configuration instance will
62
+ be created.
63
+ configure_logging: Should the default logging configuration be configured?
57
64
  """
58
- self._configuration = config or Configuration.get_global_configuration()
65
+ self._configuration = configuration or Configuration.get_global_configuration()
66
+ self._configure_logging = configure_logging
59
67
  self._apify_client = self.new_client()
60
68
 
61
69
  self._event_manager: EventManager
@@ -81,6 +89,9 @@ class _ActorType:
81
89
  When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
82
90
  executing the block code, the `Actor.fail` method is called.
83
91
  """
92
+ if self._configure_logging:
93
+ _configure_logging(self._configuration)
94
+
84
95
  await self.init()
85
96
  return self
86
97
 
@@ -111,15 +122,20 @@ class _ActorType:
111
122
 
112
123
  return super().__repr__()
113
124
 
114
- def __call__(self, config: Configuration) -> Self:
125
+ def __call__(self, configuration: Configuration | None = None, *, configure_logging: bool = True) -> Self:
115
126
  """Make a new Actor instance with a non-default configuration."""
116
- return self.__class__(config=config)
127
+ return self.__class__(configuration=configuration, configure_logging=configure_logging)
117
128
 
118
129
  @property
119
130
  def apify_client(self) -> ApifyClientAsync:
120
131
  """The ApifyClientAsync instance the Actor instance uses."""
121
132
  return self._apify_client
122
133
 
134
+ @property
135
+ def configuration(self) -> Configuration:
136
+ """The Configuration instance the Actor instance uses."""
137
+ return self._configuration
138
+
123
139
  @property
124
140
  def config(self) -> Configuration:
125
141
  """The Configuration instance the Actor instance uses."""
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING
5
+
6
+ from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
7
+
8
+ if TYPE_CHECKING:
9
+ from apify import Configuration
10
+
11
+ # Name of the logger used throughout the library (resolves to 'apify')
12
+ logger_name = __name__.split('.')[0]
13
+
14
+ # Logger used throughout the library
15
+ logger = logging.getLogger(logger_name)
16
+
17
+
18
+ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 Inherited from parent class
19
+ pass
20
+
21
+
22
+ def _configure_logging(configuration: Configuration) -> None:
23
+ apify_client_logger = logging.getLogger('apify_client')
24
+ configure_logger(apify_client_logger, configuration, remove_old_handlers=True)
25
+
26
+ level = get_configured_log_level(configuration)
27
+
28
+ # Keep apify_client logger quiet unless debug logging is requested
29
+ if level > logging.DEBUG:
30
+ apify_client_logger.setLevel(logging.INFO)
31
+ else:
32
+ apify_client_logger.setLevel(level)
33
+
34
+ # Silence HTTPX logger unless debug logging is requested
35
+ httpx_logger = logging.getLogger('httpx')
36
+ if level > logging.DEBUG:
37
+ httpx_logger.setLevel(logging.WARNING)
38
+ else:
39
+ httpx_logger.setLevel(level)
40
+
41
+ # Use configured log level for apify logger
42
+ apify_logger = logging.getLogger('apify')
43
+ configure_logger(apify_logger, configuration, remove_old_handlers=True)
apify-2.0.0b13/README.md DELETED
@@ -1,90 +0,0 @@
1
- # Apify SDK for Python
2
-
3
- The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
4
- in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
5
- event handling.
6
-
7
- If you just need to access the [Apify API](https://docs.apify.com/api/v2) from your Python applications,
8
- check out the [Apify Client for Python](https://docs.apify.com/api/client/python) instead.
9
-
10
- ## Installation
11
-
12
- The Apify SDK for Python is available on PyPI as the `apify` package.
13
- For default installation, using Pip, run the following:
14
-
15
- ```bash
16
- pip install apify
17
- ```
18
-
19
- For users interested in integrating Apify with Scrapy, we provide a package extra called `scrapy`.
20
- To install Apify with the `scrapy` extra, use the following command:
21
-
22
- ```bash
23
- pip install apify[scrapy]
24
- ```
25
-
26
- ## Documentation
27
-
28
- For usage instructions, check the documentation on [Apify Docs](https://docs.apify.com/sdk/python/).
29
-
30
- ## Example
31
-
32
- ```python
33
- from apify import Actor
34
- from bs4 import BeautifulSoup
35
- from httpx import AsyncClient
36
-
37
- async def main() -> None:
38
- async with Actor:
39
- # Read the input parameters from the Actor input
40
- actor_input = await Actor.get_input()
41
- # Fetch the HTTP response from the specified URL
42
- async with AsyncClient() as client:
43
- response = await client.get(actor_input['url'])
44
- # Process the HTML content
45
- soup = BeautifulSoup(response.content, 'html.parser')
46
- # Push the extracted data
47
- await Actor.push_data({
48
- 'url': actor_input['url'],
49
- 'title': soup.title.string,
50
- })
51
- ```
52
-
53
- ## What are Actors?
54
-
55
- Actors are serverless cloud programs that can do almost anything a human can do in a web browser.
56
- They can do anything from small tasks such as filling in forms or unsubscribing from online services,
57
- all the way up to scraping and processing vast numbers of web pages.
58
-
59
- They can be run either locally, or on the [Apify platform](https://docs.apify.com/platform/),
60
- where you can run them at scale, monitor them, schedule them, or publish and monetize them.
61
-
62
- If you're new to Apify, learn [what is Apify](https://docs.apify.com/platform/about)
63
- in the Apify platform documentation.
64
-
65
- ## Creating Actors
66
-
67
- To create and run Actors through Apify Console,
68
- see the [Console documentation](https://docs.apify.com/academy/getting-started/creating-actors#choose-your-template).
69
-
70
- To create and run Python Actors locally, check the documentation for
71
- [how to create and run Python Actors locally](https://docs.apify.com/sdk/python/docs/overview/running-locally).
72
-
73
- ## Guides
74
-
75
- To see how you can use the Apify SDK with other popular libraries used for web scraping,
76
- check out our guides for using
77
- [Requests and HTTPX](https://docs.apify.com/sdk/python/docs/guides/requests-and-httpx),
78
- [Beautiful Soup](https://docs.apify.com/sdk/python/docs/guides/beautiful-soup),
79
- [Playwright](https://docs.apify.com/sdk/python/docs/guides/playwright),
80
- [Selenium](https://docs.apify.com/sdk/python/docs/guides/selenium),
81
- or [Scrapy](https://docs.apify.com/sdk/python/docs/guides/scrapy).
82
-
83
- ## Usage concepts
84
-
85
- To learn more about the features of the Apify SDK and how to use them,
86
- check out the Usage Concepts section in the sidebar,
87
- particularly the guides for the [Actor lifecycle](https://docs.apify.com/sdk/python/docs/concepts/actor-lifecycle),
88
- [working with storages](https://docs.apify.com/sdk/python/docs/concepts/storages),
89
- [handling Actor events](https://docs.apify.com/sdk/python/docs/concepts/actor-events)
90
- or [how to use proxies](https://docs.apify.com/sdk/python/docs/concepts/proxy-management).
@@ -1,15 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
-
5
- from crawlee._log_config import CrawleeLogFormatter
6
-
7
- # Name of the logger used throughout the library (resolves to 'apify')
8
- logger_name = __name__.split('.')[0]
9
-
10
- # Logger used throughout the library
11
- logger = logging.getLogger(logger_name)
12
-
13
-
14
- class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 Inherited from parent class
15
- pass
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes