apify 2.1.0__py3-none-any.whl → 2.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +1 -1
- apify/_actor.py +2 -5
- apify/_configuration.py +2 -5
- apify/_models.py +1 -8
- apify/_platform_event_manager.py +2 -13
- apify/_proxy_configuration.py +1 -4
- apify/_utils.py +0 -33
- apify/apify_storage_client/_apify_storage_client.py +0 -2
- apify/apify_storage_client/_dataset_client.py +2 -3
- apify/log.py +1 -1
- apify/scrapy/__init__.py +2 -2
- apify/scrapy/middlewares/apify_proxy.py +6 -8
- apify/scrapy/pipelines/actor_dataset_push.py +2 -5
- apify/scrapy/requests.py +5 -7
- apify/scrapy/scheduler.py +6 -9
- apify/scrapy/utils.py +1 -4
- apify/storages/__init__.py +1 -3
- {apify-2.1.0.dist-info → apify-2.1.0b1.dist-info}/METADATA +4 -4
- apify-2.1.0b1.dist-info/RECORD +37 -0
- apify/storages/_request_list.py +0 -150
- apify-2.1.0.dist-info/RECORD +0 -38
- {apify-2.1.0.dist-info → apify-2.1.0b1.dist-info}/LICENSE +0 -0
- {apify-2.1.0.dist-info → apify-2.1.0b1.dist-info}/WHEEL +0 -0
apify/__init__.py
CHANGED
apify/_actor.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
|
|
|
8
8
|
|
|
9
9
|
from lazy_object_proxy import Proxy
|
|
10
10
|
from pydantic import AliasChoices
|
|
11
|
+
from typing_extensions import Self
|
|
11
12
|
|
|
12
13
|
from apify_client import ApifyClientAsync
|
|
13
14
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
@@ -21,7 +22,7 @@ from apify._crypto import decrypt_input_secrets, load_private_key
|
|
|
21
22
|
from apify._models import ActorRun
|
|
22
23
|
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
|
|
23
24
|
from apify._proxy_configuration import ProxyConfiguration
|
|
24
|
-
from apify._utils import
|
|
25
|
+
from apify._utils import get_system_info, is_running_in_ipython
|
|
25
26
|
from apify.apify_storage_client import ApifyStorageClient
|
|
26
27
|
from apify.log import _configure_logging, logger
|
|
27
28
|
from apify.storages import Dataset, KeyValueStore, RequestQueue
|
|
@@ -30,8 +31,6 @@ if TYPE_CHECKING:
|
|
|
30
31
|
import logging
|
|
31
32
|
from types import TracebackType
|
|
32
33
|
|
|
33
|
-
from typing_extensions import Self
|
|
34
|
-
|
|
35
34
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
36
35
|
|
|
37
36
|
from apify._models import Webhook
|
|
@@ -40,8 +39,6 @@ if TYPE_CHECKING:
|
|
|
40
39
|
MainReturnType = TypeVar('MainReturnType')
|
|
41
40
|
|
|
42
41
|
|
|
43
|
-
@docs_name('Actor')
|
|
44
|
-
@docs_group('Classes')
|
|
45
42
|
class _ActorType:
|
|
46
43
|
"""The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
|
|
47
44
|
|
apify/_configuration.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# ruff: noqa: TCH001 TCH002 TCH003 (so that pydantic annotations work)
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
from datetime import datetime, timedelta
|
|
@@ -10,10 +11,7 @@ from crawlee._utils.models import timedelta_ms
|
|
|
10
11
|
from crawlee._utils.urls import validate_http_url
|
|
11
12
|
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
12
13
|
|
|
13
|
-
from apify._utils import docs_group
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
@docs_group('Classes')
|
|
17
15
|
class Configuration(CrawleeConfiguration):
|
|
18
16
|
"""A class for specifying the configuration of an Actor.
|
|
19
17
|
|
|
@@ -253,7 +251,6 @@ class Configuration(CrawleeConfiguration):
|
|
|
253
251
|
),
|
|
254
252
|
description='Date when the Actor will time out',
|
|
255
253
|
),
|
|
256
|
-
BeforeValidator(lambda val: val if val != '' else None), # We should accept empty environment variables as well
|
|
257
254
|
] = None
|
|
258
255
|
|
|
259
256
|
standby_port: Annotated[
|
|
@@ -323,4 +320,4 @@ class Configuration(CrawleeConfiguration):
|
|
|
323
320
|
|
|
324
321
|
|
|
325
322
|
# Monkey-patch the base class so that it works with the extended configuration
|
|
326
|
-
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
|
|
323
|
+
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
|
apify/_models.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
from datetime import datetime, timedelta
|
|
@@ -9,10 +10,7 @@ from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType
|
|
|
9
10
|
from crawlee._utils.models import timedelta_ms
|
|
10
11
|
from crawlee._utils.urls import validate_http_url
|
|
11
12
|
|
|
12
|
-
from apify._utils import docs_group
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
@docs_group('Data structures')
|
|
16
14
|
class Webhook(BaseModel):
|
|
17
15
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
18
16
|
|
|
@@ -31,14 +29,12 @@ class Webhook(BaseModel):
|
|
|
31
29
|
] = None
|
|
32
30
|
|
|
33
31
|
|
|
34
|
-
@docs_group('Data structures')
|
|
35
32
|
class ActorRunMeta(BaseModel):
|
|
36
33
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
37
34
|
|
|
38
35
|
origin: Annotated[MetaOrigin, Field()]
|
|
39
36
|
|
|
40
37
|
|
|
41
|
-
@docs_group('Data structures')
|
|
42
38
|
class ActorRunStats(BaseModel):
|
|
43
39
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
44
40
|
|
|
@@ -59,7 +55,6 @@ class ActorRunStats(BaseModel):
|
|
|
59
55
|
compute_units: Annotated[float, Field(alias='computeUnits')]
|
|
60
56
|
|
|
61
57
|
|
|
62
|
-
@docs_group('Data structures')
|
|
63
58
|
class ActorRunOptions(BaseModel):
|
|
64
59
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
65
60
|
|
|
@@ -69,7 +64,6 @@ class ActorRunOptions(BaseModel):
|
|
|
69
64
|
disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
|
|
70
65
|
|
|
71
66
|
|
|
72
|
-
@docs_group('Data structures')
|
|
73
67
|
class ActorRunUsage(BaseModel):
|
|
74
68
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
75
69
|
|
|
@@ -87,7 +81,6 @@ class ActorRunUsage(BaseModel):
|
|
|
87
81
|
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
|
|
88
82
|
|
|
89
83
|
|
|
90
|
-
@docs_group('Data structures')
|
|
91
84
|
class ActorRun(BaseModel):
|
|
92
85
|
__model_config__ = ConfigDict(populate_by_name=True)
|
|
93
86
|
|
apify/_platform_event_manager.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
from datetime import datetime
|
|
4
|
+
from datetime import datetime # noqa: TCH003
|
|
5
5
|
from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
|
|
6
6
|
|
|
7
7
|
import websockets.client
|
|
@@ -19,7 +19,6 @@ from crawlee.events._types import (
|
|
|
19
19
|
EventSystemInfoData,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
from apify._utils import docs_group
|
|
23
22
|
from apify.log import logger
|
|
24
23
|
|
|
25
24
|
if TYPE_CHECKING:
|
|
@@ -31,13 +30,11 @@ if TYPE_CHECKING:
|
|
|
31
30
|
__all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
|
|
32
31
|
|
|
33
32
|
|
|
34
|
-
@docs_group('Data structures')
|
|
35
33
|
class PersistStateEvent(BaseModel):
|
|
36
34
|
name: Literal[Event.PERSIST_STATE]
|
|
37
35
|
data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
|
|
38
36
|
|
|
39
37
|
|
|
40
|
-
@docs_group('Data structures')
|
|
41
38
|
class SystemInfoEventData(BaseModel):
|
|
42
39
|
mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
|
|
43
40
|
mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
|
|
@@ -52,7 +49,7 @@ class SystemInfoEventData(BaseModel):
|
|
|
52
49
|
return EventSystemInfoData.model_validate(
|
|
53
50
|
{
|
|
54
51
|
'cpu_info': {
|
|
55
|
-
'used_ratio': self.cpu_current_usage
|
|
52
|
+
'used_ratio': self.cpu_current_usage,
|
|
56
53
|
'created_at': self.created_at,
|
|
57
54
|
},
|
|
58
55
|
'memory_info': {
|
|
@@ -64,31 +61,26 @@ class SystemInfoEventData(BaseModel):
|
|
|
64
61
|
)
|
|
65
62
|
|
|
66
63
|
|
|
67
|
-
@docs_group('Data structures')
|
|
68
64
|
class SystemInfoEvent(BaseModel):
|
|
69
65
|
name: Literal[Event.SYSTEM_INFO]
|
|
70
66
|
data: SystemInfoEventData
|
|
71
67
|
|
|
72
68
|
|
|
73
|
-
@docs_group('Data structures')
|
|
74
69
|
class MigratingEvent(BaseModel):
|
|
75
70
|
name: Literal[Event.MIGRATING]
|
|
76
71
|
data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
|
|
77
72
|
|
|
78
73
|
|
|
79
|
-
@docs_group('Data structures')
|
|
80
74
|
class AbortingEvent(BaseModel):
|
|
81
75
|
name: Literal[Event.ABORTING]
|
|
82
76
|
data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
|
|
83
77
|
|
|
84
78
|
|
|
85
|
-
@docs_group('Data structures')
|
|
86
79
|
class ExitEvent(BaseModel):
|
|
87
80
|
name: Literal[Event.EXIT]
|
|
88
81
|
data: Annotated[EventExitData, Field(default_factory=EventExitData)]
|
|
89
82
|
|
|
90
83
|
|
|
91
|
-
@docs_group('Data structures')
|
|
92
84
|
class EventWithoutData(BaseModel):
|
|
93
85
|
name: Literal[
|
|
94
86
|
Event.SESSION_RETIRED,
|
|
@@ -101,13 +93,11 @@ class EventWithoutData(BaseModel):
|
|
|
101
93
|
data: Any = None
|
|
102
94
|
|
|
103
95
|
|
|
104
|
-
@docs_group('Data structures')
|
|
105
96
|
class DeprecatedEvent(BaseModel):
|
|
106
97
|
name: Literal['cpuInfo']
|
|
107
98
|
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
108
99
|
|
|
109
100
|
|
|
110
|
-
@docs_group('Data structures')
|
|
111
101
|
class UnknownEvent(BaseModel):
|
|
112
102
|
name: str
|
|
113
103
|
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
@@ -135,7 +125,6 @@ event_data_adapter: TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent] =
|
|
|
135
125
|
)
|
|
136
126
|
|
|
137
127
|
|
|
138
|
-
@docs_group('Classes')
|
|
139
128
|
class PlatformEventManager(EventManager):
|
|
140
129
|
"""A class for managing Actor events.
|
|
141
130
|
|
apify/_proxy_configuration.py
CHANGED
|
@@ -16,7 +16,6 @@ from crawlee.proxy_configuration import ProxyInfo as CrawleeProxyInfo
|
|
|
16
16
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
17
17
|
|
|
18
18
|
from apify._configuration import Configuration
|
|
19
|
-
from apify._utils import docs_group
|
|
20
19
|
from apify.log import logger
|
|
21
20
|
|
|
22
21
|
if TYPE_CHECKING:
|
|
@@ -69,7 +68,6 @@ def _check(
|
|
|
69
68
|
raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
|
|
70
69
|
|
|
71
70
|
|
|
72
|
-
@docs_group('Classes')
|
|
73
71
|
@dataclass
|
|
74
72
|
class ProxyInfo(CrawleeProxyInfo):
|
|
75
73
|
"""Provides information about a proxy connection that is used for requests."""
|
|
@@ -89,7 +87,6 @@ class ProxyInfo(CrawleeProxyInfo):
|
|
|
89
87
|
"""
|
|
90
88
|
|
|
91
89
|
|
|
92
|
-
@docs_group('Classes')
|
|
93
90
|
class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
94
91
|
"""Configures a connection to a proxy server with the provided options.
|
|
95
92
|
|
|
@@ -280,7 +277,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
280
277
|
return
|
|
281
278
|
|
|
282
279
|
status = None
|
|
283
|
-
async with httpx.AsyncClient(
|
|
280
|
+
async with httpx.AsyncClient(proxies=proxy_info.url, timeout=10) as client:
|
|
284
281
|
for _ in range(2):
|
|
285
282
|
try:
|
|
286
283
|
response = await client.get(proxy_status_url)
|
apify/_utils.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import builtins
|
|
4
4
|
import sys
|
|
5
5
|
from importlib import metadata
|
|
6
|
-
from typing import Callable, Literal
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def get_system_info() -> dict:
|
|
@@ -12,7 +11,6 @@ def get_system_info() -> dict:
|
|
|
12
11
|
system_info: dict[str, str | bool] = {
|
|
13
12
|
'apify_sdk_version': metadata.version('apify'),
|
|
14
13
|
'apify_client_version': metadata.version('apify-client'),
|
|
15
|
-
'crawlee_version': metadata.version('crawlee'),
|
|
16
14
|
'python_version': python_version,
|
|
17
15
|
'os': sys.platform,
|
|
18
16
|
}
|
|
@@ -25,34 +23,3 @@ def get_system_info() -> dict:
|
|
|
25
23
|
|
|
26
24
|
def is_running_in_ipython() -> bool:
|
|
27
25
|
return getattr(builtins, '__IPYTHON__', False)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
GroupName = Literal['Classes', 'Abstract classes', 'Data structures', 'Errors', 'Functions']
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
|
|
34
|
-
"""Decorator to mark symbols for rendering and grouping in documentation.
|
|
35
|
-
|
|
36
|
-
This decorator is used purely for documentation purposes and does not alter the behavior
|
|
37
|
-
of the decorated callable.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
def wrapper(func: Callable) -> Callable:
|
|
41
|
-
return func
|
|
42
|
-
|
|
43
|
-
return wrapper
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
|
|
47
|
-
"""Decorator for renaming symbols in documentation.
|
|
48
|
-
|
|
49
|
-
This changes the rendered name of the symbol only in the rendered web documentation.
|
|
50
|
-
|
|
51
|
-
This decorator is used purely for documentation purposes and does not alter the behavior
|
|
52
|
-
of the decorated callable.
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def wrapper(func: Callable) -> Callable:
|
|
56
|
-
return func
|
|
57
|
-
|
|
58
|
-
return wrapper
|
|
@@ -5,7 +5,6 @@ from crawlee._utils.crypto import crypto_random_object_id
|
|
|
5
5
|
from crawlee.base_storage_client import BaseStorageClient
|
|
6
6
|
|
|
7
7
|
from apify._configuration import Configuration
|
|
8
|
-
from apify._utils import docs_group
|
|
9
8
|
from apify.apify_storage_client._dataset_client import DatasetClient
|
|
10
9
|
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
|
|
11
10
|
from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
|
|
@@ -14,7 +13,6 @@ from apify.apify_storage_client._request_queue_client import RequestQueueClient
|
|
|
14
13
|
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
@docs_group('Classes')
|
|
18
16
|
class ApifyStorageClient(BaseStorageClient):
|
|
19
17
|
"""A storage client implementation based on the Apify platform storage."""
|
|
20
18
|
|
|
@@ -91,7 +91,7 @@ class DatasetClient(BaseDatasetClient):
|
|
|
91
91
|
skip_empty: bool = False,
|
|
92
92
|
skip_hidden: bool = False,
|
|
93
93
|
) -> AsyncIterator[dict]:
|
|
94
|
-
|
|
94
|
+
return self._client.iterate_items(
|
|
95
95
|
offset=offset,
|
|
96
96
|
limit=limit,
|
|
97
97
|
clean=clean,
|
|
@@ -101,8 +101,7 @@ class DatasetClient(BaseDatasetClient):
|
|
|
101
101
|
unwind=unwind,
|
|
102
102
|
skip_empty=skip_empty,
|
|
103
103
|
skip_hidden=skip_hidden,
|
|
104
|
-
)
|
|
105
|
-
yield item
|
|
104
|
+
)
|
|
106
105
|
|
|
107
106
|
@override
|
|
108
107
|
async def get_items_as_bytes(
|
apify/log.py
CHANGED
apify/scrapy/__init__.py
CHANGED
|
@@ -3,9 +3,9 @@ from apify.scrapy.scheduler import ApifyScheduler
|
|
|
3
3
|
from apify.scrapy.utils import get_basic_auth_header, get_running_event_loop_id
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
|
+
'to_apify_request',
|
|
7
|
+
'to_scrapy_request',
|
|
6
8
|
'ApifyScheduler',
|
|
7
9
|
'get_basic_auth_header',
|
|
8
10
|
'get_running_event_loop_id',
|
|
9
|
-
'to_apify_request',
|
|
10
|
-
'to_scrapy_request',
|
|
11
11
|
]
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
3
|
from urllib.parse import ParseResult, urlparse
|
|
5
4
|
|
|
6
5
|
try:
|
|
7
|
-
|
|
8
|
-
from scrapy import Request, Spider
|
|
9
|
-
from scrapy.crawler import Crawler
|
|
6
|
+
from scrapy import Request, Spider # noqa: TCH002
|
|
10
7
|
from scrapy.core.downloader.handlers.http11 import TunnelError
|
|
8
|
+
from scrapy.crawler import Crawler # noqa: TCH002
|
|
11
9
|
from scrapy.exceptions import NotConfigured
|
|
12
10
|
except ImportError as exc:
|
|
13
11
|
raise ImportError(
|
|
@@ -29,7 +27,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
29
27
|
proxy_settings = {'useApifyProxy': true, 'apifyProxyGroups': []}
|
|
30
28
|
"""
|
|
31
29
|
|
|
32
|
-
def __init__(self, proxy_settings: dict) -> None:
|
|
30
|
+
def __init__(self: ApifyHttpProxyMiddleware, proxy_settings: dict) -> None:
|
|
33
31
|
"""Create a new instance.
|
|
34
32
|
|
|
35
33
|
Args:
|
|
@@ -68,7 +66,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
68
66
|
|
|
69
67
|
return cls(proxy_settings)
|
|
70
68
|
|
|
71
|
-
async def process_request(self, request: Request, spider: Spider) -> None:
|
|
69
|
+
async def process_request(self: ApifyHttpProxyMiddleware, request: Request, spider: Spider) -> None:
|
|
72
70
|
"""Process a Scrapy request by assigning a new proxy.
|
|
73
71
|
|
|
74
72
|
Args:
|
|
@@ -91,7 +89,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
91
89
|
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
|
|
92
90
|
|
|
93
91
|
def process_exception(
|
|
94
|
-
self,
|
|
92
|
+
self: ApifyHttpProxyMiddleware,
|
|
95
93
|
request: Request,
|
|
96
94
|
exception: Exception,
|
|
97
95
|
spider: Spider,
|
|
@@ -118,7 +116,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
118
116
|
'reason="{exception}", skipping...'
|
|
119
117
|
)
|
|
120
118
|
|
|
121
|
-
async def _get_new_proxy_url(self) -> ParseResult:
|
|
119
|
+
async def _get_new_proxy_url(self: ApifyHttpProxyMiddleware) -> ParseResult:
|
|
122
120
|
"""Get a new proxy URL.
|
|
123
121
|
|
|
124
122
|
Raises:
|
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
3
|
from itemadapter.adapter import ItemAdapter
|
|
6
4
|
|
|
7
5
|
try:
|
|
8
|
-
|
|
9
|
-
from scrapy import Item, Spider
|
|
6
|
+
from scrapy import Item, Spider # noqa: TCH002
|
|
10
7
|
except ImportError as exc:
|
|
11
8
|
raise ImportError(
|
|
12
9
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
@@ -22,7 +19,7 @@ class ActorDatasetPushPipeline:
|
|
|
22
19
|
"""
|
|
23
20
|
|
|
24
21
|
async def process_item(
|
|
25
|
-
self,
|
|
22
|
+
self: ActorDatasetPushPipeline,
|
|
26
23
|
item: Item,
|
|
27
24
|
spider: Spider,
|
|
28
25
|
) -> Item:
|
apify/scrapy/requests.py
CHANGED
|
@@ -42,10 +42,8 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
42
42
|
Returns:
|
|
43
43
|
The converted Apify request if the conversion was successful, otherwise None.
|
|
44
44
|
"""
|
|
45
|
-
if not isinstance(scrapy_request, Request):
|
|
46
|
-
Actor.log.warning(
|
|
47
|
-
'Failed to convert to Apify request: Scrapy request must be a Request instance.'
|
|
48
|
-
)
|
|
45
|
+
if not isinstance(cast(Any, scrapy_request), Request):
|
|
46
|
+
Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
|
|
49
47
|
return None
|
|
50
48
|
|
|
51
49
|
call_id = crypto_random_object_id(8)
|
|
@@ -55,7 +53,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
55
53
|
if _is_request_produced_by_middleware(scrapy_request):
|
|
56
54
|
unique_key = compute_unique_key(
|
|
57
55
|
url=scrapy_request.url,
|
|
58
|
-
method=scrapy_request.method,
|
|
56
|
+
method=scrapy_request.method,
|
|
59
57
|
payload=scrapy_request.body,
|
|
60
58
|
use_extended_unique_key=True,
|
|
61
59
|
)
|
|
@@ -82,9 +80,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
82
80
|
|
|
83
81
|
# Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
|
|
84
82
|
if isinstance(scrapy_request.headers, Headers):
|
|
85
|
-
apify_request.headers = HttpHeaders(
|
|
83
|
+
apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
|
|
86
84
|
else:
|
|
87
|
-
Actor.log.warning(
|
|
85
|
+
Actor.log.warning(
|
|
88
86
|
f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
|
|
89
87
|
)
|
|
90
88
|
|
apify/scrapy/scheduler.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import traceback
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
5
|
from apify._configuration import Configuration
|
|
7
6
|
from apify.apify_storage_client import ApifyStorageClient
|
|
@@ -9,10 +8,8 @@ from apify.apify_storage_client import ApifyStorageClient
|
|
|
9
8
|
try:
|
|
10
9
|
from scrapy import Spider
|
|
11
10
|
from scrapy.core.scheduler import BaseScheduler
|
|
11
|
+
from scrapy.http.request import Request # noqa: TCH002
|
|
12
12
|
from scrapy.utils.reactor import is_asyncio_reactor_installed
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from scrapy.http.request import Request
|
|
16
13
|
except ImportError as exc:
|
|
17
14
|
raise ImportError(
|
|
18
15
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
@@ -32,7 +29,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
32
29
|
This scheduler requires the asyncio Twisted reactor to be installed.
|
|
33
30
|
"""
|
|
34
31
|
|
|
35
|
-
def __init__(self) -> None:
|
|
32
|
+
def __init__(self: ApifyScheduler) -> None:
|
|
36
33
|
"""Create a new instance."""
|
|
37
34
|
if not is_asyncio_reactor_installed():
|
|
38
35
|
raise ValueError(
|
|
@@ -43,7 +40,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
43
40
|
self._rq: RequestQueue | None = None
|
|
44
41
|
self.spider: Spider | None = None
|
|
45
42
|
|
|
46
|
-
def open(self, spider: Spider) -> None: # this has to be named "open"
|
|
43
|
+
def open(self: ApifyScheduler, spider: Spider) -> None: # this has to be named "open"
|
|
47
44
|
"""Open the scheduler.
|
|
48
45
|
|
|
49
46
|
Args:
|
|
@@ -61,7 +58,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
61
58
|
traceback.print_exc()
|
|
62
59
|
raise
|
|
63
60
|
|
|
64
|
-
def has_pending_requests(self) -> bool:
|
|
61
|
+
def has_pending_requests(self: ApifyScheduler) -> bool:
|
|
65
62
|
"""Check if the scheduler has any pending requests.
|
|
66
63
|
|
|
67
64
|
Returns:
|
|
@@ -78,7 +75,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
78
75
|
|
|
79
76
|
return not is_finished
|
|
80
77
|
|
|
81
|
-
def enqueue_request(self, request: Request) -> bool:
|
|
78
|
+
def enqueue_request(self: ApifyScheduler, request: Request) -> bool:
|
|
82
79
|
"""Add a request to the scheduler.
|
|
83
80
|
|
|
84
81
|
This could be called from either from a spider or a downloader middleware (e.g. redirect, retry, ...).
|
|
@@ -114,7 +111,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
114
111
|
Actor.log.debug(f'[{call_id}]: rq.add_request.result={result}...')
|
|
115
112
|
return bool(result.was_already_present)
|
|
116
113
|
|
|
117
|
-
def next_request(self) -> Request | None:
|
|
114
|
+
def next_request(self: ApifyScheduler) -> Request | None:
|
|
118
115
|
"""Fetch the next request from the scheduler.
|
|
119
116
|
|
|
120
117
|
Returns:
|
apify/scrapy/utils.py
CHANGED
|
@@ -2,17 +2,14 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from base64 import b64encode
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
5
|
from urllib.parse import unquote
|
|
7
6
|
|
|
8
7
|
from apify_shared.utils import ignore_docs
|
|
9
8
|
|
|
10
9
|
try:
|
|
10
|
+
from scrapy.settings import Settings # noqa: TCH002
|
|
11
11
|
from scrapy.utils.project import get_project_settings
|
|
12
12
|
from scrapy.utils.python import to_bytes
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from scrapy.settings import Settings
|
|
16
13
|
except ImportError as exc:
|
|
17
14
|
raise ImportError(
|
|
18
15
|
'To use this module, you need to install the "scrapy" extra. For example, if you use pip, run '
|
apify/storages/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.0b1
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -21,13 +21,13 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
21
21
|
Provides-Extra: scrapy
|
|
22
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
23
23
|
Requires-Dist: apify-shared (>=1.1.2)
|
|
24
|
-
Requires-Dist: crawlee (>=0.
|
|
24
|
+
Requires-Dist: crawlee (>=0.3.8)
|
|
25
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
26
|
-
Requires-Dist: httpx (>=0.27.0
|
|
26
|
+
Requires-Dist: httpx (>=0.27.0)
|
|
27
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
28
28
|
Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
|
|
29
29
|
Requires-Dist: typing-extensions (>=4.1.0)
|
|
30
|
-
Requires-Dist: websockets (>=10.0
|
|
30
|
+
Requires-Dist: websockets (>=10.0)
|
|
31
31
|
Project-URL: Apify Homepage, https://apify.com
|
|
32
32
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
33
33
|
Project-URL: Documentation, https://docs.apify.com/sdk/python/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
apify/__init__.py,sha256=ikoi2EpDYl6y-XSVtlU8UsdQdMEyOiIJCRRAaZFDOP8,550
|
|
2
|
+
apify/_actor.py,sha256=oPgQ3rxxIEzVcZ9XtI3lf1a_6gwIMgxihNuYGjJpGww,41816
|
|
3
|
+
apify/_configuration.py,sha256=fK-BmsdctMJJT93SsupG98_DmknPnfifmIp5pwehYlA,9508
|
|
4
|
+
apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
|
|
5
|
+
apify/_crypto.py,sha256=e0_aM3l9_5Osk-jszYOOjrAKK60OggSHbiw5c30QnsU,5638
|
|
6
|
+
apify/_models.py,sha256=oYlTEr-DyQAE-V2rrYD5PhUxTXVPdAig7QV-u6CJw3E,5571
|
|
7
|
+
apify/_platform_event_manager.py,sha256=K4cHabbQ7_ex7vkX-c-VhAOp8Efw3HDn5Wp4lfA-qAU,7571
|
|
8
|
+
apify/_proxy_configuration.py,sha256=2z4VV_NrnIp6pDpgQKlKpcHM2pPyXiOpFedpPWje48A,13087
|
|
9
|
+
apify/_utils.py,sha256=x4lnR9RNulySiEQTft-GeQqUcJsRr0k8p0Sv9NTeWFg,638
|
|
10
|
+
apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
|
|
11
|
+
apify/apify_storage_client/_apify_storage_client.py,sha256=xi4OFchxhe-1-sykanH6Zcya4OcBhn2uf7OQ1pV4Ins,2338
|
|
12
|
+
apify/apify_storage_client/_dataset_client.py,sha256=j9seF2OKvbSMD9R9XF9fpa1vtr_1w4JcRV--WCmvU4E,5501
|
|
13
|
+
apify/apify_storage_client/_dataset_collection_client.py,sha256=fkYvYGQCigHD2CDzpWk0swNAkfvAinAhMGpYqllle3E,1445
|
|
14
|
+
apify/apify_storage_client/_key_value_store_client.py,sha256=uyeQgb75sGFsqIS4sq4hEZ3QP81COLfS3tmTqHc0tso,3340
|
|
15
|
+
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=vCtMTI-jx89Qp5WHILDNkCthwLuv0MAwm1J_5E4aypU,1519
|
|
16
|
+
apify/apify_storage_client/_request_queue_client.py,sha256=jAiFkaJ38_myHFGTw-Rk21wmpbN0UCR2w2SFoimFGFc,5826
|
|
17
|
+
apify/apify_storage_client/_request_queue_collection_client.py,sha256=NnO73UJ9ZrjV8xoudo30wfaM-SojRkG0guhxDyB-K1g,1527
|
|
18
|
+
apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
apify/log.py,sha256=zIVjrqQ1DNWNQQOAmdmR9oAbf4nJH7CSMB6u4OOUf6I,1448
|
|
20
|
+
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
apify/scrapy/__init__.py,sha256=qDPV_zTRFaUqoFOyS5g4uBfz-UCkmWYJ82VXQ_3Cw6k,348
|
|
22
|
+
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
23
|
+
apify/scrapy/middlewares/apify_proxy.py,sha256=_1WO7NKHxIcPf8mSNjsqANTEsx7ygMTuRQW9fbwKMO8,5837
|
|
24
|
+
apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
26
|
+
apify/scrapy/pipelines/actor_dataset_push.py,sha256=QERmmExQOGIKQ70-p-lCj5qyE-c-fnYplEqd4mgaB1Q,953
|
|
27
|
+
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
+
apify/scrapy/requests.py,sha256=F4VNaX2fGqybJKbhcRcz0_m6dXse5LzKll4gtMuTRko,7480
|
|
30
|
+
apify/scrapy/scheduler.py,sha256=AAIKY5i1QxkC1mtmix6n3M2eQaOw-d1T56Noue9xToc,6013
|
|
31
|
+
apify/scrapy/utils.py,sha256=0XdFxi1qlUa6gHXG96e1FU9gW0N5Rsu0sVZklFYfC2U,2884
|
|
32
|
+
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
33
|
+
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
apify-2.1.0b1.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
35
|
+
apify-2.1.0b1.dist-info/METADATA,sha256=wP9WixgCbxLoefOJF57CcIMtBCtJb70JQJBAPNCMSNE,8657
|
|
36
|
+
apify-2.1.0b1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
37
|
+
apify-2.1.0b1.dist-info/RECORD,,
|
apify/storages/_request_list.py
DELETED
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import re
|
|
5
|
-
from asyncio import Task
|
|
6
|
-
from functools import partial
|
|
7
|
-
from typing import Annotated, Any, Union
|
|
8
|
-
|
|
9
|
-
from pydantic import BaseModel, Field, TypeAdapter
|
|
10
|
-
|
|
11
|
-
from crawlee import Request
|
|
12
|
-
from crawlee._types import HttpMethod
|
|
13
|
-
from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
|
|
14
|
-
from crawlee.storages import RequestList as CrawleeRequestList
|
|
15
|
-
|
|
16
|
-
from apify._utils import docs_group
|
|
17
|
-
|
|
18
|
-
URL_NO_COMMAS_REGEX = re.compile(
|
|
19
|
-
r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class _RequestDetails(BaseModel):
|
|
24
|
-
method: HttpMethod = 'GET'
|
|
25
|
-
payload: str = ''
|
|
26
|
-
headers: Annotated[dict[str, str], Field(default_factory=dict)] = {}
|
|
27
|
-
user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')] = {}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class _RequestsFromUrlInput(_RequestDetails):
|
|
31
|
-
requests_from_url: str = Field(alias='requestsFromUrl')
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class _SimpleUrlInput(_RequestDetails):
|
|
35
|
-
url: str
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@docs_group('Classes')
|
|
42
|
-
class RequestList(CrawleeRequestList):
|
|
43
|
-
"""Extends crawlee RequestList.
|
|
44
|
-
|
|
45
|
-
Method open is used to create RequestList from actor's requestListSources input.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
@staticmethod
|
|
49
|
-
async def open(
|
|
50
|
-
name: str | None = None,
|
|
51
|
-
request_list_sources_input: list[dict[str, Any]] | None = None,
|
|
52
|
-
http_client: BaseHttpClient | None = None,
|
|
53
|
-
) -> RequestList:
|
|
54
|
-
"""Creates RequestList from Actor input requestListSources.
|
|
55
|
-
|
|
56
|
-
Args:
|
|
57
|
-
name: Name of the returned RequestList.
|
|
58
|
-
request_list_sources_input: List of dicts with either url key or requestsFromUrl key.
|
|
59
|
-
http_client: Client that will be used to send get request to urls defined by value of requestsFromUrl keys.
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
RequestList created from request_list_sources_input.
|
|
63
|
-
|
|
64
|
-
### Usage
|
|
65
|
-
|
|
66
|
-
```python
|
|
67
|
-
example_input = [
|
|
68
|
-
# Gather urls from response body.
|
|
69
|
-
{'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
|
|
70
|
-
# Directly include this url.
|
|
71
|
-
{'url': 'https://crawlee.dev', 'method': 'GET'}
|
|
72
|
-
]
|
|
73
|
-
request_list = await RequestList.open(request_list_sources_input=example_input)
|
|
74
|
-
```
|
|
75
|
-
"""
|
|
76
|
-
request_list_sources_input = request_list_sources_input or []
|
|
77
|
-
return await RequestList._create_request_list(name, request_list_sources_input, http_client)
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
async def _create_request_list(
|
|
81
|
-
name: str | None, request_list_sources_input: list[dict[str, Any]], http_client: BaseHttpClient | None
|
|
82
|
-
) -> RequestList:
|
|
83
|
-
if not http_client:
|
|
84
|
-
http_client = HttpxHttpClient()
|
|
85
|
-
|
|
86
|
-
url_inputs = url_input_adapter.validate_python(request_list_sources_input)
|
|
87
|
-
|
|
88
|
-
simple_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _SimpleUrlInput)]
|
|
89
|
-
remote_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _RequestsFromUrlInput)]
|
|
90
|
-
|
|
91
|
-
simple_url_requests = RequestList._create_requests_from_input(simple_url_inputs)
|
|
92
|
-
remote_url_requests = await RequestList._fetch_requests_from_url(remote_url_inputs, http_client=http_client)
|
|
93
|
-
|
|
94
|
-
return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
|
|
95
|
-
|
|
96
|
-
@staticmethod
|
|
97
|
-
def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
|
|
98
|
-
return [
|
|
99
|
-
Request.from_url(
|
|
100
|
-
method=request_input.method,
|
|
101
|
-
url=request_input.url,
|
|
102
|
-
payload=request_input.payload.encode('utf-8'),
|
|
103
|
-
headers=request_input.headers,
|
|
104
|
-
user_data=request_input.user_data,
|
|
105
|
-
)
|
|
106
|
-
for request_input in simple_url_inputs
|
|
107
|
-
]
|
|
108
|
-
|
|
109
|
-
@staticmethod
|
|
110
|
-
async def _fetch_requests_from_url(
|
|
111
|
-
remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
|
|
112
|
-
) -> list[Request]:
|
|
113
|
-
"""Crete list of requests from url.
|
|
114
|
-
|
|
115
|
-
Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
|
|
116
|
-
callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
|
|
117
|
-
collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
|
|
118
|
-
"""
|
|
119
|
-
created_requests: list[Request] = []
|
|
120
|
-
|
|
121
|
-
def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
|
|
122
|
-
"""Callback to scrape response body with regexp and create Requests from matches."""
|
|
123
|
-
matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
|
|
124
|
-
created_requests.extend(
|
|
125
|
-
[
|
|
126
|
-
Request.from_url(
|
|
127
|
-
match.group(0),
|
|
128
|
-
method=request_input.method,
|
|
129
|
-
payload=request_input.payload.encode('utf-8'),
|
|
130
|
-
headers=request_input.headers,
|
|
131
|
-
user_data=request_input.user_data,
|
|
132
|
-
)
|
|
133
|
-
for match in matches
|
|
134
|
-
]
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
remote_url_requests = []
|
|
138
|
-
for remote_url_requests_input in remote_url_requests_inputs:
|
|
139
|
-
get_response_task = asyncio.create_task(
|
|
140
|
-
http_client.send_request(
|
|
141
|
-
method='GET',
|
|
142
|
-
url=remote_url_requests_input.requests_from_url,
|
|
143
|
-
)
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
get_response_task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
|
|
147
|
-
remote_url_requests.append(get_response_task)
|
|
148
|
-
|
|
149
|
-
await asyncio.gather(*remote_url_requests)
|
|
150
|
-
return created_requests
|
apify-2.1.0.dist-info/RECORD
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
apify/__init__.py,sha256=99ynaDWBLEcCjdLq7R0Exy_iACsXiXoQ8VUZKmbzTeM,550
|
|
2
|
-
apify/_actor.py,sha256=AUviY4qrX4UoN7fSZtXXSHqEk4rrQwBymMLjkgb4Mzg,41887
|
|
3
|
-
apify/_configuration.py,sha256=WC9X5gQ-e5r4BGQw-Kaq77oxLlPx7vtq0M-L4VlKrIc,9633
|
|
4
|
-
apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
|
|
5
|
-
apify/_crypto.py,sha256=e0_aM3l9_5Osk-jszYOOjrAKK60OggSHbiw5c30QnsU,5638
|
|
6
|
-
apify/_models.py,sha256=Btlz-23obKY5tJ75JnUwkVNC2lmU1IEBbdU3HvWaVhg,5748
|
|
7
|
-
apify/_platform_event_manager.py,sha256=44xyV0Lpzf4h4VZ0rkyYg_nhbQkEONNor8_Z9gIKO40,7899
|
|
8
|
-
apify/_proxy_configuration.py,sha256=vdDiE5dfyNQYEXKXnj0jcgL3rG3-qanwNSybrVl1xT8,13167
|
|
9
|
-
apify/_utils.py,sha256=CCLkpAsZKp00ykm88Z_Fbck5PNT0j6mJYOuD0RxzZUs,1620
|
|
10
|
-
apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
|
|
11
|
-
apify/apify_storage_client/_apify_storage_client.py,sha256=NsZBleJNHLBXVyG__bVjdCGEI30cnmVZngCbQaVekfk,2397
|
|
12
|
-
apify/apify_storage_client/_dataset_client.py,sha256=FfXew6tBiZRkpovyPaQ__xhtZZ-rZvjijwBIIyRahH8,5536
|
|
13
|
-
apify/apify_storage_client/_dataset_collection_client.py,sha256=fkYvYGQCigHD2CDzpWk0swNAkfvAinAhMGpYqllle3E,1445
|
|
14
|
-
apify/apify_storage_client/_key_value_store_client.py,sha256=uyeQgb75sGFsqIS4sq4hEZ3QP81COLfS3tmTqHc0tso,3340
|
|
15
|
-
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=vCtMTI-jx89Qp5WHILDNkCthwLuv0MAwm1J_5E4aypU,1519
|
|
16
|
-
apify/apify_storage_client/_request_queue_client.py,sha256=jAiFkaJ38_myHFGTw-Rk21wmpbN0UCR2w2SFoimFGFc,5826
|
|
17
|
-
apify/apify_storage_client/_request_queue_collection_client.py,sha256=NnO73UJ9ZrjV8xoudo30wfaM-SojRkG0guhxDyB-K1g,1527
|
|
18
|
-
apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
apify/log.py,sha256=zElFyEp2RJN0kiHEwJhcjSCAuHrba5zYiq4pK2xsL_o,1450
|
|
20
|
-
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
apify/scrapy/__init__.py,sha256=HE5wCN7-DZKPydLCOvjNyLuL3CvN2fUFweXfrDfe1Ss,348
|
|
22
|
-
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
23
|
-
apify/scrapy/middlewares/apify_proxy.py,sha256=9_-hJqTwQ4yVMjvN9zkJ_GXJADzrrYu8QoHZ6IX6fDs,5764
|
|
24
|
-
apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
26
|
-
apify/scrapy/pipelines/actor_dataset_push.py,sha256=otggoULfUdCqOPJLb9wMROZ9WylnlL-209930tMS2Rg,971
|
|
27
|
-
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
apify/scrapy/requests.py,sha256=yZ9hIsz2YyqOoOwzN9F1h76wG4qwawrI6h_6xq0I7Iw,7599
|
|
30
|
-
apify/scrapy/scheduler.py,sha256=03kZxejWWb-TofJ-vpSZuQ28rT-qNjhhpC-QeO2OzoU,5977
|
|
31
|
-
apify/scrapy/utils.py,sha256=758DcHCSAgCTProY0QX74uJ1XrzVsQwvCmFanj2f_3Q,2928
|
|
32
|
-
apify/storages/__init__.py,sha256=FW-z6ubuPnHGM-Wp15T8mR5q6lnpDGrCW-IkgZd5L30,177
|
|
33
|
-
apify/storages/_request_list.py,sha256=4nrvSdMUF-kiwGVIPEfIOygLKgjUpO37Jl8Om-jRbIU,5858
|
|
34
|
-
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
apify-2.1.0.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
36
|
-
apify-2.1.0.dist-info/METADATA,sha256=9K6Q_mrUCisKOlhMQuvt74xYMcaxu6UZPLMtec_8Pnc,8678
|
|
37
|
-
apify-2.1.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
38
|
-
apify-2.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|