apify 1.7.3b4__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (61) hide show
  1. apify/__init__.py +19 -4
  2. apify/_actor.py +979 -0
  3. apify/_configuration.py +310 -0
  4. apify/_consts.py +10 -0
  5. apify/_crypto.py +29 -27
  6. apify/_models.py +110 -0
  7. apify/_platform_event_manager.py +222 -0
  8. apify/_proxy_configuration.py +316 -0
  9. apify/_utils.py +0 -497
  10. apify/apify_storage_client/__init__.py +3 -0
  11. apify/apify_storage_client/_apify_storage_client.py +56 -0
  12. apify/apify_storage_client/_dataset_client.py +188 -0
  13. apify/apify_storage_client/_dataset_collection_client.py +50 -0
  14. apify/apify_storage_client/_key_value_store_client.py +98 -0
  15. apify/apify_storage_client/_key_value_store_collection_client.py +50 -0
  16. apify/apify_storage_client/_request_queue_client.py +208 -0
  17. apify/apify_storage_client/_request_queue_collection_client.py +50 -0
  18. apify/apify_storage_client/py.typed +0 -0
  19. apify/log.py +24 -105
  20. apify/scrapy/__init__.py +11 -3
  21. apify/scrapy/middlewares/__init__.py +3 -1
  22. apify/scrapy/middlewares/apify_proxy.py +21 -21
  23. apify/scrapy/middlewares/py.typed +0 -0
  24. apify/scrapy/pipelines/__init__.py +3 -1
  25. apify/scrapy/pipelines/actor_dataset_push.py +1 -1
  26. apify/scrapy/pipelines/py.typed +0 -0
  27. apify/scrapy/py.typed +0 -0
  28. apify/scrapy/requests.py +55 -54
  29. apify/scrapy/scheduler.py +19 -13
  30. apify/scrapy/utils.py +2 -31
  31. apify/storages/__init__.py +2 -10
  32. apify/storages/py.typed +0 -0
  33. apify-2.0.0.dist-info/METADATA +209 -0
  34. apify-2.0.0.dist-info/RECORD +37 -0
  35. {apify-1.7.3b4.dist-info → apify-2.0.0.dist-info}/WHEEL +1 -2
  36. apify/_memory_storage/__init__.py +0 -3
  37. apify/_memory_storage/file_storage_utils.py +0 -71
  38. apify/_memory_storage/memory_storage_client.py +0 -219
  39. apify/_memory_storage/resource_clients/__init__.py +0 -19
  40. apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
  41. apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
  42. apify/_memory_storage/resource_clients/dataset.py +0 -452
  43. apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
  44. apify/_memory_storage/resource_clients/key_value_store.py +0 -533
  45. apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
  46. apify/_memory_storage/resource_clients/request_queue.py +0 -466
  47. apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
  48. apify/actor.py +0 -1357
  49. apify/config.py +0 -130
  50. apify/consts.py +0 -67
  51. apify/event_manager.py +0 -236
  52. apify/proxy_configuration.py +0 -365
  53. apify/storages/base_storage.py +0 -181
  54. apify/storages/dataset.py +0 -494
  55. apify/storages/key_value_store.py +0 -257
  56. apify/storages/request_queue.py +0 -602
  57. apify/storages/storage_client_manager.py +0 -72
  58. apify-1.7.3b4.dist-info/METADATA +0 -150
  59. apify-1.7.3b4.dist-info/RECORD +0 -41
  60. apify-1.7.3b4.dist-info/top_level.txt +0 -1
  61. {apify-1.7.3b4.dist-info → apify-2.0.0.dist-info}/LICENSE +0 -0
@@ -1,141 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING
7
-
8
- from apify_shared.utils import ignore_docs
9
-
10
- if TYPE_CHECKING:
11
- from typing_extensions import Self
12
-
13
- from apify._memory_storage.memory_storage_client import MemoryStorageClient
14
-
15
-
16
- @ignore_docs
17
- class BaseResourceClient(ABC):
18
- """Base class for resource clients."""
19
-
20
- _id: str
21
- _name: str | None
22
- _resource_directory: str
23
-
24
- @abstractmethod
25
- def __init__(
26
- self: BaseResourceClient,
27
- *,
28
- base_storage_directory: str,
29
- memory_storage_client: MemoryStorageClient,
30
- id: str | None = None, # noqa: A002
31
- name: str | None = None,
32
- ) -> None:
33
- """Initialize the BaseResourceClient."""
34
- raise NotImplementedError('You must override this method in the subclass!')
35
-
36
- @abstractmethod
37
- async def get(self: BaseResourceClient) -> dict | None:
38
- """Retrieve the storage.
39
-
40
- Returns:
41
- dict, optional: The retrieved storage, or None, if it does not exist
42
- """
43
- raise NotImplementedError('You must override this method in the subclass!')
44
-
45
- @classmethod
46
- @abstractmethod
47
- def _get_storages_dir(cls: type[BaseResourceClient], memory_storage_client: MemoryStorageClient) -> str:
48
- raise NotImplementedError('You must override this method in the subclass!')
49
-
50
- @classmethod
51
- @abstractmethod
52
- def _get_storage_client_cache(
53
- cls,
54
- memory_storage_client: MemoryStorageClient,
55
- ) -> list[Self]:
56
- raise NotImplementedError('You must override this method in the subclass!')
57
-
58
- @abstractmethod
59
- def _to_resource_info(self: BaseResourceClient) -> dict:
60
- raise NotImplementedError('You must override this method in the subclass!')
61
-
62
- @classmethod
63
- @abstractmethod
64
- def _create_from_directory(
65
- cls,
66
- storage_directory: str,
67
- memory_storage_client: MemoryStorageClient,
68
- id: str | None = None, # noqa: A002
69
- name: str | None = None,
70
- ) -> Self:
71
- raise NotImplementedError('You must override this method in the subclass!')
72
-
73
- @classmethod
74
- def _find_or_create_client_by_id_or_name(
75
- cls,
76
- memory_storage_client: MemoryStorageClient,
77
- id: str | None = None, # noqa: A002
78
- name: str | None = None,
79
- ) -> Self | None:
80
- assert id is not None or name is not None # noqa: S101
81
-
82
- storage_client_cache = cls._get_storage_client_cache(memory_storage_client)
83
- storages_dir = cls._get_storages_dir(memory_storage_client)
84
-
85
- # First check memory cache
86
- found = next(
87
- (
88
- storage_client
89
- for storage_client in storage_client_cache
90
- if storage_client._id == id or (storage_client._name and name and storage_client._name.lower() == name.lower())
91
- ),
92
- None,
93
- )
94
-
95
- if found is not None:
96
- return found
97
-
98
- storage_path = None
99
-
100
- # First try to find the storage by looking up the directory by name
101
- if name:
102
- possible_storage_path = os.path.join(storages_dir, name)
103
- if os.access(possible_storage_path, os.F_OK):
104
- storage_path = possible_storage_path
105
-
106
- # If it's not found, try going through the storages dir and finding it by metadata
107
- if not storage_path and os.access(storages_dir, os.F_OK):
108
- for entry in os.scandir(storages_dir):
109
- if not entry.is_dir():
110
- continue
111
- metadata_path = os.path.join(entry.path, '__metadata__.json')
112
- if not os.access(metadata_path, os.F_OK):
113
- continue
114
- with open(metadata_path, encoding='utf-8') as metadata_file:
115
- metadata = json.load(metadata_file)
116
- if id and id == metadata.get('id'):
117
- storage_path = entry.path
118
- name = metadata.get(name)
119
- break
120
- if name and name == metadata.get('name'):
121
- storage_path = entry.path
122
- id = metadata.get(id) # noqa: A001
123
- break
124
-
125
- # As a last resort, try to check if the accessed storage is the default one,
126
- # and the folder has no metadata
127
- # TODO: make this respect the APIFY_DEFAULT_XXX_ID env var
128
- # https://github.com/apify/apify-sdk-python/issues/149
129
- if id == 'default':
130
- possible_storage_path = os.path.join(storages_dir, id)
131
- if os.access(possible_storage_path, os.F_OK):
132
- storage_path = possible_storage_path
133
-
134
- if not storage_path:
135
- return None
136
-
137
- resource_client = cls._create_from_directory(storage_path, memory_storage_client, id, name)
138
-
139
- storage_client_cache.append(resource_client)
140
-
141
- return resource_client
@@ -1,114 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from abc import ABC, abstractmethod
4
- from operator import itemgetter
5
- from typing import TYPE_CHECKING, Generic, TypeVar, cast
6
-
7
- from apify_shared.models import ListPage
8
- from apify_shared.utils import ignore_docs
9
-
10
- from apify._memory_storage.file_storage_utils import update_metadata
11
- from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
12
-
13
- if TYPE_CHECKING:
14
- from apify._memory_storage.memory_storage_client import MemoryStorageClient
15
-
16
-
17
- ResourceClientType = TypeVar('ResourceClientType', bound=BaseResourceClient, contravariant=True) # noqa: PLC0105
18
-
19
-
20
- @ignore_docs
21
- class BaseResourceCollectionClient(ABC, Generic[ResourceClientType]):
22
- """Base class for resource collection clients."""
23
-
24
- _base_storage_directory: str
25
- _memory_storage_client: MemoryStorageClient
26
-
27
- def __init__(
28
- self: BaseResourceCollectionClient,
29
- *,
30
- base_storage_directory: str,
31
- memory_storage_client: MemoryStorageClient,
32
- ) -> None:
33
- """Initialize the DatasetCollectionClient with the passed arguments."""
34
- self._base_storage_directory = base_storage_directory
35
- self._memory_storage_client = memory_storage_client
36
-
37
- @abstractmethod
38
- def _get_storage_client_cache(self: BaseResourceCollectionClient) -> list[ResourceClientType]:
39
- raise NotImplementedError('You must override this method in the subclass!')
40
-
41
- @abstractmethod
42
- def _get_resource_client_class(self: BaseResourceCollectionClient) -> type[ResourceClientType]:
43
- raise NotImplementedError('You must override this method in the subclass!')
44
-
45
- @abstractmethod
46
- async def list(self: BaseResourceCollectionClient) -> ListPage:
47
- """List the available storages.
48
-
49
- Returns:
50
- ListPage: The list of available storages matching the specified filters.
51
- """
52
- storage_client_cache = self._get_storage_client_cache()
53
-
54
- items = [storage._to_resource_info() for storage in storage_client_cache]
55
-
56
- return ListPage(
57
- {
58
- 'total': len(items),
59
- 'count': len(items),
60
- 'offset': 0,
61
- 'limit': len(items),
62
- 'desc': False,
63
- 'items': sorted(items, key=itemgetter('createdAt')),
64
- }
65
- )
66
-
67
- @abstractmethod
68
- async def get_or_create(
69
- self: BaseResourceCollectionClient,
70
- *,
71
- name: str | None = None,
72
- schema: dict | None = None,
73
- _id: str | None = None,
74
- ) -> dict:
75
- """Retrieve a named storage, or create a new one when it doesn't exist.
76
-
77
- Args:
78
- name (str, optional): The name of the storage to retrieve or create.
79
- schema (Dict, optional): The schema of the storage
80
-
81
- Returns:
82
- dict: The retrieved or newly-created storage.
83
- """
84
- resource_client_class = self._get_resource_client_class()
85
- storage_client_cache = self._get_storage_client_cache()
86
-
87
- if name or _id:
88
- found = resource_client_class._find_or_create_client_by_id_or_name(
89
- memory_storage_client=self._memory_storage_client,
90
- name=name,
91
- id=_id,
92
- )
93
- if found:
94
- resource_info = found._to_resource_info()
95
- return cast(dict, resource_info)
96
-
97
- new_resource = resource_client_class(
98
- id=_id,
99
- name=name,
100
- base_storage_directory=self._base_storage_directory,
101
- memory_storage_client=self._memory_storage_client,
102
- )
103
- storage_client_cache.append(new_resource)
104
-
105
- resource_info = new_resource._to_resource_info()
106
-
107
- # Write to the disk
108
- await update_metadata(
109
- data=resource_info,
110
- entity_directory=new_resource._resource_directory,
111
- write_metadata=self._memory_storage_client._write_metadata,
112
- )
113
-
114
- return cast(dict, resource_info)