apify 3.0.3b1__tar.gz → 3.0.4b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-3.0.3b1 → apify-3.0.4b2}/CHANGELOG.md +14 -2
- {apify-3.0.3b1 → apify-3.0.4b2}/PKG-INFO +1 -1
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/12_pay_per_event.mdx +1 -1
- {apify-3.0.3b1 → apify-3.0.4b2}/pyproject.toml +1 -1
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_actor.py +0 -2
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_charging.py +89 -37
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_configuration.py +30 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_models.py +1 -1
- apify-3.0.4b2/src/apify/storage_clients/_apify/_storage_client.py +132 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_smart_apify/_storage_client.py +40 -33
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_lifecycle.py +3 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_log.py +1 -1
- {apify-3.0.3b1 → apify-3.0.4b2}/uv.lock +117 -102
- {apify-3.0.3b1 → apify-3.0.4b2}/website/docusaurus.config.js +4 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/package-lock.json +459 -459
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/pages/index.js +1 -1
- apify-3.0.3b1/src/apify/storage_clients/_apify/_storage_client.py +0 -106
- {apify-3.0.3b1 → apify-3.0.4b2}/.editorconfig +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/CODEOWNERS +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/pre_release.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/release.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/run_code_checks.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.github/workflows/update_new_issue.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.gitignore +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.markdownlint.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/.pre-commit-config.yaml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/CONTRIBUTING.md +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/LICENSE +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/Makefile +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/README.md +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/01_actor_lifecycle.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/02_actor_input.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/03_storages.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/04_actor_events.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/05_proxy_management.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/06_interacting_with_other_actors.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/07_webhooks.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/08_access_apify_api.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/09_running_webserver.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/10_logging.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/11_configuration.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_class_context.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_class_manual.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_context_manager.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_error_handling_context.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_error_handling_manual.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_init_exit.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_instance_context.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_instance_manual.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_reboot.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/01_status_message.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/02_input.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_dataset_exports.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_dataset_read_write.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_deleting_storages.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_kvs_iterating.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_kvs_public_url.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_kvs_read_write.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_opening_storages.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/03_rq.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/04_actor_events.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_apify_proxy.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_apify_proxy_config.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_custom_proxy.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_custom_proxy_function.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_proxy_actor_input.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_proxy_httpx.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/05_proxy_rotation.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/06_interacting_call.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/06_interacting_call_task.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/06_interacting_metamorph.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/06_interacting_start.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/07_webhook.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/07_webhook_preventing.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/08_actor_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/08_actor_new_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/09_webserver.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/10_log_config.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/10_logger_usage.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/10_redirect_log.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/10_redirect_log_existing_run.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/11_config.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/actor_charge.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/02_concepts/code/conditional_actor_charge.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/01_beautifulsoup_httpx.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/02_parsel_impit.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/03_playwright.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/04_selenium.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/05_crawlee.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/06_scrapy.mdx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/01_beautifulsoup_httpx.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/02_parsel_impit.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/03_playwright.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/04_selenium.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/05_crawlee_beautifulsoup.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/05_crawlee_parsel.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/05_crawlee_playwright.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/__main__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/items.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/main.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/settings.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/03_guides/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/04_upgrading/upgrading_to_v2.md +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/04_upgrading/upgrading_to_v3.md +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/docs/pyproject.toml +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/renovate.json +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_consts.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_crypto.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_models.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/_utils.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/events/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/events/_types.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/events/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/log.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/requests.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/scrapy/utils.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_request_queue_single_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/_utils.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storages/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/src/apify/storages/py.typed +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/README.md +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/_utils.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/Dockerfile +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/requirements.txt +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/server.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/src/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/conftest.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_api_helpers.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_call_timeouts.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_charge.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_dataset.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_events.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_key_value_store.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_request_queue.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_actor_scrapy.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_apify_storages.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_crawlers_with_storages.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_fixtures.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/integration/test_request_queue.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_dataset.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_env_helpers.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_helpers.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_key_value_store.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_lifecycle.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_log.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_actor_request_queue.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_configuration.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/actor/test_request_list.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/conftest.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/events/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/events/test_apify_event_manager.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/extensions/test_httpcache.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/requests/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/utils/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/storage_clients/__init__.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/storage_clients/test_apify_request_queue_client.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/storage_clients/test_file_system.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/test_apify_storages.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/test_crypto.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/tests/unit/test_proxy_configuration.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/.eslintrc.json +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/babel.config.js +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/build_api_reference.sh +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/generate_module_shortcuts.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/package.json +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/sidebars.js +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/ApiLink.jsx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/Gradients.jsx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/Highlights.jsx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/Highlights.module.css +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/css/custom.css +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/pages/home_page_example.py +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/pages/index.module.css +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/src/theme/DocItem/Content/index.js +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/static/.nojekyll +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/static/img/docs-og.png +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/static/img/guides/redirected_logs_example.webp +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/tools/docs-prettier.config.js +0 -0
- {apify-3.0.3b1 → apify-3.0.4b2}/website/tools/utils/externalLink.js +0 -0
|
@@ -3,14 +3,26 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 3.0.
|
|
6
|
+
## 3.0.4 - **not yet released**
|
|
7
7
|
|
|
8
8
|
### 🐛 Bug Fixes
|
|
9
9
|
|
|
10
|
-
-
|
|
10
|
+
- Fix type of `cloud_storage_client` in `SmartApifyStorageClient` ([#642](https://github.com/apify/apify-sdk-python/pull/642)) ([3bf285d](https://github.com/apify/apify-sdk-python/commit/3bf285d60f507730954986a80c19ed2e27a38f9c)) by [@vdusek](https://github.com/vdusek)
|
|
11
|
+
- Fix local charging log dataset name ([#649](https://github.com/apify/apify-sdk-python/pull/649)) ([fdb1276](https://github.com/apify/apify-sdk-python/commit/fdb1276264aee2687596d87c96d19033fe915823)) by [@vdusek](https://github.com/vdusek), closes [#648](https://github.com/apify/apify-sdk-python/issues/648)
|
|
12
|
+
|
|
13
|
+
### ⚡ Performance
|
|
14
|
+
|
|
15
|
+
- Use Apify-provided environment variables to obtain PPE pricing information ([#644](https://github.com/apify/apify-sdk-python/pull/644)) ([0c32f29](https://github.com/apify/apify-sdk-python/commit/0c32f29d6a316f5bacc931595d694f262c925b2b)) by [@Mantisus](https://github.com/Mantisus), closes [#614](https://github.com/apify/apify-sdk-python/issues/614)
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
<!-- git-cliff-unreleased-end -->
|
|
19
|
+
## [3.0.3](https://github.com/apify/apify-sdk-python/releases/tag/v3.0.3) (2025-10-21)
|
|
20
|
+
|
|
21
|
+
### 🐛 Bug Fixes
|
|
22
|
+
|
|
23
|
+
- Cache requests in RQ implementations by `id` ([#633](https://github.com/apify/apify-sdk-python/pull/633)) ([76886ce](https://github.com/apify/apify-sdk-python/commit/76886ce496165346a01f67e018547287c211ea54)) by [@Pijukatel](https://github.com/Pijukatel), closes [#630](https://github.com/apify/apify-sdk-python/issues/630)
|
|
24
|
+
|
|
25
|
+
|
|
14
26
|
## [3.0.2](https://github.com/apify/apify-sdk-python/releases/tag/v3.0.2) (2025-10-17)
|
|
15
27
|
|
|
16
28
|
### 🐛 Bug Fixes
|
|
@@ -47,6 +47,6 @@ It is encouraged to test your monetization code on your machine before releasing
|
|
|
47
47
|
ACTOR_TEST_PAY_PER_EVENT=true python -m youractor
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
If you also wish to see a log of all the events charged throughout the run, the Apify SDK keeps a log of charged events in a so called charging dataset. Your charging dataset can be found under the `
|
|
50
|
+
If you also wish to see a log of all the events charged throughout the run, the Apify SDK keeps a log of charged events in a so called charging dataset. Your charging dataset can be found under the `charging-log` name (unless you change your storage settings, this dataset is stored in `storage/datasets/charging-log/`). Please note that this log is not available when running the Actor in production on the Apify platform.
|
|
51
51
|
|
|
52
52
|
Because pricing configuration is stored by the Apify platform, all events will have a default price of $1.
|
|
@@ -48,7 +48,6 @@ if TYPE_CHECKING:
|
|
|
48
48
|
from typing_extensions import Self
|
|
49
49
|
|
|
50
50
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
51
|
-
from crawlee.storage_clients import StorageClient
|
|
52
51
|
|
|
53
52
|
from apify._models import Webhook
|
|
54
53
|
|
|
@@ -140,7 +139,6 @@ class _ActorType:
|
|
|
140
139
|
# `__init__` method should not be considered final.
|
|
141
140
|
|
|
142
141
|
self._apify_client: ApifyClientAsync | None = None
|
|
143
|
-
self._local_storage_client: StorageClient | None = None
|
|
144
142
|
|
|
145
143
|
self._is_exiting = False
|
|
146
144
|
self._is_initialized = False
|
|
@@ -4,13 +4,20 @@ import math
|
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from decimal import Decimal
|
|
7
|
-
from typing import TYPE_CHECKING, Protocol
|
|
7
|
+
from typing import TYPE_CHECKING, Protocol, TypedDict
|
|
8
8
|
|
|
9
9
|
from pydantic import TypeAdapter
|
|
10
10
|
|
|
11
11
|
from crawlee._utils.context import ensure_context
|
|
12
12
|
|
|
13
|
-
from apify._models import
|
|
13
|
+
from apify._models import (
|
|
14
|
+
ActorRun,
|
|
15
|
+
FlatPricePerMonthActorPricingInfo,
|
|
16
|
+
FreeActorPricingInfo,
|
|
17
|
+
PayPerEventActorPricingInfo,
|
|
18
|
+
PricePerDatasetItemActorPricingInfo,
|
|
19
|
+
PricingModel,
|
|
20
|
+
)
|
|
14
21
|
from apify._utils import docs_group
|
|
15
22
|
from apify.log import logger
|
|
16
23
|
from apify.storages import Dataset
|
|
@@ -111,24 +118,16 @@ class ActorPricingInfo:
|
|
|
111
118
|
class ChargingManagerImplementation(ChargingManager):
|
|
112
119
|
"""Implementation of the `ChargingManager` Protocol - this is only meant to be instantiated internally."""
|
|
113
120
|
|
|
114
|
-
LOCAL_CHARGING_LOG_DATASET_NAME = '
|
|
121
|
+
LOCAL_CHARGING_LOG_DATASET_NAME = 'charging-log'
|
|
115
122
|
|
|
116
123
|
def __init__(self, configuration: Configuration, client: ApifyClientAsync) -> None:
|
|
117
124
|
self._max_total_charge_usd = configuration.max_total_charge_usd or Decimal('inf')
|
|
125
|
+
self._configuration = configuration
|
|
118
126
|
self._is_at_home = configuration.is_at_home
|
|
119
127
|
self._actor_run_id = configuration.actor_run_id
|
|
120
128
|
self._purge_charging_log_dataset = configuration.purge_on_start
|
|
121
129
|
self._pricing_model: PricingModel | None = None
|
|
122
130
|
|
|
123
|
-
if configuration.test_pay_per_event:
|
|
124
|
-
if self._is_at_home:
|
|
125
|
-
raise ValueError(
|
|
126
|
-
'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported '
|
|
127
|
-
'in a local development environment'
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
self._pricing_model = 'PAY_PER_EVENT'
|
|
131
|
-
|
|
132
131
|
self._client = client
|
|
133
132
|
self._charging_log_dataset: Dataset | None = None
|
|
134
133
|
|
|
@@ -140,37 +139,46 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
140
139
|
|
|
141
140
|
async def __aenter__(self) -> None:
|
|
142
141
|
"""Initialize the charging manager - this is called by the `Actor` class and shouldn't be invoked manually."""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')
|
|
142
|
+
# Validate config
|
|
143
|
+
if self._configuration.test_pay_per_event and self._is_at_home:
|
|
144
|
+
raise ValueError(
|
|
145
|
+
'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported '
|
|
146
|
+
'in a local development environment'
|
|
147
|
+
)
|
|
150
148
|
|
|
151
|
-
|
|
152
|
-
if run is None:
|
|
153
|
-
raise RuntimeError('Actor run not found')
|
|
149
|
+
self.active = True
|
|
154
150
|
|
|
155
|
-
|
|
156
|
-
|
|
151
|
+
# Retrieve pricing information from env vars or API
|
|
152
|
+
pricing_data = await self._fetch_pricing_info()
|
|
153
|
+
pricing_info = pricing_data['pricing_info']
|
|
154
|
+
charged_event_counts = pricing_data['charged_event_counts']
|
|
155
|
+
max_total_charge_usd = pricing_data['max_total_charge_usd']
|
|
157
156
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
157
|
+
# Set pricing model
|
|
158
|
+
if self._configuration.test_pay_per_event:
|
|
159
|
+
self._pricing_model = 'PAY_PER_EVENT'
|
|
160
|
+
else:
|
|
161
|
+
self._pricing_model = pricing_info.pricing_model if pricing_info else None
|
|
162
|
+
|
|
163
|
+
# Load per-event pricing information
|
|
164
|
+
if pricing_info and pricing_info.pricing_model == 'PAY_PER_EVENT':
|
|
165
|
+
for event_name, event_pricing in pricing_info.pricing_per_event.actor_charge_events.items():
|
|
166
|
+
self._pricing_info[event_name] = PricingInfoItem(
|
|
167
|
+
price=event_pricing.event_price_usd,
|
|
168
|
+
title=event_pricing.event_title,
|
|
169
|
+
)
|
|
164
170
|
|
|
165
|
-
|
|
171
|
+
self._max_total_charge_usd = max_total_charge_usd
|
|
166
172
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
+
# Load charged event counts
|
|
174
|
+
for event_name, count in charged_event_counts.items():
|
|
175
|
+
price = self._pricing_info.get(event_name, PricingInfoItem(Decimal(), title='')).price
|
|
176
|
+
self._charging_state[event_name] = ChargingStateItem(
|
|
177
|
+
charge_count=count,
|
|
178
|
+
total_charged_amount=count * price,
|
|
179
|
+
)
|
|
173
180
|
|
|
181
|
+
# Set up charging log dataset for local development
|
|
174
182
|
if not self._is_at_home and self._pricing_model == 'PAY_PER_EVENT':
|
|
175
183
|
# We are not running on the Apify platform, but PPE is enabled for testing - open a dataset that
|
|
176
184
|
# will contain a log of all charge calls for debugging purposes.
|
|
@@ -328,6 +336,38 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
328
336
|
def get_max_total_charge_usd(self) -> Decimal:
|
|
329
337
|
return self._max_total_charge_usd
|
|
330
338
|
|
|
339
|
+
async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict:
|
|
340
|
+
"""Fetch pricing information from environment variables or API."""
|
|
341
|
+
# Check if pricing info is available via environment variables
|
|
342
|
+
if self._configuration.actor_pricing_info is not None and self._configuration.charged_event_counts is not None:
|
|
343
|
+
return _FetchedPricingInfoDict(
|
|
344
|
+
pricing_info=self._configuration.actor_pricing_info,
|
|
345
|
+
charged_event_counts=self._configuration.charged_event_counts,
|
|
346
|
+
max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
# Fall back to API call
|
|
350
|
+
if self._is_at_home:
|
|
351
|
+
if self._actor_run_id is None:
|
|
352
|
+
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')
|
|
353
|
+
|
|
354
|
+
run = run_validator.validate_python(await self._client.run(self._actor_run_id).get())
|
|
355
|
+
if run is None:
|
|
356
|
+
raise RuntimeError('Actor run not found')
|
|
357
|
+
|
|
358
|
+
return _FetchedPricingInfoDict(
|
|
359
|
+
pricing_info=run.pricing_info,
|
|
360
|
+
charged_event_counts=run.charged_event_counts or {},
|
|
361
|
+
max_total_charge_usd=run.options.max_total_charge_usd or Decimal('inf'),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Local development without environment variables
|
|
365
|
+
return _FetchedPricingInfoDict(
|
|
366
|
+
pricing_info=None,
|
|
367
|
+
charged_event_counts={},
|
|
368
|
+
max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'),
|
|
369
|
+
)
|
|
370
|
+
|
|
331
371
|
|
|
332
372
|
@dataclass
|
|
333
373
|
class ChargingStateItem:
|
|
@@ -339,3 +379,15 @@ class ChargingStateItem:
|
|
|
339
379
|
class PricingInfoItem:
|
|
340
380
|
price: Decimal
|
|
341
381
|
title: str
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class _FetchedPricingInfoDict(TypedDict):
|
|
385
|
+
pricing_info: (
|
|
386
|
+
FreeActorPricingInfo
|
|
387
|
+
| FlatPricePerMonthActorPricingInfo
|
|
388
|
+
| PricePerDatasetItemActorPricingInfo
|
|
389
|
+
| PayPerEventActorPricingInfo
|
|
390
|
+
| None
|
|
391
|
+
)
|
|
392
|
+
charged_event_counts: dict[str, int]
|
|
393
|
+
max_total_charge_usd: Decimal
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from datetime import datetime, timedelta
|
|
4
5
|
from decimal import Decimal
|
|
5
6
|
from logging import getLogger
|
|
@@ -14,6 +15,12 @@ from crawlee._utils.models import timedelta_ms
|
|
|
14
15
|
from crawlee._utils.urls import validate_http_url
|
|
15
16
|
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
16
17
|
|
|
18
|
+
from apify._models import (
|
|
19
|
+
FlatPricePerMonthActorPricingInfo,
|
|
20
|
+
FreeActorPricingInfo,
|
|
21
|
+
PayPerEventActorPricingInfo,
|
|
22
|
+
PricePerDatasetItemActorPricingInfo,
|
|
23
|
+
)
|
|
17
24
|
from apify._utils import docs_group
|
|
18
25
|
|
|
19
26
|
logger = getLogger(__name__)
|
|
@@ -409,6 +416,29 @@ class Configuration(CrawleeConfiguration):
|
|
|
409
416
|
),
|
|
410
417
|
] = None
|
|
411
418
|
|
|
419
|
+
actor_pricing_info: Annotated[
|
|
420
|
+
FreeActorPricingInfo
|
|
421
|
+
| FlatPricePerMonthActorPricingInfo
|
|
422
|
+
| PricePerDatasetItemActorPricingInfo
|
|
423
|
+
| PayPerEventActorPricingInfo
|
|
424
|
+
| None,
|
|
425
|
+
Field(
|
|
426
|
+
alias='apify_actor_pricing_info',
|
|
427
|
+
description='JSON string with prising info of the actor',
|
|
428
|
+
discriminator='pricing_model',
|
|
429
|
+
),
|
|
430
|
+
BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None),
|
|
431
|
+
] = None
|
|
432
|
+
|
|
433
|
+
charged_event_counts: Annotated[
|
|
434
|
+
dict[str, int] | None,
|
|
435
|
+
Field(
|
|
436
|
+
alias='apify_charged_actor_event_counts',
|
|
437
|
+
description='Counts of events that were charged for the actor',
|
|
438
|
+
),
|
|
439
|
+
BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None),
|
|
440
|
+
] = None
|
|
441
|
+
|
|
412
442
|
@model_validator(mode='after')
|
|
413
443
|
def disable_browser_sandbox_on_platform(self) -> Self:
|
|
414
444
|
"""Disable the browser sandbox mode when running on the Apify platform.
|
|
@@ -120,7 +120,7 @@ class RequestQueueStats(BaseModel):
|
|
|
120
120
|
"""The number of request queue reads."""
|
|
121
121
|
|
|
122
122
|
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
|
|
123
|
-
"""Storage size in
|
|
123
|
+
"""Storage size in bytes."""
|
|
124
124
|
|
|
125
125
|
write_count: Annotated[int, Field(alias='writeCount', default=0)]
|
|
126
126
|
"""The number of request queue writes."""
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.storage_clients._base import StorageClient
|
|
8
|
+
|
|
9
|
+
from ._dataset_client import ApifyDatasetClient
|
|
10
|
+
from ._key_value_store_client import ApifyKeyValueStoreClient
|
|
11
|
+
from ._request_queue_client import ApifyRequestQueueClient
|
|
12
|
+
from ._utils import hash_api_base_url_and_token
|
|
13
|
+
from apify._configuration import Configuration as ApifyConfiguration
|
|
14
|
+
from apify._utils import docs_group
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Hashable
|
|
18
|
+
|
|
19
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@docs_group('Storage clients')
|
|
23
|
+
class ApifyStorageClient(StorageClient):
|
|
24
|
+
"""Apify platform implementation of the storage client.
|
|
25
|
+
|
|
26
|
+
This storage client provides access to datasets, key-value stores, and request queues that persist data
|
|
27
|
+
to the Apify platform. Each storage type is implemented with its own specific Apify client that stores data
|
|
28
|
+
in the cloud, making it accessible from anywhere.
|
|
29
|
+
|
|
30
|
+
The communication with the Apify platform is handled via the Apify API client for Python, which is an HTTP API
|
|
31
|
+
wrapper. For maximum efficiency and performance of the storage clients, various caching mechanisms are used to
|
|
32
|
+
minimize the number of API calls made to the Apify platform. Data can be inspected and manipulated through
|
|
33
|
+
the Apify console web interface or via the Apify API.
|
|
34
|
+
|
|
35
|
+
The request queue client supports two access modes controlled by the `request_queue_access` parameter:
|
|
36
|
+
|
|
37
|
+
### Single mode
|
|
38
|
+
|
|
39
|
+
The `single` mode is optimized for scenarios with only one consumer. It minimizes API calls, making it faster
|
|
40
|
+
and more cost-efficient compared to the `shared` mode. This option is ideal when a single Actor is responsible
|
|
41
|
+
for consuming the entire request queue. Using multiple consumers simultaneously may lead to inconsistencies
|
|
42
|
+
or unexpected behavior.
|
|
43
|
+
|
|
44
|
+
In this mode, multiple producers can safely add new requests, but forefront requests may not be processed
|
|
45
|
+
immediately, as the client relies on local head estimation instead of frequent forefront fetching. Requests can
|
|
46
|
+
also be added or marked as handled by other clients, but they must not be deleted or modified, since such changes
|
|
47
|
+
would not be reflected in the local cache. If a request is already fully cached locally, marking it as handled
|
|
48
|
+
by another client will be ignored by this client. This does not cause errors but can occasionally result in
|
|
49
|
+
reprocessing a request that was already handled elsewhere. If the request was not yet cached locally, marking
|
|
50
|
+
it as handled poses no issue.
|
|
51
|
+
|
|
52
|
+
### Shared mode
|
|
53
|
+
|
|
54
|
+
The `shared` mode is designed for scenarios with multiple concurrent consumers. It ensures proper synchronization
|
|
55
|
+
and consistency across clients, at the cost of higher API usage and slightly worse performance. This mode is safe
|
|
56
|
+
for concurrent access from multiple processes, including Actors running in parallel on the Apify platform. It
|
|
57
|
+
should be used when multiple consumers need to process requests from the same queue simultaneously.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
_LSP_ERROR_MSG = 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
|
|
61
|
+
"""This class (intentionally) violates the Liskov Substitution Principle.
|
|
62
|
+
|
|
63
|
+
It requires a specialized `Configuration` instance compared to its parent class.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None:
|
|
67
|
+
"""Initialize a new instance.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
request_queue_access: Defines how the request queue client behaves. Use `single` mode for a single
|
|
71
|
+
consumer. It has fewer API calls, meaning better performance and lower costs. If you need multiple
|
|
72
|
+
concurrent consumers use `shared` mode, but expect worse performance and higher costs due to
|
|
73
|
+
the additional overhead.
|
|
74
|
+
"""
|
|
75
|
+
self._request_queue_access = request_queue_access
|
|
76
|
+
|
|
77
|
+
@override
|
|
78
|
+
async def create_dataset_client(
|
|
79
|
+
self,
|
|
80
|
+
*,
|
|
81
|
+
id: str | None = None,
|
|
82
|
+
name: str | None = None,
|
|
83
|
+
alias: str | None = None,
|
|
84
|
+
configuration: CrawleeConfiguration | None = None,
|
|
85
|
+
) -> ApifyDatasetClient:
|
|
86
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
87
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
88
|
+
return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration)
|
|
89
|
+
|
|
90
|
+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
|
|
91
|
+
|
|
92
|
+
@override
|
|
93
|
+
async def create_kvs_client(
|
|
94
|
+
self,
|
|
95
|
+
*,
|
|
96
|
+
id: str | None = None,
|
|
97
|
+
name: str | None = None,
|
|
98
|
+
alias: str | None = None,
|
|
99
|
+
configuration: CrawleeConfiguration | None = None,
|
|
100
|
+
) -> ApifyKeyValueStoreClient:
|
|
101
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
102
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
103
|
+
return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration)
|
|
104
|
+
|
|
105
|
+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
|
|
106
|
+
|
|
107
|
+
@override
|
|
108
|
+
async def create_rq_client(
|
|
109
|
+
self,
|
|
110
|
+
*,
|
|
111
|
+
id: str | None = None,
|
|
112
|
+
name: str | None = None,
|
|
113
|
+
alias: str | None = None,
|
|
114
|
+
configuration: CrawleeConfiguration | None = None,
|
|
115
|
+
) -> ApifyRequestQueueClient:
|
|
116
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
117
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
118
|
+
return await ApifyRequestQueueClient.open(
|
|
119
|
+
id=id, name=name, alias=alias, configuration=configuration, access=self._request_queue_access
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
|
|
123
|
+
|
|
124
|
+
@override
|
|
125
|
+
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
|
|
126
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
127
|
+
# It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
|
|
128
|
+
# Whichever client variation gets used first, wins.
|
|
129
|
+
return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
|
|
130
|
+
|
|
131
|
+
config_class = type(configuration)
|
|
132
|
+
raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}'))
|
|
@@ -8,8 +8,7 @@ from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, Re
|
|
|
8
8
|
|
|
9
9
|
from apify._configuration import Configuration as ApifyConfiguration
|
|
10
10
|
from apify._utils import docs_group
|
|
11
|
-
from apify.storage_clients import ApifyStorageClient
|
|
12
|
-
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
|
|
11
|
+
from apify.storage_clients import ApifyStorageClient, FileSystemStorageClient
|
|
13
12
|
|
|
14
13
|
if TYPE_CHECKING:
|
|
15
14
|
from collections.abc import Hashable
|
|
@@ -19,28 +18,36 @@ if TYPE_CHECKING:
|
|
|
19
18
|
|
|
20
19
|
@docs_group('Storage clients')
|
|
21
20
|
class SmartApifyStorageClient(StorageClient):
|
|
22
|
-
"""
|
|
21
|
+
"""Storage client that automatically selects cloud or local storage client based on the environment.
|
|
23
22
|
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
This storage client provides access to datasets, key-value stores, and request queues by intelligently
|
|
24
|
+
delegating to either the cloud or local storage client based on the execution environment and configuration.
|
|
25
|
+
|
|
26
|
+
When running on the Apify platform (which is detected via environment variables), this client automatically
|
|
27
|
+
uses the `cloud_storage_client` to store storage data there. When running locally, it uses the
|
|
28
|
+
`local_storage_client` to store storage data there. You can also force cloud storage usage from your
|
|
29
|
+
local machine by using the `force_cloud` argument.
|
|
30
|
+
|
|
31
|
+
This storage client is designed to work specifically in `Actor` context and provides a seamless development
|
|
32
|
+
experience where the same code works both locally and on the Apify platform without any changes.
|
|
26
33
|
"""
|
|
27
34
|
|
|
28
35
|
def __init__(
|
|
29
36
|
self,
|
|
30
37
|
*,
|
|
31
|
-
cloud_storage_client:
|
|
38
|
+
cloud_storage_client: StorageClient | None = None,
|
|
32
39
|
local_storage_client: StorageClient | None = None,
|
|
33
40
|
) -> None:
|
|
34
|
-
"""Initialize
|
|
41
|
+
"""Initialize a new instance.
|
|
35
42
|
|
|
36
43
|
Args:
|
|
37
|
-
cloud_storage_client:
|
|
38
|
-
`force_cloud` argument
|
|
39
|
-
local_storage_client:
|
|
40
|
-
|
|
44
|
+
cloud_storage_client: Storage client used when an Actor is running on the Apify platform, or when
|
|
45
|
+
explicitly enabled via the `force_cloud` argument. Defaults to `ApifyStorageClient`.
|
|
46
|
+
local_storage_client: Storage client used when an Actor is not running on the Apify platform and when
|
|
47
|
+
`force_cloud` flag is not set. Defaults to `FileSystemStorageClient`.
|
|
41
48
|
"""
|
|
42
|
-
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(
|
|
43
|
-
self._local_storage_client = local_storage_client or
|
|
49
|
+
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient()
|
|
50
|
+
self._local_storage_client = local_storage_client or FileSystemStorageClient()
|
|
44
51
|
|
|
45
52
|
def __str__(self) -> str:
|
|
46
53
|
return (
|
|
@@ -48,26 +55,6 @@ class SmartApifyStorageClient(StorageClient):
|
|
|
48
55
|
f' local_storage_client={self._local_storage_client.__class__.__name__})'
|
|
49
56
|
)
|
|
50
57
|
|
|
51
|
-
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
|
|
52
|
-
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
force_cloud: If True, return `cloud_storage_client`.
|
|
56
|
-
"""
|
|
57
|
-
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
58
|
-
return self._cloud_storage_client
|
|
59
|
-
|
|
60
|
-
configuration = ApifyConfiguration.get_global_configuration()
|
|
61
|
-
if force_cloud:
|
|
62
|
-
if configuration.token is None:
|
|
63
|
-
raise RuntimeError(
|
|
64
|
-
'In order to use the Apify cloud storage from your computer, '
|
|
65
|
-
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
66
|
-
)
|
|
67
|
-
return self._cloud_storage_client
|
|
68
|
-
|
|
69
|
-
return self._local_storage_client
|
|
70
|
-
|
|
71
58
|
@override
|
|
72
59
|
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
|
|
73
60
|
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
@@ -115,3 +102,23 @@ class SmartApifyStorageClient(StorageClient):
|
|
|
115
102
|
return await self.get_suitable_storage_client().create_rq_client(
|
|
116
103
|
id=id, name=id, alias=alias, configuration=configuration
|
|
117
104
|
)
|
|
105
|
+
|
|
106
|
+
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
|
|
107
|
+
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
force_cloud: If True, return `cloud_storage_client`.
|
|
111
|
+
"""
|
|
112
|
+
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
113
|
+
return self._cloud_storage_client
|
|
114
|
+
|
|
115
|
+
configuration = ApifyConfiguration.get_global_configuration()
|
|
116
|
+
if force_cloud:
|
|
117
|
+
if configuration.token is None:
|
|
118
|
+
raise RuntimeError(
|
|
119
|
+
'In order to use the Apify cloud storage from your computer, '
|
|
120
|
+
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
121
|
+
)
|
|
122
|
+
return self._cloud_storage_client
|
|
123
|
+
|
|
124
|
+
return self._local_storage_client
|
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
5
7
|
from apify import Actor
|
|
6
8
|
|
|
7
9
|
if TYPE_CHECKING:
|
|
@@ -119,6 +121,7 @@ async def test_actor_fails_correctly_with_exception(
|
|
|
119
121
|
assert run_result.status == 'FAILED'
|
|
120
122
|
|
|
121
123
|
|
|
124
|
+
@pytest.mark.skip(reason='Skipped due to known instability, see issue #650.')
|
|
122
125
|
async def test_actor_with_crawler_reboot(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None:
|
|
123
126
|
"""Test that crawler in actor works as expected after reboot.
|
|
124
127
|
|
|
@@ -12,7 +12,7 @@ if TYPE_CHECKING:
|
|
|
12
12
|
|
|
13
13
|
# TODO: What to do with the `browserforge` output?
|
|
14
14
|
# https://github.com/apify/apify-sdk-python/issues/423
|
|
15
|
-
@pytest.mark.skip
|
|
15
|
+
@pytest.mark.skip(reason='Skipped due to known instability, see issue #423.')
|
|
16
16
|
async def test_actor_logging(
|
|
17
17
|
make_actor: MakeActorFunction,
|
|
18
18
|
run_actor: RunActorFunction,
|