apify 2.7.1b18__tar.gz → 2.7.1b20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-2.7.1b18 → apify-2.7.1b20}/CHANGELOG.md +26 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/PKG-INFO +1 -1
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/12_pay_per_event.mdx +6 -0
- apify-2.7.1b20/docs/04_upgrading/upgrading_to_v3.md +121 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/pyproject.toml +1 -1
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_actor.py +0 -3
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_configuration.py +6 -6
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_dataset_client.py +9 -2
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_key_value_store_client.py +9 -2
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_models.py +25 -1
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_request_queue_client.py +19 -5
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_utils.py +3 -2
- apify-2.7.1b20/tests/integration/test_apify_storages.py +75 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_request_queue.py +25 -1
- {apify-2.7.1b18 → apify-2.7.1b20}/uv.lock +462 -376
- {apify-2.7.1b18 → apify-2.7.1b20}/website/package-lock.json +809 -707
- apify-2.7.1b18/docs/04_upgrading/upgrading_to_v3.md +0 -54
- apify-2.7.1b18/tests/integration/test_apify_storages.py +0 -34
- {apify-2.7.1b18 → apify-2.7.1b20}/.editorconfig +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/CODEOWNERS +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/pre_release.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/release.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/run_code_checks.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.github/workflows/update_new_issue.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.gitignore +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.markdownlint.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/.pre-commit-config.yaml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/CONTRIBUTING.md +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/LICENSE +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/Makefile +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/README.md +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/01_actor_lifecycle.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/02_actor_input.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/03_storages.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/04_actor_events.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/05_proxy_management.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/06_interacting_with_other_actors.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/07_webhooks.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/08_access_apify_api.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/09_running_webserver.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/10_logging.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/11_configuration.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/01_context_manager.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/01_init_exit.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/01_reboot.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/01_status_message.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/02_input.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_dataset_exports.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_dataset_read_write.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_deleting_storages.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_kvs_iterating.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_kvs_public_url.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_kvs_read_write.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_opening_storages.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/03_rq.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/04_actor_events.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_apify_proxy.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_apify_proxy_config.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_custom_proxy.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_custom_proxy_function.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_proxy_actor_input.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_proxy_httpx.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/05_proxy_rotation.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/06_interacting_call.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/06_interacting_call_task.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/06_interacting_metamorph.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/06_interacting_start.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/07_webhook.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/07_webhook_preventing.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/08_actor_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/08_actor_new_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/09_webserver.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/10_log_config.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/10_logger_usage.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/10_redirect_log.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/10_redirect_log_existing_run.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/11_config.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/actor_charge.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/02_concepts/code/conditional_actor_charge.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/01_beautifulsoup_httpx.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/02_parsel_impit.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/03_playwright.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/04_selenium.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/05_crawlee.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/06_scrapy.mdx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/01_beautifulsoup_httpx.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/02_parsel_impit.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/03_playwright.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/04_selenium.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/05_crawlee_beautifulsoup.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/05_crawlee_parsel.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/05_crawlee_playwright.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/__main__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/items.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/main.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/settings.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/03_guides/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/04_upgrading/upgrading_to_v2.md +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/docs/pyproject.toml +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/renovate.json +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_charging.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_consts.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_crypto.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_models.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_proxy_configuration.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/_utils.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/events/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/events/_types.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/events/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/log.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/request_loaders/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/request_loaders/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/requests.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/scheduler.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/scrapy/utils.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storages/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storages/py.typed +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/README.md +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/_utils.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/Dockerfile +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/requirements.txt +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/server.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/src/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/conftest.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_api_helpers.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_call_timeouts.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_charge.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_dataset.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_events.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_key_value_store.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_lifecycle.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_log.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_request_queue.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_actor_scrapy.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_crawlers_with_storages.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/integration/test_fixtures.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_dataset.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_env_helpers.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_helpers.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_key_value_store.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_lifecycle.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_log.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_actor_request_queue.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_configuration.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/actor/test_request_list.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/conftest.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/events/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/events/test_apify_event_manager.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/extensions/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/extensions/test_httpcache.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/middlewares/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/pipelines/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/requests/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/utils/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/storage_clients/__init__.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/storage_clients/test_apify_request_queue_client.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/storage_clients/test_file_system.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/test_apify_storages.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/test_crypto.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/tests/unit/test_proxy_configuration.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/.eslintrc.json +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/babel.config.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/build_api_reference.sh +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/docusaurus.config.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/generate_module_shortcuts.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/package.json +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/sidebars.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/ApiLink.jsx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/Gradients.jsx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/Highlights.jsx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/Highlights.module.css +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/css/custom.css +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/pages/home_page_example.py +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/pages/index.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/pages/index.module.css +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/src/theme/DocItem/Content/index.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/static/.nojekyll +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/static/img/docs-og.png +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/static/img/guides/redirected_logs_example.webp +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/tools/docs-prettier.config.js +0 -0
- {apify-2.7.1b18 → apify-2.7.1b20}/website/tools/utils/externalLink.js +0 -0
|
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
|
|
|
10
10
|
- Add deduplication to `add_batch_of_requests` ([#534](https://github.com/apify/apify-sdk-python/pull/534)) ([dd03c4d](https://github.com/apify/apify-sdk-python/commit/dd03c4d446f611492adf35f1b5738648ee5a66f7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#514](https://github.com/apify/apify-sdk-python/issues/514)
|
|
11
11
|
- Add new methods to ChargingManager ([#580](https://github.com/apify/apify-sdk-python/pull/580)) ([54f7f8b](https://github.com/apify/apify-sdk-python/commit/54f7f8b29c5982be98b595dac11eceff915035c9)) by [@vdusek](https://github.com/vdusek)
|
|
12
12
|
- Add support for NDU storages ([#594](https://github.com/apify/apify-sdk-python/pull/594)) ([8721ef5](https://github.com/apify/apify-sdk-python/commit/8721ef5731bcb1a04ad63c930089bf83be29f308)) by [@vdusek](https://github.com/vdusek), closes [#1175](https://github.com/apify/apify-sdk-python/issues/1175)
|
|
13
|
+
- Add stats to `ApifyRequestQueueClient` ([#574](https://github.com/apify/apify-sdk-python/pull/574)) ([21f6782](https://github.com/apify/apify-sdk-python/commit/21f6782b444f623aba986b4922cf67bafafd4b2c)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1344](https://github.com/apify/apify-sdk-python/issues/1344)
|
|
13
14
|
|
|
14
15
|
### 🐛 Bug Fixes
|
|
15
16
|
|
|
@@ -29,6 +30,7 @@ All notable changes to this project will be documented in this file.
|
|
|
29
30
|
- [**breaking**] Replace `httpx` with `impit` ([#560](https://github.com/apify/apify-sdk-python/pull/560)) ([cca3869](https://github.com/apify/apify-sdk-python/commit/cca3869e85968865e56aafcdcb36fbccba27aef0)) by [@Mantisus](https://github.com/Mantisus), closes [#558](https://github.com/apify/apify-sdk-python/issues/558)
|
|
30
31
|
- [**breaking**] Remove `Request.id` field ([#553](https://github.com/apify/apify-sdk-python/pull/553)) ([445ab5d](https://github.com/apify/apify-sdk-python/commit/445ab5d752b785fc2018b35c8adbe779253d7acd)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
31
32
|
- [**breaking**] Make `Actor` initialization stricter and more predictable ([#576](https://github.com/apify/apify-sdk-python/pull/576)) ([912222a](https://github.com/apify/apify-sdk-python/commit/912222a7a8123be66c94c50a2e461276fbfc50c4)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
33
|
+
- [**breaking**] Make default Apify storages use alias mechanism ([#606](https://github.com/apify/apify-sdk-python/pull/606)) ([dbea7d9](https://github.com/apify/apify-sdk-python/commit/dbea7d97fe7f25aa8658a32c5bb46a3800561df5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#599](https://github.com/apify/apify-sdk-python/issues/599)
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -152,6 +154,30 @@ All notable changes to this project will be documented in this file.
|
|
|
152
154
|
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
153
155
|
|
|
154
156
|
|
|
157
|
+
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
158
|
+
|
|
159
|
+
### 🚀 Features
|
|
160
|
+
|
|
161
|
+
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
162
|
+
|
|
163
|
+
### 🐛 Bug Fixes
|
|
164
|
+
|
|
165
|
+
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
166
|
+
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
170
|
+
|
|
171
|
+
### 🚀 Features
|
|
172
|
+
|
|
173
|
+
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
174
|
+
|
|
175
|
+
### 🐛 Bug Fixes
|
|
176
|
+
|
|
177
|
+
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
178
|
+
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
179
|
+
|
|
180
|
+
|
|
155
181
|
|
|
156
182
|
## [2.7.3](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.3) (2025-08-11)
|
|
157
183
|
|
|
@@ -13,6 +13,12 @@ Apify provides several [pricing models](https://docs.apify.com/platform/actors/p
|
|
|
13
13
|
|
|
14
14
|
To use the pay-per-event pricing model, you first need to [set it up](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event) for your Actor in the Apify console. After that, you're free to start charging for events.
|
|
15
15
|
|
|
16
|
+
:::info How pay-per-event pricing works
|
|
17
|
+
|
|
18
|
+
If you want more details about PPE pricing, please refer to our [PPE documentation](https://docs.apify.com/platform/actors/publishing/monetize/pay-per-event).
|
|
19
|
+
|
|
20
|
+
:::
|
|
21
|
+
|
|
16
22
|
## Charging for events
|
|
17
23
|
|
|
18
24
|
After monetization is set in the Apify console, you can add <ApiLink to="class/Actor#charge">`Actor.charge`</ApiLink> calls to your code and start monetizing!
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: upgrading-to-v3
|
|
3
|
+
title: Upgrading to v3
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
This page summarizes the breaking changes between Apify Python SDK v2.x and v3.0.
|
|
7
|
+
|
|
8
|
+
## Python version support
|
|
9
|
+
|
|
10
|
+
Support for Python 3.9 has been dropped. The Apify Python SDK v3.x now requires Python 3.10 or later. Make sure your environment is running a compatible version before upgrading.
|
|
11
|
+
|
|
12
|
+
## Changes in storages
|
|
13
|
+
|
|
14
|
+
Apify Python SDK v3.0 includes Crawlee v1.0, which brings significant changes to the storage APIs. In Crawlee v1.0, the `Dataset`, `KeyValueStore`, and `RequestQueue` storage APIs have been updated for consistency and simplicity. Below is a detailed overview of what's new, what's changed, and what's been removed.
|
|
15
|
+
|
|
16
|
+
See the Crawlee's [Storages guide](https://crawlee.dev/python/docs/guides/storages) for more details.
|
|
17
|
+
|
|
18
|
+
### Dataset
|
|
19
|
+
|
|
20
|
+
The `Dataset` API now includes several new methods, such as:
|
|
21
|
+
|
|
22
|
+
- `get_metadata` - retrieves metadata information for the dataset.
|
|
23
|
+
- `purge` - completely clears the dataset, including all items (keeps the metadata only).
|
|
24
|
+
- `list_items` - returns the dataset's items in a list format.
|
|
25
|
+
|
|
26
|
+
Some older methods have been removed or replaced:
|
|
27
|
+
|
|
28
|
+
- `from_storage_object` constructor has been removed. You should now use the `open` method with either a `name` or `id` parameter.
|
|
29
|
+
- `get_info` method and the `storage_object` property have been replaced by the new `get_metadata` method.
|
|
30
|
+
- `set_metadata` method has been removed.
|
|
31
|
+
- `write_to_json` and `write_to_csv` methods have been removed; instead, use the `export_to` method for exporting data in different formats.
|
|
32
|
+
|
|
33
|
+
### Key-value store
|
|
34
|
+
|
|
35
|
+
The `KeyValueStore` API now includes several new methods, such as:
|
|
36
|
+
|
|
37
|
+
- `get_metadata` - retrieves metadata information for the key-value store.
|
|
38
|
+
- `purge` - completely clears the key-value store, removing all keys and values (keeps the metadata only).
|
|
39
|
+
- `delete_value` - deletes a specific key and its associated value.
|
|
40
|
+
- `list_keys` - lists all keys in the key-value store.
|
|
41
|
+
|
|
42
|
+
Some older methods have been removed or replaced:
|
|
43
|
+
|
|
44
|
+
- `from_storage_object` - removed; use the `open` method with either a `name` or `id` instead.
|
|
45
|
+
- `get_info` and `storage_object` - replaced by the new `get_metadata` method.
|
|
46
|
+
- `set_metadata` method has been removed.
|
|
47
|
+
|
|
48
|
+
### Request queue
|
|
49
|
+
|
|
50
|
+
The `RequestQueue` API now includes several new methods, such as:
|
|
51
|
+
|
|
52
|
+
- `get_metadata` - retrieves metadata information for the request queue.
|
|
53
|
+
- `purge` - completely clears the request queue, including all pending and processed requests (keeps the metadata only).
|
|
54
|
+
- `add_requests` - replaces the previous `add_requests_batched` method, offering the same functionality under a simpler name.
|
|
55
|
+
|
|
56
|
+
Some older methods have been removed or replaced:
|
|
57
|
+
|
|
58
|
+
- `from_storage_object` - removed; use the `open` method with either a `name` or `id` instead.
|
|
59
|
+
- `get_info` and `storage_object` - replaced by the new `get_metadata` method.
|
|
60
|
+
- `get_request` has argument `unique_key` instead of `request_id` as the `id` field was removed from the `Request`.
|
|
61
|
+
- `set_metadata` method has been removed.
|
|
62
|
+
|
|
63
|
+
Some changes in the related model classes:
|
|
64
|
+
|
|
65
|
+
- `resource_directory` in `RequestQueueMetadata` - removed; use the corresponding `path_to_*` property instead.
|
|
66
|
+
- `stats` field in `RequestQueueMetadata` - removed as it was unused.
|
|
67
|
+
- `RequestQueueHead` - replaced by `RequestQueueHeadWithLocks`.
|
|
68
|
+
|
|
69
|
+
## Removed Actor.config property
|
|
70
|
+
- `Actor.config` property has been removed. Use `Actor.configuration` instead.
|
|
71
|
+
|
|
72
|
+
## Actor initialization and ServiceLocator changes
|
|
73
|
+
|
|
74
|
+
`Actor` initialization and global `service_locator` services setup is more strict and predictable.
|
|
75
|
+
- Services in `Actor` can't be changed after calling `Actor.init`, entering the `async with Actor` context manager or after requesting them from the `Actor`.
|
|
76
|
+
- Services in `Actor` can be different from services in Crawler.
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
**Now (v3.0):**
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from crawlee.crawlers import BasicCrawler
|
|
83
|
+
from crawlee.storage_clients import MemoryStorageClient
|
|
84
|
+
from crawlee.configuration import Configuration
|
|
85
|
+
from crawlee.events import LocalEventManager
|
|
86
|
+
from apify import Actor
|
|
87
|
+
|
|
88
|
+
async def main():
|
|
89
|
+
|
|
90
|
+
async with Actor():
|
|
91
|
+
# This crawler will use same services as Actor and global service_locator
|
|
92
|
+
crawler_1 = BasicCrawler()
|
|
93
|
+
|
|
94
|
+
# This crawler will use custom services
|
|
95
|
+
custom_configuration = Configuration()
|
|
96
|
+
custom_event_manager = LocalEventManager.from_config(custom_configuration)
|
|
97
|
+
custom_storage_client = MemoryStorageClient()
|
|
98
|
+
crawler_2 = BasicCrawler(
|
|
99
|
+
configuration=custom_configuration,
|
|
100
|
+
event_manager=custom_event_manager,
|
|
101
|
+
storage_client=custom_storage_client,
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Removed Actor.config property
|
|
106
|
+
- `Actor.config` property has been removed. Use `Actor.configuration` instead.
|
|
107
|
+
|
|
108
|
+
## Default storage ids in configuration changed to None
|
|
109
|
+
- `Configuration.default_key_value_store_id` changed from `'default'` to `None`.
|
|
110
|
+
- `Configuration.default_dataset_id` changed from `'default'` to `None`.
|
|
111
|
+
- `Configuration.default_request_queue_id` changed from `'default'` to `None`.
|
|
112
|
+
|
|
113
|
+
Previously using the default storage without specifying its `id` in `Configuration` would lead to using specific storage with id `'default'`. Now it will use newly created unnamed storage with `'id'` assigned by the Apify platform, consecutive calls to get the default storage will return the same storage.
|
|
114
|
+
|
|
115
|
+
## Storages
|
|
116
|
+
|
|
117
|
+
<!-- TODO -->
|
|
118
|
+
|
|
119
|
+
## Storage clients
|
|
120
|
+
|
|
121
|
+
<!-- TODO -->
|
|
@@ -324,9 +324,6 @@ class _ActorType:
|
|
|
324
324
|
self.log.info('Initializing Actor...')
|
|
325
325
|
self.log.info('System info', extra=get_system_info())
|
|
326
326
|
|
|
327
|
-
# TODO: Print outdated SDK version warning (we need a new env var for this)
|
|
328
|
-
# https://github.com/apify/apify-sdk-python/issues/146
|
|
329
|
-
|
|
330
327
|
await self.event_manager.__aenter__()
|
|
331
328
|
self.log.debug('Event manager initialized')
|
|
332
329
|
|
|
@@ -142,7 +142,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
142
142
|
] = None
|
|
143
143
|
|
|
144
144
|
default_dataset_id: Annotated[
|
|
145
|
-
str,
|
|
145
|
+
str | None,
|
|
146
146
|
Field(
|
|
147
147
|
validation_alias=AliasChoices(
|
|
148
148
|
'actor_default_dataset_id',
|
|
@@ -150,10 +150,10 @@ class Configuration(CrawleeConfiguration):
|
|
|
150
150
|
),
|
|
151
151
|
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
|
|
152
152
|
),
|
|
153
|
-
] =
|
|
153
|
+
] = None
|
|
154
154
|
|
|
155
155
|
default_key_value_store_id: Annotated[
|
|
156
|
-
str,
|
|
156
|
+
str | None,
|
|
157
157
|
Field(
|
|
158
158
|
validation_alias=AliasChoices(
|
|
159
159
|
'actor_default_key_value_store_id',
|
|
@@ -161,10 +161,10 @@ class Configuration(CrawleeConfiguration):
|
|
|
161
161
|
),
|
|
162
162
|
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
|
|
163
163
|
),
|
|
164
|
-
] =
|
|
164
|
+
] = None
|
|
165
165
|
|
|
166
166
|
default_request_queue_id: Annotated[
|
|
167
|
-
str,
|
|
167
|
+
str | None,
|
|
168
168
|
Field(
|
|
169
169
|
validation_alias=AliasChoices(
|
|
170
170
|
'actor_default_request_queue_id',
|
|
@@ -172,7 +172,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
172
172
|
),
|
|
173
173
|
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
|
|
174
174
|
),
|
|
175
|
-
] =
|
|
175
|
+
] = None
|
|
176
176
|
|
|
177
177
|
disable_outdated_warning: Annotated[
|
|
178
178
|
bool,
|
|
@@ -124,8 +124,10 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
124
124
|
)
|
|
125
125
|
apify_datasets_client = apify_client_async.datasets()
|
|
126
126
|
|
|
127
|
-
# Normalize
|
|
128
|
-
|
|
127
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
|
|
128
|
+
# storage aliased as `__default__`
|
|
129
|
+
if not any([alias, name, id, configuration.default_dataset_id]):
|
|
130
|
+
alias = '__default__'
|
|
129
131
|
|
|
130
132
|
if alias:
|
|
131
133
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -150,6 +152,11 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
150
152
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
151
153
|
elif id is None:
|
|
152
154
|
id = configuration.default_dataset_id
|
|
155
|
+
if not id:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
'Dataset "id", "name", or "alias" must be specified, '
|
|
158
|
+
'or a default dataset ID must be set in the configuration.'
|
|
159
|
+
)
|
|
153
160
|
|
|
154
161
|
# Now create the client for the determined ID
|
|
155
162
|
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
|
{apify-2.7.1b18 → apify-2.7.1b20}/src/apify/storage_clients/_apify/_key_value_store_client.py
RENAMED
|
@@ -115,8 +115,10 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
115
115
|
)
|
|
116
116
|
apify_kvss_client = apify_client_async.key_value_stores()
|
|
117
117
|
|
|
118
|
-
# Normalize
|
|
119
|
-
|
|
118
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
|
|
119
|
+
# unnamed storage aliased as `__default__`
|
|
120
|
+
if not any([alias, name, id, configuration.default_key_value_store_id]):
|
|
121
|
+
alias = '__default__'
|
|
120
122
|
|
|
121
123
|
if alias:
|
|
122
124
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -142,6 +144,11 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
142
144
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
143
145
|
elif id is None:
|
|
144
146
|
id = configuration.default_key_value_store_id
|
|
147
|
+
if not id:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
'KeyValueStore "id", "name", or "alias" must be specified, '
|
|
150
|
+
'or a default KeyValueStore ID must be set in the configuration.'
|
|
151
|
+
)
|
|
145
152
|
|
|
146
153
|
# Now create the client for the determined ID
|
|
147
154
|
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
|
|
@@ -5,7 +5,7 @@ from typing import Annotated
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, ConfigDict, Field
|
|
7
7
|
|
|
8
|
-
from crawlee.storage_clients.models import KeyValueStoreMetadata
|
|
8
|
+
from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
|
|
9
9
|
|
|
10
10
|
from apify import Request
|
|
11
11
|
from apify._utils import docs_group
|
|
@@ -105,3 +105,27 @@ class CachedRequest(BaseModel):
|
|
|
105
105
|
|
|
106
106
|
lock_expires_at: datetime | None = None
|
|
107
107
|
"""The expiration time of the lock on the request."""
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class RequestQueueStats(BaseModel):
|
|
111
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
112
|
+
|
|
113
|
+
delete_count: Annotated[int, Field(alias='deleteCount', default=0)]
|
|
114
|
+
""""The number of request queue deletes."""
|
|
115
|
+
|
|
116
|
+
head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)]
|
|
117
|
+
"""The number of request queue head reads."""
|
|
118
|
+
|
|
119
|
+
read_count: Annotated[int, Field(alias='readCount', default=0)]
|
|
120
|
+
"""The number of request queue reads."""
|
|
121
|
+
|
|
122
|
+
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
|
|
123
|
+
"""Storage size in Bytes."""
|
|
124
|
+
|
|
125
|
+
write_count: Annotated[int, Field(alias='writeCount', default=0)]
|
|
126
|
+
"""The number of request queue writes."""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ApifyRequestQueueMetadata(RequestQueueMetadata):
|
|
130
|
+
stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)]
|
|
131
|
+
"""Additional statistics about the request queue."""
|
|
@@ -18,7 +18,13 @@ from crawlee.storage_clients._base import RequestQueueClient
|
|
|
18
18
|
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
|
|
19
19
|
from crawlee.storages import RequestQueue
|
|
20
20
|
|
|
21
|
-
from ._models import
|
|
21
|
+
from ._models import (
|
|
22
|
+
ApifyRequestQueueMetadata,
|
|
23
|
+
CachedRequest,
|
|
24
|
+
ProlongRequestLockResponse,
|
|
25
|
+
RequestQueueHead,
|
|
26
|
+
RequestQueueStats,
|
|
27
|
+
)
|
|
22
28
|
from ._utils import AliasResolver
|
|
23
29
|
from apify import Request
|
|
24
30
|
|
|
@@ -108,7 +114,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
108
114
|
return self._metadata
|
|
109
115
|
|
|
110
116
|
@override
|
|
111
|
-
async def get_metadata(self) ->
|
|
117
|
+
async def get_metadata(self) -> ApifyRequestQueueMetadata:
|
|
112
118
|
"""Get metadata about the request queue.
|
|
113
119
|
|
|
114
120
|
Returns:
|
|
@@ -119,7 +125,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
119
125
|
if response is None:
|
|
120
126
|
raise ValueError('Failed to fetch request queue metadata from the API.')
|
|
121
127
|
# Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
|
|
122
|
-
return
|
|
128
|
+
return ApifyRequestQueueMetadata(
|
|
123
129
|
id=response['id'],
|
|
124
130
|
name=response['name'],
|
|
125
131
|
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
|
|
@@ -129,6 +135,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
129
135
|
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
|
|
130
136
|
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
|
|
131
137
|
had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
|
|
138
|
+
stats=RequestQueueStats.model_validate(response['stats'], by_alias=True),
|
|
132
139
|
)
|
|
133
140
|
|
|
134
141
|
@classmethod
|
|
@@ -193,8 +200,10 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
193
200
|
)
|
|
194
201
|
apify_rqs_client = apify_client_async.request_queues()
|
|
195
202
|
|
|
196
|
-
# Normalize
|
|
197
|
-
|
|
203
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
|
|
204
|
+
# unnamed storage aliased as `__default__`
|
|
205
|
+
if not any([alias, name, id, configuration.default_request_queue_id]):
|
|
206
|
+
alias = '__default__'
|
|
198
207
|
|
|
199
208
|
if alias:
|
|
200
209
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -219,6 +228,11 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
219
228
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
220
229
|
elif id is None:
|
|
221
230
|
id = configuration.default_request_queue_id
|
|
231
|
+
if not id:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
'RequestQueue "id", "name", or "alias" must be specified, '
|
|
234
|
+
'or a default default_request_queue_id ID must be set in the configuration.'
|
|
235
|
+
)
|
|
222
236
|
|
|
223
237
|
# Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
|
|
224
238
|
# It should persist across migrated or resurrected Actor runs on the Apify platform.
|
|
@@ -76,7 +76,7 @@ class AliasResolver:
|
|
|
76
76
|
Returns:
|
|
77
77
|
Map of aliases and storage ids.
|
|
78
78
|
"""
|
|
79
|
-
if not cls._alias_map:
|
|
79
|
+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
|
|
80
80
|
default_kvs_client = await _get_default_kvs_client()
|
|
81
81
|
|
|
82
82
|
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
|
|
@@ -156,7 +156,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
|
|
|
156
156
|
min_delay_between_retries_millis=500,
|
|
157
157
|
timeout_secs=360,
|
|
158
158
|
)
|
|
159
|
-
|
|
159
|
+
if not configuration.default_key_value_store_id:
|
|
160
|
+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
|
|
160
161
|
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
|
|
161
162
|
|
|
162
163
|
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from crawlee import service_locator
|
|
6
|
+
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
|
|
7
|
+
|
|
8
|
+
from apify import Actor, Configuration
|
|
9
|
+
from apify.storage_clients import ApifyStorageClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.mark.parametrize(
|
|
13
|
+
'storage_type',
|
|
14
|
+
[Dataset, KeyValueStore, RequestQueue],
|
|
15
|
+
)
|
|
16
|
+
async def test_alias_concurrent_creation_local(
|
|
17
|
+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
|
|
18
|
+
) -> None:
|
|
19
|
+
"""Test that storages created with same alias are created only once even when created concurrently."""
|
|
20
|
+
service_locator.set_configuration(Configuration(token=apify_token))
|
|
21
|
+
service_locator.set_storage_client(ApifyStorageClient())
|
|
22
|
+
tasks = [asyncio.create_task(storage_type.open(alias='test')) for _ in range(2)]
|
|
23
|
+
|
|
24
|
+
storages = await asyncio.gather(*tasks)
|
|
25
|
+
unique_storage_ids = {storage.id for storage in storages}
|
|
26
|
+
try:
|
|
27
|
+
# Only one aliased storage should be created.
|
|
28
|
+
assert len(unique_storage_ids) == 1
|
|
29
|
+
|
|
30
|
+
# Clean up
|
|
31
|
+
await storages[0].drop()
|
|
32
|
+
except AssertionError:
|
|
33
|
+
for storage in storages:
|
|
34
|
+
await storage.drop()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.parametrize(
|
|
38
|
+
'storage_type',
|
|
39
|
+
[Dataset, KeyValueStore, RequestQueue],
|
|
40
|
+
)
|
|
41
|
+
async def test_unnamed_default_without_config(
|
|
42
|
+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Test that default Apify storage used locally is unnamed storage."""
|
|
45
|
+
service_locator.set_configuration(Configuration(token=apify_token))
|
|
46
|
+
service_locator.set_storage_client(ApifyStorageClient())
|
|
47
|
+
|
|
48
|
+
# Open storage and make sure it has no name and it has id
|
|
49
|
+
storage = await storage_type.open()
|
|
50
|
+
assert storage.name is None
|
|
51
|
+
assert storage.id
|
|
52
|
+
|
|
53
|
+
# Make sure the same instance is returned when opened again without name or alias
|
|
54
|
+
storage_again = await storage_type.open()
|
|
55
|
+
assert storage is storage_again
|
|
56
|
+
|
|
57
|
+
await storage.drop()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.mark.parametrize(
|
|
61
|
+
'storage_type',
|
|
62
|
+
[Dataset, KeyValueStore, RequestQueue],
|
|
63
|
+
)
|
|
64
|
+
async def test_aliases_not_stored_on_platform_when_local(
|
|
65
|
+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Test that default Apify storage used locally is not persisting aliases to Apify based default KVS."""
|
|
68
|
+
service_locator.set_configuration(Configuration(token=apify_token))
|
|
69
|
+
service_locator.set_storage_client(ApifyStorageClient())
|
|
70
|
+
async with Actor(configure_logging=False):
|
|
71
|
+
await storage_type.open(alias='test')
|
|
72
|
+
default_kvs = await Actor.open_key_value_store(force_cloud=True)
|
|
73
|
+
|
|
74
|
+
# The default KVS should be empty
|
|
75
|
+
assert len(await default_kvs.list_keys()) == 0
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
4
|
+
from typing import TYPE_CHECKING, cast
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
@@ -14,6 +14,7 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from crawlee.storages import RequestQueue
|
|
15
15
|
|
|
16
16
|
from .conftest import MakeActorFunction, RunActorFunction
|
|
17
|
+
from apify.storage_clients._apify._models import ApifyRequestQueueMetadata
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
async def test_add_and_fetch_requests(
|
|
@@ -1278,3 +1279,26 @@ async def test_request_queue_not_had_multiple_clients(
|
|
|
1278
1279
|
api_response = await api_client.get()
|
|
1279
1280
|
assert api_response
|
|
1280
1281
|
assert api_response['hadMultipleClients'] is False
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
async def test_request_queue_has_stats(request_queue_force_cloud: RequestQueue) -> None:
|
|
1285
|
+
"""Test that Apify based request queue has stats in metadata."""
|
|
1286
|
+
|
|
1287
|
+
add_request_count = 3
|
|
1288
|
+
read_request_count = 2
|
|
1289
|
+
|
|
1290
|
+
await request_queue_force_cloud.add_requests(
|
|
1291
|
+
[Request.from_url(f'http://example.com/{i}') for i in range(add_request_count)]
|
|
1292
|
+
)
|
|
1293
|
+
for _ in range(read_request_count):
|
|
1294
|
+
await request_queue_force_cloud.get_request(Request.from_url('http://example.com/1').unique_key)
|
|
1295
|
+
|
|
1296
|
+
# Wait for stats to become stable
|
|
1297
|
+
await asyncio.sleep(10)
|
|
1298
|
+
|
|
1299
|
+
metadata = await request_queue_force_cloud.get_metadata()
|
|
1300
|
+
|
|
1301
|
+
assert hasattr(metadata, 'stats')
|
|
1302
|
+
apify_metadata = cast('ApifyRequestQueueMetadata', metadata)
|
|
1303
|
+
assert apify_metadata.stats.read_count == read_request_count
|
|
1304
|
+
assert apify_metadata.stats.write_count == add_request_count
|