apify 3.0.2b6__tar.gz → 3.0.3b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-3.0.2b6 → apify-3.0.3b1}/CHANGELOG.md +10 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/PKG-INFO +2 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/pyproject.toml +2 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/request_loaders/_apify_request_list.py +2 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_dataset_client.py +12 -29
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_key_value_store_client.py +13 -43
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_models.py +2 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_request_queue_client.py +7 -30
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +54 -48
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_request_queue_single_client.py +86 -67
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_utils.py +27 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_request_queue.py +50 -2
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_env_helpers.py +1 -1
- {apify-3.0.2b6 → apify-3.0.3b1}/uv.lock +575 -506
- {apify-3.0.2b6 → apify-3.0.3b1}/website/package-lock.json +440 -439
- {apify-3.0.2b6 → apify-3.0.3b1}/.editorconfig +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/CODEOWNERS +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/pre_release.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/release.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/run_code_checks.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.github/workflows/update_new_issue.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.gitignore +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.markdownlint.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/.pre-commit-config.yaml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/CONTRIBUTING.md +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/LICENSE +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/Makefile +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/README.md +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/01_actor_lifecycle.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/02_actor_input.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/03_storages.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/04_actor_events.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/05_proxy_management.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/06_interacting_with_other_actors.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/07_webhooks.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/08_access_apify_api.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/09_running_webserver.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/10_logging.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/11_configuration.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/12_pay_per_event.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_class_context.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_class_manual.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_context_manager.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_error_handling_context.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_error_handling_manual.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_init_exit.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_instance_context.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_instance_manual.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_reboot.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/01_status_message.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/02_input.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_dataset_exports.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_dataset_read_write.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_deleting_storages.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_kvs_iterating.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_kvs_public_url.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_kvs_read_write.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_opening_storages.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/03_rq.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/04_actor_events.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_apify_proxy.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_apify_proxy_config.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_custom_proxy.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_custom_proxy_function.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_proxy_actor_input.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_proxy_httpx.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/05_proxy_rotation.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/06_interacting_call.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/06_interacting_call_task.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/06_interacting_metamorph.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/06_interacting_start.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/07_webhook.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/07_webhook_preventing.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/08_actor_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/08_actor_new_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/09_webserver.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/10_log_config.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/10_logger_usage.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/10_redirect_log.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/10_redirect_log_existing_run.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/11_config.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/actor_charge.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/02_concepts/code/conditional_actor_charge.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/01_beautifulsoup_httpx.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/02_parsel_impit.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/03_playwright.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/04_selenium.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/05_crawlee.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/06_scrapy.mdx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/01_beautifulsoup_httpx.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/02_parsel_impit.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/03_playwright.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/04_selenium.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/05_crawlee_beautifulsoup.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/05_crawlee_parsel.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/05_crawlee_playwright.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/__main__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/items.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/main.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/settings.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/03_guides/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/04_upgrading/upgrading_to_v2.md +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/04_upgrading/upgrading_to_v3.md +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/docs/pyproject.toml +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/renovate.json +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_actor.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_charging.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_consts.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_crypto.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_models.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/_utils.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/events/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/events/_types.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/events/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/log.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/requests.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/scrapy/utils.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storages/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storages/py.typed +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/README.md +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/_utils.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/Dockerfile +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/requirements.txt +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/server.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/src/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/conftest.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_api_helpers.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_call_timeouts.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_charge.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_dataset.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_events.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_key_value_store.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_lifecycle.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_log.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_request_queue.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_actor_scrapy.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_apify_storages.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_crawlers_with_storages.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/integration/test_fixtures.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_dataset.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_helpers.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_key_value_store.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_lifecycle.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_log.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_actor_request_queue.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/actor/test_request_list.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/conftest.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/events/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/events/test_apify_event_manager.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/extensions/test_httpcache.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/requests/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/utils/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/storage_clients/__init__.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/storage_clients/test_apify_request_queue_client.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/storage_clients/test_file_system.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/test_apify_storages.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/test_crypto.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/tests/unit/test_proxy_configuration.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/.eslintrc.json +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/babel.config.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/build_api_reference.sh +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/docusaurus.config.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/generate_module_shortcuts.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/package.json +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/sidebars.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/ApiLink.jsx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/Gradients.jsx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/Highlights.jsx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/Highlights.module.css +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/css/custom.css +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/pages/home_page_example.py +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/pages/index.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/pages/index.module.css +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/src/theme/DocItem/Content/index.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/static/.nojekyll +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/static/img/docs-og.png +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/static/img/guides/redirected_logs_example.webp +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/tools/docs-prettier.config.js +0 -0
- {apify-3.0.2b6 → apify-3.0.3b1}/website/tools/utils/externalLink.js +0 -0
|
@@ -3,15 +3,23 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 3.0.
|
|
6
|
+
## 3.0.3 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🐛 Bug Fixes
|
|
9
|
+
|
|
10
|
+
- Cache requests in RQ implementations by `id` ([#633](https://github.com/apify/apify-sdk-python/pull/633)) ([76886ce](https://github.com/apify/apify-sdk-python/commit/76886ce496165346a01f67e018547287c211ea54)) by [@Pijukatel](https://github.com/Pijukatel), closes [#630](https://github.com/apify/apify-sdk-python/issues/630)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
<!-- git-cliff-unreleased-end -->
|
|
14
|
+
## [3.0.2](https://github.com/apify/apify-sdk-python/releases/tag/v3.0.2) (2025-10-17)
|
|
7
15
|
|
|
8
16
|
### 🐛 Bug Fixes
|
|
9
17
|
|
|
10
18
|
- Handle None result in single consumer request queue client ([#623](https://github.com/apify/apify-sdk-python/pull/623)) ([451284a](https://github.com/apify/apify-sdk-python/commit/451284a5c633bc5613bd1e9060df286a1c20b259)) by [@janbuchar](https://github.com/janbuchar), closes [#1472](https://github.com/apify/apify-sdk-python/issues/1472)
|
|
11
19
|
- Unify Actor context manager with init & exit methods ([#600](https://github.com/apify/apify-sdk-python/pull/600)) ([6b0d084](https://github.com/apify/apify-sdk-python/commit/6b0d0842ae66a3a206bbb682a3e5f81ad552f029)) by [@vdusek](https://github.com/vdusek), closes [#598](https://github.com/apify/apify-sdk-python/issues/598)
|
|
20
|
+
- Handle truncated `unique_key` in `list_head` by fetching full request data ([#631](https://github.com/apify/apify-sdk-python/pull/631)) ([4238086](https://github.com/apify/apify-sdk-python/commit/423808678d9155a84a266bf50bb09f1a56466174)) by [@vdusek](https://github.com/vdusek), closes [#627](https://github.com/apify/apify-sdk-python/issues/627)
|
|
12
21
|
|
|
13
22
|
|
|
14
|
-
<!-- git-cliff-unreleased-end -->
|
|
15
23
|
## [3.0.1](https://github.com/apify/apify-sdk-python/releases/tag/v3.0.1) (2025-10-08)
|
|
16
24
|
|
|
17
25
|
### 🐛 Bug Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.3b1
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -225,7 +225,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
227
|
Requires-Python: >=3.10
|
|
228
|
-
Requires-Dist: apify-client<3.0.0,>=2.
|
|
228
|
+
Requires-Dist: apify-client<3.0.0,>=2.2.0
|
|
229
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
230
|
Requires-Dist: cachetools>=5.5.0
|
|
231
231
|
Requires-Dist: crawlee<2.0.0,>=1.0.2
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "3.0.
|
|
7
|
+
version = "3.0.3b1"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -34,7 +34,7 @@ keywords = [
|
|
|
34
34
|
"scraping",
|
|
35
35
|
]
|
|
36
36
|
dependencies = [
|
|
37
|
-
"apify-client>=2.
|
|
37
|
+
"apify-client>=2.2.0,<3.0.0",
|
|
38
38
|
"apify-shared>=2.0.0,<3.0.0",
|
|
39
39
|
"crawlee>=1.0.2,<2.0.0",
|
|
40
40
|
"cachetools>=5.5.0",
|
|
@@ -22,8 +22,8 @@ URL_NO_COMMAS_REGEX = re.compile(
|
|
|
22
22
|
class _RequestDetails(BaseModel):
|
|
23
23
|
method: HttpMethod = 'GET'
|
|
24
24
|
payload: str = ''
|
|
25
|
-
headers: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
26
|
-
user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')]
|
|
25
|
+
headers: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
26
|
+
user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')]
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class _RequestsFromUrlInput(_RequestDetails):
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import warnings
|
|
4
5
|
from logging import getLogger
|
|
5
6
|
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
from typing_extensions import override
|
|
8
9
|
|
|
9
|
-
from apify_client import ApifyClientAsync
|
|
10
10
|
from crawlee._utils.byte_size import ByteSize
|
|
11
11
|
from crawlee._utils.file import json_dumps
|
|
12
12
|
from crawlee.storage_clients._base import DatasetClient
|
|
13
13
|
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
|
|
14
14
|
from crawlee.storages import Dataset
|
|
15
15
|
|
|
16
|
-
from ._utils import AliasResolver
|
|
16
|
+
from ._utils import AliasResolver, create_apify_client
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from collections.abc import AsyncIterator
|
|
@@ -52,12 +52,17 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
52
52
|
self._api_client = api_client
|
|
53
53
|
"""The Apify dataset client for API operations."""
|
|
54
54
|
|
|
55
|
-
self._api_public_base_url = api_public_base_url
|
|
56
|
-
"""The public base URL for accessing the key-value store records."""
|
|
57
|
-
|
|
58
55
|
self._lock = lock
|
|
59
56
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
60
57
|
|
|
58
|
+
if api_public_base_url:
|
|
59
|
+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
60
|
+
warnings.warn(
|
|
61
|
+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
62
|
+
DeprecationWarning,
|
|
63
|
+
stacklevel=2,
|
|
64
|
+
)
|
|
65
|
+
|
|
61
66
|
@override
|
|
62
67
|
async def get_metadata(self) -> DatasetMetadata:
|
|
63
68
|
metadata = await self._api_client.get()
|
|
@@ -99,29 +104,7 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
99
104
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
100
105
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
101
106
|
|
|
102
|
-
|
|
103
|
-
if not token:
|
|
104
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
105
|
-
|
|
106
|
-
api_url = configuration.api_base_url
|
|
107
|
-
if not api_url:
|
|
108
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
109
|
-
|
|
110
|
-
api_public_base_url = configuration.api_public_base_url
|
|
111
|
-
if not api_public_base_url:
|
|
112
|
-
raise ValueError(
|
|
113
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
114
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Create Apify client with the provided token and API URL.
|
|
118
|
-
apify_client_async = ApifyClientAsync(
|
|
119
|
-
token=token,
|
|
120
|
-
api_url=api_url,
|
|
121
|
-
max_retries=8,
|
|
122
|
-
min_delay_between_retries_millis=500,
|
|
123
|
-
timeout_secs=360,
|
|
124
|
-
)
|
|
107
|
+
apify_client_async = create_apify_client(configuration)
|
|
125
108
|
apify_datasets_client = apify_client_async.datasets()
|
|
126
109
|
|
|
127
110
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
|
|
@@ -178,7 +161,7 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
178
161
|
|
|
179
162
|
return cls(
|
|
180
163
|
api_client=apify_dataset_client,
|
|
181
|
-
api_public_base_url=
|
|
164
|
+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
182
165
|
lock=asyncio.Lock(),
|
|
183
166
|
)
|
|
184
167
|
|
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import warnings
|
|
4
5
|
from logging import getLogger
|
|
5
6
|
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
from typing_extensions import override
|
|
8
|
-
from yarl import URL
|
|
9
9
|
|
|
10
|
-
from apify_client import ApifyClientAsync
|
|
11
10
|
from crawlee.storage_clients._base import KeyValueStoreClient
|
|
12
11
|
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
|
|
13
12
|
from crawlee.storages import KeyValueStore
|
|
14
13
|
|
|
15
14
|
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
|
|
16
|
-
from ._utils import AliasResolver
|
|
17
|
-
from apify._crypto import create_hmac_signature
|
|
15
|
+
from ._utils import AliasResolver, create_apify_client
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
20
18
|
from collections.abc import AsyncIterator
|
|
@@ -43,12 +41,17 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
43
41
|
self._api_client = api_client
|
|
44
42
|
"""The Apify KVS client for API operations."""
|
|
45
43
|
|
|
46
|
-
self._api_public_base_url = api_public_base_url
|
|
47
|
-
"""The public base URL for accessing the key-value store records."""
|
|
48
|
-
|
|
49
44
|
self._lock = lock
|
|
50
45
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
51
46
|
|
|
47
|
+
if api_public_base_url:
|
|
48
|
+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
49
|
+
warnings.warn(
|
|
50
|
+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
51
|
+
DeprecationWarning,
|
|
52
|
+
stacklevel=2,
|
|
53
|
+
)
|
|
54
|
+
|
|
52
55
|
@override
|
|
53
56
|
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
|
|
54
57
|
metadata = await self._api_client.get()
|
|
@@ -90,29 +93,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
90
93
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
91
94
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
92
95
|
|
|
93
|
-
|
|
94
|
-
if not token:
|
|
95
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
96
|
-
|
|
97
|
-
api_url = configuration.api_base_url
|
|
98
|
-
if not api_url:
|
|
99
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
100
|
-
|
|
101
|
-
api_public_base_url = configuration.api_public_base_url
|
|
102
|
-
if not api_public_base_url:
|
|
103
|
-
raise ValueError(
|
|
104
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
105
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
# Create Apify client with the provided token and API URL.
|
|
109
|
-
apify_client_async = ApifyClientAsync(
|
|
110
|
-
token=token,
|
|
111
|
-
api_url=api_url,
|
|
112
|
-
max_retries=8,
|
|
113
|
-
min_delay_between_retries_millis=500,
|
|
114
|
-
timeout_secs=360,
|
|
115
|
-
)
|
|
96
|
+
apify_client_async = create_apify_client(configuration)
|
|
116
97
|
apify_kvss_client = apify_client_async.key_value_stores()
|
|
117
98
|
|
|
118
99
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
|
|
@@ -170,7 +151,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
170
151
|
|
|
171
152
|
return cls(
|
|
172
153
|
api_client=apify_kvs_client,
|
|
173
|
-
api_public_base_url=
|
|
154
|
+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
174
155
|
lock=asyncio.Lock(),
|
|
175
156
|
)
|
|
176
157
|
|
|
@@ -251,15 +232,4 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
251
232
|
Returns:
|
|
252
233
|
A public URL that can be used to access the value of the given key in the KVS.
|
|
253
234
|
"""
|
|
254
|
-
|
|
255
|
-
raise ValueError('resource_id cannot be None when generating a public URL')
|
|
256
|
-
|
|
257
|
-
public_url = (
|
|
258
|
-
URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
|
|
259
|
-
)
|
|
260
|
-
metadata = await self.get_metadata()
|
|
261
|
-
|
|
262
|
-
if metadata.url_signing_secret_key is not None:
|
|
263
|
-
public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key))
|
|
264
|
-
|
|
265
|
-
return str(public_url)
|
|
235
|
+
return await self._api_client.get_record_public_url(key=key)
|
|
@@ -5,16 +5,15 @@ from typing import TYPE_CHECKING, Final, Literal
|
|
|
5
5
|
|
|
6
6
|
from typing_extensions import override
|
|
7
7
|
|
|
8
|
-
from apify_client import ApifyClientAsync
|
|
9
8
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
10
9
|
from crawlee.storage_clients._base import RequestQueueClient
|
|
11
10
|
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
|
|
12
11
|
from crawlee.storages import RequestQueue
|
|
13
12
|
|
|
14
13
|
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
|
|
15
|
-
from ._request_queue_shared_client import
|
|
16
|
-
from ._request_queue_single_client import
|
|
17
|
-
from ._utils import AliasResolver
|
|
14
|
+
from ._request_queue_shared_client import ApifyRequestQueueSharedClient
|
|
15
|
+
from ._request_queue_single_client import ApifyRequestQueueSingleClient
|
|
16
|
+
from ._utils import AliasResolver, create_apify_client
|
|
18
17
|
|
|
19
18
|
if TYPE_CHECKING:
|
|
20
19
|
from collections.abc import Sequence
|
|
@@ -47,14 +46,14 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
47
46
|
self._api_client = api_client
|
|
48
47
|
"""The Apify request queue client for API operations."""
|
|
49
48
|
|
|
50
|
-
self._implementation:
|
|
49
|
+
self._implementation: ApifyRequestQueueSingleClient | ApifyRequestQueueSharedClient
|
|
51
50
|
"""Internal implementation used to communicate with the Apify platform based Request Queue."""
|
|
52
51
|
if access == 'single':
|
|
53
|
-
self._implementation =
|
|
52
|
+
self._implementation = ApifyRequestQueueSingleClient(
|
|
54
53
|
api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
|
|
55
54
|
)
|
|
56
55
|
elif access == 'shared':
|
|
57
|
-
self._implementation =
|
|
56
|
+
self._implementation = ApifyRequestQueueSharedClient(
|
|
58
57
|
api_client=self._api_client,
|
|
59
58
|
metadata=metadata,
|
|
60
59
|
cache_size=self._MAX_CACHED_REQUESTS,
|
|
@@ -228,29 +227,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
228
227
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
229
228
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
230
229
|
|
|
231
|
-
|
|
232
|
-
if not token:
|
|
233
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
234
|
-
|
|
235
|
-
api_url = configuration.api_base_url
|
|
236
|
-
if not api_url:
|
|
237
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
238
|
-
|
|
239
|
-
api_public_base_url = configuration.api_public_base_url
|
|
240
|
-
if not api_public_base_url:
|
|
241
|
-
raise ValueError(
|
|
242
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
243
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
# Create Apify client with the provided token and API URL.
|
|
247
|
-
apify_client_async = ApifyClientAsync(
|
|
248
|
-
token=token,
|
|
249
|
-
api_url=api_url,
|
|
250
|
-
max_retries=8,
|
|
251
|
-
min_delay_between_retries_millis=500,
|
|
252
|
-
timeout_secs=360,
|
|
253
|
-
)
|
|
230
|
+
apify_client_async = create_apify_client(configuration)
|
|
254
231
|
apify_rqs_client = apify_client_async.request_queues()
|
|
255
232
|
|
|
256
233
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
|
{apify-3.0.2b6 → apify-3.0.3b1}/src/apify/storage_clients/_apify/_request_queue_shared_client.py
RENAMED
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
logger = getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class ApifyRequestQueueSharedClient:
|
|
27
27
|
"""An Apify platform implementation of the request queue client.
|
|
28
28
|
|
|
29
29
|
This implementation supports multiple producers and multiple consumers scenario.
|
|
@@ -54,10 +54,10 @@ class _ApifyRequestQueueSharedClient:
|
|
|
54
54
|
"""The Apify request queue client for API operations."""
|
|
55
55
|
|
|
56
56
|
self._queue_head = deque[str]()
|
|
57
|
-
"""A deque to store request
|
|
57
|
+
"""A deque to store request ids in the queue head."""
|
|
58
58
|
|
|
59
59
|
self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=cache_size)
|
|
60
|
-
"""A cache to store request objects. Request
|
|
60
|
+
"""A cache to store request objects. Request id is used as the cache key."""
|
|
61
61
|
|
|
62
62
|
self._queue_has_locked_requests: bool | None = None
|
|
63
63
|
"""Whether the queue has requests locked by another client."""
|
|
@@ -101,31 +101,30 @@ class _ApifyRequestQueueSharedClient:
|
|
|
101
101
|
already_present_requests: list[ProcessedRequest] = []
|
|
102
102
|
|
|
103
103
|
for request in requests:
|
|
104
|
-
|
|
104
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
105
|
+
if self._requests_cache.get(request_id):
|
|
105
106
|
# We are not sure if it was already handled at this point, and it is not worth calling API for it.
|
|
106
107
|
# It could have been handled by another client in the meantime, so cached information about
|
|
107
108
|
# `request.was_already_handled` is not reliable.
|
|
108
109
|
already_present_requests.append(
|
|
109
|
-
ProcessedRequest
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
110
|
+
ProcessedRequest(
|
|
111
|
+
id=request_id,
|
|
112
|
+
unique_key=request.unique_key,
|
|
113
|
+
was_already_present=True,
|
|
114
|
+
was_already_handled=request.was_already_handled,
|
|
115
115
|
)
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
else:
|
|
119
119
|
# Add new request to the cache.
|
|
120
|
-
processed_request = ProcessedRequest
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
120
|
+
processed_request = ProcessedRequest(
|
|
121
|
+
id=request_id,
|
|
122
|
+
unique_key=request.unique_key,
|
|
123
|
+
was_already_present=True,
|
|
124
|
+
was_already_handled=request.was_already_handled,
|
|
126
125
|
)
|
|
127
126
|
self._cache_request(
|
|
128
|
-
|
|
127
|
+
request_id,
|
|
129
128
|
processed_request,
|
|
130
129
|
)
|
|
131
130
|
new_requests.append(request)
|
|
@@ -135,7 +134,6 @@ class _ApifyRequestQueueSharedClient:
|
|
|
135
134
|
requests_dict = [
|
|
136
135
|
request.model_dump(
|
|
137
136
|
by_alias=True,
|
|
138
|
-
exclude={'id'}, # Exclude ID fields from requests since the API doesn't accept them.
|
|
139
137
|
)
|
|
140
138
|
for request in new_requests
|
|
141
139
|
]
|
|
@@ -150,7 +148,8 @@ class _ApifyRequestQueueSharedClient:
|
|
|
150
148
|
|
|
151
149
|
# Remove unprocessed requests from the cache
|
|
152
150
|
for unprocessed_request in api_response.unprocessed_requests:
|
|
153
|
-
|
|
151
|
+
unprocessed_request_id = unique_key_to_request_id(unprocessed_request.unique_key)
|
|
152
|
+
self._requests_cache.pop(unprocessed_request_id, None)
|
|
154
153
|
|
|
155
154
|
else:
|
|
156
155
|
api_response = AddRequestsResponse.model_validate(
|
|
@@ -183,7 +182,10 @@ class _ApifyRequestQueueSharedClient:
|
|
|
183
182
|
Returns:
|
|
184
183
|
The request or None if not found.
|
|
185
184
|
"""
|
|
186
|
-
|
|
185
|
+
return await self._get_request_by_id(unique_key_to_request_id(unique_key))
|
|
186
|
+
|
|
187
|
+
async def _get_request_by_id(self, request_id: str) -> Request | None:
|
|
188
|
+
response = await self._api_client.get_request(request_id)
|
|
187
189
|
|
|
188
190
|
if response is None:
|
|
189
191
|
return None
|
|
@@ -210,15 +212,15 @@ class _ApifyRequestQueueSharedClient:
|
|
|
210
212
|
return None
|
|
211
213
|
|
|
212
214
|
# Get the next request ID from the queue head
|
|
213
|
-
|
|
215
|
+
next_request_id = self._queue_head.popleft()
|
|
214
216
|
|
|
215
|
-
request = await self._get_or_hydrate_request(
|
|
217
|
+
request = await self._get_or_hydrate_request(next_request_id)
|
|
216
218
|
|
|
217
219
|
# Handle potential inconsistency where request might not be in the main table yet
|
|
218
220
|
if request is None:
|
|
219
221
|
logger.debug(
|
|
220
222
|
'Cannot find a request from the beginning of queue, will be retried later',
|
|
221
|
-
extra={'
|
|
223
|
+
extra={'next_request_id': next_request_id},
|
|
222
224
|
)
|
|
223
225
|
return None
|
|
224
226
|
|
|
@@ -226,16 +228,16 @@ class _ApifyRequestQueueSharedClient:
|
|
|
226
228
|
if request.handled_at is not None:
|
|
227
229
|
logger.debug(
|
|
228
230
|
'Request fetched from the beginning of queue was already handled',
|
|
229
|
-
extra={'
|
|
231
|
+
extra={'next_request_id': next_request_id},
|
|
230
232
|
)
|
|
231
233
|
return None
|
|
232
234
|
|
|
233
235
|
# Use get request to ensure we have the full request object.
|
|
234
|
-
request = await self.
|
|
236
|
+
request = await self._get_request_by_id(next_request_id)
|
|
235
237
|
if request is None:
|
|
236
238
|
logger.debug(
|
|
237
239
|
'Request fetched from the beginning of queue was not found in the RQ',
|
|
238
|
-
extra={'
|
|
240
|
+
extra={'next_request_id': next_request_id},
|
|
239
241
|
)
|
|
240
242
|
return None
|
|
241
243
|
|
|
@@ -252,15 +254,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
252
254
|
Returns:
|
|
253
255
|
Information about the queue operation. `None` if the given request was not in progress.
|
|
254
256
|
"""
|
|
257
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
255
258
|
# Set the handled_at timestamp if not already set
|
|
256
259
|
if request.handled_at is None:
|
|
257
260
|
request.handled_at = datetime.now(tz=timezone.utc)
|
|
258
261
|
|
|
259
|
-
if cached_request := self._requests_cache[
|
|
262
|
+
if cached_request := self._requests_cache[request_id]:
|
|
260
263
|
cached_request.was_already_handled = request.was_already_handled
|
|
261
264
|
try:
|
|
262
265
|
# Update the request in the API
|
|
263
266
|
processed_request = await self._update_request(request)
|
|
267
|
+
processed_request.id = request_id
|
|
264
268
|
processed_request.unique_key = request.unique_key
|
|
265
269
|
|
|
266
270
|
# Update assumed handled count if this wasn't already handled
|
|
@@ -269,10 +273,9 @@ class _ApifyRequestQueueSharedClient:
|
|
|
269
273
|
self.metadata.pending_request_count -= 1
|
|
270
274
|
|
|
271
275
|
# Update the cache with the handled request
|
|
272
|
-
cache_key = request.unique_key
|
|
273
276
|
self._cache_request(
|
|
274
|
-
cache_key,
|
|
275
|
-
processed_request,
|
|
277
|
+
cache_key=request_id,
|
|
278
|
+
processed_request=processed_request,
|
|
276
279
|
hydrated_request=request,
|
|
277
280
|
)
|
|
278
281
|
except Exception as exc:
|
|
@@ -356,17 +359,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
356
359
|
# Fetch requests from the API and populate the queue head
|
|
357
360
|
await self._list_head()
|
|
358
361
|
|
|
359
|
-
async def _get_or_hydrate_request(self,
|
|
360
|
-
"""Get a request by
|
|
362
|
+
async def _get_or_hydrate_request(self, request_id: str) -> Request | None:
|
|
363
|
+
"""Get a request by id, either from cache or by fetching from API.
|
|
361
364
|
|
|
362
365
|
Args:
|
|
363
|
-
|
|
366
|
+
request_id: Id of the request to get.
|
|
364
367
|
|
|
365
368
|
Returns:
|
|
366
369
|
The request if found and valid, otherwise None.
|
|
367
370
|
"""
|
|
368
371
|
# First check if the request is in our cache
|
|
369
|
-
cached_entry = self._requests_cache.get(
|
|
372
|
+
cached_entry = self._requests_cache.get(request_id)
|
|
370
373
|
|
|
371
374
|
if cached_entry and cached_entry.hydrated:
|
|
372
375
|
# If we have the request hydrated in cache, return it
|
|
@@ -375,17 +378,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
375
378
|
# If not in cache or not hydrated, fetch the request
|
|
376
379
|
try:
|
|
377
380
|
# Fetch the request data
|
|
378
|
-
request = await self.
|
|
381
|
+
request = await self._get_request_by_id(request_id)
|
|
379
382
|
|
|
380
383
|
# If request is not found and return None
|
|
381
384
|
if not request:
|
|
382
385
|
return None
|
|
383
386
|
|
|
384
387
|
# Update cache with hydrated request
|
|
385
|
-
cache_key = request.unique_key
|
|
386
388
|
self._cache_request(
|
|
387
|
-
cache_key,
|
|
388
|
-
ProcessedRequest(
|
|
389
|
+
cache_key=request_id,
|
|
390
|
+
processed_request=ProcessedRequest(
|
|
391
|
+
id=request_id,
|
|
389
392
|
unique_key=request.unique_key,
|
|
390
393
|
was_already_present=True,
|
|
391
394
|
was_already_handled=request.handled_at is not None,
|
|
@@ -393,7 +396,7 @@ class _ApifyRequestQueueSharedClient:
|
|
|
393
396
|
hydrated_request=request,
|
|
394
397
|
)
|
|
395
398
|
except Exception as exc:
|
|
396
|
-
logger.debug(f'Error fetching request {
|
|
399
|
+
logger.debug(f'Error fetching request {request_id}: {exc!s}')
|
|
397
400
|
return None
|
|
398
401
|
else:
|
|
399
402
|
return request
|
|
@@ -442,8 +445,8 @@ class _ApifyRequestQueueSharedClient:
|
|
|
442
445
|
logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
|
|
443
446
|
# Create a list of requests from the cached queue head
|
|
444
447
|
items = []
|
|
445
|
-
for
|
|
446
|
-
cached_request = self._requests_cache.get(
|
|
448
|
+
for request_id in list(self._queue_head)[:limit]:
|
|
449
|
+
cached_request = self._requests_cache.get(request_id)
|
|
447
450
|
if cached_request and cached_request.hydrated:
|
|
448
451
|
items.append(cached_request.hydrated)
|
|
449
452
|
|
|
@@ -476,32 +479,35 @@ class _ApifyRequestQueueSharedClient:
|
|
|
476
479
|
|
|
477
480
|
for request_data in response.get('items', []):
|
|
478
481
|
request = Request.model_validate(request_data)
|
|
482
|
+
request_id = request_data.get('id')
|
|
479
483
|
|
|
480
484
|
# Skip requests without ID or unique key
|
|
481
|
-
if not request.unique_key:
|
|
485
|
+
if not request.unique_key or not request_id:
|
|
482
486
|
logger.debug(
|
|
483
|
-
'Skipping request from queue head, missing unique key',
|
|
487
|
+
'Skipping request from queue head, missing unique key or id',
|
|
484
488
|
extra={
|
|
485
489
|
'unique_key': request.unique_key,
|
|
490
|
+
'id': request_id,
|
|
486
491
|
},
|
|
487
492
|
)
|
|
488
493
|
continue
|
|
489
494
|
|
|
490
495
|
# Cache the request
|
|
491
496
|
self._cache_request(
|
|
492
|
-
|
|
497
|
+
request_id,
|
|
493
498
|
ProcessedRequest(
|
|
499
|
+
id=request_id,
|
|
494
500
|
unique_key=request.unique_key,
|
|
495
501
|
was_already_present=True,
|
|
496
502
|
was_already_handled=False,
|
|
497
503
|
),
|
|
498
504
|
hydrated_request=request,
|
|
499
505
|
)
|
|
500
|
-
self._queue_head.append(
|
|
506
|
+
self._queue_head.append(request_id)
|
|
501
507
|
|
|
502
|
-
for
|
|
508
|
+
for leftover_id in leftover_buffer:
|
|
503
509
|
# After adding new requests to the forefront, any existing leftover locked request is kept in the end.
|
|
504
|
-
self._queue_head.append(
|
|
510
|
+
self._queue_head.append(leftover_id)
|
|
505
511
|
return RequestQueueHead.model_validate(response)
|
|
506
512
|
|
|
507
513
|
def _cache_request(
|
|
@@ -520,7 +526,7 @@ class _ApifyRequestQueueSharedClient:
|
|
|
520
526
|
hydrated_request: The hydrated request object, if available.
|
|
521
527
|
"""
|
|
522
528
|
self._requests_cache[cache_key] = CachedRequest(
|
|
523
|
-
|
|
529
|
+
id=processed_request.id,
|
|
524
530
|
was_already_handled=processed_request.was_already_handled,
|
|
525
531
|
hydrated=hydrated_request,
|
|
526
532
|
lock_expires_at=None,
|