apify 2.7.1b7__tar.gz → 2.7.1b9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/build_and_deploy_docs.yaml +1 -1
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/release.yaml +1 -8
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/run_code_checks.yaml +0 -1
- {apify-2.7.1b7 → apify-2.7.1b9}/CHANGELOG.md +17 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/Makefile +3 -3
- {apify-2.7.1b7 → apify-2.7.1b9}/PKG-INFO +7 -5
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_dataset_exports.py +2 -2
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_rq.py +1 -4
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/conditional_actor_charge.py +2 -2
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/04_upgrading/upgrading_to_v2.md +2 -2
- apify-2.7.1b9/docs/04_upgrading/upgrading_to_v3.md +18 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/pyproject.toml +22 -17
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_actor.py +6 -7
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_configuration.py +33 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_proxy_configuration.py +8 -5
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_utils.py +9 -1
- apify-2.7.1b9/src/apify/events/__init__.py +5 -0
- apify-2.7.1b9/src/apify/events/_apify_event_manager.py +140 -0
- apify-2.7.1b9/src/apify/events/_types.py +102 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/log.py +0 -7
- apify-2.7.1b9/src/apify/request_loaders/__init__.py +18 -0
- apify-2.7.1b7/src/apify/storages/_request_list.py → apify-2.7.1b9/src/apify/request_loaders/_apify_request_list.py +22 -15
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/_logging_config.py +1 -4
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/extensions/_httpcache.py +9 -5
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/requests.py +3 -3
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/scheduler.py +8 -5
- apify-2.7.1b9/src/apify/storage_clients/__init__.py +10 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/__init__.py +11 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/_dataset_client.py +304 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/_key_value_store_client.py +241 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/_models.py +107 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/_request_queue_client.py +785 -0
- apify-2.7.1b9/src/apify/storage_clients/_apify/_storage_client.py +80 -0
- apify-2.7.1b9/src/apify/storage_clients/_file_system/__init__.py +2 -0
- apify-2.7.1b9/src/apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
- apify-2.7.1b9/src/apify/storage_clients/_file_system/_storage_client.py +35 -0
- apify-2.7.1b9/src/apify/storages/__init__.py +3 -0
- apify-2.7.1b9/tests/integration/_utils.py +17 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/actor_source_base/Dockerfile +4 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/actor_source_base/requirements.txt +1 -1
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/conftest.py +22 -22
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_api_helpers.py +4 -6
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_dataset.py +4 -3
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_key_value_store.py +17 -15
- apify-2.7.1b9/tests/integration/test_actor_request_queue.py +400 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_crawlers_with_storages.py +1 -3
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_request_queue.py +108 -25
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_create_proxy_configuration.py +2 -2
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_dataset.py +5 -30
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_env_helpers.py +3 -27
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_key_value_store.py +25 -45
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_lifecycle.py +1 -1
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_request_queue.py +3 -4
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_request_list.py +7 -6
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/conftest.py +14 -33
- apify-2.7.1b7/tests/unit/test_platform_event_manager.py → apify-2.7.1b9/tests/unit/events/test_apify_event_manager.py +8 -7
- apify-2.7.1b9/tests/unit/scrapy/pipelines/__init__.py +0 -0
- apify-2.7.1b9/tests/unit/scrapy/requests/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -2
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/requests/test_to_scrapy_request.py +6 -10
- apify-2.7.1b9/tests/unit/scrapy/utils/__init__.py +0 -0
- apify-2.7.1b9/tests/unit/storage_clients/__init__.py +0 -0
- apify-2.7.1b9/tests/unit/storage_clients/test_apify_request_queue_client.py +38 -0
- apify-2.7.1b9/tests/unit/storage_clients/test_file_system.py +55 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/test_proxy_configuration.py +13 -7
- {apify-2.7.1b7 → apify-2.7.1b9}/uv.lock +423 -652
- {apify-2.7.1b7 → apify-2.7.1b9}/website/package-lock.json +628 -593
- {apify-2.7.1b7 → apify-2.7.1b9}/website/package.json +1 -1
- {apify-2.7.1b7 → apify-2.7.1b9}/website/sidebars.js +4 -4
- apify-2.7.1b9/website/static/.nojekyll +0 -0
- apify-2.7.1b7/src/apify/_platform_event_manager.py +0 -215
- apify-2.7.1b7/src/apify/apify_storage_client/__init__.py +0 -3
- apify-2.7.1b7/src/apify/apify_storage_client/_apify_storage_client.py +0 -72
- apify-2.7.1b7/src/apify/apify_storage_client/_dataset_client.py +0 -190
- apify-2.7.1b7/src/apify/apify_storage_client/_dataset_collection_client.py +0 -51
- apify-2.7.1b7/src/apify/apify_storage_client/_key_value_store_client.py +0 -109
- apify-2.7.1b7/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
- apify-2.7.1b7/src/apify/apify_storage_client/_request_queue_client.py +0 -176
- apify-2.7.1b7/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -51
- apify-2.7.1b7/src/apify/storages/__init__.py +0 -5
- apify-2.7.1b7/tests/integration/_utils.py +0 -9
- apify-2.7.1b7/tests/integration/test_actor_request_queue.py +0 -88
- {apify-2.7.1b7 → apify-2.7.1b9}/.editorconfig +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/CODEOWNERS +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/pre_release.yaml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.github/workflows/update_new_issue.yaml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.gitignore +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.markdownlint.yaml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/.pre-commit-config.yaml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/CONTRIBUTING.md +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/LICENSE +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/README.md +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/01_beautifulsoup_httpx.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/02_crawlee.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/03_playwright.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/04_selenium.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/05_scrapy.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/01_beautifulsoup_httpx.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/02_crawlee_beautifulsoup.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/02_crawlee_playwright.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/03_playwright.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/04_selenium.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/__main__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/items.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/main.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/settings.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/02_guides/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/01_actor_lifecycle.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/02_actor_input.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/03_storages.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/04_actor_events.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/05_proxy_management.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/06_interacting_with_other_actors.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/07_webhooks.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/08_access_apify_api.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/09_running_webserver.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/10_logging.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/11_configuration.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/12_pay_per_event.mdx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/01_context_manager.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/01_init_exit.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/01_reboot.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/01_status_message.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/02_input.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_dataset_read_write.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_deleting_storages.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_kvs_iterating.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_kvs_public_url.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_kvs_read_write.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/03_opening_storages.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/04_actor_events.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_apify_proxy.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_apify_proxy_config.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_custom_proxy.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_custom_proxy_function.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_proxy_actor_input.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_proxy_httpx.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/05_proxy_rotation.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/06_interacting_call.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/06_interacting_call_task.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/06_interacting_metamorph.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/06_interacting_start.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/07_webhook.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/07_webhook_preventing.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/08_actor_client.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/08_actor_new_client.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/09_webserver.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/10_log_config.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/10_logger_usage.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/10_redirect_log.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/10_redirect_log_existing_run.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/11_config.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/03_concepts/code/actor_charge.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/docs/pyproject.toml +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/renovate.json +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_charging.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_consts.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_crypto.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/_models.py +0 -0
- {apify-2.7.1b7/src/apify/apify_storage_client → apify-2.7.1b9/src/apify/events}/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/py.typed +0 -0
- {apify-2.7.1b7/src/apify/scrapy/middlewares → apify-2.7.1b9/src/apify/request_loaders}/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-2.7.1b7/src/apify/scrapy/pipelines → apify-2.7.1b9/src/apify/scrapy/middlewares}/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-2.7.1b7/src/apify/scrapy → apify-2.7.1b9/src/apify/scrapy/pipelines}/py.typed +0 -0
- {apify-2.7.1b7/src/apify/storages → apify-2.7.1b9/src/apify/scrapy}/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/src/apify/scrapy/utils.py +0 -0
- /apify-2.7.1b7/tests/integration/__init__.py → /apify-2.7.1b9/src/apify/storage_clients/_apify/py.typed +0 -0
- /apify-2.7.1b7/tests/integration/actor_source_base/src/__init__.py → /apify-2.7.1b9/src/apify/storage_clients/py.typed +0 -0
- /apify-2.7.1b7/tests/unit/__init__.py → /apify-2.7.1b9/src/apify/storages/py.typed +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/README.md +0 -0
- {apify-2.7.1b7/tests/unit/actor → apify-2.7.1b9/tests/integration}/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/actor_source_base/server.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy → apify-2.7.1b9/tests/integration/actor_source_base/src}/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_call_timeouts.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_charge.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_events.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_lifecycle.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_log.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_actor_scrapy.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/integration/test_fixtures.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy/extensions → apify-2.7.1b9/tests/unit}/__init__.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy/middlewares → apify-2.7.1b9/tests/unit/actor}/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_helpers.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_log.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/actor/test_configuration.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy/pipelines → apify-2.7.1b9/tests/unit/events}/__init__.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy/requests → apify-2.7.1b9/tests/unit/scrapy}/__init__.py +0 -0
- {apify-2.7.1b7/tests/unit/scrapy/utils → apify-2.7.1b9/tests/unit/scrapy/extensions}/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/extensions/test_httpcache.py +0 -0
- /apify-2.7.1b7/website/static/.nojekyll → /apify-2.7.1b9/tests/unit/scrapy/middlewares/__init__.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/tests/unit/test_crypto.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/.eslintrc.json +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/babel.config.js +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/build_api_reference.sh +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/docusaurus.config.js +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/generate_module_shortcuts.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/ApiLink.jsx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/Gradients.jsx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/Highlights.jsx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/Highlights.module.css +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/css/custom.css +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/pages/home_page_example.py +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/pages/index.js +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/src/pages/index.module.css +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/static/img/docs-og.png +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/static/img/guides/redirected_logs_example.webp +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/tools/docs-prettier.config.js +0 -0
- {apify-2.7.1b7 → apify-2.7.1b9}/website/tools/utils/externalLink.js +0 -0
|
@@ -61,16 +61,9 @@ jobs:
|
|
|
61
61
|
with:
|
|
62
62
|
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
63
63
|
|
|
64
|
-
integration_tests:
|
|
65
|
-
name: Integration tests
|
|
66
|
-
uses: apify/workflows/.github/workflows/python_integration_tests.yaml@main
|
|
67
|
-
secrets: inherit
|
|
68
|
-
with:
|
|
69
|
-
python-versions: '["3.10", "3.13"]'
|
|
70
|
-
|
|
71
64
|
update_changelog:
|
|
72
65
|
name: Update changelog
|
|
73
|
-
needs: [release_metadata, lint_check, type_check, unit_tests
|
|
66
|
+
needs: [release_metadata, lint_check, type_check, unit_tests]
|
|
74
67
|
uses: apify/workflows/.github/workflows/python_bump_and_update_changelog.yaml@main
|
|
75
68
|
with:
|
|
76
69
|
version_number: ${{ needs.release_metadata.outputs.version_number }}
|
|
@@ -5,10 +5,26 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
6
|
## 2.7.1 - **not yet released**
|
|
7
7
|
|
|
8
|
+
### 🚀 Features
|
|
9
|
+
|
|
10
|
+
- Add deduplication to `add_batch_of_requests` ([#534](https://github.com/apify/apify-sdk-python/pull/534)) ([dd03c4d](https://github.com/apify/apify-sdk-python/commit/dd03c4d446f611492adf35f1b5738648ee5a66f7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#514](https://github.com/apify/apify-sdk-python/issues/514)
|
|
11
|
+
|
|
8
12
|
### 🐛 Bug Fixes
|
|
9
13
|
|
|
10
14
|
- Restrict apify-shared and apify-client versions ([#523](https://github.com/apify/apify-sdk-python/pull/523)) ([b3ae5a9](https://github.com/apify/apify-sdk-python/commit/b3ae5a972a65454a4998eda59c9fcc3f6b7e8579)) by [@vdusek](https://github.com/vdusek)
|
|
11
15
|
- Expose `APIFY_USER_IS_PAYING` env var to the configuration ([#507](https://github.com/apify/apify-sdk-python/pull/507)) ([0801e54](https://github.com/apify/apify-sdk-python/commit/0801e54887317c1280cc6828ecd3f2cc53287e76)) by [@stepskop](https://github.com/stepskop)
|
|
16
|
+
- Resolve DeprecationWarning in ApifyEventManager ([#555](https://github.com/apify/apify-sdk-python/pull/555)) ([0c5111d](https://github.com/apify/apify-sdk-python/commit/0c5111dafe19796ec1fb9652a44c031bed9758df)) by [@vdusek](https://github.com/vdusek), closes [#343](https://github.com/apify/apify-sdk-python/issues/343)
|
|
17
|
+
- Use same `client_key` for `Actor` created `request_queue` and improve its metadata estimation ([#552](https://github.com/apify/apify-sdk-python/pull/552)) ([7e4e5da](https://github.com/apify/apify-sdk-python/commit/7e4e5da81dd87e84ebeef2bd336c6c1d422cb9a7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#536](https://github.com/apify/apify-sdk-python/issues/536)
|
|
18
|
+
|
|
19
|
+
### Chore
|
|
20
|
+
|
|
21
|
+
- [**breaking**] Update apify-client and apify-shared to v2.0 ([#548](https://github.com/apify/apify-sdk-python/pull/548)) ([8ba084d](https://github.com/apify/apify-sdk-python/commit/8ba084ded6cd018111343f2219260b481c8d4e35)) by [@vdusek](https://github.com/vdusek)
|
|
22
|
+
|
|
23
|
+
### Refactor
|
|
24
|
+
|
|
25
|
+
- [**breaking**] Adapt to the Crawlee v1.0 ([#470](https://github.com/apify/apify-sdk-python/pull/470)) ([f7e3320](https://github.com/apify/apify-sdk-python/commit/f7e33206cf3e4767faacbdc43511b45b6785f929)) by [@vdusek](https://github.com/vdusek), closes [#469](https://github.com/apify/apify-sdk-python/issues/469), [#540](https://github.com/apify/apify-sdk-python/issues/540)
|
|
26
|
+
- [**breaking**] Replace `httpx` with `impit` ([#560](https://github.com/apify/apify-sdk-python/pull/560)) ([cca3869](https://github.com/apify/apify-sdk-python/commit/cca3869e85968865e56aafcdcb36fbccba27aef0)) by [@Mantisus](https://github.com/Mantisus), closes [#558](https://github.com/apify/apify-sdk-python/issues/558)
|
|
27
|
+
- [**breaking**] Remove `Request.id` field ([#553](https://github.com/apify/apify-sdk-python/pull/553)) ([445ab5d](https://github.com/apify/apify-sdk-python/commit/445ab5d752b785fc2018b35c8adbe779253d7acd)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
12
28
|
|
|
13
29
|
|
|
14
30
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -24,6 +40,7 @@ All notable changes to this project will be documented in this file.
|
|
|
24
40
|
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
25
41
|
|
|
26
42
|
|
|
43
|
+
|
|
27
44
|
## [2.7.3](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.3) (2025-08-11)
|
|
28
45
|
|
|
29
46
|
### 🐛 Bug Fixes
|
|
@@ -26,13 +26,13 @@ type-check:
|
|
|
26
26
|
uv run mypy
|
|
27
27
|
|
|
28
28
|
unit-tests:
|
|
29
|
-
uv run pytest --numprocesses=auto
|
|
29
|
+
uv run pytest --numprocesses=auto -vv --cov=src/apify tests/unit
|
|
30
30
|
|
|
31
31
|
unit-tests-cov:
|
|
32
|
-
uv run pytest --numprocesses=auto
|
|
32
|
+
uv run pytest --numprocesses=auto -vv --cov=src/apify --cov-report=html tests/unit
|
|
33
33
|
|
|
34
34
|
integration-tests:
|
|
35
|
-
uv run pytest --numprocesses=$(INTEGRATION_TESTS_CONCURRENCY)
|
|
35
|
+
uv run pytest --numprocesses=$(INTEGRATION_TESTS_CONCURRENCY) -vv tests/integration
|
|
36
36
|
|
|
37
37
|
format:
|
|
38
38
|
uv run ruff check --fix
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.1b9
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -225,15 +225,17 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
227
|
Requires-Python: >=3.10
|
|
228
|
-
Requires-Dist: apify-client<2.0.0
|
|
229
|
-
Requires-Dist: apify-shared<2.0.0
|
|
230
|
-
Requires-Dist:
|
|
228
|
+
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
|
+
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
|
+
Requires-Dist: cachetools>=5.5.0
|
|
231
|
+
Requires-Dist: crawlee==1.0.0rc1
|
|
231
232
|
Requires-Dist: cryptography>=42.0.0
|
|
232
|
-
Requires-Dist:
|
|
233
|
+
Requires-Dist: impit>=0.5.3
|
|
233
234
|
Requires-Dist: lazy-object-proxy<1.11.0
|
|
234
235
|
Requires-Dist: more-itertools>=10.2.0
|
|
235
236
|
Requires-Dist: typing-extensions>=4.1.0
|
|
236
237
|
Requires-Dist: websockets>=14.0
|
|
238
|
+
Requires-Dist: yarl>=1.18.0
|
|
237
239
|
Provides-Extra: scrapy
|
|
238
240
|
Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
|
|
239
241
|
Description-Content-Type: text/markdown
|
|
@@ -11,14 +11,14 @@ async def main() -> None:
|
|
|
11
11
|
await dataset.export_to(
|
|
12
12
|
content_type='csv',
|
|
13
13
|
key='data.csv',
|
|
14
|
-
|
|
14
|
+
to_kvs_name='my-cool-key-value-store',
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
# Export the data as JSON
|
|
18
18
|
await dataset.export_to(
|
|
19
19
|
content_type='json',
|
|
20
20
|
key='data.json',
|
|
21
|
-
|
|
21
|
+
to_kvs_name='my-cool-key-value-store',
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
# Print the exported records
|
|
@@ -20,13 +20,10 @@ async def main() -> None:
|
|
|
20
20
|
|
|
21
21
|
# If you try to add an existing request again, it will not do anything
|
|
22
22
|
add_request_info = await queue.add_request(
|
|
23
|
-
Request.from_url('http://
|
|
23
|
+
Request.from_url('http://example.com/5')
|
|
24
24
|
)
|
|
25
25
|
Actor.log.info(f'Add request info: {add_request_info}')
|
|
26
26
|
|
|
27
|
-
processed_request = await queue.get_request(add_request_info.id)
|
|
28
|
-
Actor.log.info(f'Processed request: {processed_request}')
|
|
29
|
-
|
|
30
27
|
# Finally, process the queue until all requests are handled
|
|
31
28
|
while not await queue.is_finished():
|
|
32
29
|
# Fetch the next unhandled request in the queue
|
|
@@ -6,8 +6,8 @@ async def main() -> None:
|
|
|
6
6
|
# Check the dataset because there might already be items
|
|
7
7
|
# if the run migrated or was restarted
|
|
8
8
|
default_dataset = await Actor.open_dataset()
|
|
9
|
-
|
|
10
|
-
charged_items =
|
|
9
|
+
metadata = await default_dataset.get_metadata()
|
|
10
|
+
charged_items = metadata.item_count
|
|
11
11
|
|
|
12
12
|
# highlight-start
|
|
13
13
|
if Actor.get_charging_manager().get_pricing_info().is_pay_per_event:
|
|
@@ -3,7 +3,7 @@ id: upgrading-to-v2
|
|
|
3
3
|
title: Upgrading to v2
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
This page summarizes
|
|
6
|
+
This page summarizes the breaking changes between Apify Python SDK v1.x and v2.0.
|
|
7
7
|
|
|
8
8
|
## Python version support
|
|
9
9
|
|
|
@@ -12,7 +12,7 @@ Support for Python 3.8 has been dropped. The Apify Python SDK v2.x now requires
|
|
|
12
12
|
## Storages
|
|
13
13
|
|
|
14
14
|
- The SDK now uses [crawlee](https://github.com/apify/crawlee-python) for local storage emulation. This change should not affect intended usage (working with `Dataset`, `KeyValueStore` and `RequestQueue` classes from the `apify.storages` module or using the shortcuts exposed by the `Actor` class) in any way.
|
|
15
|
-
- There is a difference in the `RequestQueue.add_request` method: it accepts an `apify.Request` object instead of a free-form dictionary.
|
|
15
|
+
- There is a difference in the `RequestQueue.add_request` method: it accepts an `apify.Request` object instead of a free-form dictionary.
|
|
16
16
|
- A quick way to migrate from dict-based arguments is to wrap it with a `Request.model_validate()` call.
|
|
17
17
|
- The preferred way is using the `Request.from_url` helper which prefills the `unique_key` and `id` attributes, or instantiating it directly, e.g., `Request(url='https://example.tld', ...)`.
|
|
18
18
|
- For simple use cases, `add_request` also accepts plain strings that contain an URL, e.g. `queue.add_request('https://example.tld')`.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: upgrading-to-v3
|
|
3
|
+
title: Upgrading to v3
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
This page summarizes the breaking changes between Apify Python SDK v2.x and v3.0.
|
|
7
|
+
|
|
8
|
+
## Python version support
|
|
9
|
+
|
|
10
|
+
Support for Python 3.9 has been dropped. The Apify Python SDK v3.x now requires Python 3.10 or later. Make sure your environment is running a compatible version before upgrading.
|
|
11
|
+
|
|
12
|
+
## Storages
|
|
13
|
+
|
|
14
|
+
<!-- TODO -->
|
|
15
|
+
|
|
16
|
+
## Storage clients
|
|
17
|
+
|
|
18
|
+
<!-- TODO -->
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "2.7.
|
|
7
|
+
version = "2.7.1b9"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -34,17 +34,19 @@ keywords = [
|
|
|
34
34
|
"scraping",
|
|
35
35
|
]
|
|
36
36
|
dependencies = [
|
|
37
|
-
"apify-client
|
|
38
|
-
"apify-shared
|
|
39
|
-
"crawlee
|
|
37
|
+
"apify-client>=2.0.0,<3.0.0",
|
|
38
|
+
"apify-shared>=2.0.0,<3.0.0",
|
|
39
|
+
"crawlee==1.0.0rc1",
|
|
40
|
+
"cachetools>=5.5.0",
|
|
40
41
|
"cryptography>=42.0.0",
|
|
41
|
-
"httpx>=0.27.0",
|
|
42
42
|
# TODO: ensure compatibility with the latest version of lazy-object-proxy
|
|
43
43
|
# https://github.com/apify/apify-sdk-python/issues/460
|
|
44
|
+
"impit>=0.5.3",
|
|
44
45
|
"lazy-object-proxy<1.11.0",
|
|
45
46
|
"more_itertools>=10.2.0",
|
|
46
47
|
"typing-extensions>=4.1.0",
|
|
47
48
|
"websockets>=14.0",
|
|
49
|
+
"yarl>=1.18.0",
|
|
48
50
|
]
|
|
49
51
|
|
|
50
52
|
[project.optional-dependencies]
|
|
@@ -64,27 +66,30 @@ scrapy = ["scrapy>=2.11.0"]
|
|
|
64
66
|
dev = [
|
|
65
67
|
"build~=1.3.0",
|
|
66
68
|
"crawlee[parsel]",
|
|
67
|
-
"dycw-pytest-only
|
|
68
|
-
"griffe~=1.
|
|
69
|
+
"dycw-pytest-only~=2.1.0",
|
|
70
|
+
"griffe~=1.12.0",
|
|
69
71
|
"mypy~=1.17.0",
|
|
70
72
|
"pre-commit~=4.3.0",
|
|
71
73
|
"pydoc-markdown~=4.8.0",
|
|
72
74
|
"pytest-asyncio~=1.1.0",
|
|
73
75
|
"pytest-cov~=6.2.0",
|
|
74
|
-
"pytest-httpserver
|
|
75
|
-
"pytest-timeout
|
|
76
|
+
"pytest-httpserver~=1.1.0",
|
|
77
|
+
"pytest-timeout~=2.4.0",
|
|
76
78
|
"pytest-xdist~=3.8.0",
|
|
77
79
|
"pytest~=8.4.0",
|
|
78
80
|
"ruff~=0.12.0",
|
|
79
81
|
"setuptools", # setuptools are used by pytest but not explicitly required
|
|
82
|
+
"types-cachetools~=6.1.0.20250717",
|
|
80
83
|
"uvicorn[standard]",
|
|
81
|
-
"werkzeug~=3.1.
|
|
82
|
-
"yarl~=1.20.0", # yarl is used by crawlee
|
|
84
|
+
"werkzeug~=3.1.0", # Werkzeug is used by httpserver
|
|
83
85
|
]
|
|
84
86
|
|
|
85
87
|
[tool.hatch.build.targets.wheel]
|
|
86
88
|
packages = ["src/apify"]
|
|
87
89
|
|
|
90
|
+
[tool.hatch.metadata]
|
|
91
|
+
allow-direct-references = true
|
|
92
|
+
|
|
88
93
|
[tool.ruff]
|
|
89
94
|
line-length = 120
|
|
90
95
|
include = ["src/**/*.py", "tests/**/*.py", "docs/**/*.py", "website/**/*.py"]
|
|
@@ -208,12 +213,12 @@ exclude = []
|
|
|
208
213
|
|
|
209
214
|
[[tool.mypy.overrides]]
|
|
210
215
|
module = [
|
|
211
|
-
'bs4',
|
|
212
|
-
'
|
|
213
|
-
'
|
|
214
|
-
'playwright.*',
|
|
215
|
-
'scrapy.*',
|
|
216
|
-
'selenium.*',
|
|
216
|
+
'bs4', # Documentation
|
|
217
|
+
'httpx', # Documentation
|
|
218
|
+
'lazy_object_proxy', # Untyped and stubs not available
|
|
219
|
+
'playwright.*', # Documentation
|
|
220
|
+
'scrapy.*', # Untyped and stubs not available
|
|
221
|
+
'selenium.*', # Documentation
|
|
217
222
|
]
|
|
218
223
|
ignore_missing_imports = true
|
|
219
224
|
|
|
@@ -13,7 +13,6 @@ from pydantic import AliasChoices
|
|
|
13
13
|
|
|
14
14
|
from apify_client import ApifyClientAsync
|
|
15
15
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
16
|
-
from apify_shared.utils import maybe_extract_enum_member_value
|
|
17
16
|
from crawlee import service_locator
|
|
18
17
|
from crawlee.events import (
|
|
19
18
|
Event,
|
|
@@ -30,11 +29,11 @@ from apify._configuration import Configuration
|
|
|
30
29
|
from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
31
30
|
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
32
31
|
from apify._models import ActorRun
|
|
33
|
-
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
|
|
34
32
|
from apify._proxy_configuration import ProxyConfiguration
|
|
35
|
-
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
|
|
36
|
-
from apify.
|
|
33
|
+
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython, maybe_extract_enum_member_value
|
|
34
|
+
from apify.events import ApifyEventManager, EventManager, LocalEventManager
|
|
37
35
|
from apify.log import _configure_logging, logger
|
|
36
|
+
from apify.storage_clients import ApifyStorageClient
|
|
38
37
|
from apify.storages import Dataset, KeyValueStore, RequestQueue
|
|
39
38
|
|
|
40
39
|
if TYPE_CHECKING:
|
|
@@ -126,12 +125,12 @@ class _ActorType:
|
|
|
126
125
|
|
|
127
126
|
# Create an instance of the cloud storage client, the local storage client is obtained
|
|
128
127
|
# from the service locator.
|
|
129
|
-
self._cloud_storage_client = ApifyStorageClient
|
|
128
|
+
self._cloud_storage_client = ApifyStorageClient()
|
|
130
129
|
|
|
131
130
|
# Set the event manager based on whether the Actor is running on the platform or locally.
|
|
132
131
|
self._event_manager = (
|
|
133
|
-
|
|
134
|
-
|
|
132
|
+
ApifyEventManager(
|
|
133
|
+
configuration=self._configuration,
|
|
135
134
|
persist_state_interval=self._configuration.persist_state_interval,
|
|
136
135
|
)
|
|
137
136
|
if self.is_at_home()
|
|
@@ -140,6 +140,39 @@ class Configuration(CrawleeConfiguration):
|
|
|
140
140
|
),
|
|
141
141
|
] = None
|
|
142
142
|
|
|
143
|
+
default_dataset_id: Annotated[
|
|
144
|
+
str,
|
|
145
|
+
Field(
|
|
146
|
+
validation_alias=AliasChoices(
|
|
147
|
+
'actor_default_dataset_id',
|
|
148
|
+
'apify_default_dataset_id',
|
|
149
|
+
),
|
|
150
|
+
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
|
|
151
|
+
),
|
|
152
|
+
] = 'default'
|
|
153
|
+
|
|
154
|
+
default_key_value_store_id: Annotated[
|
|
155
|
+
str,
|
|
156
|
+
Field(
|
|
157
|
+
validation_alias=AliasChoices(
|
|
158
|
+
'actor_default_key_value_store_id',
|
|
159
|
+
'apify_default_key_value_store_id',
|
|
160
|
+
),
|
|
161
|
+
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
|
|
162
|
+
),
|
|
163
|
+
] = 'default'
|
|
164
|
+
|
|
165
|
+
default_request_queue_id: Annotated[
|
|
166
|
+
str,
|
|
167
|
+
Field(
|
|
168
|
+
validation_alias=AliasChoices(
|
|
169
|
+
'actor_default_request_queue_id',
|
|
170
|
+
'apify_default_request_queue_id',
|
|
171
|
+
),
|
|
172
|
+
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
|
|
173
|
+
),
|
|
174
|
+
] = 'default'
|
|
175
|
+
|
|
143
176
|
disable_outdated_warning: Annotated[
|
|
144
177
|
bool,
|
|
145
178
|
Field(
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import ipaddress
|
|
4
|
+
import json
|
|
4
5
|
import re
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
7
|
from re import Pattern
|
|
7
8
|
from typing import TYPE_CHECKING, Any
|
|
8
9
|
from urllib.parse import urljoin, urlparse
|
|
9
10
|
|
|
10
|
-
import
|
|
11
|
+
import impit
|
|
12
|
+
from yarl import URL
|
|
11
13
|
|
|
12
14
|
from apify_shared.consts import ApifyEnvVars
|
|
13
15
|
from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration
|
|
@@ -20,7 +22,8 @@ from apify.log import logger
|
|
|
20
22
|
|
|
21
23
|
if TYPE_CHECKING:
|
|
22
24
|
from apify_client import ApifyClientAsync
|
|
23
|
-
|
|
25
|
+
|
|
26
|
+
from apify import Request
|
|
24
27
|
|
|
25
28
|
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
|
|
26
29
|
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
|
|
@@ -230,7 +233,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
230
233
|
return None
|
|
231
234
|
|
|
232
235
|
if self._uses_apify_proxy:
|
|
233
|
-
parsed_url =
|
|
236
|
+
parsed_url = URL(proxy_info.url)
|
|
234
237
|
username = self._get_username(session_id)
|
|
235
238
|
|
|
236
239
|
return ProxyInfo(
|
|
@@ -274,11 +277,11 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
274
277
|
return
|
|
275
278
|
|
|
276
279
|
status = None
|
|
277
|
-
async with
|
|
280
|
+
async with impit.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
|
|
278
281
|
for _ in range(2):
|
|
279
282
|
try:
|
|
280
283
|
response = await client.get(proxy_status_url)
|
|
281
|
-
status = response.
|
|
284
|
+
status = json.loads(response.text)
|
|
282
285
|
break
|
|
283
286
|
except Exception: # noqa: S110
|
|
284
287
|
# retry on connection errors
|
|
@@ -2,8 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import builtins
|
|
4
4
|
import sys
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from importlib import metadata
|
|
6
|
-
from typing import TYPE_CHECKING, Literal
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from collections.abc import Callable
|
|
@@ -81,3 +82,10 @@ def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
|
|
|
81
82
|
return func
|
|
82
83
|
|
|
83
84
|
return wrapper
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any:
|
|
88
|
+
"""Extract the value of an enumeration member if it is an Enum, otherwise return the original value."""
|
|
89
|
+
if isinstance(maybe_enum_member, Enum):
|
|
90
|
+
return maybe_enum_member.value
|
|
91
|
+
return maybe_enum_member
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
from typing import TYPE_CHECKING, Annotated
|
|
6
|
+
|
|
7
|
+
import websockets.asyncio.client
|
|
8
|
+
from pydantic import Discriminator, TypeAdapter
|
|
9
|
+
from typing_extensions import Self, Unpack, override
|
|
10
|
+
|
|
11
|
+
from crawlee.events import EventManager
|
|
12
|
+
from crawlee.events._types import Event, EventPersistStateData
|
|
13
|
+
|
|
14
|
+
from apify._utils import docs_group
|
|
15
|
+
from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
|
|
16
|
+
from apify.log import logger
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from types import TracebackType
|
|
20
|
+
|
|
21
|
+
from crawlee.events._event_manager import EventManagerOptions
|
|
22
|
+
|
|
23
|
+
from apify._configuration import Configuration
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
|
|
27
|
+
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@docs_group('Event managers')
|
|
32
|
+
class ApifyEventManager(EventManager):
|
|
33
|
+
"""Event manager for the Apify platform.
|
|
34
|
+
|
|
35
|
+
This class extends Crawlee's `EventManager` to provide Apify-specific functionality, including websocket
|
|
36
|
+
connectivity to the Apify platform for receiving platform events.
|
|
37
|
+
|
|
38
|
+
The event manager handles:
|
|
39
|
+
- Registration and emission of events and their listeners.
|
|
40
|
+
- Websocket connection to Apify platform events.
|
|
41
|
+
- Processing and validation of platform messages.
|
|
42
|
+
- Automatic event forwarding from the platform to local event listeners.
|
|
43
|
+
|
|
44
|
+
This class should not be used directly. Use the `Actor.on` and `Actor.off` methods to interact
|
|
45
|
+
with the event system.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, configuration: Configuration, **kwargs: Unpack[EventManagerOptions]) -> None:
|
|
49
|
+
"""Initialize a new instance.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
configuration: The Actor configuration for the event manager.
|
|
53
|
+
**kwargs: Additional event manager options passed to the parent class.
|
|
54
|
+
"""
|
|
55
|
+
super().__init__(**kwargs)
|
|
56
|
+
|
|
57
|
+
self._configuration = configuration
|
|
58
|
+
"""The Actor configuration for the event manager."""
|
|
59
|
+
|
|
60
|
+
self._platform_events_websocket: websockets.asyncio.client.ClientConnection | None = None
|
|
61
|
+
"""WebSocket connection to the platform events."""
|
|
62
|
+
|
|
63
|
+
self._process_platform_messages_task: asyncio.Task | None = None
|
|
64
|
+
"""Task for processing messages from the platform websocket."""
|
|
65
|
+
|
|
66
|
+
self._connected_to_platform_websocket: asyncio.Future[bool] | None = None
|
|
67
|
+
"""Future that resolves when the connection to the platform websocket is established."""
|
|
68
|
+
|
|
69
|
+
@override
|
|
70
|
+
async def __aenter__(self) -> Self:
|
|
71
|
+
await super().__aenter__()
|
|
72
|
+
self._connected_to_platform_websocket = asyncio.Future()
|
|
73
|
+
|
|
74
|
+
# Run tasks but don't await them
|
|
75
|
+
if self._configuration.actor_events_ws_url:
|
|
76
|
+
self._process_platform_messages_task = asyncio.create_task(
|
|
77
|
+
self._process_platform_messages(self._configuration.actor_events_ws_url)
|
|
78
|
+
)
|
|
79
|
+
is_connected = await self._connected_to_platform_websocket
|
|
80
|
+
if not is_connected:
|
|
81
|
+
raise RuntimeError('Error connecting to platform events websocket!')
|
|
82
|
+
else:
|
|
83
|
+
logger.debug('APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.')
|
|
84
|
+
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
@override
|
|
88
|
+
async def __aexit__(
|
|
89
|
+
self,
|
|
90
|
+
exc_type: type[BaseException] | None,
|
|
91
|
+
exc_value: BaseException | None,
|
|
92
|
+
exc_traceback: TracebackType | None,
|
|
93
|
+
) -> None:
|
|
94
|
+
if self._platform_events_websocket:
|
|
95
|
+
await self._platform_events_websocket.close()
|
|
96
|
+
|
|
97
|
+
if self._process_platform_messages_task and not self._process_platform_messages_task.done():
|
|
98
|
+
self._process_platform_messages_task.cancel()
|
|
99
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
100
|
+
await self._process_platform_messages_task
|
|
101
|
+
|
|
102
|
+
await super().__aexit__(exc_type, exc_value, exc_traceback)
|
|
103
|
+
|
|
104
|
+
async def _process_platform_messages(self, ws_url: str) -> None:
|
|
105
|
+
try:
|
|
106
|
+
async with websockets.asyncio.client.connect(ws_url) as websocket:
|
|
107
|
+
self._platform_events_websocket = websocket
|
|
108
|
+
if self._connected_to_platform_websocket is not None:
|
|
109
|
+
self._connected_to_platform_websocket.set_result(True)
|
|
110
|
+
|
|
111
|
+
async for message in websocket:
|
|
112
|
+
try:
|
|
113
|
+
parsed_message = event_data_adapter.validate_json(message)
|
|
114
|
+
|
|
115
|
+
if isinstance(parsed_message, DeprecatedEvent):
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
if isinstance(parsed_message, UnknownEvent):
|
|
119
|
+
logger.info(
|
|
120
|
+
f'Unknown message received: event_name={parsed_message.name}, '
|
|
121
|
+
f'event_data={parsed_message.data}'
|
|
122
|
+
)
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
self.emit(
|
|
126
|
+
event=parsed_message.name,
|
|
127
|
+
event_data=parsed_message.data
|
|
128
|
+
if not isinstance(parsed_message.data, SystemInfoEventData)
|
|
129
|
+
else parsed_message.data.to_crawlee_format(self._configuration.dedicated_cpus or 1),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if parsed_message.name == Event.MIGRATING:
|
|
133
|
+
await self._emit_persist_state_event_rec_task.stop()
|
|
134
|
+
self.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=True))
|
|
135
|
+
except Exception:
|
|
136
|
+
logger.exception('Cannot parse Actor event', extra={'message': message})
|
|
137
|
+
except Exception:
|
|
138
|
+
logger.exception('Error in websocket connection')
|
|
139
|
+
if self._connected_to_platform_websocket is not None:
|
|
140
|
+
self._connected_to_platform_websocket.set_result(False)
|