apify 2.4.0b5__tar.gz → 2.5.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/run_code_checks.yaml +1 -1
- {apify-2.4.0b5 → apify-2.5.0b1}/PKG-INFO +1 -1
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/05_scrapy.mdx +7 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/settings.py +2 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/pyproject.toml +3 -5
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_actor.py +4 -4
- apify-2.5.0b1/src/apify/scrapy/extensions/__init__.py +3 -0
- apify-2.5.0b1/src/apify/scrapy/extensions/_httpcache.py +212 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/requests.py +1 -1
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/utils.py +3 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_lifecycle.py +7 -5
- apify-2.5.0b1/tests/unit/scrapy/extensions/test_httpcache.py +71 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/uv.lock +37 -53
- {apify-2.4.0b5 → apify-2.5.0b1}/website/package-lock.json +291 -318
- apify-2.5.0b1/website/static/.nojekyll +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.editorconfig +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/CODEOWNERS +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/pre_release.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/release.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.github/workflows/update_new_issue.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.gitignore +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.markdownlint.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/.pre-commit-config.yaml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/CHANGELOG.md +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/CONTRIBUTING.md +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/LICENSE +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/Makefile +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/README.md +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/01_beautifulsoup_httpx.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/02_crawlee.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/03_playwright.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/04_selenium.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/01_beautifulsoup_httpx.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/02_crawlee_beautifulsoup.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/02_crawlee_playwright.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/03_playwright.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/04_selenium.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/__main__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/items.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/main.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/02_guides/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/01_actor_lifecycle.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/02_actor_input.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/03_storages.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/04_actor_events.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/05_proxy_management.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/06_interacting_with_other_actors.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/07_webhooks.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/08_access_apify_api.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/09_running_webserver.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/10_logging.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/11_configuration.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/12_pay_per_event.mdx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/01_context_manager.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/01_init_exit.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/01_reboot.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/01_status_message.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/02_input.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_dataset_exports.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_dataset_read_write.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_deleting_storages.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_kvs_iterating.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_kvs_public_url.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_kvs_read_write.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_opening_storages.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/03_rq.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/04_actor_events.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_apify_proxy.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_apify_proxy_config.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_custom_proxy.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_custom_proxy_function.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_proxy_actor_input.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_proxy_httpx.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/05_proxy_rotation.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/06_interacting_call.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/06_interacting_call_task.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/06_interacting_metamorph.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/06_interacting_start.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/07_webhook.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/07_webhook_preventing.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/08_actor_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/08_actor_new_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/09_webserver.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/10_log_config.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/10_logger_usage.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/11_config.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/actor_charge.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/03_concepts/code/conditional_actor_charge.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/04_upgrading/upgrading_to_v2.md +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/docs/pyproject.toml +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/renovate.json +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_charging.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_configuration.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_consts.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_crypto.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_models.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_platform_event_manager.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_proxy_configuration.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/_utils.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_dataset_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/apify_storage_client/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/log.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/scrapy/scheduler.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/storages/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/storages/_request_list.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/src/apify/storages/py.typed +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/README.md +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/_utils.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/actor_source_base/Dockerfile +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/actor_source_base/requirements.txt +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/actor_source_base/src/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/conftest.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_api_helpers.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_charge.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_dataset.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_events.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_key_value_store.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_lifecycle.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_log.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_request_queue.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_actor_scrapy.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_fixtures.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/integration/test_request_queue.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_dataset.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_env_helpers.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_helpers.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_key_value_store.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_log.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_actor_request_queue.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/actor/test_request_list.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/conftest.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/__init__.py +0 -0
- {apify-2.4.0b5/tests/unit/scrapy/middlewares → apify-2.5.0b1/tests/unit/scrapy/extensions}/__init__.py +0 -0
- {apify-2.4.0b5/tests/unit/scrapy/pipelines → apify-2.5.0b1/tests/unit/scrapy/middlewares}/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-2.4.0b5/tests/unit/scrapy/requests → apify-2.5.0b1/tests/unit/scrapy/pipelines}/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-2.4.0b5/tests/unit/scrapy/utils → apify-2.5.0b1/tests/unit/scrapy/requests}/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
- /apify-2.4.0b5/website/static/.nojekyll → /apify-2.5.0b1/tests/unit/scrapy/utils/__init__.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/test_crypto.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/test_platform_event_manager.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/tests/unit/test_proxy_configuration.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/.eslintrc.json +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/babel.config.js +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/build_api_reference.sh +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/docusaurus.config.js +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/generate_module_shortcuts.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/package.json +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/sidebars.js +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/ApiLink.jsx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/Gradients.jsx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/Highlights.jsx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/Highlights.module.css +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/css/custom.css +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/pages/home_page_example.py +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/pages/index.js +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/src/pages/index.module.css +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/static/img/docs-og.png +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/tools/docs-prettier.config.js +0 -0
- {apify-2.4.0b5 → apify-2.5.0b1}/website/tools/utils/externalLink.js +0 -0
|
@@ -26,5 +26,5 @@ jobs:
|
|
|
26
26
|
integration_tests:
|
|
27
27
|
name: Integration tests
|
|
28
28
|
needs: [lint_check, type_check, unit_tests]
|
|
29
|
-
uses: apify/workflows/.github/workflows/python_integration_tests.yaml@
|
|
29
|
+
uses: apify/workflows/.github/workflows/python_integration_tests.yaml@fix-integration-tests-from-forks
|
|
30
30
|
secrets: inherit
|
|
@@ -40,6 +40,7 @@ The Apify SDK provides several custom components to support integration with the
|
|
|
40
40
|
- [`apify.scrapy.ApifyScheduler`](https://docs.apify.com/sdk/python/reference/class/ApifyScheduler) - Replaces Scrapy's default [scheduler](https://docs.scrapy.org/en/latest/topics/scheduler.html) with one that uses Apify's [request queue](https://docs.apify.com/platform/storage/request-queue) for storing requests. It manages enqueuing, dequeuing, and maintaining the state and priority of requests.
|
|
41
41
|
- [`apify.scrapy.ActorDatasetPushPipeline`](https://docs.apify.com/sdk/python/reference/class/ActorDatasetPushPipeline) - A Scrapy [item pipeline](https://docs.scrapy.org/en/latest/topics/item-pipeline.html) that pushes scraped items to Apify's [dataset](https://docs.apify.com/platform/storage/dataset). When enabled, every item produced by the spider is sent to the dataset.
|
|
42
42
|
- [`apify.scrapy.ApifyHttpProxyMiddleware`](https://docs.apify.com/sdk/python/reference/class/ApifyHttpProxyMiddleware) - A Scrapy [middleware](https://docs.scrapy.org/en/latest/topics/downloader-middleware.html) that manages proxy configurations. This middleware replaces Scrapy's default `HttpProxyMiddleware` to facilitate the use of Apify's proxy service.
|
|
43
|
+
- [`apify.scrapy.extensions.ApifyCacheStorage`](https://docs.apify.com/sdk/python/reference/class/ApifyCacheStorage) - A storage backend for Scrapy's built-in [HTTP cache middleware](https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#module-scrapy.downloadermiddlewares.httpcache). This backend uses Apify's [key-value store](https://docs.apify.com/platform/storage/key-value-store). Make sure to set `HTTPCACHE_ENABLED` and `HTTPCACHE_EXPIRATION_SECS` in your settings, or caching won't work.
|
|
43
44
|
|
|
44
45
|
Additional helper functions in the [`apify.scrapy`](https://github.com/apify/apify-sdk-python/tree/master/src/apify/scrapy) subpackage include:
|
|
45
46
|
|
|
@@ -94,6 +95,12 @@ The following example demonstrates a Scrapy Actor that scrapes page titles and e
|
|
|
94
95
|
</TabItem>
|
|
95
96
|
</Tabs>
|
|
96
97
|
|
|
98
|
+
## Dealing with ‘imminent migration to another host’
|
|
99
|
+
|
|
100
|
+
Under some circumstances, the platform may decide to [migrate your Actor](https://docs.apify.com/academy/expert-scraping-with-apify/migrations-maintaining-state) from one piece of infrastructure to another while it's in progress. While [Crawlee](https://crawlee.dev/python)-based projects can pause and resume their work after a restart, achieving the same with a Scrapy-based project can be challenging.
|
|
101
|
+
|
|
102
|
+
As a workaround for this issue (tracked as [apify/actor-templates#303](https://github.com/apify/actor-templates/issues/303)), turn on caching with `HTTPCACHE_ENABLED` and set `HTTPCACHE_EXPIRATION_SECS` to at least a few minutes—the exact value depends on your use case. If your Actor gets migrated and restarted, the subsequent run will hit the cache, making it fast and avoiding unnecessary resource consumption.
|
|
103
|
+
|
|
97
104
|
## Conclusion
|
|
98
105
|
|
|
99
106
|
In this guide you learned how to use Scrapy in Apify Actors. You can now start building your own web scraping projects using Scrapy, the Apify SDK and host them on the Apify platform. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
|
|
@@ -7,3 +7,5 @@ SPIDER_MODULES = ['src.spiders']
|
|
|
7
7
|
TELNETCONSOLE_ENABLED = False
|
|
8
8
|
# Do not change the Twisted reactor unless you really know what you are doing.
|
|
9
9
|
TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
|
|
10
|
+
HTTPCACHE_ENABLED = True
|
|
11
|
+
HTTPCACHE_EXPIRATION_SECS = 7200
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.5.0b1"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -59,19 +59,17 @@ scrapy = ["scrapy>=2.11.0"]
|
|
|
59
59
|
[dependency-groups]
|
|
60
60
|
dev = [
|
|
61
61
|
"build~=1.2.0",
|
|
62
|
-
"filelock~=3.17.0",
|
|
63
62
|
"griffe~=1.6.0",
|
|
64
63
|
"mypy~=1.15.0",
|
|
65
64
|
"pre-commit~=4.1.0",
|
|
66
65
|
"pydoc-markdown~=4.8.0",
|
|
67
|
-
"pytest~=8.3.0",
|
|
68
66
|
"pytest-asyncio~=0.25.0",
|
|
69
67
|
"pytest-cov~=6.0.0",
|
|
70
68
|
"pytest-only~=2.1.0",
|
|
71
|
-
"pytest-timeout~=2.3.0",
|
|
72
69
|
"pytest-xdist~=3.6.0",
|
|
70
|
+
"pytest~=8.3.0",
|
|
73
71
|
"respx~=0.22.0",
|
|
74
|
-
"ruff~=0.
|
|
72
|
+
"ruff~=0.11.0",
|
|
75
73
|
"setuptools~=76.0.0", # setuptools are used by pytest but not explicitly required
|
|
76
74
|
]
|
|
77
75
|
|
|
@@ -141,7 +141,7 @@ class _ActorType:
|
|
|
141
141
|
await self.exit()
|
|
142
142
|
|
|
143
143
|
def __repr__(self) -> str:
|
|
144
|
-
if self is cast(Proxy, Actor).__wrapped__:
|
|
144
|
+
if self is cast('Proxy', Actor).__wrapped__:
|
|
145
145
|
return '<apify.Actor>'
|
|
146
146
|
|
|
147
147
|
return super().__repr__()
|
|
@@ -222,7 +222,7 @@ class _ActorType:
|
|
|
222
222
|
self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care')
|
|
223
223
|
|
|
224
224
|
# Make sure that the currently initialized instance is also available through the global `Actor` proxy
|
|
225
|
-
cast(Proxy, Actor).__wrapped__ = self
|
|
225
|
+
cast('Proxy', Actor).__wrapped__ = self
|
|
226
226
|
|
|
227
227
|
self._is_exiting = False
|
|
228
228
|
self._was_final_persist_state_emitted = False
|
|
@@ -674,7 +674,7 @@ class _ActorType:
|
|
|
674
674
|
elif isinstance(field.validation_alias, str):
|
|
675
675
|
aliases = [field.validation_alias]
|
|
676
676
|
elif isinstance(field.validation_alias, AliasChoices):
|
|
677
|
-
aliases = cast(list[str], field.validation_alias.choices)
|
|
677
|
+
aliases = cast('list[str]', field.validation_alias.choices)
|
|
678
678
|
else:
|
|
679
679
|
aliases = [field_name]
|
|
680
680
|
|
|
@@ -1158,5 +1158,5 @@ class _ActorType:
|
|
|
1158
1158
|
return True
|
|
1159
1159
|
|
|
1160
1160
|
|
|
1161
|
-
Actor = cast(_ActorType, Proxy(_ActorType))
|
|
1161
|
+
Actor = cast('_ActorType', Proxy(_ActorType))
|
|
1162
1162
|
"""The entry point of the SDK, through which all the Actor operations should be done."""
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import gzip
|
|
4
|
+
import io
|
|
5
|
+
import pickle
|
|
6
|
+
import re
|
|
7
|
+
import struct
|
|
8
|
+
from logging import getLogger
|
|
9
|
+
from time import time
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from scrapy.http.headers import Headers
|
|
13
|
+
from scrapy.responsetypes import responsetypes
|
|
14
|
+
|
|
15
|
+
from apify import Configuration
|
|
16
|
+
from apify.apify_storage_client import ApifyStorageClient
|
|
17
|
+
from apify.scrapy._async_thread import AsyncThread
|
|
18
|
+
from apify.storages import KeyValueStore
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from scrapy import Request, Spider
|
|
22
|
+
from scrapy.http.response import Response
|
|
23
|
+
from scrapy.settings import BaseSettings
|
|
24
|
+
from scrapy.utils.request import RequestFingerprinterProtocol
|
|
25
|
+
|
|
26
|
+
logger = getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ApifyCacheStorage:
|
|
30
|
+
"""A Scrapy cache storage that uses the Apify `KeyValueStore` to store responses.
|
|
31
|
+
|
|
32
|
+
It can be set as a storage for Scrapy's built-in `HttpCacheMiddleware`, which caches
|
|
33
|
+
responses to requests. See HTTPCache middleware settings (prefixed with `HTTPCACHE_`)
|
|
34
|
+
in the Scrapy documentation for more information. Requires the asyncio Twisted reactor
|
|
35
|
+
to be installed.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, settings: BaseSettings) -> None:
|
|
39
|
+
self._expiration_max_items = 100
|
|
40
|
+
self._expiration_secs: int = settings.getint('HTTPCACHE_EXPIRATION_SECS')
|
|
41
|
+
self._spider: Spider | None = None
|
|
42
|
+
self._kvs: KeyValueStore | None = None
|
|
43
|
+
self._fingerprinter: RequestFingerprinterProtocol | None = None
|
|
44
|
+
self._async_thread: AsyncThread | None = None
|
|
45
|
+
|
|
46
|
+
def open_spider(self, spider: Spider) -> None:
|
|
47
|
+
"""Open the cache storage for a spider."""
|
|
48
|
+
logger.debug('Using Apify key value cache storage', extra={'spider': spider})
|
|
49
|
+
self._spider = spider
|
|
50
|
+
self._fingerprinter = spider.crawler.request_fingerprinter
|
|
51
|
+
kvs_name = get_kvs_name(spider.name)
|
|
52
|
+
|
|
53
|
+
async def open_kvs() -> KeyValueStore:
|
|
54
|
+
config = Configuration.get_global_configuration()
|
|
55
|
+
if config.is_at_home:
|
|
56
|
+
storage_client = ApifyStorageClient.from_config(config)
|
|
57
|
+
return await KeyValueStore.open(name=kvs_name, storage_client=storage_client)
|
|
58
|
+
return await KeyValueStore.open(name=kvs_name)
|
|
59
|
+
|
|
60
|
+
logger.debug("Starting background thread for cache storage's event loop")
|
|
61
|
+
self._async_thread = AsyncThread()
|
|
62
|
+
logger.debug(f"Opening cache storage's {kvs_name!r} key value store")
|
|
63
|
+
self._kvs = self._async_thread.run_coro(open_kvs())
|
|
64
|
+
|
|
65
|
+
def close_spider(self, _: Spider, current_time: int | None = None) -> None:
|
|
66
|
+
"""Close the cache storage for a spider."""
|
|
67
|
+
if self._async_thread is None:
|
|
68
|
+
raise ValueError('Async thread not initialized')
|
|
69
|
+
|
|
70
|
+
logger.info(f'Cleaning up cache items (max {self._expiration_max_items})')
|
|
71
|
+
if self._expiration_secs > 0:
|
|
72
|
+
if current_time is None:
|
|
73
|
+
current_time = int(time())
|
|
74
|
+
|
|
75
|
+
async def expire_kvs() -> None:
|
|
76
|
+
if self._kvs is None:
|
|
77
|
+
raise ValueError('Key value store not initialized')
|
|
78
|
+
i = 0
|
|
79
|
+
async for item in self._kvs.iterate_keys():
|
|
80
|
+
value = await self._kvs.get_value(item.key)
|
|
81
|
+
try:
|
|
82
|
+
gzip_time = read_gzip_time(value)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.warning(f'Malformed cache item {item.key}: {e}')
|
|
85
|
+
await self._kvs.set_value(item.key, None)
|
|
86
|
+
else:
|
|
87
|
+
if self._expiration_secs < current_time - gzip_time:
|
|
88
|
+
logger.debug(f'Expired cache item {item.key}')
|
|
89
|
+
await self._kvs.set_value(item.key, None)
|
|
90
|
+
else:
|
|
91
|
+
logger.debug(f'Valid cache item {item.key}')
|
|
92
|
+
if i == self._expiration_max_items:
|
|
93
|
+
break
|
|
94
|
+
i += 1
|
|
95
|
+
|
|
96
|
+
self._async_thread.run_coro(expire_kvs())
|
|
97
|
+
|
|
98
|
+
logger.debug('Closing cache storage')
|
|
99
|
+
try:
|
|
100
|
+
self._async_thread.close()
|
|
101
|
+
except KeyboardInterrupt:
|
|
102
|
+
logger.warning('Shutdown interrupted by KeyboardInterrupt!')
|
|
103
|
+
except Exception:
|
|
104
|
+
logger.exception('Exception occurred while shutting down cache storage')
|
|
105
|
+
finally:
|
|
106
|
+
logger.debug('Cache storage closed')
|
|
107
|
+
|
|
108
|
+
def retrieve_response(self, _: Spider, request: Request, current_time: int | None = None) -> Response | None:
|
|
109
|
+
"""Retrieve a response from the cache storage."""
|
|
110
|
+
if self._async_thread is None:
|
|
111
|
+
raise ValueError('Async thread not initialized')
|
|
112
|
+
if self._kvs is None:
|
|
113
|
+
raise ValueError('Key value store not initialized')
|
|
114
|
+
if self._fingerprinter is None:
|
|
115
|
+
raise ValueError('Request fingerprinter not initialized')
|
|
116
|
+
|
|
117
|
+
key = self._fingerprinter.fingerprint(request).hex()
|
|
118
|
+
value = self._async_thread.run_coro(self._kvs.get_value(key))
|
|
119
|
+
|
|
120
|
+
if value is None:
|
|
121
|
+
logger.debug('Cache miss', extra={'request': request})
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
if current_time is None:
|
|
125
|
+
current_time = int(time())
|
|
126
|
+
if 0 < self._expiration_secs < current_time - read_gzip_time(value):
|
|
127
|
+
logger.debug('Cache expired', extra={'request': request})
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
data = from_gzip(value)
|
|
131
|
+
url = data['url']
|
|
132
|
+
status = data['status']
|
|
133
|
+
headers = Headers(data['headers'])
|
|
134
|
+
body = data['body']
|
|
135
|
+
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
|
|
136
|
+
|
|
137
|
+
logger.debug('Cache hit', extra={'request': request})
|
|
138
|
+
return respcls(url=url, headers=headers, status=status, body=body)
|
|
139
|
+
|
|
140
|
+
def store_response(self, _: Spider, request: Request, response: Response) -> None:
|
|
141
|
+
"""Store a response in the cache storage."""
|
|
142
|
+
if self._async_thread is None:
|
|
143
|
+
raise ValueError('Async thread not initialized')
|
|
144
|
+
if self._kvs is None:
|
|
145
|
+
raise ValueError('Key value store not initialized')
|
|
146
|
+
if self._fingerprinter is None:
|
|
147
|
+
raise ValueError('Request fingerprinter not initialized')
|
|
148
|
+
|
|
149
|
+
key = self._fingerprinter.fingerprint(request).hex()
|
|
150
|
+
data = {
|
|
151
|
+
'status': response.status,
|
|
152
|
+
'url': response.url,
|
|
153
|
+
'headers': dict(response.headers),
|
|
154
|
+
'body': response.body,
|
|
155
|
+
}
|
|
156
|
+
value = to_gzip(data)
|
|
157
|
+
self._async_thread.run_coro(self._kvs.set_value(key, value))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def to_gzip(data: dict, mtime: int | None = None) -> bytes:
|
|
161
|
+
"""Dump a dictionary to a gzip-compressed byte stream."""
|
|
162
|
+
with io.BytesIO() as byte_stream:
|
|
163
|
+
with gzip.GzipFile(fileobj=byte_stream, mode='wb', mtime=mtime) as gzip_file:
|
|
164
|
+
pickle.dump(data, gzip_file, protocol=4)
|
|
165
|
+
return byte_stream.getvalue()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def from_gzip(gzip_bytes: bytes) -> dict:
|
|
169
|
+
"""Load a dictionary from a gzip-compressed byte stream."""
|
|
170
|
+
with io.BytesIO(gzip_bytes) as byte_stream, gzip.GzipFile(fileobj=byte_stream, mode='rb') as gzip_file:
|
|
171
|
+
data: dict = pickle.load(gzip_file)
|
|
172
|
+
return data
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def read_gzip_time(gzip_bytes: bytes) -> int:
|
|
176
|
+
"""Read the modification time from a gzip-compressed byte stream without decompressing the data."""
|
|
177
|
+
header = gzip_bytes[:10]
|
|
178
|
+
header_components = struct.unpack('<HBBI2B', header)
|
|
179
|
+
mtime: int = header_components[3]
|
|
180
|
+
return mtime
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_kvs_name(spider_name: str, max_length: int = 60) -> str:
|
|
184
|
+
"""Get the key value store name for a spider.
|
|
185
|
+
|
|
186
|
+
The key value store name is derived from the spider name by replacing all special characters
|
|
187
|
+
with hyphens and trimming leading and trailing hyphens. The resulting name is prefixed with
|
|
188
|
+
'httpcache-' and truncated to the maximum length.
|
|
189
|
+
|
|
190
|
+
The documentation
|
|
191
|
+
[about storages](https://docs.apify.com/platform/storage/usage#named-and-unnamed-storages)
|
|
192
|
+
mentions that names can be up to 63 characters long, so the default max length is set to 60.
|
|
193
|
+
|
|
194
|
+
Such naming isn't unique per spider, but should be sufficiently unique for most use cases.
|
|
195
|
+
The name of the key value store should indicate to which spider it belongs, e.g. in
|
|
196
|
+
the listing in the Apify's console.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
spider_name: Value of the Spider instance's name attribute.
|
|
200
|
+
max_length: Maximum length of the key value store name.
|
|
201
|
+
|
|
202
|
+
Returns: Key value store name.
|
|
203
|
+
|
|
204
|
+
Raises:
|
|
205
|
+
ValueError: If the spider name contains only special characters.
|
|
206
|
+
"""
|
|
207
|
+
slug = re.sub(r'[^a-zA-Z0-9-]', '-', spider_name)
|
|
208
|
+
slug = re.sub(r'-+', '-', slug)
|
|
209
|
+
slug = slug.strip('-')
|
|
210
|
+
if not slug:
|
|
211
|
+
raise ValueError(f'Unsupported spider name: {spider_name!r} (slug: {slug!r})')
|
|
212
|
+
return f'httpcache-{slug}'[:max_length]
|
|
@@ -93,7 +93,7 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
|
|
|
93
93
|
Returns:
|
|
94
94
|
The converted Scrapy request.
|
|
95
95
|
"""
|
|
96
|
-
if not isinstance(cast(Any, apify_request), ApifyRequest):
|
|
96
|
+
if not isinstance(cast('Any', apify_request), ApifyRequest):
|
|
97
97
|
raise TypeError('apify_request must be a crawlee.ScrapyRequest instance')
|
|
98
98
|
|
|
99
99
|
logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...')
|
|
@@ -44,6 +44,9 @@ def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict
|
|
|
44
44
|
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
|
|
45
45
|
settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 750
|
|
46
46
|
|
|
47
|
+
# Set the default HTTPCache middleware storage backend to ApifyCacheStorage
|
|
48
|
+
settings['HTTPCACHE_STORAGE'] = 'apify.scrapy.extensions.ApifyCacheStorage'
|
|
49
|
+
|
|
47
50
|
# Store the proxy configuration
|
|
48
51
|
settings['APIFY_PROXY_SETTINGS'] = proxy_config
|
|
49
52
|
|
|
@@ -5,12 +5,11 @@ import contextlib
|
|
|
5
5
|
import json
|
|
6
6
|
import sys
|
|
7
7
|
from datetime import datetime, timezone
|
|
8
|
-
from typing import Any, Callable, cast
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, cast
|
|
9
9
|
from unittest.mock import AsyncMock, Mock
|
|
10
10
|
|
|
11
11
|
import pytest
|
|
12
12
|
import websockets.asyncio.server
|
|
13
|
-
from lazy_object_proxy import Proxy
|
|
14
13
|
|
|
15
14
|
from apify_shared.consts import ActorEnvVars, ApifyEnvVars
|
|
16
15
|
from crawlee.events._types import Event, EventPersistStateData
|
|
@@ -19,12 +18,15 @@ import apify._actor
|
|
|
19
18
|
from apify import Actor
|
|
20
19
|
from apify._actor import _ActorType
|
|
21
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from lazy_object_proxy import Proxy
|
|
23
|
+
|
|
22
24
|
|
|
23
25
|
async def test_actor_properly_init_with_async() -> None:
|
|
24
26
|
async with Actor:
|
|
25
|
-
assert cast(Proxy, apify._actor.Actor).__wrapped__ is not None
|
|
26
|
-
assert cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized
|
|
27
|
-
assert not cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized
|
|
27
|
+
assert cast('Proxy', apify._actor.Actor).__wrapped__ is not None
|
|
28
|
+
assert cast('Proxy', apify._actor.Actor).__wrapped__._is_initialized
|
|
29
|
+
assert not cast('Proxy', apify._actor.Actor).__wrapped__._is_initialized
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
async def test_actor_init() -> None:
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from time import time
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from apify.scrapy.extensions._httpcache import from_gzip, get_kvs_name, read_gzip_time, to_gzip
|
|
6
|
+
|
|
7
|
+
FIXTURE_DICT = {'name': 'Alice'}
|
|
8
|
+
|
|
9
|
+
FIXTURE_BYTES = (
|
|
10
|
+
b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xffk`\x99*\xcc\x00\x01\xb5SzX\xf2\x12s'
|
|
11
|
+
b'S\xa7\xf4\xb0:\xe6d&\xa7N)\xd6\x03\x00\x1c\xe8U\x9c\x1e\x00\x00\x00'
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_gzip() -> None:
|
|
16
|
+
assert from_gzip(to_gzip(FIXTURE_DICT)) == FIXTURE_DICT
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_to_gzip() -> None:
|
|
20
|
+
data_bytes = to_gzip(FIXTURE_DICT, mtime=0)
|
|
21
|
+
|
|
22
|
+
assert data_bytes == FIXTURE_BYTES
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_from_gzip() -> None:
|
|
26
|
+
data_dict = from_gzip(FIXTURE_BYTES)
|
|
27
|
+
|
|
28
|
+
assert data_dict == FIXTURE_DICT
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_read_gzip_time() -> None:
|
|
32
|
+
assert read_gzip_time(FIXTURE_BYTES) == 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_read_gzip_time_non_zero() -> None:
|
|
36
|
+
current_time = int(time())
|
|
37
|
+
data_bytes = to_gzip(FIXTURE_DICT, mtime=current_time)
|
|
38
|
+
|
|
39
|
+
assert read_gzip_time(data_bytes) == current_time
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.parametrize(
|
|
43
|
+
('spider_name', 'expected'),
|
|
44
|
+
[
|
|
45
|
+
('test', 'httpcache-test'),
|
|
46
|
+
('123', 'httpcache-123'),
|
|
47
|
+
('test-spider', 'httpcache-test-spider'),
|
|
48
|
+
('test_spider', 'httpcache-test-spider'),
|
|
49
|
+
('test spider', 'httpcache-test-spider'),
|
|
50
|
+
('test👻spider', 'httpcache-test-spider'),
|
|
51
|
+
('test@spider', 'httpcache-test-spider'),
|
|
52
|
+
(' test spider ', 'httpcache-test-spider'),
|
|
53
|
+
('testspider.com', 'httpcache-testspider-com'),
|
|
54
|
+
('t' * 100, 'httpcache-tttttttttttttttttttttttttttttttttttttttttttttttttt'),
|
|
55
|
+
],
|
|
56
|
+
)
|
|
57
|
+
def test_get_kvs_name(spider_name: str, expected: str) -> None:
|
|
58
|
+
assert get_kvs_name(spider_name) == expected
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@pytest.mark.parametrize(
|
|
62
|
+
('spider_name'),
|
|
63
|
+
[
|
|
64
|
+
'',
|
|
65
|
+
'-',
|
|
66
|
+
'-@-/-',
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
def test_get_kvs_name_raises(spider_name: str) -> None:
|
|
70
|
+
with pytest.raises(ValueError, match='Unsupported spider name'):
|
|
71
|
+
assert get_kvs_name(spider_name)
|