apify 3.0.0rc1__tar.gz → 3.0.1b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/build_and_deploy_docs.yaml +4 -4
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/update_new_issue.yaml +1 -1
- {apify-3.0.0rc1 → apify-3.0.1b2}/CHANGELOG.md +22 -133
- {apify-3.0.0rc1 → apify-3.0.1b2}/PKG-INFO +20 -5
- {apify-3.0.0rc1 → apify-3.0.1b2}/README.md +16 -1
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/09_running_webserver.mdx +2 -2
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/12_pay_per_event.mdx +6 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/07_webhook_preventing.py +1 -1
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/09_webserver.py +2 -2
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/conditional_actor_charge.py +1 -1
- apify-3.0.1b2/docs/03_guides/01_beautifulsoup_httpx.mdx +30 -0
- apify-3.0.1b2/docs/03_guides/02_parsel_impit.mdx +28 -0
- apify-3.0.1b2/docs/03_guides/05_crawlee.mdx +46 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/01_beautifulsoup_httpx.py +7 -4
- apify-3.0.1b2/docs/03_guides/code/02_parsel_impit.py +94 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/03_playwright.py +6 -3
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/04_selenium.py +5 -3
- apify-3.0.1b2/docs/03_guides/code/05_crawlee_beautifulsoup.py +55 -0
- apify-3.0.1b2/docs/03_guides/code/05_crawlee_parsel.py +55 -0
- apify-3.0.1b2/docs/03_guides/code/05_crawlee_playwright.py +58 -0
- apify-3.0.1b2/docs/04_upgrading/upgrading_to_v3.md +159 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/pyproject.toml +10 -12
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_actor.py +150 -117
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_charging.py +19 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_configuration.py +51 -11
- apify-3.0.1b2/src/apify/events/__init__.py +5 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/__init__.py +2 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_apify/_dataset_client.py +47 -23
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_apify/_key_value_store_client.py +46 -22
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_apify/_models.py +25 -1
- apify-3.0.1b2/src/apify/storage_clients/_apify/_request_queue_client.py +327 -0
- apify-3.0.0rc1/src/apify/storage_clients/_apify/_request_queue_client.py → apify-3.0.1b2/src/apify/storage_clients/_apify/_request_queue_shared_client.py +45 -305
- apify-3.0.1b2/src/apify/storage_clients/_apify/_request_queue_single_client.py +399 -0
- apify-3.0.1b2/src/apify/storage_clients/_apify/_storage_client.py +106 -0
- apify-3.0.1b2/src/apify/storage_clients/_apify/_utils.py +194 -0
- apify-3.0.1b2/src/apify/storage_clients/_file_system/_key_value_store_client.py +103 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_file_system/_storage_client.py +7 -1
- apify-3.0.1b2/src/apify/storage_clients/_smart_apify/__init__.py +1 -0
- apify-3.0.1b2/src/apify/storage_clients/_smart_apify/_storage_client.py +117 -0
- apify-3.0.1b2/tests/integration/_utils.py +17 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/requirements.txt +1 -1
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/conftest.py +25 -9
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_create_proxy_configuration.py +4 -1
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_dataset.py +83 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_key_value_store.py +85 -2
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_lifecycle.py +41 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_request_queue.py +182 -11
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_scrapy.py +1 -1
- apify-3.0.1b2/tests/integration/test_apify_storages.py +152 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_crawlers_with_storages.py +2 -1
- apify-3.0.1b2/tests/integration/test_request_queue.py +1191 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_helpers.py +0 -5
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_lifecycle.py +17 -17
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_log.py +39 -34
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_non_default_instance.py +1 -1
- apify-3.0.1b2/tests/unit/actor/test_configuration.py +243 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/conftest.py +30 -9
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/events/test_apify_event_manager.py +2 -4
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/extensions/test_httpcache.py +1 -1
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/storage_clients/test_apify_request_queue_client.py +1 -1
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/storage_clients/test_file_system.py +40 -3
- apify-3.0.1b2/tests/unit/test_apify_storages.py +63 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/test_crypto.py +2 -2
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/test_proxy_configuration.py +10 -15
- apify-3.0.1b2/uv.lock +2809 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/package-lock.json +3049 -1318
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/package.json +3 -3
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/sidebars.js +4 -4
- apify-3.0.1b2/website/src/theme/DocItem/Content/index.js +6 -0
- apify-3.0.0rc1/docs/02_guides/01_beautifulsoup_httpx.mdx +0 -30
- apify-3.0.0rc1/docs/02_guides/02_crawlee.mdx +0 -37
- apify-3.0.0rc1/docs/02_guides/code/02_crawlee_beautifulsoup.py +0 -55
- apify-3.0.0rc1/docs/02_guides/code/02_crawlee_playwright.py +0 -68
- apify-3.0.0rc1/docs/04_upgrading/upgrading_to_v3.md +0 -18
- apify-3.0.0rc1/src/apify/events/__init__.py +0 -5
- apify-3.0.0rc1/src/apify/storage_clients/_apify/_storage_client.py +0 -80
- apify-3.0.0rc1/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -36
- apify-3.0.0rc1/tests/integration/_utils.py +0 -9
- apify-3.0.0rc1/tests/integration/test_request_queue.py +0 -1194
- apify-3.0.0rc1/tests/unit/actor/test_configuration.py +0 -21
- apify-3.0.0rc1/uv.lock +0 -2589
- {apify-3.0.0rc1 → apify-3.0.1b2}/.editorconfig +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/CODEOWNERS +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/check_pr_title.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/pre_release.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/release.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.github/workflows/run_code_checks.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.gitignore +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.markdownlint.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/.pre-commit-config.yaml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/CONTRIBUTING.md +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/LICENSE +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/Makefile +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/01_introduction.mdx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/02_running_actors_locally.mdx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/03_actor_structure.mdx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/code/01_introduction.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/code/actor_structure/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/code/actor_structure/__main__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/code/actor_structure/main.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/01_overview/code/actor_structure/py.typed +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/01_actor_lifecycle.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/02_actor_input.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/03_storages.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/04_actor_events.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/05_proxy_management.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/06_interacting_with_other_actors.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/07_webhooks.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/08_access_apify_api.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/10_logging.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/11_configuration.mdx +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/01_context_manager.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/01_init_exit.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/01_reboot.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/01_status_message.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/02_input.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_dataset_exports.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_dataset_read_write.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_deleting_storages.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_kvs_iterating.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_kvs_public_url.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_kvs_read_write.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_opening_storages.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/03_rq.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/04_actor_events.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_apify_proxy.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_apify_proxy_config.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_custom_proxy.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_custom_proxy_function.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_proxy_actor_input.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_proxy_httpx.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/05_proxy_rotation.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/06_interacting_call.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/06_interacting_call_task.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/06_interacting_metamorph.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/06_interacting_start.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/07_webhook.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/08_actor_client.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/08_actor_new_client.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/10_log_config.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/10_logger_usage.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/10_redirect_log.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/10_redirect_log_existing_run.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/11_config.py +0 -0
- {apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/actor_charge.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/03_playwright.mdx +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/04_selenium.mdx +0 -0
- /apify-3.0.0rc1/docs/02_guides/05_scrapy.mdx → /apify-3.0.1b2/docs/03_guides/06_scrapy.mdx +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/__init__.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/__main__.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/items.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/main.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/py.typed +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/settings.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/spiders/__init__.py +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/spiders/py.typed +0 -0
- {apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/scrapy_project/src/spiders/title.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/04_upgrading/upgrading_to_v2.md +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/docs/pyproject.toml +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/renovate.json +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_consts.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_crypto.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_models.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/_utils.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/events/_types.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/events/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/log.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/requests.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/scrapy/utils.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storages/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/src/apify/storages/py.typed +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/README.md +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/Dockerfile +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/server.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/src/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/src/__main__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/actor_source_base/src/main.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_api_helpers.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_call_timeouts.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_charge.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_events.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_actor_log.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/integration/test_fixtures.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_dataset.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_env_helpers.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_key_value_store.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_actor_request_queue.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/actor/test_request_list.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/events/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/extensions/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/middlewares/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/pipelines/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/requests/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/utils/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/tests/unit/storage_clients/__init__.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/.eslintrc.json +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/babel.config.js +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/build_api_reference.sh +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/docusaurus.config.js +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/generate_module_shortcuts.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/ApiLink.jsx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/Gradients.jsx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/Highlights.jsx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/Highlights.module.css +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/css/custom.css +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/pages/home_page_example.py +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/pages/index.js +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/src/pages/index.module.css +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/static/.nojekyll +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/static/img/docs-og.png +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/static/img/guides/redirected_logs_example.webp +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/tools/docs-prettier.config.js +0 -0
- {apify-3.0.0rc1 → apify-3.0.1b2}/website/tools/utils/externalLink.js +0 -0
|
@@ -27,7 +27,7 @@ jobs:
|
|
|
27
27
|
token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
28
28
|
|
|
29
29
|
- name: Set up Node
|
|
30
|
-
uses: actions/setup-node@
|
|
30
|
+
uses: actions/setup-node@v5
|
|
31
31
|
with:
|
|
32
32
|
node-version: ${{ env.NODE_VERSION }}
|
|
33
33
|
cache: npm
|
|
@@ -50,12 +50,12 @@ jobs:
|
|
|
50
50
|
git push
|
|
51
51
|
|
|
52
52
|
- name: Set up Python
|
|
53
|
-
uses: actions/setup-python@
|
|
53
|
+
uses: actions/setup-python@v6
|
|
54
54
|
with:
|
|
55
55
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
56
56
|
|
|
57
57
|
- name: Set up uv package manager
|
|
58
|
-
uses: astral-sh/setup-uv@
|
|
58
|
+
uses: astral-sh/setup-uv@v7
|
|
59
59
|
with:
|
|
60
60
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
61
61
|
|
|
@@ -72,7 +72,7 @@ jobs:
|
|
|
72
72
|
uses: actions/configure-pages@v5
|
|
73
73
|
|
|
74
74
|
- name: Upload GitHub Pages artifact
|
|
75
|
-
uses: actions/upload-pages-artifact@
|
|
75
|
+
uses: actions/upload-pages-artifact@v4
|
|
76
76
|
with:
|
|
77
77
|
path: ./website/build
|
|
78
78
|
|
|
@@ -2,17 +2,34 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
<!-- git-cliff-unreleased-start -->
|
|
6
|
+
## 3.0.1 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🐛 Bug Fixes
|
|
9
|
+
|
|
10
|
+
- Also load input from a file with a .json extension in file system storage ([#617](https://github.com/apify/apify-sdk-python/pull/617)) ([b62804c](https://github.com/apify/apify-sdk-python/commit/b62804c170069cd7aa77572bb9682a156581cbac)) by [@janbuchar](https://github.com/janbuchar)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
<!-- git-cliff-unreleased-end -->
|
|
14
|
+
## [3.0.0](https://github.com/apify/apify-sdk-python/releases/tag/v3.0.0) (2025-09-29)
|
|
15
|
+
|
|
16
|
+
- Check out the [Upgrading guide](https://docs.apify.com/sdk/python/docs/upgrading/upgrading-to-v3) to ensure a smooth update.
|
|
6
17
|
|
|
7
18
|
### 🚀 Features
|
|
8
19
|
|
|
9
20
|
- Add deduplication to `add_batch_of_requests` ([#534](https://github.com/apify/apify-sdk-python/pull/534)) ([dd03c4d](https://github.com/apify/apify-sdk-python/commit/dd03c4d446f611492adf35f1b5738648ee5a66f7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#514](https://github.com/apify/apify-sdk-python/issues/514)
|
|
21
|
+
- Add new methods to ChargingManager ([#580](https://github.com/apify/apify-sdk-python/pull/580)) ([54f7f8b](https://github.com/apify/apify-sdk-python/commit/54f7f8b29c5982be98b595dac11eceff915035c9)) by [@vdusek](https://github.com/vdusek)
|
|
22
|
+
- Add support for NDU storages ([#594](https://github.com/apify/apify-sdk-python/pull/594)) ([8721ef5](https://github.com/apify/apify-sdk-python/commit/8721ef5731bcb1a04ad63c930089bf83be29f308)) by [@vdusek](https://github.com/vdusek), closes [#1175](https://github.com/apify/apify-sdk-python/issues/1175)
|
|
23
|
+
- Add stats to `ApifyRequestQueueClient` ([#574](https://github.com/apify/apify-sdk-python/pull/574)) ([21f6782](https://github.com/apify/apify-sdk-python/commit/21f6782b444f623aba986b4922cf67bafafd4b2c)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1344](https://github.com/apify/apify-sdk-python/issues/1344)
|
|
24
|
+
- Add specialized ApifyRequestQueue clients ([#573](https://github.com/apify/apify-sdk-python/pull/573)) ([f830ab0](https://github.com/apify/apify-sdk-python/commit/f830ab09b1fa12189c9d3297d5cf18a4f2da62fa)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
10
25
|
|
|
11
26
|
### 🐛 Bug Fixes
|
|
12
27
|
|
|
13
28
|
- Restrict apify-shared and apify-client versions ([#523](https://github.com/apify/apify-sdk-python/pull/523)) ([b3ae5a9](https://github.com/apify/apify-sdk-python/commit/b3ae5a972a65454a4998eda59c9fcc3f6b7e8579)) by [@vdusek](https://github.com/vdusek)
|
|
14
29
|
- Expose `APIFY_USER_IS_PAYING` env var to the configuration ([#507](https://github.com/apify/apify-sdk-python/pull/507)) ([0801e54](https://github.com/apify/apify-sdk-python/commit/0801e54887317c1280cc6828ecd3f2cc53287e76)) by [@stepskop](https://github.com/stepskop)
|
|
15
30
|
- Resolve DeprecationWarning in ApifyEventManager ([#555](https://github.com/apify/apify-sdk-python/pull/555)) ([0c5111d](https://github.com/apify/apify-sdk-python/commit/0c5111dafe19796ec1fb9652a44c031bed9758df)) by [@vdusek](https://github.com/vdusek), closes [#343](https://github.com/apify/apify-sdk-python/issues/343)
|
|
31
|
+
- Use same `client_key` for `Actor` created `request_queue` and improve its metadata estimation ([#552](https://github.com/apify/apify-sdk-python/pull/552)) ([7e4e5da](https://github.com/apify/apify-sdk-python/commit/7e4e5da81dd87e84ebeef2bd336c6c1d422cb9a7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#536](https://github.com/apify/apify-sdk-python/issues/536)
|
|
32
|
+
- Properly process pre-existing Actor input file ([#591](https://github.com/apify/apify-sdk-python/pull/591)) ([cc5075f](https://github.com/apify/apify-sdk-python/commit/cc5075fab8c72ca5711cfd97932037b34e6997cd)) by [@Pijukatel](https://github.com/Pijukatel), closes [#590](https://github.com/apify/apify-sdk-python/issues/590)
|
|
16
33
|
|
|
17
34
|
### Chore
|
|
18
35
|
|
|
@@ -23,138 +40,8 @@ All notable changes to this project will be documented in this file.
|
|
|
23
40
|
- [**breaking**] Adapt to the Crawlee v1.0 ([#470](https://github.com/apify/apify-sdk-python/pull/470)) ([f7e3320](https://github.com/apify/apify-sdk-python/commit/f7e33206cf3e4767faacbdc43511b45b6785f929)) by [@vdusek](https://github.com/vdusek), closes [#469](https://github.com/apify/apify-sdk-python/issues/469), [#540](https://github.com/apify/apify-sdk-python/issues/540)
|
|
24
41
|
- [**breaking**] Replace `httpx` with `impit` ([#560](https://github.com/apify/apify-sdk-python/pull/560)) ([cca3869](https://github.com/apify/apify-sdk-python/commit/cca3869e85968865e56aafcdcb36fbccba27aef0)) by [@Mantisus](https://github.com/Mantisus), closes [#558](https://github.com/apify/apify-sdk-python/issues/558)
|
|
25
42
|
- [**breaking**] Remove `Request.id` field ([#553](https://github.com/apify/apify-sdk-python/pull/553)) ([445ab5d](https://github.com/apify/apify-sdk-python/commit/445ab5d752b785fc2018b35c8adbe779253d7acd)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
29
|
-
|
|
30
|
-
### 🚀 Features
|
|
31
|
-
|
|
32
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
33
|
-
|
|
34
|
-
### 🐛 Bug Fixes
|
|
35
|
-
|
|
36
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
37
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
41
|
-
|
|
42
|
-
### 🚀 Features
|
|
43
|
-
|
|
44
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
45
|
-
|
|
46
|
-
### 🐛 Bug Fixes
|
|
47
|
-
|
|
48
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
49
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
53
|
-
|
|
54
|
-
### 🚀 Features
|
|
55
|
-
|
|
56
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
57
|
-
|
|
58
|
-
### 🐛 Bug Fixes
|
|
59
|
-
|
|
60
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
61
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
65
|
-
|
|
66
|
-
### 🚀 Features
|
|
67
|
-
|
|
68
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
69
|
-
|
|
70
|
-
### 🐛 Bug Fixes
|
|
71
|
-
|
|
72
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
73
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
77
|
-
|
|
78
|
-
### 🚀 Features
|
|
79
|
-
|
|
80
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
81
|
-
|
|
82
|
-
### 🐛 Bug Fixes
|
|
83
|
-
|
|
84
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
85
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
89
|
-
|
|
90
|
-
### 🚀 Features
|
|
91
|
-
|
|
92
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
93
|
-
|
|
94
|
-
### 🐛 Bug Fixes
|
|
95
|
-
|
|
96
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
97
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
101
|
-
|
|
102
|
-
### 🚀 Features
|
|
103
|
-
|
|
104
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
105
|
-
|
|
106
|
-
### 🐛 Bug Fixes
|
|
107
|
-
|
|
108
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
109
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
113
|
-
|
|
114
|
-
### 🚀 Features
|
|
115
|
-
|
|
116
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
117
|
-
|
|
118
|
-
### 🐛 Bug Fixes
|
|
119
|
-
|
|
120
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
121
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
125
|
-
|
|
126
|
-
### 🚀 Features
|
|
127
|
-
|
|
128
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
129
|
-
|
|
130
|
-
### 🐛 Bug Fixes
|
|
131
|
-
|
|
132
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
133
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
137
|
-
|
|
138
|
-
### 🚀 Features
|
|
139
|
-
|
|
140
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
141
|
-
|
|
142
|
-
### 🐛 Bug Fixes
|
|
143
|
-
|
|
144
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
145
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
|
|
149
|
-
|
|
150
|
-
### 🚀 Features
|
|
151
|
-
|
|
152
|
-
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
|
|
153
|
-
|
|
154
|
-
### 🐛 Bug Fixes
|
|
155
|
-
|
|
156
|
-
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
|
|
157
|
-
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
|
|
43
|
+
- [**breaking**] Make `Actor` initialization stricter and more predictable ([#576](https://github.com/apify/apify-sdk-python/pull/576)) ([912222a](https://github.com/apify/apify-sdk-python/commit/912222a7a8123be66c94c50a2e461276fbfc50c4)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
44
|
+
- [**breaking**] Make default Apify storages use alias mechanism ([#606](https://github.com/apify/apify-sdk-python/pull/606)) ([dbea7d9](https://github.com/apify/apify-sdk-python/commit/dbea7d97fe7f25aa8658a32c5bb46a3800561df5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#599](https://github.com/apify/apify-sdk-python/issues/599)
|
|
158
45
|
|
|
159
46
|
|
|
160
47
|
## [2.7.3](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.3) (2025-08-11)
|
|
@@ -296,6 +183,8 @@ All notable changes to this project will be documented in this file.
|
|
|
296
183
|
|
|
297
184
|
## [2.0.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.0.0) (2024-09-10)
|
|
298
185
|
|
|
186
|
+
- Check out the [Upgrading guide](https://docs.apify.com/sdk/python/docs/upgrading/upgrading-to-v2) to ensure a smooth update.
|
|
187
|
+
|
|
299
188
|
### 🚀 Features
|
|
300
189
|
|
|
301
190
|
- Better Actor API typing ([#256](https://github.com/apify/apify-sdk-python/pull/256)) ([abb87e7](https://github.com/apify/apify-sdk-python/commit/abb87e7f3c272f88a9a76292d8394fe93b98428a)) by [@janbuchar](https://github.com/janbuchar), closes [#243](https://github.com/apify/apify-sdk-python/issues/243)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.1b2
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -228,10 +228,10 @@ Requires-Python: >=3.10
|
|
|
228
228
|
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
230
|
Requires-Dist: cachetools>=5.5.0
|
|
231
|
-
Requires-Dist: crawlee
|
|
231
|
+
Requires-Dist: crawlee<2.0.0,>=1.0.2
|
|
232
232
|
Requires-Dist: cryptography>=42.0.0
|
|
233
|
-
Requires-Dist: impit>=0.
|
|
234
|
-
Requires-Dist: lazy-object-proxy
|
|
233
|
+
Requires-Dist: impit>=0.6.1
|
|
234
|
+
Requires-Dist: lazy-object-proxy>=1.11.0
|
|
235
235
|
Requires-Dist: more-itertools>=10.2.0
|
|
236
236
|
Requires-Dist: typing-extensions>=4.1.0
|
|
237
237
|
Requires-Dist: websockets>=14.0
|
|
@@ -240,7 +240,22 @@ Provides-Extra: scrapy
|
|
|
240
240
|
Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
|
|
241
241
|
Description-Content-Type: text/markdown
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
<h1 align=center>Apify SDK for Python</h1>
|
|
244
|
+
|
|
245
|
+
<p align=center>
|
|
246
|
+
<a href="https://badge.fury.io/py/apify" rel="nofollow">
|
|
247
|
+
<img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
|
|
248
|
+
</a>
|
|
249
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
250
|
+
<img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
|
|
251
|
+
</a>
|
|
252
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
253
|
+
<img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
254
|
+
</a>
|
|
255
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
256
|
+
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
257
|
+
</a>
|
|
258
|
+
</p>
|
|
244
259
|
|
|
245
260
|
The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
|
|
246
261
|
in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
|
|
@@ -1,4 +1,19 @@
|
|
|
1
|
-
|
|
1
|
+
<h1 align=center>Apify SDK for Python</h1>
|
|
2
|
+
|
|
3
|
+
<p align=center>
|
|
4
|
+
<a href="https://badge.fury.io/py/apify" rel="nofollow">
|
|
5
|
+
<img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
|
|
6
|
+
</a>
|
|
7
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
8
|
+
<img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
|
|
9
|
+
</a>
|
|
10
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
11
|
+
<img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
12
|
+
</a>
|
|
13
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
14
|
+
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
15
|
+
</a>
|
|
16
|
+
</p>
|
|
2
17
|
|
|
3
18
|
The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
|
|
4
19
|
in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
|
|
@@ -13,9 +13,9 @@ The URL is available in the following places:
|
|
|
13
13
|
|
|
14
14
|
- In Apify Console, on the Actor run details page as the **Container URL** field.
|
|
15
15
|
- In the API as the `container_url` property of the [Run object](https://docs.apify.com/api/v2#/reference/actors/run-object/get-run).
|
|
16
|
-
- In the Actor as the `Actor.
|
|
16
|
+
- In the Actor as the `Actor.configuration.container_url` property.
|
|
17
17
|
|
|
18
|
-
The web server running inside the container must listen at the port defined by the `Actor.
|
|
18
|
+
The web server running inside the container must listen at the port defined by the `Actor.configuration.container_port` property. When running Actors locally, the port defaults to `4321`, so the web server will be accessible at `http://localhost:4321`.
|
|
19
19
|
|
|
20
20
|
## Example
|
|
21
21
|
|
|
@@ -13,6 +13,12 @@ Apify provides several [pricing models](https://docs.apify.com/platform/actors/p
|
|
|
13
13
|
|
|
14
14
|
To use the pay-per-event pricing model, you first need to [set it up](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event) for your Actor in the Apify console. After that, you're free to start charging for events.
|
|
15
15
|
|
|
16
|
+
:::info How pay-per-event pricing works
|
|
17
|
+
|
|
18
|
+
If you want more details about PPE pricing, please refer to our [PPE documentation](https://docs.apify.com/platform/actors/publishing/monetize/pay-per-event).
|
|
19
|
+
|
|
20
|
+
:::
|
|
21
|
+
|
|
16
22
|
## Charging for events
|
|
17
23
|
|
|
18
24
|
After monetization is set in the Apify console, you can add <ApiLink to="class/Actor#charge">`Actor.charge`</ApiLink> calls to your code and start monetizing!
|
{apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/07_webhook_preventing.py
RENAMED
|
@@ -7,7 +7,7 @@ async def main() -> None:
|
|
|
7
7
|
webhook = Webhook(
|
|
8
8
|
event_types=['ACTOR.RUN.FAILED'],
|
|
9
9
|
request_url='https://example.com/run-failed',
|
|
10
|
-
idempotency_key=Actor.
|
|
10
|
+
idempotency_key=Actor.configuration.actor_run_id,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
# Add the webhook to the Actor.
|
|
@@ -22,9 +22,9 @@ def run_server() -> None:
|
|
|
22
22
|
# and save a reference to the server.
|
|
23
23
|
global http_server
|
|
24
24
|
with ThreadingHTTPServer(
|
|
25
|
-
('', Actor.
|
|
25
|
+
('', Actor.configuration.web_server_port), RequestHandler
|
|
26
26
|
) as server:
|
|
27
|
-
Actor.log.info(f'Server running on {Actor.
|
|
27
|
+
Actor.log.info(f'Server running on {Actor.configuration.web_server_port}')
|
|
28
28
|
http_server = server
|
|
29
29
|
server.serve_forever()
|
|
30
30
|
|
{apify-3.0.0rc1/docs/03_concepts → apify-3.0.1b2/docs/02_concepts}/code/conditional_actor_charge.py
RENAMED
|
@@ -13,6 +13,6 @@ async def main() -> None:
|
|
|
13
13
|
if Actor.get_charging_manager().get_pricing_info().is_pay_per_event:
|
|
14
14
|
# highlight-end
|
|
15
15
|
await Actor.push_data({'hello': 'world'}, 'dataset-item')
|
|
16
|
-
elif charged_items < (Actor.
|
|
16
|
+
elif charged_items < (Actor.configuration.max_paid_dataset_items or 0):
|
|
17
17
|
await Actor.push_data({'hello': 'world'})
|
|
18
18
|
charged_items += 1
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: beautifulsoup-httpx
|
|
3
|
+
title: Using BeautifulSoup with HTTPX
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import CodeBlock from '@theme/CodeBlock';
|
|
7
|
+
|
|
8
|
+
import BeautifulSoupHttpxExample from '!!raw-loader!./code/01_beautifulsoup_httpx.py';
|
|
9
|
+
|
|
10
|
+
In this guide, you'll learn how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) library with the [HTTPX](https://www.python-httpx.org/) library in your Apify Actors.
|
|
11
|
+
|
|
12
|
+
## Introduction
|
|
13
|
+
|
|
14
|
+
[BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) is a Python library for extracting data from HTML and XML files. It provides simple methods and Pythonic idioms for navigating, searching, and modifying a website's element tree, enabling efficient data extraction.
|
|
15
|
+
|
|
16
|
+
[HTTPX](https://www.python-httpx.org/) is a modern, high-level HTTP client library for Python. It provides a simple interface for making HTTP requests and supports both synchronous and asynchronous requests.
|
|
17
|
+
|
|
18
|
+
To create an Actor which uses those libraries, start from the [BeautifulSoup & Python](https://apify.com/templates/categories/python) Actor template. This template includes the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) and [HTTPX](https://www.python-httpx.org/) libraries preinstalled, allowing you to begin development immediately.
|
|
19
|
+
|
|
20
|
+
## Example Actor
|
|
21
|
+
|
|
22
|
+
Below is a simple Actor that recursively scrapes titles from all linked websites, up to a specified maximum depth, starting from URLs provided in the Actor input. It uses [HTTPX](https://www.python-httpx.org/) for fetching pages and [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for parsing their content to extract titles and links to other pages.
|
|
23
|
+
|
|
24
|
+
<CodeBlock className="language-python">
|
|
25
|
+
{BeautifulSoupHttpxExample}
|
|
26
|
+
</CodeBlock>
|
|
27
|
+
|
|
28
|
+
## Conclusion
|
|
29
|
+
|
|
30
|
+
In this guide, you learned how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) with the [HTTPX](https://www.python-httpx.org/) in your Apify Actors. By combining these libraries, you can efficiently extract data from HTML or XML files, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: parsel-impit
|
|
3
|
+
title: Using Parsel with Impit
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import CodeBlock from '@theme/CodeBlock';
|
|
7
|
+
|
|
8
|
+
import ParselImpitExample from '!!raw-loader!./code/02_parsel_impit.py';
|
|
9
|
+
|
|
10
|
+
In this guide, you'll learn how to combine the [Parsel](https://github.com/scrapy/parsel) and [Impit](https://github.com/apify/impit) libraries when building Apify Actors.
|
|
11
|
+
|
|
12
|
+
## Introduction
|
|
13
|
+
|
|
14
|
+
[Parsel](https://github.com/scrapy/parsel) is a Python library for extracting data from HTML and XML documents using CSS selectors and [XPath](https://en.wikipedia.org/wiki/XPath) expressions. It offers an intuitive API for navigating and extracting structured data, making it a popular choice for web scraping. Compared to [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/), it also delivers better performance.
|
|
15
|
+
|
|
16
|
+
[Impit](https://github.com/apify/impit) is Apify's high-performance HTTP client for Python. It supports both synchronous and asynchronous workflows and is built for large-scale web scraping, where making thousands of requests efficiently is essential. With built-in browser impersonation and anti-blocking features, it simplifies handling modern websites.
|
|
17
|
+
|
|
18
|
+
## Example Actor
|
|
19
|
+
|
|
20
|
+
The following example shows a simple Actor that recursively scrapes titles from linked pages, up to a user-defined maximum depth. It uses [Impit](https://github.com/apify/impit) to fetch pages and [Parsel](https://github.com/scrapy/parsel) to extract titles and discover new links.
|
|
21
|
+
|
|
22
|
+
<CodeBlock className="language-python">
|
|
23
|
+
{ParselImpitExample}
|
|
24
|
+
</CodeBlock>
|
|
25
|
+
|
|
26
|
+
## Conclusion
|
|
27
|
+
|
|
28
|
+
In this guide, you learned how to use [Parsel](https://github.com/scrapy/parsel) with [Impit](https://github.com/apify/impit) in your Apify Actors. By combining these libraries, you get a powerful and efficient solution for web scraping: [Parsel](https://github.com/scrapy/parsel) provides excellent CSS selector and XPath support for data extraction, while [Impit](https://github.com/apify/impit) offers a fast and simple HTTP client built by Apify. This combination makes it easy to build scalable web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: crawlee
|
|
3
|
+
title: Using Crawlee
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import CodeBlock from '@theme/CodeBlock';
|
|
7
|
+
|
|
8
|
+
import CrawleeBeautifulSoupExample from '!!raw-loader!./code/05_crawlee_beautifulsoup.py';
|
|
9
|
+
import CrawleeParselExample from '!!raw-loader!./code/05_crawlee_parsel.py';
|
|
10
|
+
import CrawleePlaywrightExample from '!!raw-loader!./code/05_crawlee_playwright.py';
|
|
11
|
+
|
|
12
|
+
In this guide you'll learn how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors.
|
|
13
|
+
|
|
14
|
+
## Introduction
|
|
15
|
+
|
|
16
|
+
[Crawlee](https://crawlee.dev/python) is a Python library for web scraping and browser automation that provides a robust and flexible framework for building web scraping tasks. It seamlessly integrates with the Apify platform and supports a variety of scraping techniques, from static HTML parsing to dynamic JavaScript-rendered content handling. Crawlee offers a range of crawlers, including HTTP-based crawlers like [`HttpCrawler`](https://crawlee.dev/python/api/class/HttpCrawler), [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) and [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and browser-based crawlers like [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler), to suit different scraping needs.
|
|
17
|
+
|
|
18
|
+
In this guide, you'll learn how to use Crawlee with [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) to build Apify Actors for web scraping.
|
|
19
|
+
|
|
20
|
+
## Actor with BeautifulSoupCrawler
|
|
21
|
+
|
|
22
|
+
The [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is ideal for extracting data from static HTML pages. It uses [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) for parsing and [`ImpitHttpClient`](https://crawlee.dev/python/api/class/ImpitHttpClient) for HTTP communication, ensuring efficient and lightweight scraping. If you do not need to execute JavaScript on the page, [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is a great choice for your scraping tasks. Below is an example of how to use it` in an Apify Actor.
|
|
23
|
+
|
|
24
|
+
<CodeBlock className="language-python">
|
|
25
|
+
{CrawleeBeautifulSoupExample}
|
|
26
|
+
</CodeBlock>
|
|
27
|
+
|
|
28
|
+
## Actor with ParselCrawler
|
|
29
|
+
|
|
30
|
+
The [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) works in the same way as [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), but it uses the [Parsel](https://parsel.readthedocs.io/en/latest/) library for HTML parsing. This allows for more powerful and flexible data extraction using [XPath](https://en.wikipedia.org/wiki/XPath) selectors. It should be faster than [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler). Below is an example of how to use [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) in an Apify Actor.
|
|
31
|
+
|
|
32
|
+
<CodeBlock className="language-python">
|
|
33
|
+
{CrawleeParselExample}
|
|
34
|
+
</CodeBlock>
|
|
35
|
+
|
|
36
|
+
## Actor with PlaywrightCrawler
|
|
37
|
+
|
|
38
|
+
The [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) is built for handling dynamic web pages that rely on JavaScript for content rendering. Using the [Playwright](https://playwright.dev/) library, it provides a browser-based automation environment to interact with complex websites. Below is an example of how to use [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) in an Apify Actor.
|
|
39
|
+
|
|
40
|
+
<CodeBlock className="language-python">
|
|
41
|
+
{CrawleePlaywrightExample}
|
|
42
|
+
</CodeBlock>
|
|
43
|
+
|
|
44
|
+
## Conclusion
|
|
45
|
+
|
|
46
|
+
In this guide, you learned how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors. By using the [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) crawlers, you can efficiently scrape static or dynamic web pages, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
|
{apify-3.0.0rc1/docs/02_guides → apify-3.0.1b2/docs/03_guides}/code/01_beautifulsoup_httpx.py
RENAMED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import asyncio
|
|
3
2
|
from urllib.parse import urljoin
|
|
4
3
|
|
|
4
|
+
import httpx
|
|
5
5
|
from bs4 import BeautifulSoup
|
|
6
|
-
from httpx import AsyncClient
|
|
7
6
|
|
|
8
7
|
from apify import Actor, Request
|
|
9
8
|
|
|
@@ -32,7 +31,7 @@ async def main() -> None:
|
|
|
32
31
|
await request_queue.add_request(new_request)
|
|
33
32
|
|
|
34
33
|
# Create an HTTPX client to fetch the HTML content of the URLs.
|
|
35
|
-
async with AsyncClient() as client:
|
|
34
|
+
async with httpx.AsyncClient() as client:
|
|
36
35
|
# Process the URLs from the request queue.
|
|
37
36
|
while request := await request_queue.fetch_next_request():
|
|
38
37
|
url = request.url
|
|
@@ -83,3 +82,7 @@ async def main() -> None:
|
|
|
83
82
|
finally:
|
|
84
83
|
# Mark the request as handled to ensure it is not processed again.
|
|
85
84
|
await request_queue.mark_request_as_handled(new_request)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == '__main__':
|
|
88
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from urllib.parse import urljoin
|
|
3
|
+
|
|
4
|
+
import impit
|
|
5
|
+
import parsel
|
|
6
|
+
|
|
7
|
+
from apify import Actor, Request
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def main() -> None:
|
|
11
|
+
# Enter the context of the Actor.
|
|
12
|
+
async with Actor:
|
|
13
|
+
# Retrieve the Actor input, and use default values if not provided.
|
|
14
|
+
actor_input = await Actor.get_input() or {}
|
|
15
|
+
start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
|
|
16
|
+
max_depth = actor_input.get('max_depth', 1)
|
|
17
|
+
|
|
18
|
+
# Exit if no start URLs are provided.
|
|
19
|
+
if not start_urls:
|
|
20
|
+
Actor.log.info('No start URLs specified in Actor input, exiting...')
|
|
21
|
+
await Actor.exit()
|
|
22
|
+
|
|
23
|
+
# Open the default request queue for handling URLs to be processed.
|
|
24
|
+
request_queue = await Actor.open_request_queue()
|
|
25
|
+
|
|
26
|
+
# Enqueue the start URLs with an initial crawl depth of 0.
|
|
27
|
+
for start_url in start_urls:
|
|
28
|
+
url = start_url.get('url')
|
|
29
|
+
Actor.log.info(f'Enqueuing {url} ...')
|
|
30
|
+
new_request = Request.from_url(url, user_data={'depth': 0})
|
|
31
|
+
await request_queue.add_request(new_request)
|
|
32
|
+
|
|
33
|
+
# Create an Impit client to fetch the HTML content of the URLs.
|
|
34
|
+
async with impit.AsyncClient() as client:
|
|
35
|
+
# Process the URLs from the request queue.
|
|
36
|
+
while request := await request_queue.fetch_next_request():
|
|
37
|
+
url = request.url
|
|
38
|
+
|
|
39
|
+
if not isinstance(request.user_data['depth'], (str, int)):
|
|
40
|
+
raise TypeError('Request.depth is an unexpected type.')
|
|
41
|
+
|
|
42
|
+
depth = int(request.user_data['depth'])
|
|
43
|
+
Actor.log.info(f'Scraping {url} (depth={depth}) ...')
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# Fetch the HTTP response from the specified URL using Impit.
|
|
47
|
+
response = await client.get(url)
|
|
48
|
+
|
|
49
|
+
# Parse the HTML content using Parsel Selector.
|
|
50
|
+
selector = parsel.Selector(text=response.text)
|
|
51
|
+
|
|
52
|
+
# If the current depth is less than max_depth, find nested links
|
|
53
|
+
# and enqueue them.
|
|
54
|
+
if depth < max_depth:
|
|
55
|
+
# Extract all links using CSS selector
|
|
56
|
+
links = selector.css('a::attr(href)').getall()
|
|
57
|
+
for link_href in links:
|
|
58
|
+
link_url = urljoin(url, link_href)
|
|
59
|
+
|
|
60
|
+
if link_url.startswith(('http://', 'https://')):
|
|
61
|
+
Actor.log.info(f'Enqueuing {link_url} ...')
|
|
62
|
+
new_request = Request.from_url(
|
|
63
|
+
link_url,
|
|
64
|
+
user_data={'depth': depth + 1},
|
|
65
|
+
)
|
|
66
|
+
await request_queue.add_request(new_request)
|
|
67
|
+
|
|
68
|
+
# Extract the desired data using Parsel selectors.
|
|
69
|
+
title = selector.css('title::text').get()
|
|
70
|
+
h1s = selector.css('h1::text').getall()
|
|
71
|
+
h2s = selector.css('h2::text').getall()
|
|
72
|
+
h3s = selector.css('h3::text').getall()
|
|
73
|
+
|
|
74
|
+
data = {
|
|
75
|
+
'url': url,
|
|
76
|
+
'title': title,
|
|
77
|
+
'h1s': h1s,
|
|
78
|
+
'h2s': h2s,
|
|
79
|
+
'h3s': h3s,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Store the extracted data to the default dataset.
|
|
83
|
+
await Actor.push_data(data)
|
|
84
|
+
|
|
85
|
+
except Exception:
|
|
86
|
+
Actor.log.exception(f'Cannot extract data from {url}.')
|
|
87
|
+
|
|
88
|
+
finally:
|
|
89
|
+
# Mark the request as handled to ensure it is not processed again.
|
|
90
|
+
await request_queue.mark_request_as_handled(request)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
if __name__ == '__main__':
|
|
94
|
+
asyncio.run(main())
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import asyncio
|
|
3
2
|
from urllib.parse import urljoin
|
|
4
3
|
|
|
5
4
|
from playwright.async_api import async_playwright
|
|
@@ -41,7 +40,7 @@ async def main() -> None:
|
|
|
41
40
|
async with async_playwright() as playwright:
|
|
42
41
|
# Configure the browser to launch in headless mode as per Actor configuration.
|
|
43
42
|
browser = await playwright.chromium.launch(
|
|
44
|
-
headless=Actor.
|
|
43
|
+
headless=Actor.configuration.headless,
|
|
45
44
|
args=['--disable-gpu'],
|
|
46
45
|
)
|
|
47
46
|
context = await browser.new_context()
|
|
@@ -92,3 +91,7 @@ async def main() -> None:
|
|
|
92
91
|
await page.close()
|
|
93
92
|
# Mark the request as handled to ensure it is not processed again.
|
|
94
93
|
await request_queue.mark_request_as_handled(request)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == '__main__':
|
|
97
|
+
asyncio.run(main())
|