crawlee 1.0.1b7__tar.gz → 1.0.5b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/build_and_deploy_docs.yaml +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/templates_e2e_tests.yaml +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.gitignore +1 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/CHANGELOG.md +57 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/PKG-INFO +1 -1
- crawlee-1.0.5b3/docs/examples/code_examples/using_browser_profiles_chrome.py +54 -0
- crawlee-1.0.5b3/docs/examples/code_examples/using_browser_profiles_firefox.py +42 -0
- crawlee-1.0.5b3/docs/examples/using_browser_profile.mdx +39 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/architecture_overview.mdx +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/avoid_blocking.mdx +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/request_loaders.mdx +8 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/09_running_in_cloud.mdx +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/upgrading/upgrading_to_v1.md +4 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/pyproject.toml +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_request.py +31 -20
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_service_locator.py +4 -4
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_types.py +10 -16
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/recoverable_state.py +32 -8
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/recurring_task.py +15 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/sitemap.py +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/urls.py +9 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_browser_pool.py +4 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_playwright_browser_controller.py +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_playwright_browser_plugin.py +17 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_types.py +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -12
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/_basic_crawler.py +23 -12
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/_header_generator.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/_sitemap_request_loader.py +5 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/statistics/_statistics.py +15 -6
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +26 -8
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/_dataset_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/_request_queue_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_dataset_client.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_db_models.py +1 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +5 -4
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_request_queue_client.py +20 -6
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_storage_client.py +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/_base.py +3 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/_dataset.py +3 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/_key_value_store.py +8 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/_request_queue.py +3 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/_storage_instance_manager.py +9 -1
- crawlee-1.0.5b3/src/crawlee/storages/_utils.py +11 -0
- crawlee-1.0.5b3/tests/unit/_autoscaling/test_snapshotter.py +353 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/browsers/test_playwright_browser_plugin.py +10 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/conftest.py +19 -7
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +103 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_basic/test_basic_crawler.py +58 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +2 -2
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/otel/test_crawler_instrumentor.py +9 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/server_endpoints.py +1 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/sessions/test_session_pool.py +1 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +13 -5
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +3 -3
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +6 -6
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +6 -6
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +6 -6
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/test_dataset.py +66 -34
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/test_key_value_store.py +105 -33
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/test_request_queue.py +126 -34
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/test_configuration.py +32 -6
- crawlee-1.0.5b3/uv.lock +4120 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/package.json +4 -4
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/css/custom.css +0 -1
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/yarn.lock +1126 -1104
- crawlee-1.0.1b7/tests/unit/_autoscaling/test_snapshotter.py +0 -333
- crawlee-1.0.1b7/uv.lock +0 -3781
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.editorconfig +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.markdownlint.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/LICENSE +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/Makefile +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/README.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/pyproject.toml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/renovate.json +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/router.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/README.md +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/server.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/.eslintrc.json +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/babel.config.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/sidebars.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/pages/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/.nojekyll +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/API.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/check.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/robot.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/system.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/js/custom.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/static/robots.txt +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.1b7 → crawlee-1.0.5b3}/website/tsconfig.eslint.json +0 -0
|
@@ -30,7 +30,7 @@ jobs:
|
|
|
30
30
|
ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
|
|
31
31
|
|
|
32
32
|
- name: Set up Node
|
|
33
|
-
uses: actions/setup-node@
|
|
33
|
+
uses: actions/setup-node@v6
|
|
34
34
|
with:
|
|
35
35
|
node-version: ${{ env.NODE_VERSION }}
|
|
36
36
|
|
|
@@ -40,7 +40,7 @@ jobs:
|
|
|
40
40
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
41
41
|
|
|
42
42
|
- name: Set up uv package manager
|
|
43
|
-
uses: astral-sh/setup-uv@
|
|
43
|
+
uses: astral-sh/setup-uv@v7
|
|
44
44
|
with:
|
|
45
45
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
46
46
|
|
|
@@ -27,7 +27,7 @@ jobs:
|
|
|
27
27
|
uses: actions/checkout@v5
|
|
28
28
|
|
|
29
29
|
- name: Setup node
|
|
30
|
-
uses: actions/setup-node@
|
|
30
|
+
uses: actions/setup-node@v6
|
|
31
31
|
with:
|
|
32
32
|
node-version: ${{ env.NODE_VERSION }}
|
|
33
33
|
|
|
@@ -44,7 +44,7 @@ jobs:
|
|
|
44
44
|
run: pipx install poetry
|
|
45
45
|
|
|
46
46
|
- name: Set up uv package manager
|
|
47
|
-
uses: astral-sh/setup-uv@
|
|
47
|
+
uses: astral-sh/setup-uv@v7
|
|
48
48
|
with:
|
|
49
49
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
50
50
|
|
|
@@ -3,16 +3,61 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 1.0.
|
|
6
|
+
## 1.0.5 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🚀 Features
|
|
9
|
+
|
|
10
|
+
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
7
11
|
|
|
8
12
|
### 🐛 Bug Fixes
|
|
9
13
|
|
|
10
|
-
-
|
|
14
|
+
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
<!-- git-cliff-unreleased-end -->
|
|
18
|
+
## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
|
|
19
|
+
|
|
20
|
+
### 🐛 Bug Fixes
|
|
21
|
+
|
|
22
|
+
- Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
|
|
23
|
+
- Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
|
|
24
|
+
- Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
|
|
25
|
+
- Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
|
|
26
|
+
- Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
|
|
30
|
+
|
|
31
|
+
### 🐛 Bug Fixes
|
|
32
|
+
|
|
33
|
+
- Add support for Pydantic v2.12 ([#1471](https://github.com/apify/crawlee-python/pull/1471)) ([35c1108](https://github.com/apify/crawlee-python/commit/35c110878c2f445a2866be2522ea8703e9b371dd)) by [@Mantisus](https://github.com/Mantisus), closes [#1464](https://github.com/apify/crawlee-python/issues/1464)
|
|
34
|
+
- Fix database version warning message ([#1485](https://github.com/apify/crawlee-python/pull/1485)) ([18a545e](https://github.com/apify/crawlee-python/commit/18a545ee8add92e844acd0068f9cb8580a82e1c9)) by [@Mantisus](https://github.com/Mantisus)
|
|
35
|
+
- Fix `reclaim_request` in `SqlRequestQueueClient` to correctly update the request state ([#1486](https://github.com/apify/crawlee-python/pull/1486)) ([1502469](https://github.com/apify/crawlee-python/commit/150246957f8f7f1ceb77bb77e3a02a903c50cae1)) by [@Mantisus](https://github.com/Mantisus), closes [#1484](https://github.com/apify/crawlee-python/issues/1484)
|
|
36
|
+
- Fix `KeyValueStore.auto_saved_value` failing in some scenarios ([#1438](https://github.com/apify/crawlee-python/pull/1438)) ([b35dee7](https://github.com/apify/crawlee-python/commit/b35dee78180e57161b826641d45a61b8d8f6ef51)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1354](https://github.com/apify/crawlee-python/issues/1354)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## [1.0.2](https://github.com/apify/crawlee-python/releases/tag/v1.0.2) (2025-10-08)
|
|
40
|
+
|
|
41
|
+
### 🐛 Bug Fixes
|
|
42
|
+
|
|
43
|
+
- Use Self type in the open() method of storage clients ([#1462](https://github.com/apify/crawlee-python/pull/1462)) ([4ec6f6c](https://github.com/apify/crawlee-python/commit/4ec6f6c08f81632197f602ff99151338b3eba6e7)) by [@janbuchar](https://github.com/janbuchar)
|
|
44
|
+
- Add storages name validation ([#1457](https://github.com/apify/crawlee-python/pull/1457)) ([84de11a](https://github.com/apify/crawlee-python/commit/84de11a3a603503076f5b7df487c9abab68a9015)) by [@Mantisus](https://github.com/Mantisus), closes [#1434](https://github.com/apify/crawlee-python/issues/1434)
|
|
45
|
+
- Pin pydantic version to <2.12.0 to avoid compatibility issues ([#1467](https://github.com/apify/crawlee-python/pull/1467)) ([f11b86f](https://github.com/apify/crawlee-python/commit/f11b86f7ed57f98e83dc1b52f15f2017a919bf59)) by [@vdusek](https://github.com/vdusek)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
|
|
49
|
+
|
|
50
|
+
### 🐛 Bug Fixes
|
|
51
|
+
|
|
52
|
+
- Fix memory leak in `PlaywrightCrawler` on browser context creation ([#1446](https://github.com/apify/crawlee-python/pull/1446)) ([bb181e5](https://github.com/apify/crawlee-python/commit/bb181e58d8070fba38e62d6e57fe981a00e5f035)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1443](https://github.com/apify/crawlee-python/issues/1443)
|
|
53
|
+
- Update templates to handle optional httpx client ([#1440](https://github.com/apify/crawlee-python/pull/1440)) ([c087efd](https://github.com/apify/crawlee-python/commit/c087efd39baedf46ca3e5cae1ddc1acd6396e6c1)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
54
|
+
|
|
55
|
+
|
|
14
56
|
## [1.0.0](https://github.com/apify/crawlee-python/releases/tag/v1.0.0) (2025-09-29)
|
|
15
57
|
|
|
58
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v1) for more details.
|
|
59
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v1) to ensure a smooth update.
|
|
60
|
+
|
|
16
61
|
### 🚀 Features
|
|
17
62
|
|
|
18
63
|
- Add utility for load and parse Sitemap and `SitemapRequestLoader` ([#1169](https://github.com/apify/crawlee-python/pull/1169)) ([66599f8](https://github.com/apify/crawlee-python/commit/66599f8d085f3a8622e130019b6fdce2325737de)) by [@Mantisus](https://github.com/Mantisus), closes [#1161](https://github.com/apify/crawlee-python/issues/1161)
|
|
@@ -195,6 +240,9 @@ All notable changes to this project will be documented in this file.
|
|
|
195
240
|
|
|
196
241
|
## [0.6.0](https://github.com/apify/crawlee-python/releases/tag/v0.6.0) (2025-03-03)
|
|
197
242
|
|
|
243
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v06) for more details.
|
|
244
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v06) to ensure a smooth update.
|
|
245
|
+
|
|
198
246
|
### 🚀 Features
|
|
199
247
|
|
|
200
248
|
- Integrate browserforge fingerprints ([#829](https://github.com/apify/crawlee-python/pull/829)) ([2b156b4](https://github.com/apify/crawlee-python/commit/2b156b4ba688f9111195422e6058dff30eb1f782)) by [@Pijukatel](https://github.com/Pijukatel), closes [#549](https://github.com/apify/crawlee-python/issues/549)
|
|
@@ -275,6 +323,9 @@ All notable changes to this project will be documented in this file.
|
|
|
275
323
|
|
|
276
324
|
## [0.5.0](https://github.com/apify/crawlee-python/releases/tag/v0.5.0) (2025-01-02)
|
|
277
325
|
|
|
326
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v05) for more details.
|
|
327
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v05) to ensure a smooth update.
|
|
328
|
+
|
|
278
329
|
### 🚀 Features
|
|
279
330
|
|
|
280
331
|
- Add possibility to use None as no proxy in tiered proxies ([#760](https://github.com/apify/crawlee-python/pull/760)) ([0fbd017](https://github.com/apify/crawlee-python/commit/0fbd01723b9fe2e3410e0f358cab2f22848b08d0)) by [@Pijukatel](https://github.com/Pijukatel), closes [#687](https://github.com/apify/crawlee-python/issues/687)
|
|
@@ -366,6 +417,8 @@ All notable changes to this project will be documented in this file.
|
|
|
366
417
|
|
|
367
418
|
## [0.4.0](https://github.com/apify/crawlee-python/releases/tag/v0.4.0) (2024-11-01)
|
|
368
419
|
|
|
420
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v04) to ensure a smooth update.
|
|
421
|
+
|
|
369
422
|
### 🚀 Features
|
|
370
423
|
|
|
371
424
|
- [**breaking**] Add headers in unique key computation ([#609](https://github.com/apify/crawlee-python/pull/609)) ([6c4746f](https://github.com/apify/crawlee-python/commit/6c4746fa8ff86952a812b32a1d70dc910e76b43e)) by [@Prathamesh010](https://github.com/Prathamesh010), closes [#548](https://github.com/apify/crawlee-python/issues/548)
|
|
@@ -475,6 +528,8 @@ All notable changes to this project will be documented in this file.
|
|
|
475
528
|
|
|
476
529
|
## [0.3.0](https://github.com/apify/crawlee-python/releases/tag/v0.3.0) (2024-08-27)
|
|
477
530
|
|
|
531
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v03) to ensure a smooth update.
|
|
532
|
+
|
|
478
533
|
### 🚀 Features
|
|
479
534
|
|
|
480
535
|
- Implement ParselCrawler that adds support for Parsel ([#348](https://github.com/apify/crawlee-python/pull/348)) ([a3832e5](https://github.com/apify/crawlee-python/commit/a3832e527f022f32cce4a80055da3b7967b74522)) by [@asymness](https://github.com/asymness), closes [#335](https://github.com/apify/crawlee-python/issues/335)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from tempfile import TemporaryDirectory
|
|
5
|
+
|
|
6
|
+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
7
|
+
|
|
8
|
+
# Profile name to use (usually 'Default' for single profile setups)
|
|
9
|
+
PROFILE_NAME = 'Default'
|
|
10
|
+
|
|
11
|
+
# Paths to Chrome profiles in your system (example for Windows)
|
|
12
|
+
# Use `chrome://version/` to find your profile path
|
|
13
|
+
PROFILE_PATH = Path(Path.home(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def main() -> None:
|
|
17
|
+
# Create a temporary folder to copy the profile to
|
|
18
|
+
with TemporaryDirectory(prefix='crawlee-') as tmpdirname:
|
|
19
|
+
tmp_profile_dir = Path(tmpdirname)
|
|
20
|
+
|
|
21
|
+
# Copy the profile to a temporary folder
|
|
22
|
+
shutil.copytree(
|
|
23
|
+
PROFILE_PATH / PROFILE_NAME,
|
|
24
|
+
tmp_profile_dir / PROFILE_NAME,
|
|
25
|
+
dirs_exist_ok=True,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
crawler = PlaywrightCrawler(
|
|
29
|
+
headless=False,
|
|
30
|
+
# Use the installed Chrome browser
|
|
31
|
+
browser_type='chrome',
|
|
32
|
+
# Disable fingerprints to preserve profile identity
|
|
33
|
+
fingerprint_generator=None,
|
|
34
|
+
# Set user data directory to temp folder
|
|
35
|
+
user_data_dir=tmp_profile_dir,
|
|
36
|
+
browser_launch_options={
|
|
37
|
+
# Slow down actions to mimic human behavior
|
|
38
|
+
'slow_mo': 200,
|
|
39
|
+
'args': [
|
|
40
|
+
# Use the specified profile
|
|
41
|
+
f'--profile-directory={PROFILE_NAME}',
|
|
42
|
+
],
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@crawler.router.default_handler
|
|
47
|
+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
|
|
48
|
+
context.log.info(f'Visiting {context.request.url}')
|
|
49
|
+
|
|
50
|
+
await crawler.run(['https://crawlee.dev/'])
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == '__main__':
|
|
54
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
5
|
+
|
|
6
|
+
# Replace this with your actual Firefox profile name
|
|
7
|
+
# Find it at about:profiles in Firefox
|
|
8
|
+
PROFILE_NAME = 'your-profile-name-here'
|
|
9
|
+
|
|
10
|
+
# Paths to Firefox profiles in your system (example for Windows)
|
|
11
|
+
# Use `about:profiles` to find your profile path
|
|
12
|
+
PROFILE_PATH = Path(
|
|
13
|
+
Path.home(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def main() -> None:
|
|
18
|
+
crawler = PlaywrightCrawler(
|
|
19
|
+
# Use Firefox browser type
|
|
20
|
+
browser_type='firefox',
|
|
21
|
+
# Disable fingerprints to use the profile as is
|
|
22
|
+
fingerprint_generator=None,
|
|
23
|
+
headless=False,
|
|
24
|
+
# Path to your Firefox profile
|
|
25
|
+
user_data_dir=PROFILE_PATH,
|
|
26
|
+
browser_launch_options={
|
|
27
|
+
'args': [
|
|
28
|
+
# Required to avoid version conflicts
|
|
29
|
+
'--allow-downgrade'
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@crawler.router.default_handler
|
|
35
|
+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
|
|
36
|
+
context.log.info(f'Visiting {context.request.url}')
|
|
37
|
+
|
|
38
|
+
await crawler.run(['https://crawlee.dev/'])
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == '__main__':
|
|
42
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: using_browser_profile
|
|
3
|
+
title: Using browser profile
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import ApiLink from '@site/src/components/ApiLink';
|
|
7
|
+
|
|
8
|
+
import CodeBlock from '@theme/CodeBlock';
|
|
9
|
+
|
|
10
|
+
import ChromeProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_chrome.py';
|
|
11
|
+
import FirefoxProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_firefox.py';
|
|
12
|
+
|
|
13
|
+
This example demonstrates how to run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> using your local browser profile from [Chrome](https://www.google.com/intl/us/chrome/) or [Firefox](https://www.firefox.com/).
|
|
14
|
+
|
|
15
|
+
Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.
|
|
16
|
+
|
|
17
|
+
## Chrome browser
|
|
18
|
+
|
|
19
|
+
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
|
|
20
|
+
|
|
21
|
+
:::warning Profile access limitation
|
|
22
|
+
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
|
|
23
|
+
:::
|
|
24
|
+
|
|
25
|
+
Make sure you don't have any running Chrome browser processes before running this code:
|
|
26
|
+
|
|
27
|
+
<CodeBlock className="language-python" language="python">
|
|
28
|
+
{ChromeProfileExample}
|
|
29
|
+
</CodeBlock>
|
|
30
|
+
|
|
31
|
+
## Firefox browser
|
|
32
|
+
|
|
33
|
+
To find the path to your Firefox profile, enter `about:profiles` as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.
|
|
34
|
+
|
|
35
|
+
Make sure you don't have any running Firefox browser processes before running this code:
|
|
36
|
+
|
|
37
|
+
<CodeBlock className="language-python" language="python">
|
|
38
|
+
{FirefoxProfileExample}
|
|
39
|
+
</CodeBlock>
|
|
@@ -291,7 +291,7 @@ Request loaders provide a subset of <ApiLink to="class/RequestQueue">`RequestQue
|
|
|
291
291
|
|
|
292
292
|
- <ApiLink to="class/RequestLoader">`RequestLoader`</ApiLink> - Base interface for read-only access to a stream of requests, with capabilities like fetching the next request, marking as handled, and status checking.
|
|
293
293
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink> - Lightweight in-memory implementation of `RequestLoader` for managing static lists of URLs.
|
|
294
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> -
|
|
294
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
295
295
|
|
|
296
296
|
### Request managers
|
|
297
297
|
|
|
@@ -25,7 +25,7 @@ Changing browser fingerprints can be a tedious job. Luckily, Crawlee provides th
|
|
|
25
25
|
{PlaywrightDefaultFingerprintGenerator}
|
|
26
26
|
</RunnableCodeBlock>
|
|
27
27
|
|
|
28
|
-
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example
|
|
28
|
+
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example below:
|
|
29
29
|
|
|
30
30
|
<CodeBlock className="language-python">
|
|
31
31
|
{PlaywrightDefaultFingerprintGeneratorWithArgs}
|
|
@@ -31,7 +31,7 @@ The [`request_loaders`](https://github.com/apify/crawlee-python/tree/master/src/
|
|
|
31
31
|
And specific request loader implementations:
|
|
32
32
|
|
|
33
33
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink>: A lightweight implementation for managing a static list of URLs.
|
|
34
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML sitemaps with filtering capabilities.
|
|
34
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
35
35
|
|
|
36
36
|
Below is a class diagram that illustrates the relationships between these components and the <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>:
|
|
37
37
|
|
|
@@ -130,7 +130,13 @@ To enable persistence, provide `persist_state_key` and optionally `persist_reque
|
|
|
130
130
|
|
|
131
131
|
### Sitemap request loader
|
|
132
132
|
|
|
133
|
-
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from
|
|
133
|
+
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats. It's particularly useful when you want to crawl a website systematically by following its sitemap structure.
|
|
134
|
+
|
|
135
|
+
:::note
|
|
136
|
+
The `SitemapRequestLoader` is designed specifically for sitemaps that follow the standard Sitemaps protocol. HTML pages containing links are not supported by this loader - those should be handled by regular crawlers using the `enqueue_links` functionality.
|
|
137
|
+
:::
|
|
138
|
+
|
|
139
|
+
The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
|
|
134
140
|
|
|
135
141
|
<RunnableCodeBlock className="language-python" language="python">
|
|
136
142
|
{SitemapExample}
|
|
@@ -50,7 +50,7 @@ apify login
|
|
|
50
50
|
|
|
51
51
|
Now that you have your account set up, you will need to adjust the code a tiny bit. We will use the [Apify SDK](https://docs.apify.com/sdk/python/), which will help us to wire the Crawlee storages (like the [`RequestQueue`](https://docs.apify.com/sdk/python/reference/class/RequestQueue)) to their Apify platform counterparts - otherwise Crawlee would keep things only in memory.
|
|
52
52
|
|
|
53
|
-
Open your `src/main.py` file, and wrap
|
|
53
|
+
Open your `src/main.py` file, and wrap everything in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
|
|
54
54
|
|
|
55
55
|
<CodeBlock className="language-python" title="src/main.py">
|
|
56
56
|
{MainExample}
|
|
@@ -333,3 +333,7 @@ async def main() -> None:
|
|
|
333
333
|
|
|
334
334
|
await crawler.run(['https://crawlee.dev/'])
|
|
335
335
|
```
|
|
336
|
+
|
|
337
|
+
### New storage naming restrictions
|
|
338
|
+
|
|
339
|
+
We've introduced naming restrictions for storages to ensure compatibility with Apify Platform requirements and prevent potential conflicts. Storage names may include only letters (a–z, A–Z), digits (0–9), and hyphens (-), with hyphens allowed only in the middle of the name (for example, my-storage-1).
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.5b3"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -107,14 +107,14 @@ dev = [
|
|
|
107
107
|
"pytest-timeout~=2.4.0",
|
|
108
108
|
"pytest-xdist~=3.8.0",
|
|
109
109
|
"pytest~=8.4.0",
|
|
110
|
-
"ruff~=0.
|
|
110
|
+
"ruff~=0.14.0",
|
|
111
111
|
"setuptools", # setuptools are used by pytest, but not explicitly required
|
|
112
112
|
"types-beautifulsoup4~=4.12.0.20240229",
|
|
113
113
|
"types-cachetools~=6.2.0.20250827",
|
|
114
114
|
"types-colorama~=0.4.15.20240106",
|
|
115
115
|
"types-psutil~=7.0.0.20250218",
|
|
116
116
|
"types-python-dateutil~=2.9.0.20240316",
|
|
117
|
-
"uvicorn[standard]~=0.
|
|
117
|
+
"uvicorn[standard]~=0.35.0", # https://github.com/apify/crawlee-python/issues/1441
|
|
118
118
|
]
|
|
119
119
|
|
|
120
120
|
[tool.hatch.build.targets.wheel]
|
|
@@ -185,9 +185,6 @@ class Request(BaseModel):
|
|
|
185
185
|
method: HttpMethod = 'GET'
|
|
186
186
|
"""HTTP request method."""
|
|
187
187
|
|
|
188
|
-
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
|
|
189
|
-
"""HTTP request headers."""
|
|
190
|
-
|
|
191
188
|
payload: Annotated[
|
|
192
189
|
HttpPayload | None,
|
|
193
190
|
BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
|
|
@@ -195,23 +192,37 @@ class Request(BaseModel):
|
|
|
195
192
|
] = None
|
|
196
193
|
"""HTTP request payload."""
|
|
197
194
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
195
|
+
# Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
|
|
196
|
+
if TYPE_CHECKING:
|
|
197
|
+
headers: HttpHeaders = HttpHeaders()
|
|
198
|
+
"""HTTP request headers."""
|
|
199
|
+
|
|
200
|
+
user_data: dict[str, JsonSerializable] = {}
|
|
201
|
+
"""Custom user data assigned to the request. Use this to save any request related data to the
|
|
202
|
+
request's scope, keeping them accessible on retries, failures etc.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
else:
|
|
206
|
+
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)]
|
|
207
|
+
"""HTTP request headers."""
|
|
208
|
+
|
|
209
|
+
user_data: Annotated[
|
|
210
|
+
dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
|
|
211
|
+
Field(alias='userData', default_factory=lambda: UserData()),
|
|
212
|
+
PlainValidator(user_data_adapter.validate_python),
|
|
213
|
+
PlainSerializer(
|
|
214
|
+
lambda instance: user_data_adapter.dump_python(
|
|
215
|
+
instance,
|
|
216
|
+
by_alias=True,
|
|
217
|
+
exclude_none=True,
|
|
218
|
+
exclude_unset=True,
|
|
219
|
+
exclude_defaults=True,
|
|
220
|
+
)
|
|
221
|
+
),
|
|
222
|
+
]
|
|
223
|
+
"""Custom user data assigned to the request. Use this to save any request related data to the
|
|
224
|
+
request's scope, keeping them accessible on retries, failures etc.
|
|
225
|
+
"""
|
|
215
226
|
|
|
216
227
|
retry_count: Annotated[int, Field(alias='retryCount')] = 0
|
|
217
228
|
"""Number of times the request has been retried."""
|
|
@@ -38,7 +38,7 @@ class ServiceLocator:
|
|
|
38
38
|
def get_configuration(self) -> Configuration:
|
|
39
39
|
"""Get the configuration."""
|
|
40
40
|
if self._configuration is None:
|
|
41
|
-
logger.
|
|
41
|
+
logger.debug('No configuration set, implicitly creating and using default Configuration.')
|
|
42
42
|
self._configuration = Configuration()
|
|
43
43
|
|
|
44
44
|
return self._configuration
|
|
@@ -63,9 +63,9 @@ class ServiceLocator:
|
|
|
63
63
|
def get_event_manager(self) -> EventManager:
|
|
64
64
|
"""Get the event manager."""
|
|
65
65
|
if self._event_manager is None:
|
|
66
|
-
logger.
|
|
66
|
+
logger.debug('No event manager set, implicitly creating and using default LocalEventManager.')
|
|
67
67
|
if self._configuration is None:
|
|
68
|
-
logger.
|
|
68
|
+
logger.debug(
|
|
69
69
|
'Implicit creation of event manager will implicitly set configuration as side effect. '
|
|
70
70
|
'It is advised to explicitly first set the configuration instead.'
|
|
71
71
|
)
|
|
@@ -93,7 +93,7 @@ class ServiceLocator:
|
|
|
93
93
|
def get_storage_client(self) -> StorageClient:
|
|
94
94
|
"""Get the storage client."""
|
|
95
95
|
if self._storage_client is None:
|
|
96
|
-
logger.
|
|
96
|
+
logger.debug('No storage client set, implicitly creating and using default FileSystemStorageClient.')
|
|
97
97
|
if self._configuration is None:
|
|
98
98
|
logger.warning(
|
|
99
99
|
'Implicit creation of storage client will implicitly set configuration as side effect. '
|
|
@@ -3,17 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
from collections.abc import Callable, Iterator, Mapping
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import
|
|
7
|
-
TYPE_CHECKING,
|
|
8
|
-
Annotated,
|
|
9
|
-
Any,
|
|
10
|
-
Literal,
|
|
11
|
-
Protocol,
|
|
12
|
-
TypedDict,
|
|
13
|
-
TypeVar,
|
|
14
|
-
cast,
|
|
15
|
-
overload,
|
|
16
|
-
)
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload
|
|
17
7
|
|
|
18
8
|
from pydantic import ConfigDict, Field, PlainValidator, RootModel
|
|
19
9
|
|
|
@@ -71,11 +61,15 @@ class HttpHeaders(RootModel, Mapping[str, str]):
|
|
|
71
61
|
|
|
72
62
|
model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
|
|
73
63
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
64
|
+
# Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
|
|
65
|
+
if TYPE_CHECKING:
|
|
66
|
+
root: dict[str, str] = {}
|
|
67
|
+
else:
|
|
68
|
+
root: Annotated[
|
|
69
|
+
dict[str, str],
|
|
70
|
+
PlainValidator(lambda value: _normalize_headers(value)),
|
|
71
|
+
Field(default_factory=dict),
|
|
72
|
+
]
|
|
79
73
|
|
|
80
74
|
def __getitem__(self, key: str) -> str:
|
|
81
75
|
return self.root[key.lower()]
|
|
@@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, Generic, Literal, TypeVar
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
7
8
|
from crawlee.events._types import Event, EventPersistStateData
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
import logging
|
|
12
|
+
from collections.abc import Callable, Coroutine
|
|
11
13
|
|
|
12
|
-
from crawlee.storages
|
|
14
|
+
from crawlee.storages import KeyValueStore
|
|
13
15
|
|
|
14
16
|
TStateModel = TypeVar('TStateModel', bound=BaseModel)
|
|
15
17
|
|
|
@@ -37,6 +39,7 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
37
39
|
persistence_enabled: Literal[True, False, 'explicit_only'] = False,
|
|
38
40
|
persist_state_kvs_name: str | None = None,
|
|
39
41
|
persist_state_kvs_id: str | None = None,
|
|
42
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
40
43
|
logger: logging.Logger,
|
|
41
44
|
) -> None:
|
|
42
45
|
"""Initialize a new recoverable state object.
|
|
@@ -51,16 +54,40 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
51
54
|
If neither a name nor and id are supplied, the default store will be used.
|
|
52
55
|
persist_state_kvs_id: The identifier of the KeyValueStore to use for persistence.
|
|
53
56
|
If neither a name nor and id are supplied, the default store will be used.
|
|
57
|
+
persist_state_kvs_factory: Factory that can be awaited to create KeyValueStore to use for persistence. If
|
|
58
|
+
not provided, a system-wide KeyValueStore will be used, based on service locator configuration.
|
|
54
59
|
logger: A logger instance for logging operations related to state persistence
|
|
55
60
|
"""
|
|
61
|
+
raise_if_too_many_kwargs(
|
|
62
|
+
persist_state_kvs_name=persist_state_kvs_name,
|
|
63
|
+
persist_state_kvs_id=persist_state_kvs_id,
|
|
64
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
65
|
+
)
|
|
66
|
+
if not persist_state_kvs_factory:
|
|
67
|
+
logger.debug(
|
|
68
|
+
'No explicit key_value_store set for recoverable state. Recovery will use a system-wide KeyValueStore '
|
|
69
|
+
'based on service_locator configuration, potentially calling service_locator.set_storage_client in the '
|
|
70
|
+
'process. It is recommended to initialize RecoverableState with explicit key_value_store to avoid '
|
|
71
|
+
'global side effects.'
|
|
72
|
+
)
|
|
73
|
+
|
|
56
74
|
self._default_state = default_state
|
|
57
75
|
self._state_type: type[TStateModel] = self._default_state.__class__
|
|
58
76
|
self._state: TStateModel | None = None
|
|
59
77
|
self._persistence_enabled = persistence_enabled
|
|
60
78
|
self._persist_state_key = persist_state_key
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
79
|
+
if persist_state_kvs_factory is None:
|
|
80
|
+
|
|
81
|
+
async def kvs_factory() -> KeyValueStore:
|
|
82
|
+
from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
|
|
83
|
+
|
|
84
|
+
return await KeyValueStore.open(name=persist_state_kvs_name, id=persist_state_kvs_id)
|
|
85
|
+
|
|
86
|
+
self._persist_state_kvs_factory = kvs_factory
|
|
87
|
+
else:
|
|
88
|
+
self._persist_state_kvs_factory = persist_state_kvs_factory
|
|
89
|
+
|
|
90
|
+
self._key_value_store: KeyValueStore | None = None
|
|
64
91
|
self._log = logger
|
|
65
92
|
|
|
66
93
|
async def initialize(self) -> TStateModel:
|
|
@@ -77,11 +104,8 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
77
104
|
return self.current_value
|
|
78
105
|
|
|
79
106
|
# Import here to avoid circular imports.
|
|
80
|
-
from crawlee.storages._key_value_store import KeyValueStore # noqa: PLC0415
|
|
81
107
|
|
|
82
|
-
self._key_value_store = await
|
|
83
|
-
name=self._persist_state_kvs_name, id=self._persist_state_kvs_id
|
|
84
|
-
)
|
|
108
|
+
self._key_value_store = await self._persist_state_kvs_factory()
|
|
85
109
|
|
|
86
110
|
await self._load_saved_state()
|
|
87
111
|
|
|
@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from collections.abc import Callable
|
|
9
9
|
from datetime import timedelta
|
|
10
|
+
from types import TracebackType
|
|
11
|
+
|
|
12
|
+
from typing_extensions import Self
|
|
10
13
|
|
|
11
14
|
logger = getLogger(__name__)
|
|
12
15
|
|
|
@@ -26,6 +29,18 @@ class RecurringTask:
|
|
|
26
29
|
self.delay = delay
|
|
27
30
|
self.task: asyncio.Task | None = None
|
|
28
31
|
|
|
32
|
+
async def __aenter__(self) -> Self:
|
|
33
|
+
self.start()
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
async def __aexit__(
|
|
37
|
+
self,
|
|
38
|
+
exc_type: type[BaseException] | None,
|
|
39
|
+
exc_value: BaseException | None,
|
|
40
|
+
exc_traceback: TracebackType | None,
|
|
41
|
+
) -> None:
|
|
42
|
+
await self.stop()
|
|
43
|
+
|
|
29
44
|
async def _wrapper(self) -> None:
|
|
30
45
|
"""Continuously execute the provided function with the specified delay.
|
|
31
46
|
|