crawlee 1.0.3b9__tar.gz → 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.3b9 → crawlee-1.0.4}/CHANGELOG.md +12 -3
- {crawlee-1.0.3b9 → crawlee-1.0.4}/PKG-INFO +1 -1
- {crawlee-1.0.3b9 → crawlee-1.0.4}/pyproject.toml +1 -1
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/recurring_task.py +15 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/urls.py +9 -2
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -12
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/_basic_crawler.py +23 -12
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +3 -1
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/statistics/_statistics.py +9 -5
- crawlee-1.0.4/tests/unit/_autoscaling/test_snapshotter.py +353 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/conftest.py +13 -6
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +102 -1
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_basic/test_basic_crawler.py +58 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +2 -2
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +2 -2
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/server_endpoints.py +1 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/test_dataset.py +2 -2
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/test_key_value_store.py +2 -2
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/test_request_queue.py +13 -8
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/test_configuration.py +32 -6
- {crawlee-1.0.3b9 → crawlee-1.0.4}/uv.lock +1 -1
- crawlee-1.0.3b9/tests/unit/_autoscaling/test_snapshotter.py +0 -333
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.editorconfig +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/templates_e2e_tests.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.gitignore +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.markdownlint.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/LICENSE +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/Makefile +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/README.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/pyproject.toml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/renovate.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/router.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/README.md +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/server.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/.eslintrc.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/babel.config.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/package.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/sidebars.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/css/custom.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/pages/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/.nojekyll +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/API.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/check.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/robot.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/system.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/js/custom.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/static/robots.txt +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/tsconfig.eslint.json +0 -0
- {crawlee-1.0.3b9 → crawlee-1.0.4}/website/yarn.lock +0 -0
|
@@ -2,8 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
|
|
6
|
+
|
|
7
|
+
### 🐛 Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
|
|
10
|
+
- Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
|
|
11
|
+
- Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
|
|
12
|
+
- Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
|
|
13
|
+
- Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
|
|
7
17
|
|
|
8
18
|
### 🐛 Bug Fixes
|
|
9
19
|
|
|
@@ -13,7 +23,6 @@ All notable changes to this project will be documented in this file.
|
|
|
13
23
|
- Fix `KeyValueStore.auto_saved_value` failing in some scenarios ([#1438](https://github.com/apify/crawlee-python/pull/1438)) ([b35dee7](https://github.com/apify/crawlee-python/commit/b35dee78180e57161b826641d45a61b8d8f6ef51)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1354](https://github.com/apify/crawlee-python/issues/1354)
|
|
14
24
|
|
|
15
25
|
|
|
16
|
-
<!-- git-cliff-unreleased-end -->
|
|
17
26
|
## [1.0.2](https://github.com/apify/crawlee-python/releases/tag/v1.0.2) (2025-10-08)
|
|
18
27
|
|
|
19
28
|
### 🐛 Bug Fixes
|
|
@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from collections.abc import Callable
|
|
9
9
|
from datetime import timedelta
|
|
10
|
+
from types import TracebackType
|
|
11
|
+
|
|
12
|
+
from typing_extensions import Self
|
|
10
13
|
|
|
11
14
|
logger = getLogger(__name__)
|
|
12
15
|
|
|
@@ -26,6 +29,18 @@ class RecurringTask:
|
|
|
26
29
|
self.delay = delay
|
|
27
30
|
self.task: asyncio.Task | None = None
|
|
28
31
|
|
|
32
|
+
async def __aenter__(self) -> Self:
|
|
33
|
+
self.start()
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
async def __aexit__(
|
|
37
|
+
self,
|
|
38
|
+
exc_type: type[BaseException] | None,
|
|
39
|
+
exc_value: BaseException | None,
|
|
40
|
+
exc_traceback: TracebackType | None,
|
|
41
|
+
) -> None:
|
|
42
|
+
await self.stop()
|
|
43
|
+
|
|
29
44
|
async def _wrapper(self) -> None:
|
|
30
45
|
"""Continuously execute the provided function with the specified delay.
|
|
31
46
|
|
|
@@ -7,6 +7,7 @@ from yarl import URL
|
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from collections.abc import Iterator
|
|
10
|
+
from logging import Logger
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def is_url_absolute(url: str) -> bool:
|
|
@@ -22,13 +23,19 @@ def convert_to_absolute_url(base_url: str, relative_url: str) -> str:
|
|
|
22
23
|
return str(URL(base_url).join(URL(relative_url)))
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
def to_absolute_url_iterator(base_url: str, urls: Iterator[str]) -> Iterator[str]:
|
|
26
|
+
def to_absolute_url_iterator(base_url: str, urls: Iterator[str], logger: Logger | None = None) -> Iterator[str]:
|
|
26
27
|
"""Convert an iterator of relative URLs to absolute URLs using a base URL."""
|
|
27
28
|
for url in urls:
|
|
28
29
|
if is_url_absolute(url):
|
|
29
30
|
yield url
|
|
30
31
|
else:
|
|
31
|
-
|
|
32
|
+
converted_url = convert_to_absolute_url(base_url, url)
|
|
33
|
+
# Skip the URL if conversion fails, probably due to an incorrect format, such as 'mailto:'.
|
|
34
|
+
if not is_url_absolute(converted_url):
|
|
35
|
+
if logger:
|
|
36
|
+
logger.debug(f'Could not convert URL "{url}" to absolute using base URL "{base_url}". Skipping it.')
|
|
37
|
+
continue
|
|
38
|
+
yield converted_url
|
|
32
39
|
|
|
33
40
|
|
|
34
41
|
_http_url_adapter = TypeAdapter(AnyHttpUrl)
|
{crawlee-1.0.3b9 → crawlee-1.0.4}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
RENAMED
|
@@ -167,7 +167,9 @@ class AbstractHttpCrawler(
|
|
|
167
167
|
kwargs.setdefault('strategy', 'same-hostname')
|
|
168
168
|
|
|
169
169
|
links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
|
|
170
|
-
links_iterator = to_absolute_url_iterator(
|
|
170
|
+
links_iterator = to_absolute_url_iterator(
|
|
171
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
172
|
+
)
|
|
171
173
|
|
|
172
174
|
if robots_txt_file:
|
|
173
175
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -149,10 +149,6 @@ class AdaptivePlaywrightCrawler(
|
|
|
149
149
|
non-default configuration.
|
|
150
150
|
kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
|
|
151
151
|
"""
|
|
152
|
-
# Some sub crawler kwargs are internally modified. Prepare copies.
|
|
153
|
-
basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
|
|
154
|
-
basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)
|
|
155
|
-
|
|
156
152
|
# Adaptive crawling related.
|
|
157
153
|
self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
|
|
158
154
|
self.result_checker = result_checker or (lambda _: True)
|
|
@@ -170,11 +166,11 @@ class AdaptivePlaywrightCrawler(
|
|
|
170
166
|
# Each sub crawler will use custom logger .
|
|
171
167
|
static_logger = getLogger('Subcrawler_static')
|
|
172
168
|
static_logger.setLevel(logging.ERROR)
|
|
173
|
-
basic_crawler_kwargs_for_static_crawler
|
|
169
|
+
basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}
|
|
174
170
|
|
|
175
171
|
pw_logger = getLogger('Subcrawler_playwright')
|
|
176
172
|
pw_logger.setLevel(logging.ERROR)
|
|
177
|
-
basic_crawler_kwargs_for_pw_crawler
|
|
173
|
+
basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}
|
|
178
174
|
|
|
179
175
|
# Initialize sub crawlers to create their pipelines.
|
|
180
176
|
static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
|
|
@@ -319,7 +315,7 @@ class AdaptivePlaywrightCrawler(
|
|
|
319
315
|
),
|
|
320
316
|
logger=self._logger,
|
|
321
317
|
)
|
|
322
|
-
return SubCrawlerRun(result=result)
|
|
318
|
+
return SubCrawlerRun(result=result, run_context=context_linked_to_result)
|
|
323
319
|
except Exception as e:
|
|
324
320
|
return SubCrawlerRun(exception=e)
|
|
325
321
|
|
|
@@ -375,7 +371,8 @@ class AdaptivePlaywrightCrawler(
|
|
|
375
371
|
self.track_http_only_request_handler_runs()
|
|
376
372
|
|
|
377
373
|
static_run = await self._crawl_one(rendering_type='static', context=context)
|
|
378
|
-
if static_run.result and self.result_checker(static_run.result):
|
|
374
|
+
if static_run.result and static_run.run_context and self.result_checker(static_run.result):
|
|
375
|
+
self._update_context_from_copy(context, static_run.run_context)
|
|
379
376
|
self._context_result_map[context] = static_run.result
|
|
380
377
|
return
|
|
381
378
|
if static_run.exception:
|
|
@@ -406,13 +403,10 @@ class AdaptivePlaywrightCrawler(
|
|
|
406
403
|
if pw_run.exception is not None:
|
|
407
404
|
raise pw_run.exception
|
|
408
405
|
|
|
409
|
-
if pw_run.result:
|
|
410
|
-
self._context_result_map[context] = pw_run.result
|
|
411
|
-
|
|
406
|
+
if pw_run.result and pw_run.run_context:
|
|
412
407
|
if should_detect_rendering_type:
|
|
413
408
|
detection_result: RenderingType
|
|
414
409
|
static_run = await self._crawl_one('static', context=context, state=old_state_copy)
|
|
415
|
-
|
|
416
410
|
if static_run.result and self.result_comparator(static_run.result, pw_run.result):
|
|
417
411
|
detection_result = 'static'
|
|
418
412
|
else:
|
|
@@ -421,6 +415,9 @@ class AdaptivePlaywrightCrawler(
|
|
|
421
415
|
context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
|
|
422
416
|
self.rendering_type_predictor.store_result(context.request, detection_result)
|
|
423
417
|
|
|
418
|
+
self._update_context_from_copy(context, pw_run.run_context)
|
|
419
|
+
self._context_result_map[context] = pw_run.result
|
|
420
|
+
|
|
424
421
|
def pre_navigation_hook(
|
|
425
422
|
self,
|
|
426
423
|
hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
|
|
@@ -455,8 +452,32 @@ class AdaptivePlaywrightCrawler(
|
|
|
455
452
|
def track_rendering_type_mispredictions(self) -> None:
|
|
456
453
|
self.statistics.state.rendering_type_mispredictions += 1
|
|
457
454
|
|
|
455
|
+
def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
|
|
456
|
+
"""Update mutable fields of `context` from `context_copy`.
|
|
457
|
+
|
|
458
|
+
Uses object.__setattr__ to bypass frozen dataclass restrictions,
|
|
459
|
+
allowing state synchronization after isolated crawler execution.
|
|
460
|
+
"""
|
|
461
|
+
updating_attributes = {
|
|
462
|
+
'request': ('headers', 'user_data'),
|
|
463
|
+
'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
for attr, sub_attrs in updating_attributes.items():
|
|
467
|
+
original_sub_obj = getattr(context, attr)
|
|
468
|
+
copy_sub_obj = getattr(context_copy, attr)
|
|
469
|
+
|
|
470
|
+
# Check that both sub objects are not None
|
|
471
|
+
if original_sub_obj is None or copy_sub_obj is None:
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
for sub_attr in sub_attrs:
|
|
475
|
+
new_value = getattr(copy_sub_obj, sub_attr)
|
|
476
|
+
object.__setattr__(original_sub_obj, sub_attr, new_value)
|
|
477
|
+
|
|
458
478
|
|
|
459
479
|
@dataclass(frozen=True)
|
|
460
480
|
class SubCrawlerRun:
|
|
461
481
|
result: RequestHandlerRunResult | None = None
|
|
462
482
|
exception: Exception | None = None
|
|
483
|
+
run_context: BasicCrawlingContext | None = None
|
|
@@ -56,7 +56,7 @@ from crawlee.errors import (
|
|
|
56
56
|
SessionError,
|
|
57
57
|
UserDefinedErrorHandlerError,
|
|
58
58
|
)
|
|
59
|
-
from crawlee.events._types import Event, EventCrawlerStatusData
|
|
59
|
+
from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
|
|
60
60
|
from crawlee.http_clients import ImpitHttpClient
|
|
61
61
|
from crawlee.router import Router
|
|
62
62
|
from crawlee.sessions import SessionPool
|
|
@@ -437,14 +437,23 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
437
437
|
self._statistics_log_format = statistics_log_format
|
|
438
438
|
|
|
439
439
|
# Statistics
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
440
|
+
if statistics:
|
|
441
|
+
self._statistics = statistics
|
|
442
|
+
else:
|
|
443
|
+
|
|
444
|
+
async def persist_state_factory() -> KeyValueStore:
|
|
445
|
+
return await self.get_key_value_store()
|
|
446
|
+
|
|
447
|
+
self._statistics = cast(
|
|
448
|
+
'Statistics[TStatisticsState]',
|
|
449
|
+
Statistics.with_default_state(
|
|
450
|
+
persistence_enabled=True,
|
|
451
|
+
periodic_message_logger=self._logger,
|
|
452
|
+
statistics_log_format=self._statistics_log_format,
|
|
453
|
+
log_message='Current request statistics:',
|
|
454
|
+
persist_state_kvs_factory=persist_state_factory,
|
|
455
|
+
),
|
|
456
|
+
)
|
|
448
457
|
|
|
449
458
|
# Additional context managers to enter and exit
|
|
450
459
|
self._additional_context_managers = _additional_context_managers or []
|
|
@@ -689,7 +698,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
689
698
|
except CancelledError:
|
|
690
699
|
pass
|
|
691
700
|
finally:
|
|
692
|
-
await self._crawler_state_rec_task.stop()
|
|
693
701
|
if threading.current_thread() is threading.main_thread():
|
|
694
702
|
with suppress(NotImplementedError):
|
|
695
703
|
asyncio.get_running_loop().remove_signal_handler(signal.SIGINT)
|
|
@@ -721,8 +729,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
721
729
|
async def _run_crawler(self) -> None:
|
|
722
730
|
event_manager = self._service_locator.get_event_manager()
|
|
723
731
|
|
|
724
|
-
self._crawler_state_rec_task.start()
|
|
725
|
-
|
|
726
732
|
# Collect the context managers to be entered. Context managers that are already active are excluded,
|
|
727
733
|
# as they were likely entered by the caller, who will also be responsible for exiting them.
|
|
728
734
|
contexts_to_enter = [
|
|
@@ -733,6 +739,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
733
739
|
self._statistics,
|
|
734
740
|
self._session_pool if self._use_session_pool else None,
|
|
735
741
|
self._http_client,
|
|
742
|
+
self._crawler_state_rec_task,
|
|
736
743
|
*self._additional_context_managers,
|
|
737
744
|
)
|
|
738
745
|
if cm and getattr(cm, 'active', False) is False
|
|
@@ -744,6 +751,9 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
744
751
|
|
|
745
752
|
await self._autoscaled_pool.run()
|
|
746
753
|
|
|
754
|
+
# Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
|
|
755
|
+
event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
|
|
756
|
+
|
|
747
757
|
async def add_requests(
|
|
748
758
|
self,
|
|
749
759
|
requests: Sequence[str | Request],
|
|
@@ -972,6 +982,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
972
982
|
label=label,
|
|
973
983
|
user_data=user_data,
|
|
974
984
|
transform_request_function=transform_request_function,
|
|
985
|
+
**kwargs,
|
|
975
986
|
),
|
|
976
987
|
rq_id=rq_id,
|
|
977
988
|
rq_name=rq_name,
|
|
@@ -366,7 +366,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
366
366
|
links_iterator: Iterator[str] = iter(
|
|
367
367
|
[url for element in elements if (url := await element.get_attribute('href')) is not None]
|
|
368
368
|
)
|
|
369
|
-
links_iterator = to_absolute_url_iterator(
|
|
369
|
+
links_iterator = to_absolute_url_iterator(
|
|
370
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
371
|
+
)
|
|
370
372
|
|
|
371
373
|
if robots_txt_file:
|
|
372
374
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -96,7 +96,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
96
96
|
|
|
97
97
|
self._state = RecoverableState(
|
|
98
98
|
default_state=state_model(stats_id=self._id),
|
|
99
|
-
persist_state_key=persist_state_key or f'
|
|
99
|
+
persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
|
|
100
100
|
persistence_enabled=persistence_enabled,
|
|
101
101
|
persist_state_kvs_name=persist_state_kvs_name,
|
|
102
102
|
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
@@ -130,6 +130,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
130
130
|
persistence_enabled: bool = False,
|
|
131
131
|
persist_state_kvs_name: str | None = None,
|
|
132
132
|
persist_state_key: str | None = None,
|
|
133
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
133
134
|
log_message: str = 'Statistics',
|
|
134
135
|
periodic_message_logger: Logger | None = None,
|
|
135
136
|
log_interval: timedelta = timedelta(minutes=1),
|
|
@@ -141,6 +142,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
141
142
|
persistence_enabled=persistence_enabled,
|
|
142
143
|
persist_state_kvs_name=persist_state_kvs_name,
|
|
143
144
|
persist_state_key=persist_state_key,
|
|
145
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
144
146
|
log_message=log_message,
|
|
145
147
|
periodic_message_logger=periodic_message_logger,
|
|
146
148
|
log_interval=log_interval,
|
|
@@ -187,7 +189,10 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
187
189
|
if not self._active:
|
|
188
190
|
raise RuntimeError(f'The {self.__class__.__name__} is not active.')
|
|
189
191
|
|
|
190
|
-
|
|
192
|
+
if not self.state.crawler_last_started_at:
|
|
193
|
+
raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
|
|
194
|
+
self.state.crawler_finished_at = datetime.now(timezone.utc)
|
|
195
|
+
self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
|
|
191
196
|
|
|
192
197
|
await self._state.teardown()
|
|
193
198
|
|
|
@@ -255,8 +260,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
255
260
|
if self._instance_start is None:
|
|
256
261
|
raise RuntimeError('The Statistics object is not initialized')
|
|
257
262
|
|
|
258
|
-
|
|
259
|
-
total_minutes = crawler_runtime.total_seconds() / 60
|
|
263
|
+
total_minutes = self.state.crawler_runtime.total_seconds() / 60
|
|
260
264
|
state = self._state.current_value
|
|
261
265
|
serialized_state = state.model_dump(by_alias=False)
|
|
262
266
|
|
|
@@ -267,7 +271,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
267
271
|
requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
|
|
268
272
|
request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
|
|
269
273
|
requests_total=state.requests_failed + state.requests_finished,
|
|
270
|
-
crawler_runtime=crawler_runtime,
|
|
274
|
+
crawler_runtime=state.crawler_runtime,
|
|
271
275
|
requests_finished=state.requests_finished,
|
|
272
276
|
requests_failed=state.requests_failed,
|
|
273
277
|
retry_histogram=serialized_state['request_retry_histogram'],
|