crawlee 1.0.2b7__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/build_and_deploy_docs.yaml +1 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/templates_e2e_tests.yaml +1 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/CHANGELOG.md +11 -3
- {crawlee-1.0.2b7 → crawlee-1.0.3}/PKG-INFO +2 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/pyproject.toml +2 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_request.py +31 -20
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_service_locator.py +4 -4
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_types.py +10 -16
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/recoverable_state.py +32 -8
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/sitemap.py +1 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/statistics/_statistics.py +6 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +24 -6
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +3 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_request_queue_client.py +18 -4
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_storage_client.py +1 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_key_value_store.py +5 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_sitemap.py +3 -5
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/conftest.py +6 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/http_clients/test_http_clients.py +5 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/otel/test_crawler_instrumentor.py +8 -8
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +10 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/test_dataset.py +2 -2
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/test_key_value_store.py +44 -4
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/test_request_queue.py +65 -3
- {crawlee-1.0.2b7 → crawlee-1.0.3}/uv.lock +704 -592
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/package.json +1 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/css/custom.css +0 -1
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/yarn.lock +283 -287
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.editorconfig +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.gitignore +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.markdownlint.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/LICENSE +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/Makefile +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/README.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/pyproject.toml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/renovate.json +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/router.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/README.md +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/server.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/.eslintrc.json +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/babel.config.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/sidebars.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/pages/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/.nojekyll +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/API.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/check.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/robot.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/system.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/js/custom.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/static/robots.txt +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.2b7 → crawlee-1.0.3}/website/tsconfig.eslint.json +0 -0
|
@@ -2,8 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
|
|
6
|
+
|
|
7
|
+
### 🐛 Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Add support for Pydantic v2.12 ([#1471](https://github.com/apify/crawlee-python/pull/1471)) ([35c1108](https://github.com/apify/crawlee-python/commit/35c110878c2f445a2866be2522ea8703e9b371dd)) by [@Mantisus](https://github.com/Mantisus), closes [#1464](https://github.com/apify/crawlee-python/issues/1464)
|
|
10
|
+
- Fix database version warning message ([#1485](https://github.com/apify/crawlee-python/pull/1485)) ([18a545e](https://github.com/apify/crawlee-python/commit/18a545ee8add92e844acd0068f9cb8580a82e1c9)) by [@Mantisus](https://github.com/Mantisus)
|
|
11
|
+
- Fix `reclaim_request` in `SqlRequestQueueClient` to correctly update the request state ([#1486](https://github.com/apify/crawlee-python/pull/1486)) ([1502469](https://github.com/apify/crawlee-python/commit/150246957f8f7f1ceb77bb77e3a02a903c50cae1)) by [@Mantisus](https://github.com/Mantisus), closes [#1484](https://github.com/apify/crawlee-python/issues/1484)
|
|
12
|
+
- Fix `KeyValueStore.auto_saved_value` failing in some scenarios ([#1438](https://github.com/apify/crawlee-python/pull/1438)) ([b35dee7](https://github.com/apify/crawlee-python/commit/b35dee78180e57161b826641d45a61b8d8f6ef51)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1354](https://github.com/apify/crawlee-python/issues/1354)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
## [1.0.2](https://github.com/apify/crawlee-python/releases/tag/v1.0.2) (2025-10-08)
|
|
7
16
|
|
|
8
17
|
### 🐛 Bug Fixes
|
|
9
18
|
|
|
@@ -12,7 +21,6 @@ All notable changes to this project will be documented in this file.
|
|
|
12
21
|
- Pin pydantic version to <2.12.0 to avoid compatibility issues ([#1467](https://github.com/apify/crawlee-python/pull/1467)) ([f11b86f](https://github.com/apify/crawlee-python/commit/f11b86f7ed57f98e83dc1b52f15f2017a919bf59)) by [@vdusek](https://github.com/vdusek)
|
|
13
22
|
|
|
14
23
|
|
|
15
|
-
<!-- git-cliff-unreleased-end -->
|
|
16
24
|
## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
|
|
17
25
|
|
|
18
26
|
### 🐛 Bug Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -232,7 +232,7 @@ Requires-Dist: more-itertools>=10.2.0
|
|
|
232
232
|
Requires-Dist: protego>=0.5.0
|
|
233
233
|
Requires-Dist: psutil>=6.0.0
|
|
234
234
|
Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
|
|
235
|
-
Requires-Dist: pydantic
|
|
235
|
+
Requires-Dist: pydantic>=2.11.0
|
|
236
236
|
Requires-Dist: pyee>=9.0.0
|
|
237
237
|
Requires-Dist: tldextract>=5.1.0
|
|
238
238
|
Requires-Dist: typing-extensions>=4.1.0
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.3"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -40,7 +40,7 @@ dependencies = [
|
|
|
40
40
|
"protego>=0.5.0",
|
|
41
41
|
"psutil>=6.0.0",
|
|
42
42
|
"pydantic-settings>=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0",
|
|
43
|
-
"pydantic>=2.11.0
|
|
43
|
+
"pydantic>=2.11.0",
|
|
44
44
|
"pyee>=9.0.0",
|
|
45
45
|
"tldextract>=5.1.0",
|
|
46
46
|
"typing-extensions>=4.1.0",
|
|
@@ -185,9 +185,6 @@ class Request(BaseModel):
|
|
|
185
185
|
method: HttpMethod = 'GET'
|
|
186
186
|
"""HTTP request method."""
|
|
187
187
|
|
|
188
|
-
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
|
|
189
|
-
"""HTTP request headers."""
|
|
190
|
-
|
|
191
188
|
payload: Annotated[
|
|
192
189
|
HttpPayload | None,
|
|
193
190
|
BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
|
|
@@ -195,23 +192,37 @@ class Request(BaseModel):
|
|
|
195
192
|
] = None
|
|
196
193
|
"""HTTP request payload."""
|
|
197
194
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
195
|
+
# Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
|
|
196
|
+
if TYPE_CHECKING:
|
|
197
|
+
headers: HttpHeaders = HttpHeaders()
|
|
198
|
+
"""HTTP request headers."""
|
|
199
|
+
|
|
200
|
+
user_data: dict[str, JsonSerializable] = {}
|
|
201
|
+
"""Custom user data assigned to the request. Use this to save any request related data to the
|
|
202
|
+
request's scope, keeping them accessible on retries, failures etc.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
else:
|
|
206
|
+
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)]
|
|
207
|
+
"""HTTP request headers."""
|
|
208
|
+
|
|
209
|
+
user_data: Annotated[
|
|
210
|
+
dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
|
|
211
|
+
Field(alias='userData', default_factory=lambda: UserData()),
|
|
212
|
+
PlainValidator(user_data_adapter.validate_python),
|
|
213
|
+
PlainSerializer(
|
|
214
|
+
lambda instance: user_data_adapter.dump_python(
|
|
215
|
+
instance,
|
|
216
|
+
by_alias=True,
|
|
217
|
+
exclude_none=True,
|
|
218
|
+
exclude_unset=True,
|
|
219
|
+
exclude_defaults=True,
|
|
220
|
+
)
|
|
221
|
+
),
|
|
222
|
+
]
|
|
223
|
+
"""Custom user data assigned to the request. Use this to save any request related data to the
|
|
224
|
+
request's scope, keeping them accessible on retries, failures etc.
|
|
225
|
+
"""
|
|
215
226
|
|
|
216
227
|
retry_count: Annotated[int, Field(alias='retryCount')] = 0
|
|
217
228
|
"""Number of times the request has been retried."""
|
|
@@ -38,7 +38,7 @@ class ServiceLocator:
|
|
|
38
38
|
def get_configuration(self) -> Configuration:
|
|
39
39
|
"""Get the configuration."""
|
|
40
40
|
if self._configuration is None:
|
|
41
|
-
logger.
|
|
41
|
+
logger.debug('No configuration set, implicitly creating and using default Configuration.')
|
|
42
42
|
self._configuration = Configuration()
|
|
43
43
|
|
|
44
44
|
return self._configuration
|
|
@@ -63,9 +63,9 @@ class ServiceLocator:
|
|
|
63
63
|
def get_event_manager(self) -> EventManager:
|
|
64
64
|
"""Get the event manager."""
|
|
65
65
|
if self._event_manager is None:
|
|
66
|
-
logger.
|
|
66
|
+
logger.debug('No event manager set, implicitly creating and using default LocalEventManager.')
|
|
67
67
|
if self._configuration is None:
|
|
68
|
-
logger.
|
|
68
|
+
logger.debug(
|
|
69
69
|
'Implicit creation of event manager will implicitly set configuration as side effect. '
|
|
70
70
|
'It is advised to explicitly first set the configuration instead.'
|
|
71
71
|
)
|
|
@@ -93,7 +93,7 @@ class ServiceLocator:
|
|
|
93
93
|
def get_storage_client(self) -> StorageClient:
|
|
94
94
|
"""Get the storage client."""
|
|
95
95
|
if self._storage_client is None:
|
|
96
|
-
logger.
|
|
96
|
+
logger.debug('No storage client set, implicitly creating and using default FileSystemStorageClient.')
|
|
97
97
|
if self._configuration is None:
|
|
98
98
|
logger.warning(
|
|
99
99
|
'Implicit creation of storage client will implicitly set configuration as side effect. '
|
|
@@ -3,17 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
from collections.abc import Callable, Iterator, Mapping
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import
|
|
7
|
-
TYPE_CHECKING,
|
|
8
|
-
Annotated,
|
|
9
|
-
Any,
|
|
10
|
-
Literal,
|
|
11
|
-
Protocol,
|
|
12
|
-
TypedDict,
|
|
13
|
-
TypeVar,
|
|
14
|
-
cast,
|
|
15
|
-
overload,
|
|
16
|
-
)
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload
|
|
17
7
|
|
|
18
8
|
from pydantic import ConfigDict, Field, PlainValidator, RootModel
|
|
19
9
|
|
|
@@ -71,11 +61,15 @@ class HttpHeaders(RootModel, Mapping[str, str]):
|
|
|
71
61
|
|
|
72
62
|
model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
|
|
73
63
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
64
|
+
# Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
|
|
65
|
+
if TYPE_CHECKING:
|
|
66
|
+
root: dict[str, str] = {}
|
|
67
|
+
else:
|
|
68
|
+
root: Annotated[
|
|
69
|
+
dict[str, str],
|
|
70
|
+
PlainValidator(lambda value: _normalize_headers(value)),
|
|
71
|
+
Field(default_factory=dict),
|
|
72
|
+
]
|
|
79
73
|
|
|
80
74
|
def __getitem__(self, key: str) -> str:
|
|
81
75
|
return self.root[key.lower()]
|
|
@@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, Generic, Literal, TypeVar
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
7
8
|
from crawlee.events._types import Event, EventPersistStateData
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
import logging
|
|
12
|
+
from collections.abc import Callable, Coroutine
|
|
11
13
|
|
|
12
|
-
from crawlee.storages
|
|
14
|
+
from crawlee.storages import KeyValueStore
|
|
13
15
|
|
|
14
16
|
TStateModel = TypeVar('TStateModel', bound=BaseModel)
|
|
15
17
|
|
|
@@ -37,6 +39,7 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
37
39
|
persistence_enabled: Literal[True, False, 'explicit_only'] = False,
|
|
38
40
|
persist_state_kvs_name: str | None = None,
|
|
39
41
|
persist_state_kvs_id: str | None = None,
|
|
42
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
40
43
|
logger: logging.Logger,
|
|
41
44
|
) -> None:
|
|
42
45
|
"""Initialize a new recoverable state object.
|
|
@@ -51,16 +54,40 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
51
54
|
If neither a name nor and id are supplied, the default store will be used.
|
|
52
55
|
persist_state_kvs_id: The identifier of the KeyValueStore to use for persistence.
|
|
53
56
|
If neither a name nor and id are supplied, the default store will be used.
|
|
57
|
+
persist_state_kvs_factory: Factory that can be awaited to create KeyValueStore to use for persistence. If
|
|
58
|
+
not provided, a system-wide KeyValueStore will be used, based on service locator configuration.
|
|
54
59
|
logger: A logger instance for logging operations related to state persistence
|
|
55
60
|
"""
|
|
61
|
+
raise_if_too_many_kwargs(
|
|
62
|
+
persist_state_kvs_name=persist_state_kvs_name,
|
|
63
|
+
persist_state_kvs_id=persist_state_kvs_id,
|
|
64
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
65
|
+
)
|
|
66
|
+
if not persist_state_kvs_factory:
|
|
67
|
+
logger.debug(
|
|
68
|
+
'No explicit key_value_store set for recoverable state. Recovery will use a system-wide KeyValueStore '
|
|
69
|
+
'based on service_locator configuration, potentially calling service_locator.set_storage_client in the '
|
|
70
|
+
'process. It is recommended to initialize RecoverableState with explicit key_value_store to avoid '
|
|
71
|
+
'global side effects.'
|
|
72
|
+
)
|
|
73
|
+
|
|
56
74
|
self._default_state = default_state
|
|
57
75
|
self._state_type: type[TStateModel] = self._default_state.__class__
|
|
58
76
|
self._state: TStateModel | None = None
|
|
59
77
|
self._persistence_enabled = persistence_enabled
|
|
60
78
|
self._persist_state_key = persist_state_key
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
79
|
+
if persist_state_kvs_factory is None:
|
|
80
|
+
|
|
81
|
+
async def kvs_factory() -> KeyValueStore:
|
|
82
|
+
from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
|
|
83
|
+
|
|
84
|
+
return await KeyValueStore.open(name=persist_state_kvs_name, id=persist_state_kvs_id)
|
|
85
|
+
|
|
86
|
+
self._persist_state_kvs_factory = kvs_factory
|
|
87
|
+
else:
|
|
88
|
+
self._persist_state_kvs_factory = persist_state_kvs_factory
|
|
89
|
+
|
|
90
|
+
self._key_value_store: KeyValueStore | None = None
|
|
64
91
|
self._log = logger
|
|
65
92
|
|
|
66
93
|
async def initialize(self) -> TStateModel:
|
|
@@ -77,11 +104,8 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
77
104
|
return self.current_value
|
|
78
105
|
|
|
79
106
|
# Import here to avoid circular imports.
|
|
80
|
-
from crawlee.storages._key_value_store import KeyValueStore # noqa: PLC0415
|
|
81
107
|
|
|
82
|
-
self._key_value_store = await
|
|
83
|
-
name=self._persist_state_kvs_name, id=self._persist_state_kvs_id
|
|
84
|
-
)
|
|
108
|
+
self._key_value_store = await self._persist_state_kvs_factory()
|
|
85
109
|
|
|
86
110
|
await self._load_saved_state()
|
|
87
111
|
|
|
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
|
|
|
335
335
|
# Check if the first chunk is a valid gzip header
|
|
336
336
|
if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
|
|
337
337
|
decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
|
|
338
|
-
|
|
338
|
+
first_chunk = False
|
|
339
339
|
|
|
340
340
|
chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
|
|
341
341
|
text_chunk = decoder.decode(chunk)
|
|
@@ -17,8 +17,11 @@ from crawlee.statistics import FinalStatistics, StatisticsState
|
|
|
17
17
|
from crawlee.statistics._error_tracker import ErrorTracker
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Callable, Coroutine
|
|
20
21
|
from types import TracebackType
|
|
21
22
|
|
|
23
|
+
from crawlee.storages import KeyValueStore
|
|
24
|
+
|
|
22
25
|
TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
23
26
|
TNewStatisticsState = TypeVar('TNewStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
24
27
|
logger = getLogger(__name__)
|
|
@@ -70,6 +73,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
70
73
|
persistence_enabled: bool | Literal['explicit_only'] = False,
|
|
71
74
|
persist_state_kvs_name: str | None = None,
|
|
72
75
|
persist_state_key: str | None = None,
|
|
76
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
73
77
|
log_message: str = 'Statistics',
|
|
74
78
|
periodic_message_logger: Logger | None = None,
|
|
75
79
|
log_interval: timedelta = timedelta(minutes=1),
|
|
@@ -95,6 +99,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
95
99
|
persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
|
|
96
100
|
persistence_enabled=persistence_enabled,
|
|
97
101
|
persist_state_kvs_name=persist_state_kvs_name,
|
|
102
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
98
103
|
logger=logger,
|
|
99
104
|
)
|
|
100
105
|
|
|
@@ -110,8 +115,8 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
110
115
|
"""Create near copy of the `Statistics` with replaced `state_model`."""
|
|
111
116
|
new_statistics: Statistics[TNewStatisticsState] = Statistics(
|
|
112
117
|
persistence_enabled=self._state._persistence_enabled, # noqa: SLF001
|
|
113
|
-
persist_state_kvs_name=self._state._persist_state_kvs_name, # noqa: SLF001
|
|
114
118
|
persist_state_key=self._state._persist_state_key, # noqa: SLF001
|
|
119
|
+
persist_state_kvs_factory=self._state._persist_state_kvs_factory, # noqa: SLF001
|
|
115
120
|
log_message=self._log_message,
|
|
116
121
|
periodic_message_logger=self._periodic_message_logger,
|
|
117
122
|
state_model=state_model,
|
{crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_file_system/_request_queue_client.py
RENAMED
|
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
|
|
|
31
31
|
from collections.abc import Sequence
|
|
32
32
|
|
|
33
33
|
from crawlee.configuration import Configuration
|
|
34
|
+
from crawlee.storages import KeyValueStore
|
|
34
35
|
|
|
35
36
|
logger = getLogger(__name__)
|
|
36
37
|
|
|
@@ -92,6 +93,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
92
93
|
metadata: RequestQueueMetadata,
|
|
93
94
|
path_to_rq: Path,
|
|
94
95
|
lock: asyncio.Lock,
|
|
96
|
+
recoverable_state: RecoverableState[RequestQueueState],
|
|
95
97
|
) -> None:
|
|
96
98
|
"""Initialize a new instance.
|
|
97
99
|
|
|
@@ -114,12 +116,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
114
116
|
self._is_empty_cache: bool | None = None
|
|
115
117
|
"""Cache for is_empty result: None means unknown, True/False is cached state."""
|
|
116
118
|
|
|
117
|
-
self._state =
|
|
118
|
-
default_state=RequestQueueState(),
|
|
119
|
-
persist_state_key=f'__RQ_STATE_{self._metadata.id}',
|
|
120
|
-
persistence_enabled=True,
|
|
121
|
-
logger=logger,
|
|
122
|
-
)
|
|
119
|
+
self._state = recoverable_state
|
|
123
120
|
"""Recoverable state to maintain request ordering, in-progress status, and handled status."""
|
|
124
121
|
|
|
125
122
|
@override
|
|
@@ -136,6 +133,22 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
136
133
|
"""The full path to the request queue metadata file."""
|
|
137
134
|
return self.path_to_rq / METADATA_FILENAME
|
|
138
135
|
|
|
136
|
+
@classmethod
|
|
137
|
+
async def _create_recoverable_state(cls, id: str, configuration: Configuration) -> RecoverableState:
|
|
138
|
+
async def kvs_factory() -> KeyValueStore:
|
|
139
|
+
from crawlee.storage_clients import FileSystemStorageClient # noqa: PLC0415 avoid circular import
|
|
140
|
+
from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
|
|
141
|
+
|
|
142
|
+
return await KeyValueStore.open(storage_client=FileSystemStorageClient(), configuration=configuration)
|
|
143
|
+
|
|
144
|
+
return RecoverableState[RequestQueueState](
|
|
145
|
+
default_state=RequestQueueState(),
|
|
146
|
+
persist_state_key=f'__RQ_STATE_{id}',
|
|
147
|
+
persist_state_kvs_factory=kvs_factory,
|
|
148
|
+
persistence_enabled=True,
|
|
149
|
+
logger=logger,
|
|
150
|
+
)
|
|
151
|
+
|
|
139
152
|
@classmethod
|
|
140
153
|
async def open(
|
|
141
154
|
cls,
|
|
@@ -194,6 +207,9 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
194
207
|
metadata=metadata,
|
|
195
208
|
path_to_rq=rq_base_path / rq_dir,
|
|
196
209
|
lock=asyncio.Lock(),
|
|
210
|
+
recoverable_state=await cls._create_recoverable_state(
|
|
211
|
+
id=id, configuration=configuration
|
|
212
|
+
),
|
|
197
213
|
)
|
|
198
214
|
await client._state.initialize()
|
|
199
215
|
await client._discover_existing_requests()
|
|
@@ -230,6 +246,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
230
246
|
metadata=metadata,
|
|
231
247
|
path_to_rq=path_to_rq,
|
|
232
248
|
lock=asyncio.Lock(),
|
|
249
|
+
recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
|
|
233
250
|
)
|
|
234
251
|
|
|
235
252
|
await client._state.initialize()
|
|
@@ -254,6 +271,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
254
271
|
metadata=metadata,
|
|
255
272
|
path_to_rq=path_to_rq,
|
|
256
273
|
lock=asyncio.Lock(),
|
|
274
|
+
recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
|
|
257
275
|
)
|
|
258
276
|
await client._state.initialize()
|
|
259
277
|
await client._update_metadata()
|
{crawlee-1.0.2b7 → crawlee-1.0.3}/src/crawlee/storage_clients/_sql/_key_value_store_client.py
RENAMED
|
@@ -2,9 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
from logging import getLogger
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
|
|
7
|
-
from sqlalchemy import delete, select
|
|
7
|
+
from sqlalchemy import CursorResult, delete, select
|
|
8
8
|
from typing_extensions import Self, override
|
|
9
9
|
|
|
10
10
|
from crawlee._utils.file import infer_mime_type
|
|
@@ -227,6 +227,7 @@ class SqlKeyValueStoreClient(KeyValueStoreClient, SqlClientMixin):
|
|
|
227
227
|
async with self.get_session(with_simple_commit=True) as session:
|
|
228
228
|
# Delete the record if it exists
|
|
229
229
|
result = await session.execute(stmt)
|
|
230
|
+
result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
|
|
230
231
|
|
|
231
232
|
# Update metadata if we actually deleted something
|
|
232
233
|
if result.rowcount > 0:
|
|
@@ -5,9 +5,9 @@ from datetime import datetime, timedelta, timezone
|
|
|
5
5
|
from functools import lru_cache
|
|
6
6
|
from hashlib import sha256
|
|
7
7
|
from logging import getLogger
|
|
8
|
-
from typing import TYPE_CHECKING, Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
9
9
|
|
|
10
|
-
from sqlalchemy import func, or_, select, update
|
|
10
|
+
from sqlalchemy import CursorResult, func, or_, select, update
|
|
11
11
|
from sqlalchemy.exc import SQLAlchemyError
|
|
12
12
|
from sqlalchemy.orm import load_only
|
|
13
13
|
from typing_extensions import NotRequired, Self, override
|
|
@@ -231,6 +231,7 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
|
|
|
231
231
|
|
|
232
232
|
async with self.get_session() as session:
|
|
233
233
|
result = await session.execute(stmt)
|
|
234
|
+
result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
|
|
234
235
|
existing_requests = {req.request_id: req for req in result.scalars()}
|
|
235
236
|
state = await self._get_state(session)
|
|
236
237
|
insert_values: list[dict] = []
|
|
@@ -498,9 +499,12 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
|
|
|
498
499
|
)
|
|
499
500
|
async with self.get_session() as session:
|
|
500
501
|
result = await session.execute(stmt)
|
|
502
|
+
result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
|
|
503
|
+
|
|
501
504
|
if result.rowcount == 0:
|
|
502
505
|
logger.warning(f'Request {request.unique_key} not found in database.')
|
|
503
506
|
return None
|
|
507
|
+
|
|
504
508
|
await self._update_metadata(
|
|
505
509
|
session,
|
|
506
510
|
**_QueueMetadataUpdateParams(
|
|
@@ -542,14 +546,24 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
|
|
|
542
546
|
block_until = now + timedelta(seconds=self._BLOCK_REQUEST_TIME)
|
|
543
547
|
# Extend blocking for forefront request, it is considered blocked by the current client.
|
|
544
548
|
stmt = stmt.values(
|
|
545
|
-
sequence_number=new_sequence,
|
|
549
|
+
sequence_number=new_sequence,
|
|
550
|
+
time_blocked_until=block_until,
|
|
551
|
+
client_key=self.client_key,
|
|
552
|
+
data=request.model_dump_json(),
|
|
546
553
|
)
|
|
547
554
|
else:
|
|
548
555
|
new_sequence = state.sequence_counter
|
|
549
556
|
state.sequence_counter += 1
|
|
550
|
-
stmt = stmt.values(
|
|
557
|
+
stmt = stmt.values(
|
|
558
|
+
sequence_number=new_sequence,
|
|
559
|
+
time_blocked_until=None,
|
|
560
|
+
client_key=None,
|
|
561
|
+
data=request.model_dump_json(),
|
|
562
|
+
)
|
|
551
563
|
|
|
552
564
|
result = await session.execute(stmt)
|
|
565
|
+
result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
|
|
566
|
+
|
|
553
567
|
if result.rowcount == 0:
|
|
554
568
|
logger.warning(f'Request {request.unique_key} not found in database.')
|
|
555
569
|
return None
|
|
@@ -149,7 +149,7 @@ class SqlStorageClient(StorageClient):
|
|
|
149
149
|
# Raise an error if the new version creates breaking changes in the database schema.
|
|
150
150
|
if db_version and db_version != __version__:
|
|
151
151
|
warnings.warn(
|
|
152
|
-
f'Database version {db_version
|
|
152
|
+
f'Database version {db_version} does not match library version {__version__}. '
|
|
153
153
|
'This may lead to unexpected behavior. Drop the db if you want to make sure that '
|
|
154
154
|
'everything will work fine.',
|
|
155
155
|
category=UserWarning,
|
|
@@ -281,11 +281,14 @@ class KeyValueStore(Storage):
|
|
|
281
281
|
if key in cache:
|
|
282
282
|
return cache[key].current_value.root
|
|
283
283
|
|
|
284
|
+
async def kvs_factory() -> KeyValueStore:
|
|
285
|
+
return self
|
|
286
|
+
|
|
284
287
|
cache[key] = recoverable_state = RecoverableState(
|
|
285
288
|
default_state=AutosavedValue(default_value),
|
|
286
|
-
persistence_enabled=True,
|
|
287
|
-
persist_state_kvs_id=self.id,
|
|
288
289
|
persist_state_key=key,
|
|
290
|
+
persistence_enabled=True,
|
|
291
|
+
persist_state_kvs_factory=kvs_factory,
|
|
289
292
|
logger=logger,
|
|
290
293
|
)
|
|
291
294
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import gzip
|
|
3
|
-
import
|
|
3
|
+
import sys
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
@@ -95,6 +95,8 @@ async def test_gzipped_sitemap(server_url: URL, http_client: HttpClient) -> None
|
|
|
95
95
|
assert set(sitemap.urls) == BASIC_RESULTS
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
# TODO: Remove this skip when #1460 is resolved.
|
|
99
|
+
@pytest.mark.skipif(sys.platform != 'linux', reason='Flaky with Curl on Windows, see #1460.')
|
|
98
100
|
async def test_gzipped_sitemap_with_invalid_data(server_url: URL, http_client: HttpClient) -> None:
|
|
99
101
|
"""Test loading a invalid gzipped sitemap with correct type and .xml.gz url."""
|
|
100
102
|
compress_data = compress_gzip(BASIC_SITEMAP)
|
|
@@ -106,10 +108,6 @@ async def test_gzipped_sitemap_with_invalid_data(server_url: URL, http_client: H
|
|
|
106
108
|
assert sitemap.urls == []
|
|
107
109
|
|
|
108
110
|
|
|
109
|
-
@pytest.mark.skipif(
|
|
110
|
-
os.name == 'nt',
|
|
111
|
-
reason='This test is flaky on Windows, see https://github.com/apify/crawlee-python/issues/1460.',
|
|
112
|
-
)
|
|
113
111
|
async def test_gz_sitemap_with_non_gzipped(server_url: URL, http_client: HttpClient) -> None:
|
|
114
112
|
"""Test loading a sitemap with gzip type and .xml.gz url, but without gzipped data."""
|
|
115
113
|
sitemap_url = (server_url / 'sitemap.xml.gz').with_query(
|
|
@@ -17,6 +17,7 @@ from crawlee import service_locator
|
|
|
17
17
|
from crawlee.fingerprint_suite._browserforge_adapter import get_available_header_network
|
|
18
18
|
from crawlee.http_clients import CurlImpersonateHttpClient, HttpxHttpClient, ImpitHttpClient
|
|
19
19
|
from crawlee.proxy_configuration import ProxyInfo
|
|
20
|
+
from crawlee.statistics import Statistics
|
|
20
21
|
from crawlee.storages import KeyValueStore
|
|
21
22
|
from tests.unit.server import TestServer, app, serve_in_thread
|
|
22
23
|
|
|
@@ -72,6 +73,10 @@ def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callabl
|
|
|
72
73
|
# Verify that the test environment was set up correctly.
|
|
73
74
|
assert os.environ.get('CRAWLEE_STORAGE_DIR') == str(tmp_path)
|
|
74
75
|
|
|
76
|
+
# Reset global class variables to ensure test isolation.
|
|
77
|
+
KeyValueStore._autosaved_values = {}
|
|
78
|
+
Statistics._Statistics__next_id = 0 # type:ignore[attr-defined] # Mangled attribute
|
|
79
|
+
|
|
75
80
|
return _prepare_test_env
|
|
76
81
|
|
|
77
82
|
|
|
@@ -195,9 +200,9 @@ def redirect_server_url(redirect_http_server: TestServer) -> URL:
|
|
|
195
200
|
|
|
196
201
|
@pytest.fixture(
|
|
197
202
|
params=[
|
|
198
|
-
pytest.param('curl', id='curl'),
|
|
199
203
|
pytest.param('httpx', id='httpx'),
|
|
200
204
|
pytest.param('impit', id='impit'),
|
|
205
|
+
pytest.param('curl', id='curl'),
|
|
201
206
|
]
|
|
202
207
|
)
|
|
203
208
|
async def http_client(request: pytest.FixtureRequest) -> HttpClient:
|
|
@@ -493,7 +493,6 @@ async def test_adaptive_playwright_crawler_statistics_in_init() -> None:
|
|
|
493
493
|
assert type(crawler._statistics.state) is AdaptivePlaywrightCrawlerStatisticState
|
|
494
494
|
|
|
495
495
|
assert crawler._statistics._state._persistence_enabled == persistence_enabled
|
|
496
|
-
assert crawler._statistics._state._persist_state_kvs_name == persist_state_kvs_name
|
|
497
496
|
assert crawler._statistics._state._persist_state_key == persist_state_key
|
|
498
497
|
|
|
499
498
|
assert crawler._statistics._log_message == log_message
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import sys
|
|
4
5
|
from typing import TYPE_CHECKING
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
@@ -163,6 +164,10 @@ async def test_send_request_allow_redirects_false(custom_http_client: HttpClient
|
|
|
163
164
|
|
|
164
165
|
|
|
165
166
|
async def test_stream(http_client: HttpClient, server_url: URL) -> None:
|
|
167
|
+
# TODO: Remove this skip when #1494 is resolved.
|
|
168
|
+
if isinstance(http_client, CurlImpersonateHttpClient) and sys.platform != 'linux':
|
|
169
|
+
pytest.skip('Flaky with Curl on Windows, see #1494.')
|
|
170
|
+
|
|
166
171
|
content_body: bytes = b''
|
|
167
172
|
|
|
168
173
|
async with http_client.stream(str(server_url)) as response:
|