crawlee 1.0.5b3__tar.gz → 1.0.5b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/CHANGELOG.md +1 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/PKG-INFO +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/pyproject.toml +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/robots.py +17 -5
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/statistics/_error_snapshotter.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +35 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +35 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +34 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/server.py +10 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/server_endpoints.py +10 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/uv.lock +11 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.editorconfig +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/templates_e2e_tests.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.gitignore +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.markdownlint.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/LICENSE +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/Makefile +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/README.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/pyproject.toml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/renovate.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/router.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/statistics/_statistics.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/README.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/conftest.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/test_dataset.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/test_key_value_store.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/test_request_queue.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/.eslintrc.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/babel.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/package.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/sidebars.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/css/custom.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/pages/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/.nojekyll +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/API.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/check.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/robot.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/system.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/js/custom.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/static/robots.txt +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/tsconfig.eslint.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b5}/website/yarn.lock +0 -0
|
@@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
|
|
|
12
12
|
### 🐛 Bug Fixes
|
|
13
13
|
|
|
14
14
|
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
15
|
+
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from logging import getLogger
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from protego import Protego
|
|
@@ -15,6 +16,9 @@ if TYPE_CHECKING:
|
|
|
15
16
|
from crawlee.proxy_configuration import ProxyInfo
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
logger = getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
18
22
|
class RobotsTxtFile:
|
|
19
23
|
def __init__(
|
|
20
24
|
self, url: str, robots: Protego, http_client: HttpClient | None = None, proxy_info: ProxyInfo | None = None
|
|
@@ -56,12 +60,20 @@ class RobotsTxtFile:
|
|
|
56
60
|
http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
|
|
57
61
|
proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
|
|
58
62
|
"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
try:
|
|
64
|
+
response = await http_client.send_request(url, proxy_info=proxy_info)
|
|
65
|
+
|
|
66
|
+
body = (
|
|
67
|
+
b'User-agent: *\nAllow: /'
|
|
68
|
+
if is_status_code_client_error(response.status_code)
|
|
69
|
+
else await response.read()
|
|
70
|
+
)
|
|
71
|
+
robots = Protego.parse(body.decode('utf-8'))
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.warning(f'Failed to fetch from robots.txt from "{url}" with error: "{e}"')
|
|
63
75
|
|
|
64
|
-
|
|
76
|
+
robots = Protego.parse('User-agent: *\nAllow: /')
|
|
65
77
|
|
|
66
78
|
return cls(url, robots, http_client=http_client, proxy_info=proxy_info)
|
|
67
79
|
|
|
@@ -32,7 +32,7 @@ class ErrorSnapshotter:
|
|
|
32
32
|
"""Capture error snapshot and save it to key value store.
|
|
33
33
|
|
|
34
34
|
It saves the error snapshot directly to a key value store. It can't use `context.get_key_value_store` because
|
|
35
|
-
it returns `KeyValueStoreChangeRecords` which is
|
|
35
|
+
it returns `KeyValueStoreChangeRecords` which is committed to the key value store only if the `RequestHandler`
|
|
36
36
|
returned without an exception. ErrorSnapshotter is on the contrary active only when `RequestHandler` fails with
|
|
37
37
|
an exception.
|
|
38
38
|
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
RENAMED
|
@@ -6,7 +6,7 @@ from unittest import mock
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
8
|
from crawlee import ConcurrencySettings, Glob, HttpHeaders, RequestTransformAction, SkippedReason
|
|
9
|
-
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
9
|
+
from crawlee.crawlers import BasicCrawlingContext, BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
10
10
|
from crawlee.storages import RequestQueue
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
@@ -167,6 +167,40 @@ async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> N
|
|
|
167
167
|
}
|
|
168
168
|
|
|
169
169
|
|
|
170
|
+
async def test_respect_robots_txt_with_problematic_links(server_url: URL, http_client: HttpClient) -> None:
|
|
171
|
+
"""Test checks the crawler behavior with links that may cause problems when attempting to retrieve robots.txt."""
|
|
172
|
+
visit = mock.Mock()
|
|
173
|
+
fail = mock.Mock()
|
|
174
|
+
crawler = BeautifulSoupCrawler(
|
|
175
|
+
http_client=http_client,
|
|
176
|
+
respect_robots_txt_file=True,
|
|
177
|
+
max_request_retries=0,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
@crawler.router.default_handler
|
|
181
|
+
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
182
|
+
visit(context.request.url)
|
|
183
|
+
await context.enqueue_links(strategy='all')
|
|
184
|
+
|
|
185
|
+
@crawler.failed_request_handler
|
|
186
|
+
async def error_handler(context: BasicCrawlingContext, _error: Exception) -> None:
|
|
187
|
+
fail(context.request.url)
|
|
188
|
+
|
|
189
|
+
await crawler.run([str(server_url / 'problematic_links')])
|
|
190
|
+
|
|
191
|
+
visited = {call[0][0] for call in visit.call_args_list}
|
|
192
|
+
failed = {call[0][0] for call in fail.call_args_list}
|
|
193
|
+
|
|
194
|
+
# Email must be skipped
|
|
195
|
+
# https://avatars.githubusercontent.com/apify does not get robots.txt, but is correct for the crawler.
|
|
196
|
+
assert visited == {str(server_url / 'problematic_links'), 'https://avatars.githubusercontent.com/apify'}
|
|
197
|
+
|
|
198
|
+
# The budplaceholder.com does not exist.
|
|
199
|
+
assert failed == {
|
|
200
|
+
'https://budplaceholder.com/',
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
170
204
|
async def test_on_skipped_request(server_url: URL, http_client: HttpClient) -> None:
|
|
171
205
|
crawler = BeautifulSoupCrawler(http_client=http_client, respect_robots_txt_file=True)
|
|
172
206
|
skip = mock.Mock()
|
|
@@ -14,7 +14,7 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from yarl import URL
|
|
15
15
|
|
|
16
16
|
from crawlee._request import RequestOptions
|
|
17
|
-
from crawlee.crawlers import ParselCrawlingContext
|
|
17
|
+
from crawlee.crawlers import BasicCrawlingContext, ParselCrawlingContext
|
|
18
18
|
from crawlee.http_clients._base import HttpClient
|
|
19
19
|
|
|
20
20
|
|
|
@@ -261,6 +261,40 @@ async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> N
|
|
|
261
261
|
}
|
|
262
262
|
|
|
263
263
|
|
|
264
|
+
async def test_respect_robots_txt_with_problematic_links(server_url: URL, http_client: HttpClient) -> None:
|
|
265
|
+
"""Test checks the crawler behavior with links that may cause problems when attempting to retrieve robots.txt."""
|
|
266
|
+
visit = mock.Mock()
|
|
267
|
+
fail = mock.Mock()
|
|
268
|
+
crawler = ParselCrawler(
|
|
269
|
+
http_client=http_client,
|
|
270
|
+
respect_robots_txt_file=True,
|
|
271
|
+
max_request_retries=0,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
@crawler.router.default_handler
|
|
275
|
+
async def request_handler(context: ParselCrawlingContext) -> None:
|
|
276
|
+
visit(context.request.url)
|
|
277
|
+
await context.enqueue_links(strategy='all')
|
|
278
|
+
|
|
279
|
+
@crawler.failed_request_handler
|
|
280
|
+
async def error_handler(context: BasicCrawlingContext, _error: Exception) -> None:
|
|
281
|
+
fail(context.request.url)
|
|
282
|
+
|
|
283
|
+
await crawler.run([str(server_url / 'problematic_links')])
|
|
284
|
+
|
|
285
|
+
visited = {call[0][0] for call in visit.call_args_list}
|
|
286
|
+
failed = {call[0][0] for call in fail.call_args_list}
|
|
287
|
+
|
|
288
|
+
# Email must be skipped
|
|
289
|
+
# https://avatars.githubusercontent.com/apify does not get robots.txt, but is correct for the crawler.
|
|
290
|
+
assert visited == {str(server_url / 'problematic_links'), 'https://avatars.githubusercontent.com/apify'}
|
|
291
|
+
|
|
292
|
+
# The budplaceholder.com does not exist.
|
|
293
|
+
assert failed == {
|
|
294
|
+
'https://budplaceholder.com/',
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
|
|
264
298
|
async def test_on_skipped_request(server_url: URL, http_client: HttpClient) -> None:
|
|
265
299
|
crawler = ParselCrawler(http_client=http_client, respect_robots_txt_file=True)
|
|
266
300
|
skip = mock.Mock()
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py
RENAMED
|
@@ -48,7 +48,7 @@ if TYPE_CHECKING:
|
|
|
48
48
|
from crawlee._request import RequestOptions
|
|
49
49
|
from crawlee._types import HttpMethod, HttpPayload
|
|
50
50
|
from crawlee.browsers._types import BrowserType
|
|
51
|
-
from crawlee.crawlers import PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
|
|
51
|
+
from crawlee.crawlers import BasicCrawlingContext, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@pytest.mark.parametrize(
|
|
@@ -671,6 +671,39 @@ async def test_respect_robots_txt(server_url: URL) -> None:
|
|
|
671
671
|
}
|
|
672
672
|
|
|
673
673
|
|
|
674
|
+
async def test_respect_robots_txt_with_problematic_links(server_url: URL) -> None:
|
|
675
|
+
"""Test checks the crawler behavior with links that may cause problems when attempting to retrieve robots.txt."""
|
|
676
|
+
visit = mock.Mock()
|
|
677
|
+
fail = mock.Mock()
|
|
678
|
+
crawler = PlaywrightCrawler(
|
|
679
|
+
respect_robots_txt_file=True,
|
|
680
|
+
max_request_retries=0,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
@crawler.router.default_handler
|
|
684
|
+
async def request_handler(context: PlaywrightCrawlingContext) -> None:
|
|
685
|
+
visit(context.request.url)
|
|
686
|
+
await context.enqueue_links(strategy='all')
|
|
687
|
+
|
|
688
|
+
@crawler.failed_request_handler
|
|
689
|
+
async def error_handler(context: BasicCrawlingContext, _error: Exception) -> None:
|
|
690
|
+
fail(context.request.url)
|
|
691
|
+
|
|
692
|
+
await crawler.run([str(server_url / 'problematic_links')])
|
|
693
|
+
|
|
694
|
+
visited = {call[0][0] for call in visit.call_args_list}
|
|
695
|
+
failed = {call[0][0] for call in fail.call_args_list}
|
|
696
|
+
|
|
697
|
+
# Email must be skipped
|
|
698
|
+
# https://avatars.githubusercontent.com/apify does not get robots.txt, but is correct for the crawler.
|
|
699
|
+
assert visited == {str(server_url / 'problematic_links'), 'https://avatars.githubusercontent.com/apify'}
|
|
700
|
+
|
|
701
|
+
# The budplaceholder.com does not exist.
|
|
702
|
+
assert failed == {
|
|
703
|
+
'https://budplaceholder.com/',
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
|
|
674
707
|
async def test_on_skipped_request(server_url: URL) -> None:
|
|
675
708
|
crawler = PlaywrightCrawler(respect_robots_txt_file=True)
|
|
676
709
|
skip = mock.Mock()
|
|
@@ -18,6 +18,7 @@ from tests.unit.server_endpoints import (
|
|
|
18
18
|
GENERIC_RESPONSE,
|
|
19
19
|
HELLO_WORLD,
|
|
20
20
|
INCAPSULA,
|
|
21
|
+
PROBLEMATIC_LINKS,
|
|
21
22
|
ROBOTS_TXT,
|
|
22
23
|
SECONDARY_INDEX,
|
|
23
24
|
START_ENQUEUE,
|
|
@@ -102,6 +103,7 @@ async def app(scope: dict[str, Any], receive: Receive, send: Send) -> None:
|
|
|
102
103
|
'page_1': generic_response_endpoint,
|
|
103
104
|
'page_2': generic_response_endpoint,
|
|
104
105
|
'page_3': generic_response_endpoint,
|
|
106
|
+
'problematic_links': problematic_links_endpoint,
|
|
105
107
|
'set_cookies': set_cookies,
|
|
106
108
|
'set_complex_cookies': set_complex_cookies,
|
|
107
109
|
'cookies': get_cookies,
|
|
@@ -287,6 +289,14 @@ async def generic_response_endpoint(_scope: dict[str, Any], _receive: Receive, s
|
|
|
287
289
|
)
|
|
288
290
|
|
|
289
291
|
|
|
292
|
+
async def problematic_links_endpoint(_scope: dict[str, Any], _receive: Receive, send: Send) -> None:
|
|
293
|
+
"""Handle requests with a page containing problematic links."""
|
|
294
|
+
await send_html_response(
|
|
295
|
+
send,
|
|
296
|
+
PROBLEMATIC_LINKS,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
290
300
|
async def redirect_to_url(scope: dict[str, Any], _receive: Receive, send: Send) -> None:
|
|
291
301
|
"""Handle requests that should redirect to a specified full URL."""
|
|
292
302
|
query_params = get_query_params(scope.get('query_string', b''))
|
|
@@ -35,6 +35,16 @@ INCAPSULA = b"""\
|
|
|
35
35
|
</iframe>
|
|
36
36
|
</body></html>"""
|
|
37
37
|
|
|
38
|
+
PROBLEMATIC_LINKS = b"""\
|
|
39
|
+
<html><head>
|
|
40
|
+
<title>Hello</title>
|
|
41
|
+
</head>
|
|
42
|
+
<body>
|
|
43
|
+
<a href="https://budplaceholder.com/">Placeholder</a>
|
|
44
|
+
<a href="mailto:test@test.com">test@test.com</a>
|
|
45
|
+
<a href=https://avatars.githubusercontent.com/apify>Apify avatar/a>
|
|
46
|
+
</body></html>"""
|
|
47
|
+
|
|
38
48
|
GENERIC_RESPONSE = b"""\
|
|
39
49
|
<html><head>
|
|
40
50
|
<title>Hello</title>
|
|
@@ -705,7 +705,7 @@ toml = [
|
|
|
705
705
|
|
|
706
706
|
[[package]]
|
|
707
707
|
name = "crawlee"
|
|
708
|
-
version = "1.0.
|
|
708
|
+
version = "1.0.5b5"
|
|
709
709
|
source = { editable = "." }
|
|
710
710
|
dependencies = [
|
|
711
711
|
{ name = "cachetools" },
|
|
@@ -1102,6 +1102,8 @@ wheels = [
|
|
|
1102
1102
|
{ url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" },
|
|
1103
1103
|
{ url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" },
|
|
1104
1104
|
{ url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" },
|
|
1105
|
+
{ url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" },
|
|
1106
|
+
{ url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" },
|
|
1105
1107
|
{ url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" },
|
|
1106
1108
|
{ url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" },
|
|
1107
1109
|
{ url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" },
|
|
@@ -1111,6 +1113,8 @@ wheels = [
|
|
|
1111
1113
|
{ url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" },
|
|
1112
1114
|
{ url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" },
|
|
1113
1115
|
{ url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" },
|
|
1116
|
+
{ url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" },
|
|
1117
|
+
{ url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" },
|
|
1114
1118
|
{ url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" },
|
|
1115
1119
|
{ url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
|
|
1116
1120
|
{ url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
|
|
@@ -1120,6 +1124,8 @@ wheels = [
|
|
|
1120
1124
|
{ url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
|
|
1121
1125
|
{ url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
|
|
1122
1126
|
{ url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
|
|
1127
|
+
{ url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
|
|
1128
|
+
{ url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
|
|
1123
1129
|
{ url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
|
|
1124
1130
|
{ url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
|
|
1125
1131
|
{ url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
|
|
@@ -1129,6 +1135,8 @@ wheels = [
|
|
|
1129
1135
|
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
|
|
1130
1136
|
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
|
|
1131
1137
|
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
|
|
1138
|
+
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
|
|
1139
|
+
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
|
|
1132
1140
|
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
|
|
1133
1141
|
{ url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
|
|
1134
1142
|
{ url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
|
|
@@ -1136,6 +1144,8 @@ wheels = [
|
|
|
1136
1144
|
{ url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
|
|
1137
1145
|
{ url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
|
|
1138
1146
|
{ url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
|
|
1147
|
+
{ url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
|
|
1148
|
+
{ url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
|
|
1139
1149
|
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
|
|
1140
1150
|
]
|
|
1141
1151
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/log_with_config_example.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/deployment/code_examples/google/cloud_run_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/adaptive_playwright_crawler.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_block_requests.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/using_browser_profiles_chrome.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/code_examples/using_browser_profiles_firefox.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_with_block_requests.mdx
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/error_handling/disable_retry.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b3 → crawlee-1.0.5b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py
RENAMED
|
File without changes
|