crawlee 1.0.4b8__tar.gz → 1.0.5b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.gitignore +1 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/CHANGELOG.md +10 -2
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/PKG-INFO +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/using_browser_profiles_chrome.py +2 -4
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/using_browser_profile.mdx +0 -2
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/architecture_overview.mdx +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/avoid_blocking.mdx +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/request_loaders.mdx +8 -2
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/09_running_in_cloud.mdx +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/pyproject.toml +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_browser_pool.py +4 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_playwright_browser_controller.py +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_playwright_browser_plugin.py +17 -3
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_types.py +1 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +31 -6
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +8 -3
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/_header_generator.py +2 -2
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/_sitemap_request_loader.py +5 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/browsers/test_playwright_browser_plugin.py +10 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +80 -1
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/uv.lock +648 -606
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/yarn.lock +483 -482
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.editorconfig +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/templates_e2e_tests.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.markdownlint.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/LICENSE +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/Makefile +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/README.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/pyproject.toml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/renovate.json +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/router.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/statistics/_statistics.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/README.md +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/conftest.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/server.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/test_dataset.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/test_key_value_store.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/test_request_queue.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/.eslintrc.json +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/babel.config.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/package.json +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/sidebars.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/css/custom.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/pages/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/.nojekyll +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/API.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/check.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/robot.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/system.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/js/custom.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/static/robots.txt +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.4b8 → crawlee-1.0.5b2}/website/tsconfig.eslint.json +0 -0
|
@@ -3,7 +3,15 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 1.0.
|
|
6
|
+
## 1.0.5 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🚀 Features
|
|
9
|
+
|
|
10
|
+
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
<!-- git-cliff-unreleased-end -->
|
|
14
|
+
## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
|
|
7
15
|
|
|
8
16
|
### 🐛 Bug Fixes
|
|
9
17
|
|
|
@@ -11,9 +19,9 @@ All notable changes to this project will be documented in this file.
|
|
|
11
19
|
- Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
|
|
12
20
|
- Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
|
|
13
21
|
- Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
|
|
22
|
+
- Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
|
|
14
23
|
|
|
15
24
|
|
|
16
|
-
<!-- git-cliff-unreleased-end -->
|
|
17
25
|
## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
|
|
18
26
|
|
|
19
27
|
### 🐛 Bug Fixes
|
{crawlee-1.0.4b8 → crawlee-1.0.5b2}/docs/examples/code_examples/using_browser_profiles_chrome.py
RENAMED
|
@@ -27,15 +27,13 @@ async def main() -> None:
|
|
|
27
27
|
|
|
28
28
|
crawler = PlaywrightCrawler(
|
|
29
29
|
headless=False,
|
|
30
|
-
# Use
|
|
31
|
-
browser_type='
|
|
30
|
+
# Use the installed Chrome browser
|
|
31
|
+
browser_type='chrome',
|
|
32
32
|
# Disable fingerprints to preserve profile identity
|
|
33
33
|
fingerprint_generator=None,
|
|
34
34
|
# Set user data directory to temp folder
|
|
35
35
|
user_data_dir=tmp_profile_dir,
|
|
36
36
|
browser_launch_options={
|
|
37
|
-
# Use installed Chrome browser
|
|
38
|
-
'channel': 'chrome',
|
|
39
37
|
# Slow down actions to mimic human behavior
|
|
40
38
|
'slow_mo': 200,
|
|
41
39
|
'args': [
|
|
@@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas
|
|
|
18
18
|
|
|
19
19
|
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
|
|
20
20
|
|
|
21
|
-
You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
|
|
22
|
-
|
|
23
21
|
:::warning Profile access limitation
|
|
24
22
|
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
|
|
25
23
|
:::
|
|
@@ -291,7 +291,7 @@ Request loaders provide a subset of <ApiLink to="class/RequestQueue">`RequestQue
|
|
|
291
291
|
|
|
292
292
|
- <ApiLink to="class/RequestLoader">`RequestLoader`</ApiLink> - Base interface for read-only access to a stream of requests, with capabilities like fetching the next request, marking as handled, and status checking.
|
|
293
293
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink> - Lightweight in-memory implementation of `RequestLoader` for managing static lists of URLs.
|
|
294
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> -
|
|
294
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
295
295
|
|
|
296
296
|
### Request managers
|
|
297
297
|
|
|
@@ -25,7 +25,7 @@ Changing browser fingerprints can be a tedious job. Luckily, Crawlee provides th
|
|
|
25
25
|
{PlaywrightDefaultFingerprintGenerator}
|
|
26
26
|
</RunnableCodeBlock>
|
|
27
27
|
|
|
28
|
-
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example
|
|
28
|
+
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example below:
|
|
29
29
|
|
|
30
30
|
<CodeBlock className="language-python">
|
|
31
31
|
{PlaywrightDefaultFingerprintGeneratorWithArgs}
|
|
@@ -31,7 +31,7 @@ The [`request_loaders`](https://github.com/apify/crawlee-python/tree/master/src/
|
|
|
31
31
|
And specific request loader implementations:
|
|
32
32
|
|
|
33
33
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink>: A lightweight implementation for managing a static list of URLs.
|
|
34
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML sitemaps with filtering capabilities.
|
|
34
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
35
35
|
|
|
36
36
|
Below is a class diagram that illustrates the relationships between these components and the <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>:
|
|
37
37
|
|
|
@@ -130,7 +130,13 @@ To enable persistence, provide `persist_state_key` and optionally `persist_reque
|
|
|
130
130
|
|
|
131
131
|
### Sitemap request loader
|
|
132
132
|
|
|
133
|
-
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from
|
|
133
|
+
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats. It's particularly useful when you want to crawl a website systematically by following its sitemap structure.
|
|
134
|
+
|
|
135
|
+
:::note
|
|
136
|
+
The `SitemapRequestLoader` is designed specifically for sitemaps that follow the standard Sitemaps protocol. HTML pages containing links are not supported by this loader - those should be handled by regular crawlers using the `enqueue_links` functionality.
|
|
137
|
+
:::
|
|
138
|
+
|
|
139
|
+
The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
|
|
134
140
|
|
|
135
141
|
<RunnableCodeBlock className="language-python" language="python">
|
|
136
142
|
{SitemapExample}
|
|
@@ -50,7 +50,7 @@ apify login
|
|
|
50
50
|
|
|
51
51
|
Now that you have your account set up, you will need to adjust the code a tiny bit. We will use the [Apify SDK](https://docs.apify.com/sdk/python/), which will help us to wire the Crawlee storages (like the [`RequestQueue`](https://docs.apify.com/sdk/python/reference/class/RequestQueue)) to their Apify platform counterparts - otherwise Crawlee would keep things only in memory.
|
|
52
52
|
|
|
53
|
-
Open your `src/main.py` file, and wrap
|
|
53
|
+
Open your `src/main.py` file, and wrap everything in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
|
|
54
54
|
|
|
55
55
|
<CodeBlock className="language-python" title="src/main.py">
|
|
56
56
|
{MainExample}
|
|
@@ -118,7 +118,10 @@ class BrowserPool:
|
|
|
118
118
|
"""Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
|
|
119
119
|
|
|
120
120
|
Args:
|
|
121
|
-
browser_type: The type of browser to launch
|
|
121
|
+
browser_type: The type of browser to launch:
|
|
122
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
123
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
124
|
+
the system.
|
|
122
125
|
user_data_dir: Path to a user data directory, which stores browser session data like cookies
|
|
123
126
|
and local storage.
|
|
124
127
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
@@ -216,7 +216,7 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
216
216
|
browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
|
|
217
217
|
if proxy_info:
|
|
218
218
|
if browser_new_context_options.get('proxy'):
|
|
219
|
-
logger.warning("browser_new_context_options['proxy']
|
|
219
|
+
logger.warning("browser_new_context_options['proxy'] overridden by explicit `proxy_info` argument.")
|
|
220
220
|
|
|
221
221
|
browser_new_context_options['proxy'] = ProxySettings(
|
|
222
222
|
server=f'{proxy_info.scheme}://{proxy_info.hostname}:{proxy_info.port}',
|
|
@@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
34
34
|
|
|
35
35
|
It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
|
|
36
36
|
for creating new browser instances and provides a unified interface for interacting with different browser types
|
|
37
|
-
(chromium, firefox, and
|
|
38
|
-
executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
|
|
37
|
+
(chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
|
|
38
|
+
mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
|
|
39
39
|
browser instance, ensuring that resource limits are respected.
|
|
40
40
|
"""
|
|
41
41
|
|
|
@@ -55,7 +55,10 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
55
55
|
"""Initialize a new instance.
|
|
56
56
|
|
|
57
57
|
Args:
|
|
58
|
-
browser_type: The type of browser to launch
|
|
58
|
+
browser_type: The type of browser to launch:
|
|
59
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
60
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
61
|
+
the system.
|
|
59
62
|
user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
|
|
60
63
|
storage.
|
|
61
64
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
@@ -80,6 +83,17 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
80
83
|
'chromium_sandbox': not config.disable_browser_sandbox,
|
|
81
84
|
}
|
|
82
85
|
|
|
86
|
+
if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Map 'chrome' to 'chromium' with the 'chrome' channel.
|
|
92
|
+
if browser_type == 'chrome':
|
|
93
|
+
browser_type = 'chromium'
|
|
94
|
+
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
|
|
95
|
+
default_launch_browser_options['channel'] = 'chrome'
|
|
96
|
+
|
|
83
97
|
self._browser_type: BrowserType = browser_type
|
|
84
98
|
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
|
|
85
99
|
self._browser_new_context_options = browser_new_context_options or {}
|
|
@@ -315,7 +315,7 @@ class AdaptivePlaywrightCrawler(
|
|
|
315
315
|
),
|
|
316
316
|
logger=self._logger,
|
|
317
317
|
)
|
|
318
|
-
return SubCrawlerRun(result=result)
|
|
318
|
+
return SubCrawlerRun(result=result, run_context=context_linked_to_result)
|
|
319
319
|
except Exception as e:
|
|
320
320
|
return SubCrawlerRun(exception=e)
|
|
321
321
|
|
|
@@ -371,7 +371,8 @@ class AdaptivePlaywrightCrawler(
|
|
|
371
371
|
self.track_http_only_request_handler_runs()
|
|
372
372
|
|
|
373
373
|
static_run = await self._crawl_one(rendering_type='static', context=context)
|
|
374
|
-
if static_run.result and self.result_checker(static_run.result):
|
|
374
|
+
if static_run.result and static_run.run_context and self.result_checker(static_run.result):
|
|
375
|
+
self._update_context_from_copy(context, static_run.run_context)
|
|
375
376
|
self._context_result_map[context] = static_run.result
|
|
376
377
|
return
|
|
377
378
|
if static_run.exception:
|
|
@@ -402,13 +403,10 @@ class AdaptivePlaywrightCrawler(
|
|
|
402
403
|
if pw_run.exception is not None:
|
|
403
404
|
raise pw_run.exception
|
|
404
405
|
|
|
405
|
-
if pw_run.result:
|
|
406
|
-
self._context_result_map[context] = pw_run.result
|
|
407
|
-
|
|
406
|
+
if pw_run.result and pw_run.run_context:
|
|
408
407
|
if should_detect_rendering_type:
|
|
409
408
|
detection_result: RenderingType
|
|
410
409
|
static_run = await self._crawl_one('static', context=context, state=old_state_copy)
|
|
411
|
-
|
|
412
410
|
if static_run.result and self.result_comparator(static_run.result, pw_run.result):
|
|
413
411
|
detection_result = 'static'
|
|
414
412
|
else:
|
|
@@ -417,6 +415,9 @@ class AdaptivePlaywrightCrawler(
|
|
|
417
415
|
context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
|
|
418
416
|
self.rendering_type_predictor.store_result(context.request, detection_result)
|
|
419
417
|
|
|
418
|
+
self._update_context_from_copy(context, pw_run.run_context)
|
|
419
|
+
self._context_result_map[context] = pw_run.result
|
|
420
|
+
|
|
420
421
|
def pre_navigation_hook(
|
|
421
422
|
self,
|
|
422
423
|
hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
|
|
@@ -451,8 +452,32 @@ class AdaptivePlaywrightCrawler(
|
|
|
451
452
|
def track_rendering_type_mispredictions(self) -> None:
|
|
452
453
|
self.statistics.state.rendering_type_mispredictions += 1
|
|
453
454
|
|
|
455
|
+
def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
|
|
456
|
+
"""Update mutable fields of `context` from `context_copy`.
|
|
457
|
+
|
|
458
|
+
Uses object.__setattr__ to bypass frozen dataclass restrictions,
|
|
459
|
+
allowing state synchronization after isolated crawler execution.
|
|
460
|
+
"""
|
|
461
|
+
updating_attributes = {
|
|
462
|
+
'request': ('headers', 'user_data'),
|
|
463
|
+
'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
for attr, sub_attrs in updating_attributes.items():
|
|
467
|
+
original_sub_obj = getattr(context, attr)
|
|
468
|
+
copy_sub_obj = getattr(context_copy, attr)
|
|
469
|
+
|
|
470
|
+
# Check that both sub objects are not None
|
|
471
|
+
if original_sub_obj is None or copy_sub_obj is None:
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
for sub_attr in sub_attrs:
|
|
475
|
+
new_value = getattr(copy_sub_obj, sub_attr)
|
|
476
|
+
object.__setattr__(original_sub_obj, sub_attr, new_value)
|
|
477
|
+
|
|
454
478
|
|
|
455
479
|
@dataclass(frozen=True)
|
|
456
480
|
class SubCrawlerRun:
|
|
457
481
|
result: RequestHandlerRunResult | None = None
|
|
458
482
|
exception: Exception | None = None
|
|
483
|
+
run_context: BasicCrawlingContext | None = None
|
|
@@ -114,7 +114,10 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
114
114
|
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
|
|
115
115
|
user_data_dir: Path to a user data directory, which stores browser session data like cookies
|
|
116
116
|
and local storage.
|
|
117
|
-
browser_type: The type of browser to launch
|
|
117
|
+
browser_type: The type of browser to launch:
|
|
118
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
119
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
120
|
+
the system.
|
|
118
121
|
This option should not be used if `browser_pool` is provided.
|
|
119
122
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
120
123
|
directly to Playwright's `browser_type.launch` method. For more details, refer to the
|
|
@@ -153,7 +156,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
153
156
|
):
|
|
154
157
|
raise ValueError(
|
|
155
158
|
'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
|
|
156
|
-
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir`
|
|
159
|
+
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
|
|
157
160
|
'`fingerprint_generator` arguments when `browser_pool` is provided.'
|
|
158
161
|
)
|
|
159
162
|
|
|
@@ -496,7 +499,9 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
|
|
|
496
499
|
"""A `BrowserPool` instance to be used for launching the browsers and getting pages."""
|
|
497
500
|
|
|
498
501
|
browser_type: NotRequired[BrowserType]
|
|
499
|
-
"""The type of browser to launch
|
|
502
|
+
"""The type of browser to launch:
|
|
503
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
504
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on the system.
|
|
500
505
|
This option should not be used if `browser_pool` is provided."""
|
|
501
506
|
|
|
502
507
|
browser_launch_options: NotRequired[Mapping[str, Any]]
|
|
@@ -11,9 +11,9 @@ if TYPE_CHECKING:
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def fingerprint_browser_type_from_playwright_browser_type(
|
|
14
|
-
playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
|
|
14
|
+
playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
|
|
15
15
|
) -> SupportedBrowserType:
|
|
16
|
-
if playwright_browser_type
|
|
16
|
+
if playwright_browser_type in {'chromium', 'chrome'}:
|
|
17
17
|
return 'chrome'
|
|
18
18
|
if playwright_browser_type == 'firefox':
|
|
19
19
|
return 'firefox'
|
|
@@ -90,6 +90,11 @@ class SitemapRequestLoaderState(BaseModel):
|
|
|
90
90
|
class SitemapRequestLoader(RequestLoader):
|
|
91
91
|
"""A request loader that reads URLs from sitemap(s).
|
|
92
92
|
|
|
93
|
+
The loader is designed to handle sitemaps that follow the format described in the Sitemaps protocol
|
|
94
|
+
(https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats.
|
|
95
|
+
Note that HTML pages containing links are not supported - those should be handled by regular crawlers
|
|
96
|
+
and the `enqueue_links` functionality.
|
|
97
|
+
|
|
93
98
|
The loader fetches and parses sitemaps in the background, allowing crawling to start
|
|
94
99
|
before all URLs are loaded. It supports filtering URLs using glob and regex patterns.
|
|
95
100
|
|
|
@@ -69,3 +69,13 @@ async def test_methods_raise_error_when_not_active() -> None:
|
|
|
69
69
|
|
|
70
70
|
async with plugin:
|
|
71
71
|
assert plugin.active is True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def raise_error_if_chrome_and_executable_path() -> None:
|
|
75
|
+
with pytest.raises(
|
|
76
|
+
ValueError, match=r'Cannot use `use_chrome` with `Configuration.default_browser_path` or `executable_path` set.'
|
|
77
|
+
):
|
|
78
|
+
PlaywrightBrowserPlugin(
|
|
79
|
+
browser_type='chrome',
|
|
80
|
+
browser_launch_options={'executable_path': '/path/to/chrome'},
|
|
81
|
+
)
|
|
@@ -29,9 +29,10 @@ from crawlee.crawlers._adaptive_playwright._adaptive_playwright_crawler_statisti
|
|
|
29
29
|
from crawlee.crawlers._adaptive_playwright._adaptive_playwright_crawling_context import (
|
|
30
30
|
AdaptiveContextError,
|
|
31
31
|
)
|
|
32
|
+
from crawlee.sessions import SessionPool
|
|
32
33
|
from crawlee.statistics import Statistics
|
|
33
34
|
from crawlee.storage_clients import SqlStorageClient
|
|
34
|
-
from crawlee.storages import KeyValueStore
|
|
35
|
+
from crawlee.storages import KeyValueStore, RequestQueue
|
|
35
36
|
|
|
36
37
|
if TYPE_CHECKING:
|
|
37
38
|
from collections.abc import AsyncGenerator, Iterator
|
|
@@ -730,6 +731,84 @@ async def test_adaptive_context_query_non_existing_element(test_urls: list[str])
|
|
|
730
731
|
mocked_h3_handler.assert_called_once_with(None)
|
|
731
732
|
|
|
732
733
|
|
|
734
|
+
@pytest.mark.parametrize(
|
|
735
|
+
'test_input',
|
|
736
|
+
[
|
|
737
|
+
pytest.param(
|
|
738
|
+
TestInput(
|
|
739
|
+
expected_pw_count=0,
|
|
740
|
+
expected_static_count=2,
|
|
741
|
+
rendering_types=cycle(['static']),
|
|
742
|
+
detection_probability_recommendation=cycle([0]),
|
|
743
|
+
),
|
|
744
|
+
id='Static only',
|
|
745
|
+
),
|
|
746
|
+
pytest.param(
|
|
747
|
+
TestInput(
|
|
748
|
+
expected_pw_count=2,
|
|
749
|
+
expected_static_count=0,
|
|
750
|
+
rendering_types=cycle(['client only']),
|
|
751
|
+
detection_probability_recommendation=cycle([0]),
|
|
752
|
+
),
|
|
753
|
+
id='Client only',
|
|
754
|
+
),
|
|
755
|
+
pytest.param(
|
|
756
|
+
TestInput(
|
|
757
|
+
expected_pw_count=2,
|
|
758
|
+
expected_static_count=2,
|
|
759
|
+
rendering_types=cycle(['static', 'client only']),
|
|
760
|
+
detection_probability_recommendation=cycle([1]),
|
|
761
|
+
),
|
|
762
|
+
id='Enforced rendering type detection',
|
|
763
|
+
),
|
|
764
|
+
],
|
|
765
|
+
)
|
|
766
|
+
async def test_change_context_state_after_handling(test_input: TestInput, server_url: URL) -> None:
|
|
767
|
+
"""Test that context state is saved after handling the request."""
|
|
768
|
+
predictor = _SimpleRenderingTypePredictor(
|
|
769
|
+
rendering_types=test_input.rendering_types,
|
|
770
|
+
detection_probability_recommendation=test_input.detection_probability_recommendation,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
request_queue = await RequestQueue.open(name='state-test')
|
|
774
|
+
used_session_id = None
|
|
775
|
+
|
|
776
|
+
async with SessionPool() as session_pool:
|
|
777
|
+
crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
|
|
778
|
+
rendering_type_predictor=predictor,
|
|
779
|
+
session_pool=session_pool,
|
|
780
|
+
request_manager=request_queue,
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
@crawler.router.default_handler
|
|
784
|
+
async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
|
|
785
|
+
nonlocal used_session_id
|
|
786
|
+
|
|
787
|
+
if context.session is not None:
|
|
788
|
+
used_session_id = context.session.id
|
|
789
|
+
context.session.user_data['session_state'] = True
|
|
790
|
+
|
|
791
|
+
if isinstance(context.request.user_data['request_state'], list):
|
|
792
|
+
context.request.user_data['request_state'].append('handler')
|
|
793
|
+
|
|
794
|
+
request = Request.from_url(str(server_url), user_data={'request_state': ['initial']})
|
|
795
|
+
|
|
796
|
+
await crawler.run([request])
|
|
797
|
+
|
|
798
|
+
assert used_session_id is not None
|
|
799
|
+
|
|
800
|
+
session = await session_pool.get_session_by_id(used_session_id)
|
|
801
|
+
check_request = await request_queue.get_request(request.unique_key)
|
|
802
|
+
|
|
803
|
+
assert session is not None
|
|
804
|
+
assert check_request is not None
|
|
805
|
+
assert session.user_data.get('session_state') is True
|
|
806
|
+
# Check that request user data was updated in the handler and only onse.
|
|
807
|
+
assert check_request.user_data.get('request_state') == ['initial', 'handler']
|
|
808
|
+
|
|
809
|
+
await request_queue.drop()
|
|
810
|
+
|
|
811
|
+
|
|
733
812
|
async def test_adaptive_playwright_crawler_with_sql_storage(test_urls: list[str], tmp_path: Path) -> None:
|
|
734
813
|
"""Tests that AdaptivePlaywrightCrawler can be initialized with SqlStorageClient."""
|
|
735
814
|
storage_dir = tmp_path / 'test_table.db'
|