crawlee 1.0.5b20__tar.gz → 1.0.5b22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/CHANGELOG.md +2 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/PKG-INFO +1 -1
- crawlee-1.0.5b22/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
- crawlee-1.0.5b22/docs/examples/using_sitemap_request_loader.mdx +22 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/pyproject.toml +1 -1
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_basic_crawler.py +1 -4
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_event_manager.py +3 -1
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_sitemap_request_loader.py +17 -4
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_models.py +32 -1
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_statistics.py +2 -21
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -1
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/events/test_event_manager.py +12 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/uv.lock +1 -1
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.editorconfig +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/templates_e2e_tests.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.gitignore +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.markdownlint.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/LICENSE +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/Makefile +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/README.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/pyproject.toml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/renovate.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/router.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/README.md +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/conftest.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/server.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_dataset.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_key_value_store.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_request_queue.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/.eslintrc.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/babel.config.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/package.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/sidebars.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/css/custom.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/.nojekyll +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/API.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/check.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/robot.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/system.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/js/custom.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/robots.txt +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tsconfig.eslint.json +0 -0
- {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/yarn.lock +0 -0
|
@@ -10,12 +10,14 @@ All notable changes to this project will be documented in this file.
|
|
|
10
10
|
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
11
11
|
- Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
|
|
12
12
|
- Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
|
|
13
|
+
- Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
|
|
13
14
|
|
|
14
15
|
### 🐛 Bug Fixes
|
|
15
16
|
|
|
16
17
|
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
17
18
|
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
18
19
|
- Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
|
|
20
|
+
- Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
|
|
4
|
+
from yarl import URL
|
|
5
|
+
|
|
6
|
+
from crawlee import RequestOptions, RequestTransformAction
|
|
7
|
+
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
8
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
9
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Create a transform_request_function that maps request options based on the host in
|
|
13
|
+
# the URL
|
|
14
|
+
def create_transform_request(
|
|
15
|
+
data_mapper: dict[str, dict],
|
|
16
|
+
) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
|
|
17
|
+
def transform_request(
|
|
18
|
+
request_options: RequestOptions,
|
|
19
|
+
) -> RequestOptions | RequestTransformAction:
|
|
20
|
+
# According to the Sitemap protocol, all URLs in a Sitemap must be from a single
|
|
21
|
+
# host.
|
|
22
|
+
request_host = URL(request_options['url']).host
|
|
23
|
+
|
|
24
|
+
if request_host and (mapping_data := data_mapper.get(request_host)):
|
|
25
|
+
# Set properties from the mapping data
|
|
26
|
+
if 'label' in mapping_data:
|
|
27
|
+
request_options['label'] = mapping_data['label']
|
|
28
|
+
if 'user_data' in mapping_data:
|
|
29
|
+
request_options['user_data'] = mapping_data['user_data']
|
|
30
|
+
|
|
31
|
+
return request_options
|
|
32
|
+
|
|
33
|
+
return 'unchanged'
|
|
34
|
+
|
|
35
|
+
return transform_request
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def main() -> None:
|
|
39
|
+
# Prepare data mapping for hosts
|
|
40
|
+
apify_host = URL('https://apify.com/sitemap.xml').host
|
|
41
|
+
crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
|
|
42
|
+
|
|
43
|
+
if not apify_host or not crawlee_host:
|
|
44
|
+
raise ValueError('Unable to extract host from URLs')
|
|
45
|
+
|
|
46
|
+
data_map = {
|
|
47
|
+
apify_host: {
|
|
48
|
+
'label': 'apify',
|
|
49
|
+
'user_data': {'source': 'apify'},
|
|
50
|
+
},
|
|
51
|
+
crawlee_host: {
|
|
52
|
+
'label': 'crawlee',
|
|
53
|
+
'user_data': {'source': 'crawlee'},
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Initialize the SitemapRequestLoader with the transform function
|
|
58
|
+
async with SitemapRequestLoader(
|
|
59
|
+
# Set the sitemap URLs and the HTTP client
|
|
60
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
|
|
61
|
+
http_client=ImpitHttpClient(),
|
|
62
|
+
transform_request_function=create_transform_request(data_map),
|
|
63
|
+
) as sitemap_loader:
|
|
64
|
+
# Convert the sitemap loader to a request manager
|
|
65
|
+
request_manager = await sitemap_loader.to_tandem()
|
|
66
|
+
|
|
67
|
+
# Create and configure the crawler
|
|
68
|
+
crawler = BeautifulSoupCrawler(
|
|
69
|
+
request_manager=request_manager,
|
|
70
|
+
max_requests_per_crawl=10,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Create default handler for requests without a specific label
|
|
74
|
+
@crawler.router.default_handler
|
|
75
|
+
async def handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
76
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
77
|
+
context.log.info(
|
|
78
|
+
f'Processing request: {context.request.url} from source: {source}'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Create handler for requests labeled 'apify'
|
|
82
|
+
@crawler.router.handler('apify')
|
|
83
|
+
async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
84
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
85
|
+
context.log.info(
|
|
86
|
+
f'Apify handler processing: {context.request.url} from source: {source}'
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Create handler for requests labeled 'crawlee'
|
|
90
|
+
@crawler.router.handler('crawlee')
|
|
91
|
+
async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
92
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
93
|
+
context.log.info(
|
|
94
|
+
f'Crawlee handler processing: {context.request.url} from source: {source}'
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
await crawler.run()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == '__main__':
|
|
101
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: using-sitemap-request-loader
|
|
3
|
+
title: Using sitemap request loader
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import ApiLink from '@site/src/components/ApiLink';
|
|
7
|
+
|
|
8
|
+
import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
|
|
9
|
+
|
|
10
|
+
import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
|
|
11
|
+
|
|
12
|
+
This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
|
|
13
|
+
|
|
14
|
+
The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
|
|
15
|
+
|
|
16
|
+
The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
|
|
17
|
+
|
|
18
|
+
<RunnableCodeBlock className="language-python" language="python">
|
|
19
|
+
{SitemapRequestLoaderExample}
|
|
20
|
+
</RunnableCodeBlock>
|
|
21
|
+
|
|
22
|
+
For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
|
|
@@ -56,7 +56,7 @@ from crawlee.errors import (
|
|
|
56
56
|
SessionError,
|
|
57
57
|
UserDefinedErrorHandlerError,
|
|
58
58
|
)
|
|
59
|
-
from crawlee.events._types import Event, EventCrawlerStatusData
|
|
59
|
+
from crawlee.events._types import Event, EventCrawlerStatusData
|
|
60
60
|
from crawlee.http_clients import ImpitHttpClient
|
|
61
61
|
from crawlee.router import Router
|
|
62
62
|
from crawlee.sessions import SessionPool
|
|
@@ -751,9 +751,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
751
751
|
|
|
752
752
|
await self._autoscaled_pool.run()
|
|
753
753
|
|
|
754
|
-
# Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
|
|
755
|
-
event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
|
|
756
|
-
|
|
757
754
|
async def add_requests(
|
|
758
755
|
self,
|
|
759
756
|
requests: Sequence[str | Request],
|
|
@@ -130,11 +130,13 @@ class EventManager:
|
|
|
130
130
|
if not self._active:
|
|
131
131
|
raise RuntimeError(f'The {self.__class__.__name__} is not active.')
|
|
132
132
|
|
|
133
|
+
# Stop persist state event periodic emission and manually emit last one to ensure latest state is saved.
|
|
134
|
+
await self._emit_persist_state_event_rec_task.stop()
|
|
135
|
+
await self._emit_persist_state_event()
|
|
133
136
|
await self.wait_for_all_listeners_to_complete(timeout=self._close_timeout)
|
|
134
137
|
self._event_emitter.remove_all_listeners()
|
|
135
138
|
self._listener_tasks.clear()
|
|
136
139
|
self._listeners_to_wrappers.clear()
|
|
137
|
-
await self._emit_persist_state_event_rec_task.stop()
|
|
138
140
|
self._active = False
|
|
139
141
|
|
|
140
142
|
@overload
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_sitemap_request_loader.py
RENAMED
|
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Annotated, Any
|
|
|
9
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
from typing_extensions import override
|
|
11
11
|
|
|
12
|
-
from crawlee import Request
|
|
12
|
+
from crawlee import Request, RequestOptions
|
|
13
13
|
from crawlee._utils.docs import docs_group
|
|
14
14
|
from crawlee._utils.globs import Glob
|
|
15
15
|
from crawlee._utils.recoverable_state import RecoverableState
|
|
@@ -18,9 +18,10 @@ from crawlee.request_loaders._request_loader import RequestLoader
|
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
import re
|
|
21
|
-
from collections.abc import Sequence
|
|
21
|
+
from collections.abc import Callable, Sequence
|
|
22
22
|
from types import TracebackType
|
|
23
23
|
|
|
24
|
+
from crawlee import RequestTransformAction
|
|
24
25
|
from crawlee.http_clients import HttpClient
|
|
25
26
|
from crawlee.proxy_configuration import ProxyInfo
|
|
26
27
|
from crawlee.storage_clients.models import ProcessedRequest
|
|
@@ -112,6 +113,7 @@ class SitemapRequestLoader(RequestLoader):
|
|
|
112
113
|
exclude: list[re.Pattern[Any] | Glob] | None = None,
|
|
113
114
|
max_buffer_size: int = 200,
|
|
114
115
|
persist_state_key: str | None = None,
|
|
116
|
+
transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | None = None,
|
|
115
117
|
) -> None:
|
|
116
118
|
"""Initialize the sitemap request loader.
|
|
117
119
|
|
|
@@ -125,6 +127,9 @@ class SitemapRequestLoader(RequestLoader):
|
|
|
125
127
|
persist_state_key: A key for persisting the loader's state in the KeyValueStore.
|
|
126
128
|
When provided, allows resuming from where it left off after interruption.
|
|
127
129
|
If None, no state persistence occurs.
|
|
130
|
+
transform_request_function: An optional function to transform requests
|
|
131
|
+
generated by the loader. It receives `RequestOptions` with `url` and should return either
|
|
132
|
+
modified `RequestOptions` or a `RequestTransformAction`.
|
|
128
133
|
"""
|
|
129
134
|
self._http_client = http_client
|
|
130
135
|
self._sitemap_urls = sitemap_urls
|
|
@@ -132,6 +137,7 @@ class SitemapRequestLoader(RequestLoader):
|
|
|
132
137
|
self._exclude = exclude
|
|
133
138
|
self._proxy_info = proxy_info
|
|
134
139
|
self._max_buffer_size = max_buffer_size
|
|
140
|
+
self._transform_request_function = transform_request_function
|
|
135
141
|
|
|
136
142
|
# Synchronization for queue operations
|
|
137
143
|
self._queue_has_capacity = asyncio.Event()
|
|
@@ -313,8 +319,15 @@ class SitemapRequestLoader(RequestLoader):
|
|
|
313
319
|
|
|
314
320
|
async with self._queue_lock:
|
|
315
321
|
url = state.url_queue.popleft()
|
|
316
|
-
|
|
317
|
-
|
|
322
|
+
request_option = RequestOptions(url=url)
|
|
323
|
+
if self._transform_request_function:
|
|
324
|
+
transform_request_option = self._transform_request_function(request_option)
|
|
325
|
+
if transform_request_option == 'skip':
|
|
326
|
+
state.total_count -= 1
|
|
327
|
+
continue
|
|
328
|
+
if transform_request_option != 'unchanged':
|
|
329
|
+
request_option = transform_request_option
|
|
330
|
+
request = Request.from_url(**request_option)
|
|
318
331
|
state.in_progress.add(request.url)
|
|
319
332
|
if len(state.url_queue) < self._max_buffer_size:
|
|
320
333
|
self._queue_has_capacity.set()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import warnings
|
|
4
5
|
from dataclasses import asdict, dataclass
|
|
5
6
|
from datetime import datetime, timedelta, timezone
|
|
6
7
|
from typing import Annotated, Any
|
|
@@ -76,7 +77,6 @@ class StatisticsState(BaseModel):
|
|
|
76
77
|
crawler_started_at: Annotated[datetime | None, Field(alias='crawlerStartedAt')] = None
|
|
77
78
|
crawler_last_started_at: Annotated[datetime | None, Field(alias='crawlerLastStartTimestamp')] = None
|
|
78
79
|
crawler_finished_at: Annotated[datetime | None, Field(alias='crawlerFinishedAt')] = None
|
|
79
|
-
crawler_runtime: Annotated[timedelta_ms, Field(alias='crawlerRuntimeMillis')] = timedelta()
|
|
80
80
|
errors: dict[str, Any] = Field(default_factory=dict)
|
|
81
81
|
retry_errors: dict[str, Any] = Field(alias='retryErrors', default_factory=dict)
|
|
82
82
|
requests_with_status_code: dict[str, int] = Field(alias='requestsWithStatusCode', default_factory=dict)
|
|
@@ -93,6 +93,37 @@ class StatisticsState(BaseModel):
|
|
|
93
93
|
),
|
|
94
94
|
] = {}
|
|
95
95
|
|
|
96
|
+
# Used to track the crawler runtime, that had already been persisted. This is the runtime from previous runs.
|
|
97
|
+
_runtime_offset: Annotated[timedelta, Field(exclude=True)] = timedelta()
|
|
98
|
+
|
|
99
|
+
def model_post_init(self, /, __context: Any) -> None:
|
|
100
|
+
self._runtime_offset = self.crawler_runtime or self._runtime_offset
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def crawler_runtime(self) -> timedelta:
|
|
104
|
+
if self.crawler_last_started_at:
|
|
105
|
+
finished_at = self.crawler_finished_at or datetime.now(timezone.utc)
|
|
106
|
+
return self._runtime_offset + finished_at - self.crawler_last_started_at
|
|
107
|
+
return self._runtime_offset
|
|
108
|
+
|
|
109
|
+
@crawler_runtime.setter
|
|
110
|
+
def crawler_runtime(self, value: timedelta) -> None:
|
|
111
|
+
# Setter for backwards compatibility only, the crawler_runtime is now computed_field, and cant be set manually.
|
|
112
|
+
# To be removed in v2 release https://github.com/apify/crawlee-python/issues/1567
|
|
113
|
+
warnings.warn(
|
|
114
|
+
f"Setting 'crawler_runtime' is deprecated and will be removed in a future version."
|
|
115
|
+
f' Value {value} will not be used.',
|
|
116
|
+
DeprecationWarning,
|
|
117
|
+
stacklevel=2,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@computed_field(alias='crawlerRuntimeMillis')
|
|
121
|
+
def crawler_runtime_for_serialization(self) -> timedelta:
|
|
122
|
+
if self.crawler_last_started_at:
|
|
123
|
+
finished_at = self.crawler_finished_at or datetime.now(timezone.utc)
|
|
124
|
+
return self._runtime_offset + finished_at - self.crawler_last_started_at
|
|
125
|
+
return self._runtime_offset
|
|
126
|
+
|
|
96
127
|
@computed_field(alias='requestTotalDurationMillis', return_type=timedelta_ms) # type: ignore[prop-decorator]
|
|
97
128
|
@property
|
|
98
129
|
def request_total_duration(self) -> timedelta:
|
|
@@ -110,9 +110,6 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
110
110
|
# Flag to indicate the context state.
|
|
111
111
|
self._active = False
|
|
112
112
|
|
|
113
|
-
# Pre-existing runtime offset, that can be non-zero when restoring serialized state from KVS.
|
|
114
|
-
self._runtime_offset = timedelta(seconds=0)
|
|
115
|
-
|
|
116
113
|
def replace_state_model(self, state_model: type[TNewStatisticsState]) -> Statistics[TNewStatisticsState]:
|
|
117
114
|
"""Create near copy of the `Statistics` with replaced `state_model`."""
|
|
118
115
|
new_statistics: Statistics[TNewStatisticsState] = Statistics(
|
|
@@ -168,8 +165,8 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
168
165
|
raise RuntimeError(f'The {self.__class__.__name__} is already active.')
|
|
169
166
|
|
|
170
167
|
await self._state.initialize()
|
|
171
|
-
|
|
172
|
-
self.
|
|
168
|
+
# Reset `crawler_finished_at` to indicate a new run in progress.
|
|
169
|
+
self.state.crawler_finished_at = None
|
|
173
170
|
|
|
174
171
|
# Start periodic logging and let it print initial state before activation.
|
|
175
172
|
self._periodic_logger.start()
|
|
@@ -200,10 +197,6 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
200
197
|
# Stop logging and deactivate the statistics to prevent further changes to crawler_runtime
|
|
201
198
|
await self._periodic_logger.stop()
|
|
202
199
|
self.state.crawler_finished_at = datetime.now(timezone.utc)
|
|
203
|
-
self.state.crawler_runtime = (
|
|
204
|
-
self._runtime_offset + self.state.crawler_finished_at - self.state.crawler_last_started_at
|
|
205
|
-
)
|
|
206
|
-
|
|
207
200
|
self._active = False
|
|
208
201
|
await self._state.teardown()
|
|
209
202
|
|
|
@@ -262,20 +255,8 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
262
255
|
|
|
263
256
|
del self._requests_in_progress[request_id_or_key]
|
|
264
257
|
|
|
265
|
-
def _update_crawler_runtime(self) -> None:
|
|
266
|
-
current_run_duration = (
|
|
267
|
-
(datetime.now(timezone.utc) - self.state.crawler_last_started_at)
|
|
268
|
-
if self.state.crawler_last_started_at
|
|
269
|
-
else timedelta()
|
|
270
|
-
)
|
|
271
|
-
self.state.crawler_runtime = current_run_duration + self._runtime_offset
|
|
272
|
-
|
|
273
258
|
def calculate(self) -> FinalStatistics:
|
|
274
259
|
"""Calculate the current statistics."""
|
|
275
|
-
if self._active:
|
|
276
|
-
# Only update state when active. If not, just report the last known runtime.
|
|
277
|
-
self._update_crawler_runtime()
|
|
278
|
-
|
|
279
260
|
total_minutes = self.state.crawler_runtime.total_seconds() / 60
|
|
280
261
|
state = self._state.current_value
|
|
281
262
|
serialized_state = state.model_dump(by_alias=False)
|
|
@@ -1673,7 +1673,6 @@ def _process_run_crawler(requests: list[str], storage_dir: str) -> StatisticsSta
|
|
|
1673
1673
|
return asyncio.run(_run_crawler(requests=requests, storage_dir=storage_dir))
|
|
1674
1674
|
|
|
1675
1675
|
|
|
1676
|
-
@pytest.mark.skip(reason='This test is flaky, see https://github.com/apify/crawlee-python/issues/1560.')
|
|
1677
1676
|
async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
|
|
1678
1677
|
"""Test that crawler statistics persist and are loaded correctly.
|
|
1679
1678
|
|
|
@@ -5,6 +5,7 @@ import logging
|
|
|
5
5
|
from datetime import timedelta
|
|
6
6
|
from functools import update_wrapper
|
|
7
7
|
from typing import TYPE_CHECKING, Any
|
|
8
|
+
from unittest import mock
|
|
8
9
|
from unittest.mock import AsyncMock, MagicMock
|
|
9
10
|
|
|
10
11
|
import pytest
|
|
@@ -207,3 +208,14 @@ async def test_methods_raise_error_when_not_active(event_system_info_data: Event
|
|
|
207
208
|
await event_manager.wait_for_all_listeners_to_complete()
|
|
208
209
|
|
|
209
210
|
assert event_manager.active is True
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
async def test_event_manager_in_context_persistence() -> None:
|
|
214
|
+
"""Test that entering the `EventManager` context emits persist state event at least once."""
|
|
215
|
+
event_manager = EventManager()
|
|
216
|
+
|
|
217
|
+
with mock.patch.object(event_manager, '_emit_persist_state_event', AsyncMock()) as mocked_emit_persist_state_event:
|
|
218
|
+
async with event_manager:
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
assert mocked_emit_persist_state_event.call_count >= 1
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/request_loaders/test_sitemap_request_loader.py
RENAMED
|
@@ -4,6 +4,7 @@ import gzip
|
|
|
4
4
|
|
|
5
5
|
from yarl import URL
|
|
6
6
|
|
|
7
|
+
from crawlee import RequestOptions, RequestTransformAction
|
|
7
8
|
from crawlee.http_clients._base import HttpClient
|
|
8
9
|
from crawlee.request_loaders._sitemap_request_loader import SitemapRequestLoader
|
|
9
10
|
from crawlee.storages import KeyValueStore
|
|
@@ -172,3 +173,37 @@ async def test_recovery_data_persistence_for_sitemap_loading(
|
|
|
172
173
|
|
|
173
174
|
assert item is not None
|
|
174
175
|
assert item.url == next_item_in_kvs
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
async def test_transform_request_function(server_url: URL, http_client: HttpClient) -> None:
|
|
179
|
+
sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode()))
|
|
180
|
+
|
|
181
|
+
def transform_request(request_options: RequestOptions) -> RequestOptions | RequestTransformAction:
|
|
182
|
+
request_options['user_data'] = {'transformed': True}
|
|
183
|
+
return request_options
|
|
184
|
+
|
|
185
|
+
sitemap_loader = SitemapRequestLoader(
|
|
186
|
+
[str(sitemap_url)],
|
|
187
|
+
http_client=http_client,
|
|
188
|
+
transform_request_function=transform_request,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
extracted_urls = set()
|
|
192
|
+
|
|
193
|
+
while not await sitemap_loader.is_finished():
|
|
194
|
+
request = await sitemap_loader.fetch_next_request()
|
|
195
|
+
assert request is not None
|
|
196
|
+
assert request.user_data.get('transformed') is True
|
|
197
|
+
|
|
198
|
+
extracted_urls.add(request.url)
|
|
199
|
+
|
|
200
|
+
await sitemap_loader.mark_request_as_handled(request)
|
|
201
|
+
|
|
202
|
+
assert len(extracted_urls) == 5
|
|
203
|
+
assert extracted_urls == {
|
|
204
|
+
'http://not-exists.com/',
|
|
205
|
+
'http://not-exists.com/catalog?item=12&desc=vacation_hawaii',
|
|
206
|
+
'http://not-exists.com/catalog?item=73&desc=vacation_new_zealand',
|
|
207
|
+
'http://not-exists.com/catalog?item=74&desc=vacation_newfoundland',
|
|
208
|
+
'http://not-exists.com/catalog?item=83&desc=vacation_usa',
|
|
209
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/log_with_config_example.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/proxy_advanced_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/cloud_run_example.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/google_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/adaptive_playwright_crawler.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler_stop.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_bs.py
RENAMED
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_pw.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_block_requests.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/respect_robots_txt_file.py
RENAMED
|
File without changes
|
|
File without changes
|
{crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/using_browser_profiles_chrome.py
RENAMED
|
File without changes
|