crawlee 1.1.1__tar.gz → 1.1.1b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/build_and_deploy_docs.yaml +2 -6
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/templates_e2e_tests.yaml +1 -1
- {crawlee-1.1.1 → crawlee-1.1.1b1}/CHANGELOG.md +3 -5
- {crawlee-1.1.1 → crawlee-1.1.1b1}/PKG-INFO +1 -1
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/google/cloud_run_example.py +1 -1
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/google/google_example.py +5 -2
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/server.py +2 -2
- {crawlee-1.1.1 → crawlee-1.1.1b1}/pyproject.toml +2 -2
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_types.py +1 -20
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +2 -8
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_basic_crawler.py +46 -62
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +3 -6
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -3
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_autoscaled_pool.py +4 -2
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_system.py +0 -3
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_basic/test_basic_crawler.py +3 -44
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -9
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -9
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -6
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/server.py +0 -32
- crawlee-1.1.1b1/tests/unit/server_endpoints.py +71 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_dataset.py +0 -17
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_key_value_store.py +0 -18
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_request_queue.py +0 -19
- {crawlee-1.1.1 → crawlee-1.1.1b1}/uv.lock +292 -370
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/package.json +0 -1
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/yarn.lock +499 -389
- crawlee-1.1.1/before_scroll.png +0 -0
- crawlee-1.1.1/tests/unit/crawlers/_playwright/test_utils.py +0 -157
- crawlee-1.1.1/tests/unit/server_endpoints.py +0 -142
- crawlee-1.1.1/tests/unit/server_static/test.js +0 -0
- crawlee-1.1.1/tests/unit/server_static/test.png +0 -0
- crawlee-1.1.1/website/src/components/LLMButtons.jsx +0 -510
- crawlee-1.1.1/website/src/components/LLMButtons.module.css +0 -151
- crawlee-1.1.1/website/src/theme/DocItem/Content/index.js +0 -35
- crawlee-1.1.1/website/src/theme/DocItem/Content/styles.module.css +0 -22
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.editorconfig +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/CODEOWNERS +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/pull_request_template.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/release.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.gitignore +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.markdownlint.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/.pre-commit-config.yaml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/CONTRIBUTING.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/LICENSE +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/Makefile +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/README.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_sitemap_request_loader.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/using_sitemap_request_loader.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/storages.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/index.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/pyproject.toml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/renovate.json +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_cli.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_consts.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_request.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/errors.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/router.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_statistics.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/conftest.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/README.md +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/__init__.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/conftest.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_cli.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_router.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/.eslintrc.json +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/.yarnrc.yml +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/babel.config.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/build_api_reference.sh +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/docusaurus.config.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/roa-loader/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/roa-loader/package.json +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/sidebars.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Button.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Button.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/css/custom.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/index.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/.nojekyll +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/font/lota.woff +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/API.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/check.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/external-link.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favicon.ico +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/hearth.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/js_file.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/robot.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/source_code.png +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/system.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/workflow.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/js/custom.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/robots.txt +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tsconfig.eslint.json +0 -0
|
@@ -24,7 +24,7 @@ jobs:
|
|
|
24
24
|
|
|
25
25
|
steps:
|
|
26
26
|
- name: Checkout repository
|
|
27
|
-
uses: actions/checkout@
|
|
27
|
+
uses: actions/checkout@v5
|
|
28
28
|
with:
|
|
29
29
|
token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
30
30
|
ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
|
|
@@ -67,10 +67,6 @@ jobs:
|
|
|
67
67
|
uses: actions/deploy-pages@v4
|
|
68
68
|
|
|
69
69
|
- name: Invalidate CloudFront cache
|
|
70
|
-
run:
|
|
71
|
-
gh workflow run invalidate-cloudfront.yml \
|
|
72
|
-
--repo apify/apify-docs-private \
|
|
73
|
-
--field deployment=crawlee-web
|
|
74
|
-
echo "✅ CloudFront cache invalidation workflow triggered successfully"
|
|
70
|
+
run: gh workflow run invalidate.yaml --repo apify/apify-docs-private
|
|
75
71
|
env:
|
|
76
72
|
GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
@@ -2,17 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
<!-- git-cliff-unreleased-start -->
|
|
6
|
+
## 1.1.1 - **not yet released**
|
|
6
7
|
|
|
7
8
|
### 🐛 Bug Fixes
|
|
8
9
|
|
|
9
10
|
- Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
|
|
10
|
-
- Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
|
|
11
|
-
- Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
|
|
12
|
-
- Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
|
|
13
|
-
- Respect `<base>` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
|
|
14
11
|
|
|
15
12
|
|
|
13
|
+
<!-- git-cliff-unreleased-end -->
|
|
16
14
|
## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
|
|
17
15
|
|
|
18
16
|
### 🚀 Features
|
|
@@ -9,7 +9,7 @@ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
|
9
9
|
from crawlee.storage_clients import MemoryStorageClient
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@get('/')
|
|
12
|
+
@get('/')
|
|
13
13
|
async def main() -> str:
|
|
14
14
|
"""The crawler entry point that will be called when the HTTP endpoint is accessed."""
|
|
15
15
|
# highlight-start
|
|
@@ -6,7 +6,10 @@ from datetime import timedelta
|
|
|
6
6
|
import functions_framework
|
|
7
7
|
from flask import Request, Response
|
|
8
8
|
|
|
9
|
-
from crawlee.crawlers import
|
|
9
|
+
from crawlee.crawlers import (
|
|
10
|
+
BeautifulSoupCrawler,
|
|
11
|
+
BeautifulSoupCrawlingContext,
|
|
12
|
+
)
|
|
10
13
|
from crawlee.storage_clients import MemoryStorageClient
|
|
11
14
|
|
|
12
15
|
|
|
@@ -48,7 +51,7 @@ async def main() -> str:
|
|
|
48
51
|
# highlight-end
|
|
49
52
|
|
|
50
53
|
|
|
51
|
-
@functions_framework.http
|
|
54
|
+
@functions_framework.http
|
|
52
55
|
def crawlee_run(request: Request) -> Response:
|
|
53
56
|
# You can pass data to your crawler using `request`
|
|
54
57
|
function_id = request.headers['Function-Execution-Id']
|
|
@@ -14,7 +14,7 @@ from .crawler import lifespan
|
|
|
14
14
|
app = FastAPI(lifespan=lifespan, title='Crawler app')
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
@app.get('/', response_class=HTMLResponse)
|
|
17
|
+
@app.get('/', response_class=HTMLResponse)
|
|
18
18
|
def index() -> str:
|
|
19
19
|
return """
|
|
20
20
|
<!DOCTYPE html>
|
|
@@ -32,7 +32,7 @@ def index() -> str:
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
@app.get('/scrape')
|
|
35
|
+
@app.get('/scrape')
|
|
36
36
|
async def scrape_url(request: Request, url: str | None = None) -> dict:
|
|
37
37
|
if not url:
|
|
38
38
|
return {'url': 'missing', 'scrape result': 'no results'}
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.1b1"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -101,7 +101,7 @@ dev = [
|
|
|
101
101
|
"build<2.0.0", # For e2e tests.
|
|
102
102
|
"dycw-pytest-only<3.0.0",
|
|
103
103
|
"fakeredis[probabilistic,json,lua]<3.0.0",
|
|
104
|
-
"mypy~=1.
|
|
104
|
+
"mypy~=1.18.0",
|
|
105
105
|
"pre-commit<5.0.0",
|
|
106
106
|
"proxy-py<3.0.0",
|
|
107
107
|
"pydoc-markdown<5.0.0",
|
|
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
import re
|
|
16
16
|
from collections.abc import Callable, Coroutine, Sequence
|
|
17
17
|
|
|
18
|
-
from typing_extensions import NotRequired, Required,
|
|
18
|
+
from typing_extensions import NotRequired, Required, Unpack
|
|
19
19
|
|
|
20
20
|
from crawlee import Glob, Request
|
|
21
21
|
from crawlee._request import RequestOptions
|
|
@@ -643,25 +643,6 @@ class BasicCrawlingContext:
|
|
|
643
643
|
"""Return hash of the context. Each context is considered unique."""
|
|
644
644
|
return id(self)
|
|
645
645
|
|
|
646
|
-
def create_modified_copy(
|
|
647
|
-
self,
|
|
648
|
-
push_data: PushDataFunction | None = None,
|
|
649
|
-
add_requests: AddRequestsFunction | None = None,
|
|
650
|
-
get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
|
|
651
|
-
) -> Self:
|
|
652
|
-
"""Create a modified copy of the crawling context with specified changes."""
|
|
653
|
-
original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
|
|
654
|
-
modified_fields = {
|
|
655
|
-
key: value
|
|
656
|
-
for key, value in {
|
|
657
|
-
'push_data': push_data,
|
|
658
|
-
'add_requests': add_requests,
|
|
659
|
-
'get_key_value_store': get_key_value_store,
|
|
660
|
-
}.items()
|
|
661
|
-
if value
|
|
662
|
-
}
|
|
663
|
-
return self.__class__(**{**original_fields, **modified_fields})
|
|
664
|
-
|
|
665
646
|
|
|
666
647
|
class GetDataKwargs(TypedDict):
|
|
667
648
|
"""Keyword arguments for dataset's `get_data` method."""
|
{crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
RENAMED
|
@@ -167,15 +167,9 @@ class AbstractHttpCrawler(
|
|
|
167
167
|
kwargs.setdefault('strategy', 'same-hostname')
|
|
168
168
|
|
|
169
169
|
links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
extracted_base_urls = list(self._parser.find_links(parsed_content, 'base[href]'))
|
|
173
|
-
base_url: str = (
|
|
174
|
-
str(extracted_base_urls[0])
|
|
175
|
-
if extracted_base_urls
|
|
176
|
-
else context.request.loaded_url or context.request.url
|
|
170
|
+
links_iterator = to_absolute_url_iterator(
|
|
171
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
177
172
|
)
|
|
178
|
-
links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
|
|
179
173
|
|
|
180
174
|
if robots_txt_file:
|
|
181
175
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
|
-
import functools
|
|
6
5
|
import logging
|
|
7
6
|
import signal
|
|
8
7
|
import sys
|
|
@@ -15,7 +14,7 @@ from contextlib import AsyncExitStack, suppress
|
|
|
15
14
|
from datetime import timedelta
|
|
16
15
|
from functools import partial
|
|
17
16
|
from pathlib import Path
|
|
18
|
-
from typing import TYPE_CHECKING, Any, Generic, Literal,
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Generic, Literal, cast
|
|
19
18
|
from urllib.parse import ParseResult, urlparse
|
|
20
19
|
from weakref import WeakKeyDictionary
|
|
21
20
|
|
|
@@ -97,9 +96,6 @@ if TYPE_CHECKING:
|
|
|
97
96
|
TCrawlingContext = TypeVar('TCrawlingContext', bound=BasicCrawlingContext, default=BasicCrawlingContext)
|
|
98
97
|
TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
99
98
|
TRequestIterator = TypeVar('TRequestIterator', str, Request)
|
|
100
|
-
TParams = ParamSpec('TParams')
|
|
101
|
-
T = TypeVar('T')
|
|
102
|
-
|
|
103
99
|
ErrorHandler = Callable[[TCrawlingContext, Exception], Awaitable[Request | None]]
|
|
104
100
|
FailedRequestHandler = Callable[[TCrawlingContext, Exception], Awaitable[None]]
|
|
105
101
|
SkippedRequestCallback = Callable[[str, SkippedReason], Awaitable[None]]
|
|
@@ -524,24 +520,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
524
520
|
self._logger.info(f'Crawler.stop() was called with following reason: {reason}.')
|
|
525
521
|
self._unexpected_stop = True
|
|
526
522
|
|
|
527
|
-
def _wrap_handler_with_error_context(
|
|
528
|
-
self, handler: Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]
|
|
529
|
-
) -> Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]:
|
|
530
|
-
"""Decorate error handlers to make their context helpers usable."""
|
|
531
|
-
|
|
532
|
-
@functools.wraps(handler)
|
|
533
|
-
async def wrapped_handler(context: TCrawlingContext | BasicCrawlingContext, exception: Exception) -> T:
|
|
534
|
-
# Original context helpers that are from `RequestHandlerRunResult` will not be commited as the request
|
|
535
|
-
# failed. Modified context provides context helpers with direct access to the storages.
|
|
536
|
-
error_context = context.create_modified_copy(
|
|
537
|
-
push_data=self._push_data,
|
|
538
|
-
get_key_value_store=self.get_key_value_store,
|
|
539
|
-
add_requests=functools.partial(self._add_requests, context),
|
|
540
|
-
)
|
|
541
|
-
return await handler(error_context, exception)
|
|
542
|
-
|
|
543
|
-
return wrapped_handler
|
|
544
|
-
|
|
545
523
|
def _stop_if_max_requests_count_exceeded(self) -> None:
|
|
546
524
|
"""Call `stop` when the maximum number of requests to crawl has been reached."""
|
|
547
525
|
if self._max_requests_per_crawl is None:
|
|
@@ -640,7 +618,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
640
618
|
|
|
641
619
|
The error handler is invoked after a request handler error occurs and before a retry attempt.
|
|
642
620
|
"""
|
|
643
|
-
self._error_handler =
|
|
621
|
+
self._error_handler = handler
|
|
644
622
|
return handler
|
|
645
623
|
|
|
646
624
|
def failed_request_handler(
|
|
@@ -650,7 +628,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
650
628
|
|
|
651
629
|
The failed request handler is invoked when a request has failed all retry attempts.
|
|
652
630
|
"""
|
|
653
|
-
self._failed_request_handler =
|
|
631
|
+
self._failed_request_handler = handler
|
|
654
632
|
return handler
|
|
655
633
|
|
|
656
634
|
def on_skipped_request(self, callback: SkippedRequestCallback) -> SkippedRequestCallback:
|
|
@@ -1065,8 +1043,8 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
1065
1043
|
return target_url.hostname == origin_url.hostname
|
|
1066
1044
|
|
|
1067
1045
|
if strategy == 'same-domain':
|
|
1068
|
-
origin_domain = self._tld_extractor.extract_str(origin_url.hostname).
|
|
1069
|
-
target_domain = self._tld_extractor.extract_str(target_url.hostname).
|
|
1046
|
+
origin_domain = self._tld_extractor.extract_str(origin_url.hostname).domain
|
|
1047
|
+
target_domain = self._tld_extractor.extract_str(target_url.hostname).domain
|
|
1070
1048
|
return origin_domain == target_domain
|
|
1071
1049
|
|
|
1072
1050
|
if strategy == 'same-origin':
|
|
@@ -1278,46 +1256,52 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
1278
1256
|
else:
|
|
1279
1257
|
yield Request.from_url(url)
|
|
1280
1258
|
|
|
1281
|
-
async def _add_requests(
|
|
1282
|
-
self,
|
|
1283
|
-
context: BasicCrawlingContext,
|
|
1284
|
-
requests: Sequence[str | Request],
|
|
1285
|
-
rq_id: str | None = None,
|
|
1286
|
-
rq_name: str | None = None,
|
|
1287
|
-
rq_alias: str | None = None,
|
|
1288
|
-
**kwargs: Unpack[EnqueueLinksKwargs],
|
|
1289
|
-
) -> None:
|
|
1290
|
-
"""Add requests method aware of the crawling context."""
|
|
1291
|
-
if rq_id or rq_name or rq_alias:
|
|
1292
|
-
request_manager: RequestManager = await RequestQueue.open(
|
|
1293
|
-
id=rq_id,
|
|
1294
|
-
name=rq_name,
|
|
1295
|
-
alias=rq_alias,
|
|
1296
|
-
storage_client=self._service_locator.get_storage_client(),
|
|
1297
|
-
configuration=self._service_locator.get_configuration(),
|
|
1298
|
-
)
|
|
1299
|
-
else:
|
|
1300
|
-
request_manager = await self.get_request_manager()
|
|
1301
|
-
|
|
1302
|
-
context_aware_requests = list[Request]()
|
|
1303
|
-
base_url = kwargs.get('base_url') or context.request.loaded_url or context.request.url
|
|
1304
|
-
requests_iterator = self._convert_url_to_request_iterator(requests, base_url)
|
|
1305
|
-
filter_requests_iterator = self._enqueue_links_filter_iterator(requests_iterator, context.request.url, **kwargs)
|
|
1306
|
-
for dst_request in filter_requests_iterator:
|
|
1307
|
-
# Update the crawl depth of the request.
|
|
1308
|
-
dst_request.crawl_depth = context.request.crawl_depth + 1
|
|
1309
|
-
|
|
1310
|
-
if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
|
|
1311
|
-
context_aware_requests.append(dst_request)
|
|
1312
|
-
|
|
1313
|
-
return await request_manager.add_requests(context_aware_requests)
|
|
1314
|
-
|
|
1315
1259
|
async def _commit_request_handler_result(self, context: BasicCrawlingContext) -> None:
|
|
1316
1260
|
"""Commit request handler result for the input `context`. Result is taken from `_context_result_map`."""
|
|
1317
1261
|
result = self._context_result_map[context]
|
|
1318
1262
|
|
|
1263
|
+
base_request_manager = await self.get_request_manager()
|
|
1264
|
+
|
|
1265
|
+
origin = context.request.loaded_url or context.request.url
|
|
1266
|
+
|
|
1319
1267
|
for add_requests_call in result.add_requests_calls:
|
|
1320
|
-
|
|
1268
|
+
rq_id = add_requests_call.get('rq_id')
|
|
1269
|
+
rq_name = add_requests_call.get('rq_name')
|
|
1270
|
+
rq_alias = add_requests_call.get('rq_alias')
|
|
1271
|
+
specified_params = sum(1 for param in [rq_id, rq_name, rq_alias] if param is not None)
|
|
1272
|
+
if specified_params > 1:
|
|
1273
|
+
raise ValueError('You can only provide one of `rq_id`, `rq_name` or `rq_alias` arguments.')
|
|
1274
|
+
if rq_id or rq_name or rq_alias:
|
|
1275
|
+
request_manager: RequestManager | RequestQueue = await RequestQueue.open(
|
|
1276
|
+
id=rq_id,
|
|
1277
|
+
name=rq_name,
|
|
1278
|
+
alias=rq_alias,
|
|
1279
|
+
storage_client=self._service_locator.get_storage_client(),
|
|
1280
|
+
configuration=self._service_locator.get_configuration(),
|
|
1281
|
+
)
|
|
1282
|
+
else:
|
|
1283
|
+
request_manager = base_request_manager
|
|
1284
|
+
|
|
1285
|
+
requests = list[Request]()
|
|
1286
|
+
|
|
1287
|
+
base_url = url if (url := add_requests_call.get('base_url')) else origin
|
|
1288
|
+
|
|
1289
|
+
requests_iterator = self._convert_url_to_request_iterator(add_requests_call['requests'], base_url)
|
|
1290
|
+
|
|
1291
|
+
enqueue_links_kwargs: EnqueueLinksKwargs = {k: v for k, v in add_requests_call.items() if k != 'requests'} # type: ignore[assignment]
|
|
1292
|
+
|
|
1293
|
+
filter_requests_iterator = self._enqueue_links_filter_iterator(
|
|
1294
|
+
requests_iterator, context.request.url, **enqueue_links_kwargs
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
for dst_request in filter_requests_iterator:
|
|
1298
|
+
# Update the crawl depth of the request.
|
|
1299
|
+
dst_request.crawl_depth = context.request.crawl_depth + 1
|
|
1300
|
+
|
|
1301
|
+
if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
|
|
1302
|
+
requests.append(dst_request)
|
|
1303
|
+
|
|
1304
|
+
await request_manager.add_requests(requests)
|
|
1321
1305
|
|
|
1322
1306
|
for push_data_call in result.push_data_calls:
|
|
1323
1307
|
await self._push_data(**push_data_call)
|
|
@@ -369,12 +369,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
369
369
|
links_iterator: Iterator[str] = iter(
|
|
370
370
|
[url for element in elements if (url := await element.get_attribute('href')) is not None]
|
|
371
371
|
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
base_url: str = extracted_base_url or context.request.loaded_url or context.request.url
|
|
376
|
-
|
|
377
|
-
links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
|
|
372
|
+
links_iterator = to_absolute_url_iterator(
|
|
373
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
374
|
+
)
|
|
378
375
|
|
|
379
376
|
if robots_txt_file:
|
|
380
377
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
{crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_dataset_client.py
RENAMED
|
@@ -134,7 +134,7 @@ class FileSystemDatasetClient(DatasetClient):
|
|
|
134
134
|
continue
|
|
135
135
|
|
|
136
136
|
try:
|
|
137
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
137
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
138
138
|
try:
|
|
139
139
|
file_content = json.load(file)
|
|
140
140
|
metadata = DatasetMetadata(**file_content)
|
|
@@ -163,7 +163,7 @@ class FileSystemDatasetClient(DatasetClient):
|
|
|
163
163
|
|
|
164
164
|
# If the dataset directory exists, reconstruct the client from the metadata file.
|
|
165
165
|
if path_to_dataset.exists() and path_to_metadata.exists():
|
|
166
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
166
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
167
167
|
try:
|
|
168
168
|
file_content = json.load(file)
|
|
169
169
|
finally:
|
|
@@ -133,7 +133,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
133
133
|
continue
|
|
134
134
|
|
|
135
135
|
try:
|
|
136
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
136
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
137
137
|
try:
|
|
138
138
|
file_content = json.load(file)
|
|
139
139
|
metadata = KeyValueStoreMetadata(**file_content)
|
|
@@ -162,7 +162,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
162
162
|
|
|
163
163
|
# If the key-value store directory exists, reconstruct the client from the metadata file.
|
|
164
164
|
if path_to_kvs.exists() and path_to_metadata.exists():
|
|
165
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
165
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
166
166
|
try:
|
|
167
167
|
file_content = json.load(file)
|
|
168
168
|
finally:
|
|
@@ -239,7 +239,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
239
239
|
# Read the metadata file
|
|
240
240
|
async with self._lock:
|
|
241
241
|
try:
|
|
242
|
-
file = await asyncio.to_thread(open, record_metadata_filepath
|
|
242
|
+
file = await asyncio.to_thread(open, record_metadata_filepath)
|
|
243
243
|
except FileNotFoundError:
|
|
244
244
|
logger.warning(f'Metadata file disappeared for key "{key}", aborting get_value')
|
|
245
245
|
return None
|
{crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_request_queue_client.py
RENAMED
|
@@ -197,7 +197,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
197
197
|
continue
|
|
198
198
|
|
|
199
199
|
try:
|
|
200
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
200
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
201
201
|
try:
|
|
202
202
|
file_content = json.load(file)
|
|
203
203
|
metadata = RequestQueueMetadata(**file_content)
|
|
@@ -232,7 +232,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
232
232
|
|
|
233
233
|
# If the RQ directory exists, reconstruct the client from the metadata file.
|
|
234
234
|
if path_to_rq.exists() and path_to_metadata.exists():
|
|
235
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
235
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
236
236
|
try:
|
|
237
237
|
file_content = json.load(file)
|
|
238
238
|
finally:
|
|
@@ -775,7 +775,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
775
775
|
"""
|
|
776
776
|
# Open the request file.
|
|
777
777
|
try:
|
|
778
|
-
file = await asyncio.to_thread(open, file_path
|
|
778
|
+
file = await asyncio.to_thread(open, file_path)
|
|
779
779
|
except FileNotFoundError:
|
|
780
780
|
logger.warning(f'Request file "{file_path}" not found.')
|
|
781
781
|
return None
|
{crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/project_template/test_static_crawlers_templates.py
RENAMED
|
@@ -71,9 +71,6 @@ async def test_static_crawler_actor_at_apify(
|
|
|
71
71
|
project_path=tmp_path / actor_name, wheel_path=crawlee_wheel_path, package_manager=package_manager
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
-
# Print apify version for debugging purposes in rare cases of CLI failures
|
|
75
|
-
subprocess.run(['apify', '--version'], check=True) # noqa: ASYNC221, S607
|
|
76
|
-
|
|
77
74
|
# Build actor using sequence of cli commands as the user would
|
|
78
75
|
subprocess.run( # noqa: ASYNC221, S603
|
|
79
76
|
['apify', 'login', '-t', os.environ['APIFY_TEST_USER_API_TOKEN']], # noqa: S607
|
|
@@ -310,14 +310,14 @@ async def test_allows_multiple_run_calls(system_status: SystemStatus | Mock) ->
|
|
|
310
310
|
done_count = 0
|
|
311
311
|
|
|
312
312
|
async def run() -> None:
|
|
313
|
+
await asyncio.sleep(0.1)
|
|
313
314
|
nonlocal done_count
|
|
314
315
|
done_count += 1
|
|
315
|
-
await asyncio.sleep(0.1)
|
|
316
316
|
|
|
317
317
|
pool = AutoscaledPool(
|
|
318
318
|
system_status=system_status,
|
|
319
319
|
run_task_function=run,
|
|
320
|
-
is_task_ready_function=lambda: future(
|
|
320
|
+
is_task_ready_function=lambda: future(True),
|
|
321
321
|
is_finished_function=lambda: future(done_count >= 4),
|
|
322
322
|
concurrency_settings=ConcurrencySettings(
|
|
323
323
|
min_concurrency=4,
|
|
@@ -330,6 +330,8 @@ async def test_allows_multiple_run_calls(system_status: SystemStatus | Mock) ->
|
|
|
330
330
|
assert done_count == 4
|
|
331
331
|
|
|
332
332
|
done_count = 0
|
|
333
|
+
await asyncio.sleep(0.2) # Allow any lingering callbacks to complete
|
|
334
|
+
done_count = 0 # Reset again to ensure clean state
|
|
333
335
|
|
|
334
336
|
await pool.run()
|
|
335
337
|
assert done_count == 4
|
|
@@ -54,7 +54,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
|
|
|
54
54
|
|
|
55
55
|
def extra_memory_child(ready: synchronize.Barrier, measured: synchronize.Barrier) -> None:
|
|
56
56
|
memory = SharedMemory(size=extra_memory_size, create=True)
|
|
57
|
-
assert memory.buf is not None
|
|
58
57
|
memory.buf[:] = bytearray([255 for _ in range(extra_memory_size)])
|
|
59
58
|
print(f'Using the memory... {memory.buf[-1]}')
|
|
60
59
|
ready.wait()
|
|
@@ -65,7 +64,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
|
|
|
65
64
|
def shared_extra_memory_child(
|
|
66
65
|
ready: synchronize.Barrier, measured: synchronize.Barrier, memory: SharedMemory
|
|
67
66
|
) -> None:
|
|
68
|
-
assert memory.buf is not None
|
|
69
67
|
print(f'Using the memory... {memory.buf[-1]}')
|
|
70
68
|
ready.wait()
|
|
71
69
|
measured.wait()
|
|
@@ -81,7 +79,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
|
|
|
81
79
|
|
|
82
80
|
if use_shared_memory:
|
|
83
81
|
shared_memory = SharedMemory(size=extra_memory_size, create=True)
|
|
84
|
-
assert shared_memory.buf is not None
|
|
85
82
|
shared_memory.buf[:] = bytearray([255 for _ in range(extra_memory_size)])
|
|
86
83
|
extra_args = [shared_memory]
|
|
87
84
|
else:
|
|
@@ -284,46 +284,6 @@ async def test_calls_failed_request_handler() -> None:
|
|
|
284
284
|
assert isinstance(calls[0][1], RuntimeError)
|
|
285
285
|
|
|
286
286
|
|
|
287
|
-
@pytest.mark.parametrize('handler', ['failed_request_handler', 'error_handler'])
|
|
288
|
-
async def test_handlers_use_context_helpers(tmp_path: Path, handler: str) -> None:
|
|
289
|
-
"""Test that context helpers used in `failed_request_handler` and in `error_handler` have effect."""
|
|
290
|
-
# Prepare crawler
|
|
291
|
-
storage_client = FileSystemStorageClient()
|
|
292
|
-
crawler = BasicCrawler(
|
|
293
|
-
max_request_retries=1, storage_client=storage_client, configuration=Configuration(storage_dir=str(tmp_path))
|
|
294
|
-
)
|
|
295
|
-
# Test data
|
|
296
|
-
rq_alias = 'other'
|
|
297
|
-
test_data = {'some': 'data'}
|
|
298
|
-
test_key = 'key'
|
|
299
|
-
test_value = 'value'
|
|
300
|
-
test_request = Request.from_url('https://d.placeholder.com')
|
|
301
|
-
|
|
302
|
-
# Request handler with injected error
|
|
303
|
-
@crawler.router.default_handler
|
|
304
|
-
async def request_handler(context: BasicCrawlingContext) -> None:
|
|
305
|
-
raise RuntimeError('Arbitrary crash for testing purposes')
|
|
306
|
-
|
|
307
|
-
# Apply one of the handlers
|
|
308
|
-
@getattr(crawler, handler) # type: ignore[untyped-decorator]
|
|
309
|
-
async def handler_implementation(context: BasicCrawlingContext, error: Exception) -> None:
|
|
310
|
-
await context.push_data(test_data)
|
|
311
|
-
await context.add_requests(requests=[test_request], rq_alias=rq_alias)
|
|
312
|
-
kvs = await context.get_key_value_store()
|
|
313
|
-
await kvs.set_value(test_key, test_value)
|
|
314
|
-
|
|
315
|
-
await crawler.run(['https://b.placeholder.com'])
|
|
316
|
-
|
|
317
|
-
# Verify that the context helpers used in handlers had effect on used storages
|
|
318
|
-
dataset = await Dataset.open(storage_client=storage_client)
|
|
319
|
-
kvs = await KeyValueStore.open(storage_client=storage_client)
|
|
320
|
-
rq = await RequestQueue.open(alias=rq_alias, storage_client=storage_client)
|
|
321
|
-
|
|
322
|
-
assert test_value == await kvs.get_value(test_key)
|
|
323
|
-
assert [test_data] == (await dataset.get_data()).items
|
|
324
|
-
assert test_request == await rq.fetch_next_request()
|
|
325
|
-
|
|
326
|
-
|
|
327
287
|
async def test_handles_error_in_failed_request_handler() -> None:
|
|
328
288
|
crawler = BasicCrawler(max_request_retries=3)
|
|
329
289
|
|
|
@@ -387,7 +347,6 @@ STRATEGY_TEST_URLS = (
|
|
|
387
347
|
'https://blog.someplace.com/index.html',
|
|
388
348
|
'https://redirect.someplace.com',
|
|
389
349
|
'https://other.place.com/index.html',
|
|
390
|
-
'https://someplace.jp/',
|
|
391
350
|
)
|
|
392
351
|
|
|
393
352
|
INCLUDE_TEST_URLS = (
|
|
@@ -442,7 +401,7 @@ INCLUDE_TEST_URLS = (
|
|
|
442
401
|
AddRequestsTestInput(
|
|
443
402
|
start_url=STRATEGY_TEST_URLS[0],
|
|
444
403
|
loaded_url=STRATEGY_TEST_URLS[0],
|
|
445
|
-
requests=STRATEGY_TEST_URLS,
|
|
404
|
+
requests=STRATEGY_TEST_URLS[:4],
|
|
446
405
|
kwargs=EnqueueLinksKwargs(strategy='same-domain'),
|
|
447
406
|
expected_urls=STRATEGY_TEST_URLS[1:4],
|
|
448
407
|
),
|
|
@@ -452,7 +411,7 @@ INCLUDE_TEST_URLS = (
|
|
|
452
411
|
AddRequestsTestInput(
|
|
453
412
|
start_url=STRATEGY_TEST_URLS[0],
|
|
454
413
|
loaded_url=STRATEGY_TEST_URLS[0],
|
|
455
|
-
requests=STRATEGY_TEST_URLS,
|
|
414
|
+
requests=STRATEGY_TEST_URLS[:4],
|
|
456
415
|
kwargs=EnqueueLinksKwargs(strategy='same-hostname'),
|
|
457
416
|
expected_urls=[STRATEGY_TEST_URLS[1]],
|
|
458
417
|
),
|
|
@@ -462,7 +421,7 @@ INCLUDE_TEST_URLS = (
|
|
|
462
421
|
AddRequestsTestInput(
|
|
463
422
|
start_url=STRATEGY_TEST_URLS[0],
|
|
464
423
|
loaded_url=STRATEGY_TEST_URLS[0],
|
|
465
|
-
requests=STRATEGY_TEST_URLS,
|
|
424
|
+
requests=STRATEGY_TEST_URLS[:4],
|
|
466
425
|
kwargs=EnqueueLinksKwargs(strategy='same-origin'),
|
|
467
426
|
expected_urls=[],
|
|
468
427
|
),
|
{crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
RENAMED
|
@@ -58,9 +58,6 @@ async def test_enqueue_links(redirect_server_url: URL, server_url: URL, http_cli
|
|
|
58
58
|
str(server_url / 'page_1'),
|
|
59
59
|
str(server_url / 'page_2'),
|
|
60
60
|
str(server_url / 'page_3'),
|
|
61
|
-
str(server_url / 'page_4'),
|
|
62
|
-
str(server_url / 'base_page'),
|
|
63
|
-
str(server_url / 'base_subpath/page_5'),
|
|
64
61
|
}
|
|
65
62
|
|
|
66
63
|
|
|
@@ -134,9 +131,6 @@ async def test_enqueue_links_with_transform_request_function(server_url: URL, ht
|
|
|
134
131
|
str(server_url / 'sub_index'),
|
|
135
132
|
str(server_url / 'page_1'),
|
|
136
133
|
str(server_url / 'page_2'),
|
|
137
|
-
str(server_url / 'base_page'),
|
|
138
|
-
str(server_url / 'page_4'),
|
|
139
|
-
str(server_url / 'base_subpath/page_5'),
|
|
140
134
|
}
|
|
141
135
|
|
|
142
136
|
# # all urls added to `enqueue_links` must have a custom header
|
|
@@ -170,8 +164,6 @@ async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> N
|
|
|
170
164
|
assert visited == {
|
|
171
165
|
str(server_url / 'start_enqueue'),
|
|
172
166
|
str(server_url / 'sub_index'),
|
|
173
|
-
str(server_url / 'base_page'),
|
|
174
|
-
str(server_url / 'base_subpath/page_5'),
|
|
175
167
|
}
|
|
176
168
|
|
|
177
169
|
|
|
@@ -229,7 +221,6 @@ async def test_on_skipped_request(server_url: URL, http_client: HttpClient) -> N
|
|
|
229
221
|
str(server_url / 'page_1'),
|
|
230
222
|
str(server_url / 'page_2'),
|
|
231
223
|
str(server_url / 'page_3'),
|
|
232
|
-
str(server_url / 'page_4'),
|
|
233
224
|
}
|
|
234
225
|
|
|
235
226
|
|