crawlee 1.0.5b16__tar.gz → 1.2.1b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/build_and_deploy_docs.yaml +6 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/run_code_checks.yaml +1 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/templates_e2e_tests.yaml +1 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.gitignore +1 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/CHANGELOG.md +37 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/PKG-INFO +4 -3
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/cloud_run_example.py +1 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/google_example.py +2 -5
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +2 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +2 -1
- crawlee-1.2.1b5/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/export_entire_dataset_to_file.mdx +1 -1
- crawlee-1.2.1b5/docs/examples/using_sitemap_request_loader.mdx +22 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/server.py +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/pyproject.toml +12 -4
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/__init__.py +2 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_request.py +30 -11
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_types.py +20 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/context.py +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/file.py +7 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/recurring_task.py +2 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/time.py +41 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/__init__.py +2 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/__init__.py +2 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +48 -13
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawler.py +115 -112
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_logging_utils.py +23 -4
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +49 -11
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_types.py +12 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/errors.py +4 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_event_manager.py +4 -4
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_base.py +4 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_curl_impersonate.py +12 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_httpx.py +16 -6
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_impit.py +25 -10
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_sitemap_request_loader.py +17 -4
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/router.py +13 -3
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_models.py +32 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_statistics.py +2 -21
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -9
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/project_template/test_static_crawlers_templates.py +3 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_autoscaled_pool.py +2 -4
- crawlee-1.2.1b5/tests/unit/_utils/test_shared_timeout.py +57 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_system.py +3 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_controller.py +5 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/conftest.py +1 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_basic_crawler.py +138 -6
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +68 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_http/test_http_crawler.py +56 -1
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +9 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +122 -1
- crawlee-1.2.1b5/tests/unit/crawlers/_playwright/test_utils.py +157 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/events/test_event_manager.py +12 -0
- crawlee-1.2.1b5/tests/unit/events/test_local_event_manager.py +25 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/server.py +44 -1
- crawlee-1.2.1b5/tests/unit/server_endpoints.py +142 -0
- crawlee-1.2.1b5/tests/unit/server_static/test.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_dataset.py +17 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_key_value_store.py +30 -9
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_request_queue.py +19 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/uv.lock +770 -692
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/docusaurus.config.js +2 -2
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/package.json +2 -1
- crawlee-1.2.1b5/website/src/components/LLMButtons.jsx +510 -0
- crawlee-1.2.1b5/website/src/components/LLMButtons.module.css +151 -0
- crawlee-1.2.1b5/website/src/theme/DocItem/Content/index.js +35 -0
- crawlee-1.2.1b5/website/src/theme/DocItem/Content/styles.module.css +22 -0
- crawlee-1.2.1b5/website/static/.nojekyll +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/yarn.lock +734 -822
- crawlee-1.0.5b16/tests/unit/events/test_local_event_manager.py +0 -31
- crawlee-1.0.5b16/tests/unit/server_endpoints.py +0 -71
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.editorconfig +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.markdownlint.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/LICENSE +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/Makefile +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/README.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/pyproject.toml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/renovate.json +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/README.md +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_sitemap.py +0 -0
- /crawlee-1.0.5b16/tests/unit/_utils/test_timedelata_ms.py → /crawlee-1.2.1b5/tests/unit/_utils/test_timedelta_ms.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_request_list.py +0 -0
- /crawlee-1.0.5b16/website/static/.nojekyll → /crawlee-1.2.1b5/tests/unit/server_static/test.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/.eslintrc.json +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/babel.config.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/sidebars.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/css/custom.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/API.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/check.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/robot.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/system.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/js/custom.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/robots.txt +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tsconfig.eslint.json +0 -0
|
@@ -24,7 +24,7 @@ jobs:
|
|
|
24
24
|
|
|
25
25
|
steps:
|
|
26
26
|
- name: Checkout repository
|
|
27
|
-
uses: actions/checkout@
|
|
27
|
+
uses: actions/checkout@v6
|
|
28
28
|
with:
|
|
29
29
|
token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
30
30
|
ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
|
|
@@ -67,6 +67,10 @@ jobs:
|
|
|
67
67
|
uses: actions/deploy-pages@v4
|
|
68
68
|
|
|
69
69
|
- name: Invalidate CloudFront cache
|
|
70
|
-
run:
|
|
70
|
+
run: |
|
|
71
|
+
gh workflow run invalidate-cloudfront.yml \
|
|
72
|
+
--repo apify/apify-docs-private \
|
|
73
|
+
--field deployment=crawlee-web
|
|
74
|
+
echo "✅ CloudFront cache invalidation workflow triggered successfully"
|
|
71
75
|
env:
|
|
72
76
|
GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
@@ -36,6 +36,7 @@ jobs:
|
|
|
36
36
|
httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
|
|
37
37
|
with:
|
|
38
38
|
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
39
|
+
os: '["ubuntu-latest", "windows-latest", "macos-latest"]'
|
|
39
40
|
|
|
40
41
|
docs_check:
|
|
41
42
|
name: Docs check
|
|
@@ -3,22 +3,57 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 1.
|
|
6
|
+
## 1.2.1 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🐛 Bug Fixes
|
|
9
|
+
|
|
10
|
+
- Fix short error summary ([#1605](https://github.com/apify/crawlee-python/pull/1605)) ([b751208](https://github.com/apify/crawlee-python/commit/b751208d9a56e9d923e4559baeba35e2eede0450)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1602](https://github.com/apify/crawlee-python/issues/1602)
|
|
11
|
+
- Freeze core `Request` fields ([#1603](https://github.com/apify/crawlee-python/pull/1603)) ([ae6d86b](https://github.com/apify/crawlee-python/commit/ae6d86b8c82900116032596201d94cd7875aaadc)) by [@Mantisus](https://github.com/Mantisus)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
<!-- git-cliff-unreleased-end -->
|
|
15
|
+
## [1.2.0](https://github.com/apify/crawlee-python/releases/tag/v1.2.0) (2025-12-08)
|
|
16
|
+
|
|
17
|
+
### 🚀 Features
|
|
18
|
+
|
|
19
|
+
- Add additional kwargs to Crawler's export_data ([#1597](https://github.com/apify/crawlee-python/pull/1597)) ([5977f37](https://github.com/apify/crawlee-python/commit/5977f376b93a7c0d4dd53f0d331a4b04fedba2c6)) by [@vdusek](https://github.com/vdusek), closes [#526](https://github.com/apify/crawlee-python/issues/526)
|
|
20
|
+
- Add `goto_options` for `PlaywrightCrawler` ([#1599](https://github.com/apify/crawlee-python/pull/1599)) ([0b82f3b](https://github.com/apify/crawlee-python/commit/0b82f3b6fb175223ea2aa5b348afcd5fdb767972)) by [@Mantisus](https://github.com/Mantisus), closes [#1576](https://github.com/apify/crawlee-python/issues/1576)
|
|
21
|
+
|
|
22
|
+
### 🐛 Bug Fixes
|
|
23
|
+
|
|
24
|
+
- Only apply requestHandlerTimeout to request handler ([#1474](https://github.com/apify/crawlee-python/pull/1474)) ([0dfb6c2](https://github.com/apify/crawlee-python/commit/0dfb6c2a13b6650736245fa39b3fbff397644df7)) by [@janbuchar](https://github.com/janbuchar)
|
|
25
|
+
- Handle the case when `error_handler` returns `Request` ([#1595](https://github.com/apify/crawlee-python/pull/1595)) ([8a961a2](https://github.com/apify/crawlee-python/commit/8a961a2b07d0d33a7302dbb13c17f3d90999d390)) by [@Mantisus](https://github.com/Mantisus)
|
|
26
|
+
- Align `Request.state` transitions with `Request` lifecycle ([#1601](https://github.com/apify/crawlee-python/pull/1601)) ([383225f](https://github.com/apify/crawlee-python/commit/383225f9f055d95ffb1302b8cf96f42ec264f1fc)) by [@Mantisus](https://github.com/Mantisus)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
## [1.1.1](https://github.com/apify/crawlee-python/releases/tag/v1.1.1) (2025-12-02)
|
|
30
|
+
|
|
31
|
+
### 🐛 Bug Fixes
|
|
32
|
+
|
|
33
|
+
- Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
|
|
34
|
+
- Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
|
|
35
|
+
- Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
|
|
36
|
+
- Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
|
|
37
|
+
- Respect `<base>` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
|
|
7
41
|
|
|
8
42
|
### 🚀 Features
|
|
9
43
|
|
|
10
44
|
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
11
45
|
- Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
|
|
12
46
|
- Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
|
|
47
|
+
- Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
|
|
13
48
|
|
|
14
49
|
### 🐛 Bug Fixes
|
|
15
50
|
|
|
16
51
|
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
17
52
|
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
18
53
|
- Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
|
|
54
|
+
- Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
|
|
19
55
|
|
|
20
56
|
|
|
21
|
-
<!-- git-cliff-unreleased-end -->
|
|
22
57
|
## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
|
|
23
58
|
|
|
24
59
|
### 🐛 Bug Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.1b5
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -226,6 +226,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
226
226
|
Classifier: Programming Language :: Python :: 3.14
|
|
227
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
228
228
|
Requires-Python: >=3.10
|
|
229
|
+
Requires-Dist: async-timeout>=5.0.1
|
|
229
230
|
Requires-Dist: cachetools>=5.5.0
|
|
230
231
|
Requires-Dist: colorama>=0.4.0
|
|
231
232
|
Requires-Dist: impit>=0.8.0
|
|
@@ -247,7 +248,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
|
247
248
|
Provides-Extra: all
|
|
248
249
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
249
250
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
250
|
-
Requires-Dist: asyncpg>=0.24.0;
|
|
251
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
251
252
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
252
253
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
253
254
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -301,7 +302,7 @@ Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
|
301
302
|
Provides-Extra: redis
|
|
302
303
|
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
303
304
|
Provides-Extra: sql-postgres
|
|
304
|
-
Requires-Dist: asyncpg>=0.24.0;
|
|
305
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
305
306
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
306
307
|
Provides-Extra: sql-sqlite
|
|
307
308
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
{crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/cloud_run_example.py
RENAMED
|
@@ -9,7 +9,7 @@ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
|
9
9
|
from crawlee.storage_clients import MemoryStorageClient
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@get('/')
|
|
12
|
+
@get('/') # type: ignore[untyped-decorator]
|
|
13
13
|
async def main() -> str:
|
|
14
14
|
"""The crawler entry point that will be called when the HTTP endpoint is accessed."""
|
|
15
15
|
# highlight-start
|
|
@@ -6,10 +6,7 @@ from datetime import timedelta
|
|
|
6
6
|
import functions_framework
|
|
7
7
|
from flask import Request, Response
|
|
8
8
|
|
|
9
|
-
from crawlee.crawlers import
|
|
10
|
-
BeautifulSoupCrawler,
|
|
11
|
-
BeautifulSoupCrawlingContext,
|
|
12
|
-
)
|
|
9
|
+
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
13
10
|
from crawlee.storage_clients import MemoryStorageClient
|
|
14
11
|
|
|
15
12
|
|
|
@@ -51,7 +48,7 @@ async def main() -> str:
|
|
|
51
48
|
# highlight-end
|
|
52
49
|
|
|
53
50
|
|
|
54
|
-
@functions_framework.http
|
|
51
|
+
@functions_framework.http # type: ignore[untyped-decorator]
|
|
55
52
|
def crawlee_run(request: Request) -> Response:
|
|
56
53
|
# You can pass data to your crawler using `request`
|
|
57
54
|
function_id = request.headers['Function-Execution-Id']
|
|
@@ -30,7 +30,8 @@ async def main() -> None:
|
|
|
30
30
|
await crawler.run(['https://crawlee.dev'])
|
|
31
31
|
|
|
32
32
|
# Export the entire dataset to a CSV file.
|
|
33
|
-
|
|
33
|
+
# Use semicolon as delimiter and always quote strings.
|
|
34
|
+
await crawler.export_data(path='results.csv', delimiter=';', quoting='all')
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
if __name__ == '__main__':
|
|
@@ -30,7 +30,8 @@ async def main() -> None:
|
|
|
30
30
|
await crawler.run(['https://crawlee.dev'])
|
|
31
31
|
|
|
32
32
|
# Export the entire dataset to a JSON file.
|
|
33
|
-
|
|
33
|
+
# Set ensure_ascii=False to allow Unicode characters in the output.
|
|
34
|
+
await crawler.export_data(path='results.json', ensure_ascii=False)
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
if __name__ == '__main__':
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
|
|
4
|
+
from yarl import URL
|
|
5
|
+
|
|
6
|
+
from crawlee import RequestOptions, RequestTransformAction
|
|
7
|
+
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
8
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
9
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Create a transform_request_function that maps request options based on the host in
|
|
13
|
+
# the URL
|
|
14
|
+
def create_transform_request(
|
|
15
|
+
data_mapper: dict[str, dict],
|
|
16
|
+
) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
|
|
17
|
+
def transform_request(
|
|
18
|
+
request_options: RequestOptions,
|
|
19
|
+
) -> RequestOptions | RequestTransformAction:
|
|
20
|
+
# According to the Sitemap protocol, all URLs in a Sitemap must be from a single
|
|
21
|
+
# host.
|
|
22
|
+
request_host = URL(request_options['url']).host
|
|
23
|
+
|
|
24
|
+
if request_host and (mapping_data := data_mapper.get(request_host)):
|
|
25
|
+
# Set properties from the mapping data
|
|
26
|
+
if 'label' in mapping_data:
|
|
27
|
+
request_options['label'] = mapping_data['label']
|
|
28
|
+
if 'user_data' in mapping_data:
|
|
29
|
+
request_options['user_data'] = mapping_data['user_data']
|
|
30
|
+
|
|
31
|
+
return request_options
|
|
32
|
+
|
|
33
|
+
return 'unchanged'
|
|
34
|
+
|
|
35
|
+
return transform_request
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def main() -> None:
|
|
39
|
+
# Prepare data mapping for hosts
|
|
40
|
+
apify_host = URL('https://apify.com/sitemap.xml').host
|
|
41
|
+
crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
|
|
42
|
+
|
|
43
|
+
if not apify_host or not crawlee_host:
|
|
44
|
+
raise ValueError('Unable to extract host from URLs')
|
|
45
|
+
|
|
46
|
+
data_map = {
|
|
47
|
+
apify_host: {
|
|
48
|
+
'label': 'apify',
|
|
49
|
+
'user_data': {'source': 'apify'},
|
|
50
|
+
},
|
|
51
|
+
crawlee_host: {
|
|
52
|
+
'label': 'crawlee',
|
|
53
|
+
'user_data': {'source': 'crawlee'},
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Initialize the SitemapRequestLoader with the transform function
|
|
58
|
+
async with SitemapRequestLoader(
|
|
59
|
+
# Set the sitemap URLs and the HTTP client
|
|
60
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
|
|
61
|
+
http_client=ImpitHttpClient(),
|
|
62
|
+
transform_request_function=create_transform_request(data_map),
|
|
63
|
+
) as sitemap_loader:
|
|
64
|
+
# Convert the sitemap loader to a request manager
|
|
65
|
+
request_manager = await sitemap_loader.to_tandem()
|
|
66
|
+
|
|
67
|
+
# Create and configure the crawler
|
|
68
|
+
crawler = BeautifulSoupCrawler(
|
|
69
|
+
request_manager=request_manager,
|
|
70
|
+
max_requests_per_crawl=10,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Create default handler for requests without a specific label
|
|
74
|
+
@crawler.router.default_handler
|
|
75
|
+
async def handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
76
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
77
|
+
context.log.info(
|
|
78
|
+
f'Processing request: {context.request.url} from source: {source}'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Create handler for requests labeled 'apify'
|
|
82
|
+
@crawler.router.handler('apify')
|
|
83
|
+
async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
84
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
85
|
+
context.log.info(
|
|
86
|
+
f'Apify handler processing: {context.request.url} from source: {source}'
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Create handler for requests labeled 'crawlee'
|
|
90
|
+
@crawler.router.handler('crawlee')
|
|
91
|
+
async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
92
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
93
|
+
context.log.info(
|
|
94
|
+
f'Crawlee handler processing: {context.request.url} from source: {source}'
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
await crawler.run()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == '__main__':
|
|
101
|
+
asyncio.run(main())
|
|
@@ -11,7 +11,7 @@ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
|
|
|
11
11
|
import JsonExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_json.py';
|
|
12
12
|
import CsvExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_csv.py';
|
|
13
13
|
|
|
14
|
-
This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format.
|
|
14
|
+
This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format and also accepts additional keyword arguments so you can fine-tune the underlying `json.dump` or `csv.writer` behavior.
|
|
15
15
|
|
|
16
16
|
:::note
|
|
17
17
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: using-sitemap-request-loader
|
|
3
|
+
title: Using sitemap request loader
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import ApiLink from '@site/src/components/ApiLink';
|
|
7
|
+
|
|
8
|
+
import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
|
|
9
|
+
|
|
10
|
+
import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
|
|
11
|
+
|
|
12
|
+
This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
|
|
13
|
+
|
|
14
|
+
The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
|
|
15
|
+
|
|
16
|
+
The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
|
|
17
|
+
|
|
18
|
+
<RunnableCodeBlock className="language-python" language="python">
|
|
19
|
+
{SitemapRequestLoaderExample}
|
|
20
|
+
</RunnableCodeBlock>
|
|
21
|
+
|
|
22
|
+
For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
|
{crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/server.py
RENAMED
|
@@ -14,7 +14,7 @@ from .crawler import lifespan
|
|
|
14
14
|
app = FastAPI(lifespan=lifespan, title='Crawler app')
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
@app.get('/', response_class=HTMLResponse)
|
|
17
|
+
@app.get('/', response_class=HTMLResponse) # type: ignore[untyped-decorator]
|
|
18
18
|
def index() -> str:
|
|
19
19
|
return """
|
|
20
20
|
<!DOCTYPE html>
|
|
@@ -32,7 +32,7 @@ def index() -> str:
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
@app.get('/scrape')
|
|
35
|
+
@app.get('/scrape') # type: ignore[untyped-decorator]
|
|
36
36
|
async def scrape_url(request: Request, url: str | None = None) -> dict:
|
|
37
37
|
if not url:
|
|
38
38
|
return {'url': 'missing', 'scrape result': 'no results'}
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.1b5"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -34,6 +34,7 @@ keywords = [
|
|
|
34
34
|
"scraping",
|
|
35
35
|
]
|
|
36
36
|
dependencies = [
|
|
37
|
+
"async-timeout>=5.0.1",
|
|
37
38
|
"cachetools>=5.5.0",
|
|
38
39
|
"colorama>=0.4.0",
|
|
39
40
|
"impit>=0.8.0",
|
|
@@ -74,7 +75,7 @@ otel = [
|
|
|
74
75
|
]
|
|
75
76
|
sql_postgres = [
|
|
76
77
|
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
|
|
77
|
-
"asyncpg>=0.24.0
|
|
78
|
+
"asyncpg>=0.24.0"
|
|
78
79
|
]
|
|
79
80
|
sql_sqlite = [
|
|
80
81
|
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
|
|
@@ -101,7 +102,7 @@ dev = [
|
|
|
101
102
|
"build<2.0.0", # For e2e tests.
|
|
102
103
|
"dycw-pytest-only<3.0.0",
|
|
103
104
|
"fakeredis[probabilistic,json,lua]<3.0.0",
|
|
104
|
-
"mypy~=1.
|
|
105
|
+
"mypy~=1.19.0",
|
|
105
106
|
"pre-commit<5.0.0",
|
|
106
107
|
"proxy-py<3.0.0",
|
|
107
108
|
"pydoc-markdown<5.0.0",
|
|
@@ -117,7 +118,7 @@ dev = [
|
|
|
117
118
|
"types-colorama<1.0.0",
|
|
118
119
|
"types-psutil<8.0.0",
|
|
119
120
|
"types-python-dateutil<3.0.0",
|
|
120
|
-
"uvicorn[standard]
|
|
121
|
+
"uvicorn[standard]<1.0.0",
|
|
121
122
|
]
|
|
122
123
|
|
|
123
124
|
[tool.hatch.build.targets.wheel]
|
|
@@ -221,6 +222,13 @@ timeout = 300
|
|
|
221
222
|
markers = [
|
|
222
223
|
"run_alone: marks tests that must run in isolation",
|
|
223
224
|
]
|
|
225
|
+
# Ignore DeprecationWarnings coming from Uvicorn's internal imports. Uvicorn relies on deprecated
|
|
226
|
+
# modules from `websockets`, which triggers warnings during tests. These are safe to ignore until
|
|
227
|
+
# Uvicorn updates its internals.
|
|
228
|
+
filterwarnings = [
|
|
229
|
+
"ignore:websockets.legacy is deprecated:DeprecationWarning",
|
|
230
|
+
"ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning",
|
|
231
|
+
]
|
|
224
232
|
|
|
225
233
|
[tool.mypy]
|
|
226
234
|
python_version = "3.10"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from importlib import metadata
|
|
2
2
|
|
|
3
|
-
from ._request import Request, RequestOptions
|
|
3
|
+
from ._request import Request, RequestOptions, RequestState
|
|
4
4
|
from ._service_locator import service_locator
|
|
5
5
|
from ._types import ConcurrencySettings, EnqueueStrategy, HttpHeaders, RequestTransformAction, SkippedReason
|
|
6
6
|
from ._utils.globs import Glob
|
|
@@ -14,6 +14,7 @@ __all__ = [
|
|
|
14
14
|
'HttpHeaders',
|
|
15
15
|
'Request',
|
|
16
16
|
'RequestOptions',
|
|
17
|
+
'RequestState',
|
|
17
18
|
'RequestTransformAction',
|
|
18
19
|
'SkippedReason',
|
|
19
20
|
'service_locator',
|
|
@@ -34,14 +34,14 @@ class RequestState(IntEnum):
|
|
|
34
34
|
class CrawleeRequestData(BaseModel):
|
|
35
35
|
"""Crawlee-specific configuration stored in the `user_data`."""
|
|
36
36
|
|
|
37
|
-
max_retries: Annotated[int | None, Field(alias='maxRetries')] = None
|
|
37
|
+
max_retries: Annotated[int | None, Field(alias='maxRetries', frozen=True)] = None
|
|
38
38
|
"""Maximum number of retries for this request. Allows to override the global `max_request_retries` option of
|
|
39
39
|
`BasicCrawler`."""
|
|
40
40
|
|
|
41
41
|
enqueue_strategy: Annotated[EnqueueStrategy | None, Field(alias='enqueueStrategy')] = None
|
|
42
42
|
"""The strategy that was used for enqueuing the request."""
|
|
43
43
|
|
|
44
|
-
state: RequestState
|
|
44
|
+
state: RequestState = RequestState.UNPROCESSED
|
|
45
45
|
"""Describes the request's current lifecycle state."""
|
|
46
46
|
|
|
47
47
|
session_rotation_count: Annotated[int | None, Field(alias='sessionRotationCount')] = None
|
|
@@ -137,6 +137,8 @@ class RequestOptions(TypedDict):
|
|
|
137
137
|
always_enqueue: NotRequired[bool]
|
|
138
138
|
user_data: NotRequired[dict[str, JsonSerializable]]
|
|
139
139
|
no_retry: NotRequired[bool]
|
|
140
|
+
enqueue_strategy: NotRequired[EnqueueStrategy]
|
|
141
|
+
max_retries: NotRequired[int | None]
|
|
140
142
|
|
|
141
143
|
|
|
142
144
|
@docs_group('Storage data')
|
|
@@ -166,7 +168,7 @@ class Request(BaseModel):
|
|
|
166
168
|
|
|
167
169
|
model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
|
|
168
170
|
|
|
169
|
-
unique_key: Annotated[str, Field(alias='uniqueKey')]
|
|
171
|
+
unique_key: Annotated[str, Field(alias='uniqueKey', frozen=True)]
|
|
170
172
|
"""A unique key identifying the request. Two requests with the same `unique_key` are considered as pointing
|
|
171
173
|
to the same URL.
|
|
172
174
|
|
|
@@ -178,17 +180,18 @@ class Request(BaseModel):
|
|
|
178
180
|
and specify which URLs shall be considered equal.
|
|
179
181
|
"""
|
|
180
182
|
|
|
181
|
-
url: Annotated[str, BeforeValidator(validate_http_url), Field()]
|
|
183
|
+
url: Annotated[str, BeforeValidator(validate_http_url), Field(frozen=True)]
|
|
182
184
|
"""The URL of the web page to crawl. Must be a valid HTTP or HTTPS URL, and may include query parameters
|
|
183
185
|
and fragments."""
|
|
184
186
|
|
|
185
|
-
method: HttpMethod = 'GET'
|
|
187
|
+
method: Annotated[HttpMethod, Field(frozen=True)] = 'GET'
|
|
186
188
|
"""HTTP request method."""
|
|
187
189
|
|
|
188
190
|
payload: Annotated[
|
|
189
191
|
HttpPayload | None,
|
|
190
192
|
BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
|
|
191
193
|
PlainSerializer(lambda v: v.decode() if isinstance(v, bytes) else v),
|
|
194
|
+
Field(frozen=True),
|
|
192
195
|
] = None
|
|
193
196
|
"""HTTP request payload."""
|
|
194
197
|
|
|
@@ -250,6 +253,8 @@ class Request(BaseModel):
|
|
|
250
253
|
keep_url_fragment: bool = False,
|
|
251
254
|
use_extended_unique_key: bool = False,
|
|
252
255
|
always_enqueue: bool = False,
|
|
256
|
+
enqueue_strategy: EnqueueStrategy | None = None,
|
|
257
|
+
max_retries: int | None = None,
|
|
253
258
|
**kwargs: Any,
|
|
254
259
|
) -> Self:
|
|
255
260
|
"""Create a new `Request` instance from a URL.
|
|
@@ -277,6 +282,9 @@ class Request(BaseModel):
|
|
|
277
282
|
`unique_key` computation. This is only relevant when `unique_key` is not provided.
|
|
278
283
|
always_enqueue: If set to `True`, the request will be enqueued even if it is already present in the queue.
|
|
279
284
|
Using this is not allowed when a custom `unique_key` is also provided and will result in a `ValueError`.
|
|
285
|
+
enqueue_strategy: The strategy that will be used for enqueuing the request.
|
|
286
|
+
max_retries: Maximum number of retries for this request. Allows to override the global `max_request_retries`
|
|
287
|
+
option of `BasicCrawler`.
|
|
280
288
|
**kwargs: Additional request properties.
|
|
281
289
|
"""
|
|
282
290
|
if unique_key is not None and always_enqueue:
|
|
@@ -299,7 +307,21 @@ class Request(BaseModel):
|
|
|
299
307
|
)
|
|
300
308
|
|
|
301
309
|
if always_enqueue:
|
|
302
|
-
unique_key = f'{
|
|
310
|
+
unique_key = f'{crypto_random_object_id()}|{unique_key}'
|
|
311
|
+
|
|
312
|
+
user_data_dict = kwargs.pop('user_data', {}) or {}
|
|
313
|
+
crawlee_data_dict = user_data_dict.get('__crawlee', {})
|
|
314
|
+
|
|
315
|
+
if max_retries is not None:
|
|
316
|
+
crawlee_data_dict['maxRetries'] = max_retries
|
|
317
|
+
|
|
318
|
+
if enqueue_strategy is not None:
|
|
319
|
+
crawlee_data_dict['enqueueStrategy'] = enqueue_strategy
|
|
320
|
+
|
|
321
|
+
crawlee_data = CrawleeRequestData(**crawlee_data_dict)
|
|
322
|
+
|
|
323
|
+
if crawlee_data:
|
|
324
|
+
user_data_dict['__crawlee'] = crawlee_data
|
|
303
325
|
|
|
304
326
|
request = cls(
|
|
305
327
|
url=url,
|
|
@@ -307,6 +329,7 @@ class Request(BaseModel):
|
|
|
307
329
|
method=method,
|
|
308
330
|
headers=headers,
|
|
309
331
|
payload=payload,
|
|
332
|
+
user_data=user_data_dict,
|
|
310
333
|
**kwargs,
|
|
311
334
|
)
|
|
312
335
|
|
|
@@ -352,7 +375,7 @@ class Request(BaseModel):
|
|
|
352
375
|
self.crawlee_data.crawl_depth = new_value
|
|
353
376
|
|
|
354
377
|
@property
|
|
355
|
-
def state(self) -> RequestState
|
|
378
|
+
def state(self) -> RequestState:
|
|
356
379
|
"""Crawlee-specific request handling state."""
|
|
357
380
|
return self.crawlee_data.state
|
|
358
381
|
|
|
@@ -365,10 +388,6 @@ class Request(BaseModel):
|
|
|
365
388
|
"""Crawlee-specific limit on the number of retries of the request."""
|
|
366
389
|
return self.crawlee_data.max_retries
|
|
367
390
|
|
|
368
|
-
@max_retries.setter
|
|
369
|
-
def max_retries(self, new_max_retries: int) -> None:
|
|
370
|
-
self.crawlee_data.max_retries = new_max_retries
|
|
371
|
-
|
|
372
391
|
@property
|
|
373
392
|
def session_rotation_count(self) -> int | None:
|
|
374
393
|
"""Crawlee-specific number of finished session rotations for the request."""
|
|
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
import re
|
|
16
16
|
from collections.abc import Callable, Coroutine, Sequence
|
|
17
17
|
|
|
18
|
-
from typing_extensions import NotRequired, Required, Unpack
|
|
18
|
+
from typing_extensions import NotRequired, Required, Self, Unpack
|
|
19
19
|
|
|
20
20
|
from crawlee import Glob, Request
|
|
21
21
|
from crawlee._request import RequestOptions
|
|
@@ -643,6 +643,25 @@ class BasicCrawlingContext:
|
|
|
643
643
|
"""Return hash of the context. Each context is considered unique."""
|
|
644
644
|
return id(self)
|
|
645
645
|
|
|
646
|
+
def create_modified_copy(
|
|
647
|
+
self,
|
|
648
|
+
push_data: PushDataFunction | None = None,
|
|
649
|
+
add_requests: AddRequestsFunction | None = None,
|
|
650
|
+
get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
|
|
651
|
+
) -> Self:
|
|
652
|
+
"""Create a modified copy of the crawling context with specified changes."""
|
|
653
|
+
original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
|
|
654
|
+
modified_fields = {
|
|
655
|
+
key: value
|
|
656
|
+
for key, value in {
|
|
657
|
+
'push_data': push_data,
|
|
658
|
+
'add_requests': add_requests,
|
|
659
|
+
'get_key_value_store': get_key_value_store,
|
|
660
|
+
}.items()
|
|
661
|
+
if value
|
|
662
|
+
}
|
|
663
|
+
return self.__class__(**{**original_fields, **modified_fields})
|
|
664
|
+
|
|
646
665
|
|
|
647
666
|
class GetDataKwargs(TypedDict):
|
|
648
667
|
"""Keyword arguments for dataset's `get_data` method."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import inspect
|
|
4
4
|
from collections.abc import Callable
|
|
5
5
|
from functools import wraps
|
|
6
6
|
from typing import Any, TypeVar
|
|
@@ -44,4 +44,4 @@ def ensure_context(method: T) -> T:
|
|
|
44
44
|
|
|
45
45
|
return await method(self, *args, **kwargs)
|
|
46
46
|
|
|
47
|
-
return async_wrapper if
|
|
47
|
+
return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value]
|
|
@@ -163,6 +163,13 @@ async def export_csv_to_stream(
|
|
|
163
163
|
dst: TextIO,
|
|
164
164
|
**kwargs: Unpack[ExportDataCsvKwargs],
|
|
165
165
|
) -> None:
|
|
166
|
+
# Set lineterminator to '\n' if not explicitly provided. This prevents double line endings on Windows.
|
|
167
|
+
# The csv.writer default is '\r\n', which when written to a file in text mode on Windows gets converted
|
|
168
|
+
# to '\r\r\n' due to newline translation. By using '\n', we let the platform handle the line ending
|
|
169
|
+
# conversion: '\n' stays as '\n' on Unix, and becomes '\r\n' on Windows.
|
|
170
|
+
if 'lineterminator' not in kwargs:
|
|
171
|
+
kwargs['lineterminator'] = '\n'
|
|
172
|
+
|
|
166
173
|
writer = csv.writer(dst, **kwargs) # type: ignore[arg-type]
|
|
167
174
|
write_header = True
|
|
168
175
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import inspect
|
|
4
5
|
from logging import getLogger
|
|
5
6
|
from typing import TYPE_CHECKING
|
|
6
7
|
|
|
@@ -49,7 +50,7 @@ class RecurringTask:
|
|
|
49
50
|
"""
|
|
50
51
|
sleep_time_secs = self.delay.total_seconds()
|
|
51
52
|
while True:
|
|
52
|
-
await self.func() if
|
|
53
|
+
await self.func() if inspect.iscoroutinefunction(self.func) else self.func()
|
|
53
54
|
await asyncio.sleep(sleep_time_secs)
|
|
54
55
|
|
|
55
56
|
def start(self) -> None:
|