crawlee 1.0.3__tar.gz → 1.1.1b7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/build_and_deploy_docs.yaml +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/release.yaml +3 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/run_code_checks.yaml +4 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/templates_e2e_tests.yaml +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.gitignore +1 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/CHANGELOG.md +41 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/CONTRIBUTING.md +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/Makefile +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/PKG-INFO +9 -5
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/apify_platform.mdx +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/using_browser_profiles_chrome.py +2 -4
- crawlee-1.1.1b7/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/using_browser_profile.mdx +0 -2
- crawlee-1.1.1b7/docs/examples/using_sitemap_request_loader.mdx +22 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/architecture_overview.mdx +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/avoid_blocking.mdx +1 -1
- crawlee-1.1.1b7/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +10 -0
- crawlee-1.1.1b7/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +27 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/request_loaders.mdx +8 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/storage_clients.mdx +175 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/trace_and_monitor_crawlers.mdx +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/09_running_in_cloud.mdx +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/pyproject.toml +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/pyproject.toml +25 -22
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_request.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_types.py +20 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/recurring_task.py +15 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/robots.py +17 -5
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/urls.py +9 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_browser_pool.py +4 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_playwright_browser_controller.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_playwright_browser_plugin.py +17 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_types.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/configuration.py +3 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -13
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/_basic_crawler.py +81 -57
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/events/_event_manager.py +3 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/_header_generator.py +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/otel/crawler_instrumentor.py +3 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/_sitemap_request_loader.py +22 -4
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/_session_pool.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/statistics/_error_snapshotter.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/statistics/_models.py +32 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/statistics/_statistics.py +18 -32
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/__init__.py +4 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_client_mixin.py +295 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_dataset_client.py +325 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_storage_client.py +146 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee-1.1.1b7/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_db_models.py +1 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_storage_client.py +9 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/e2e/project_template/utils.py +3 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_autoscaling/test_autoscaled_pool.py +2 -4
- crawlee-1.1.1b7/tests/unit/_autoscaling/test_snapshotter.py +353 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_sitemap.py +0 -4
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_system.py +8 -6
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/browsers/test_playwright_browser_plugin.py +10 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/conftest.py +19 -9
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +107 -6
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_basic/test_basic_crawler.py +134 -9
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +37 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_http/test_http_crawler.py +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +37 -3
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +34 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/events/test_event_manager.py +12 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/http_clients/test_http_clients.py +0 -5
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/server.py +12 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/server_endpoints.py +11 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +1 -1
- crawlee-1.1.1b7/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +146 -0
- crawlee-1.1.1b7/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +217 -0
- crawlee-1.1.1b7/tests/unit/storage_clients/_redis/test_redis_rq_client.py +257 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +1 -1
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +1 -1
- crawlee-1.1.1b7/tests/unit/storages/conftest.py +39 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storages/test_dataset.py +18 -4
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storages/test_key_value_store.py +19 -4
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storages/test_request_queue.py +23 -9
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storages/test_storage_instance_manager.py +7 -20
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/test_configuration.py +30 -13
- {crawlee-1.0.3 → crawlee-1.1.1b7}/uv.lock +1136 -834
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/docusaurus.config.js +2 -2
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/package.json +5 -1
- crawlee-1.1.1b7/website/src/components/RunnableCodeBlock.jsx +42 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/pages/home_page_example.py +14 -9
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/pages/index.js +1 -1
- crawlee-1.1.1b7/website/static/.nojekyll +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/yarn.lock +742 -820
- crawlee-1.0.3/tests/unit/_autoscaling/test_snapshotter.py +0 -333
- crawlee-1.0.3/tests/unit/storages/conftest.py +0 -18
- crawlee-1.0.3/website/src/components/RunnableCodeBlock.jsx +0 -40
- crawlee-1.0.3/website/static/img/apify_logo.svg +0 -5
- crawlee-1.0.3/website/static/img/apify_og_SDK.png +0 -0
- crawlee-1.0.3/website/static/img/apify_sdk.svg +0 -13
- crawlee-1.0.3/website/static/img/apify_sdk_white.svg +0 -13
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.editorconfig +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.markdownlint.yaml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/LICENSE +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/README.md +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/renovate.json +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/router.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.3/src/crawlee/storage_clients/_sql → crawlee-1.1.1b7/src/crawlee/storage_clients/_redis}/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.3/src/crawlee/storage_clients → crawlee-1.1.1b7/src/crawlee/storage_clients/_sql}/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.3/src/crawlee/storages → crawlee-1.1.1b7/src/crawlee/storage_clients}/py.typed +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/src/crawlee/storages/_utils.py +0 -0
- /crawlee-1.0.3/tests/__init__.py → /crawlee-1.1.1b7/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.3/tests/e2e → crawlee-1.1.1b7/tests}/__init__.py +0 -0
- {crawlee-1.0.3/tests/unit → crawlee-1.1.1b7/tests/e2e}/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/README.md +0 -0
- /crawlee-1.0.3/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.1.1b7/tests/unit/__init__.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- /crawlee-1.0.3/website/static/.nojekyll → /crawlee-1.1.1b7/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/.eslintrc.json +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/babel.config.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/sidebars.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/css/custom.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/API.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/check.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/robot.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/system.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/js/custom.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/static/robots.txt +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.3 → crawlee-1.1.1b7}/website/tsconfig.eslint.json +0 -0
|
@@ -10,7 +10,7 @@ on:
|
|
|
10
10
|
|
|
11
11
|
env:
|
|
12
12
|
NODE_VERSION: 20
|
|
13
|
-
PYTHON_VERSION: 3.
|
|
13
|
+
PYTHON_VERSION: 3.14
|
|
14
14
|
|
|
15
15
|
jobs:
|
|
16
16
|
build_and_deploy_docs:
|
|
@@ -24,7 +24,7 @@ jobs:
|
|
|
24
24
|
|
|
25
25
|
steps:
|
|
26
26
|
- name: Checkout repository
|
|
27
|
-
uses: actions/checkout@
|
|
27
|
+
uses: actions/checkout@v6
|
|
28
28
|
with:
|
|
29
29
|
token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
|
|
30
30
|
ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
|
|
@@ -47,13 +47,13 @@ jobs:
|
|
|
47
47
|
name: Lint check
|
|
48
48
|
uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
|
|
49
49
|
with:
|
|
50
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
50
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
51
51
|
|
|
52
52
|
type_check:
|
|
53
53
|
name: Type check
|
|
54
54
|
uses: apify/workflows/.github/workflows/python_type_check.yaml@main
|
|
55
55
|
with:
|
|
56
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
56
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
57
57
|
|
|
58
58
|
unit_tests:
|
|
59
59
|
name: Unit tests
|
|
@@ -61,7 +61,7 @@ jobs:
|
|
|
61
61
|
secrets:
|
|
62
62
|
httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
|
|
63
63
|
with:
|
|
64
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
64
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
65
65
|
|
|
66
66
|
update_changelog:
|
|
67
67
|
name: Update changelog
|
|
@@ -21,13 +21,13 @@ jobs:
|
|
|
21
21
|
name: Lint check
|
|
22
22
|
uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
|
|
23
23
|
with:
|
|
24
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
24
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
25
25
|
|
|
26
26
|
type_check:
|
|
27
27
|
name: Type check
|
|
28
28
|
uses: apify/workflows/.github/workflows/python_type_check.yaml@main
|
|
29
29
|
with:
|
|
30
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
30
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
31
31
|
|
|
32
32
|
unit_tests:
|
|
33
33
|
name: Unit tests
|
|
@@ -35,8 +35,9 @@ jobs:
|
|
|
35
35
|
secrets:
|
|
36
36
|
httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
|
|
37
37
|
with:
|
|
38
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
38
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
39
39
|
|
|
40
40
|
docs_check:
|
|
41
41
|
name: Docs check
|
|
42
42
|
uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
|
|
43
|
+
secrets: inherit
|
|
@@ -7,7 +7,7 @@ on:
|
|
|
7
7
|
|
|
8
8
|
env:
|
|
9
9
|
NODE_VERSION: 22
|
|
10
|
-
PYTHON_VERSION: 3.
|
|
10
|
+
PYTHON_VERSION: 3.14
|
|
11
11
|
|
|
12
12
|
jobs:
|
|
13
13
|
end_to_end_tests:
|
|
@@ -24,7 +24,7 @@ jobs:
|
|
|
24
24
|
|
|
25
25
|
steps:
|
|
26
26
|
- name: Checkout repository
|
|
27
|
-
uses: actions/checkout@
|
|
27
|
+
uses: actions/checkout@v6
|
|
28
28
|
|
|
29
29
|
- name: Setup node
|
|
30
30
|
uses: actions/setup-node@v6
|
|
@@ -2,6 +2,46 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
<!-- git-cliff-unreleased-start -->
|
|
6
|
+
## 1.1.1 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🐛 Bug Fixes
|
|
9
|
+
|
|
10
|
+
- Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
|
|
11
|
+
- Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
|
|
12
|
+
- Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
|
|
13
|
+
- Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
<!-- git-cliff-unreleased-end -->
|
|
17
|
+
## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
|
|
18
|
+
|
|
19
|
+
### 🚀 Features
|
|
20
|
+
|
|
21
|
+
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
22
|
+
- Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
|
|
23
|
+
- Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
|
|
24
|
+
- Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
|
|
25
|
+
|
|
26
|
+
### 🐛 Bug Fixes
|
|
27
|
+
|
|
28
|
+
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
29
|
+
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
30
|
+
- Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
|
|
31
|
+
- Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
|
|
35
|
+
|
|
36
|
+
### 🐛 Bug Fixes
|
|
37
|
+
|
|
38
|
+
- Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
|
|
39
|
+
- Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
|
|
40
|
+
- Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
|
|
41
|
+
- Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
|
|
42
|
+
- Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
|
|
43
|
+
|
|
44
|
+
|
|
5
45
|
## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
|
|
6
46
|
|
|
7
47
|
### 🐛 Bug Fixes
|
|
@@ -257,7 +297,7 @@ All notable changes to this project will be documented in this file.
|
|
|
257
297
|
|
|
258
298
|
### 🐛 Bug Fixes
|
|
259
299
|
|
|
260
|
-
- Fix session
|
|
300
|
+
- Fix session management with retire ([#947](https://github.com/apify/crawlee-python/pull/947)) ([caee03f](https://github.com/apify/crawlee-python/commit/caee03fe3a43cc1d7a8d3f9e19b42df1bdb1c0aa)) by [@Mantisus](https://github.com/Mantisus)
|
|
261
301
|
- Fix templates - poetry-plugin-export version and camoufox template name ([#952](https://github.com/apify/crawlee-python/pull/952)) ([7addea6](https://github.com/apify/crawlee-python/commit/7addea6605359cceba208e16ec9131724bdb3e9b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#951](https://github.com/apify/crawlee-python/issues/951)
|
|
262
302
|
- Fix convert relative link to absolute in `enqueue_links` for response with redirect ([#956](https://github.com/apify/crawlee-python/pull/956)) ([694102e](https://github.com/apify/crawlee-python/commit/694102e163bb9021a4830d2545d153f6f8f3de90)) by [@Mantisus](https://github.com/Mantisus), closes [#955](https://github.com/apify/crawlee-python/issues/955)
|
|
263
303
|
- Fix `CurlImpersonateHttpClient` cookies handler ([#946](https://github.com/apify/crawlee-python/pull/946)) ([ed415c4](https://github.com/apify/crawlee-python/commit/ed415c433da2a40b0ee62534f0730d0737e991b8)) by [@Mantisus](https://github.com/Mantisus)
|
|
@@ -103,7 +103,7 @@ make run-docs
|
|
|
103
103
|
Publishing new versions to [PyPI](https://pypi.org/project/crawlee) is automated through GitHub Actions.
|
|
104
104
|
|
|
105
105
|
- **Beta releases**: On each commit to the master branch, a new beta release is automatically published. The version number is determined based on the latest release and conventional commits. The beta version suffix is incremented by 1 from the last beta release on PyPI.
|
|
106
|
-
- **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be
|
|
106
|
+
- **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be overridden using the `custom` release type.
|
|
107
107
|
|
|
108
108
|
### Publishing to PyPI manually
|
|
109
109
|
|
|
@@ -38,7 +38,7 @@ unit-tests-cov:
|
|
|
38
38
|
uv run pytest --numprocesses=auto -vv --cov=src/crawlee --cov-append --cov-report=html tests/unit -m "not run_alone"
|
|
39
39
|
|
|
40
40
|
e2e-templates-tests $(args):
|
|
41
|
-
uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)"
|
|
41
|
+
uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)" --timeout=600
|
|
42
42
|
|
|
43
43
|
format:
|
|
44
44
|
uv run ruff check --fix
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1b7
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -223,15 +223,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
226
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
228
|
Requires-Python: >=3.10
|
|
228
229
|
Requires-Dist: cachetools>=5.5.0
|
|
229
230
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
231
|
+
Requires-Dist: impit>=0.8.0
|
|
231
232
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
233
|
Requires-Dist: protego>=0.5.0
|
|
233
234
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic-settings
|
|
235
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
235
236
|
Requires-Dist: pydantic>=2.11.0
|
|
236
237
|
Requires-Dist: pyee>=9.0.0
|
|
237
238
|
Requires-Dist: tldextract>=5.1.0
|
|
@@ -246,7 +247,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
|
246
247
|
Provides-Extra: all
|
|
247
248
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
248
249
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
249
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
250
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
|
|
250
251
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
251
252
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
252
253
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -263,6 +264,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
263
264
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
264
265
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
265
266
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
267
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
266
268
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
267
269
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
270
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
@@ -296,8 +298,10 @@ Provides-Extra: playwright
|
|
|
296
298
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
297
299
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
298
300
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
301
|
+
Provides-Extra: redis
|
|
302
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
299
303
|
Provides-Extra: sql-postgres
|
|
300
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
304
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
|
|
301
305
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
302
306
|
Provides-Extra: sql-sqlite
|
|
303
307
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
@@ -99,7 +99,7 @@ apify run
|
|
|
99
99
|
For running Crawlee code as an Actor on [Apify platform](https://apify.com/actors) you need to wrap the body of the main function of your crawler with `async with Actor`.
|
|
100
100
|
|
|
101
101
|
:::info NOTE
|
|
102
|
-
Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle
|
|
102
|
+
Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle exiting the process.
|
|
103
103
|
:::
|
|
104
104
|
|
|
105
105
|
Let's look at the `BeautifulSoupCrawler` example from the [Quick start](../quick-start) guide:
|
{crawlee-1.0.3 → crawlee-1.1.1b7}/docs/examples/code_examples/using_browser_profiles_chrome.py
RENAMED
|
@@ -27,15 +27,13 @@ async def main() -> None:
|
|
|
27
27
|
|
|
28
28
|
crawler = PlaywrightCrawler(
|
|
29
29
|
headless=False,
|
|
30
|
-
# Use
|
|
31
|
-
browser_type='
|
|
30
|
+
# Use the installed Chrome browser
|
|
31
|
+
browser_type='chrome',
|
|
32
32
|
# Disable fingerprints to preserve profile identity
|
|
33
33
|
fingerprint_generator=None,
|
|
34
34
|
# Set user data directory to temp folder
|
|
35
35
|
user_data_dir=tmp_profile_dir,
|
|
36
36
|
browser_launch_options={
|
|
37
|
-
# Use installed Chrome browser
|
|
38
|
-
'channel': 'chrome',
|
|
39
37
|
# Slow down actions to mimic human behavior
|
|
40
38
|
'slow_mo': 200,
|
|
41
39
|
'args': [
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
|
|
4
|
+
from yarl import URL
|
|
5
|
+
|
|
6
|
+
from crawlee import RequestOptions, RequestTransformAction
|
|
7
|
+
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
|
|
8
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
9
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Create a transform_request_function that maps request options based on the host in
|
|
13
|
+
# the URL
|
|
14
|
+
def create_transform_request(
|
|
15
|
+
data_mapper: dict[str, dict],
|
|
16
|
+
) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
|
|
17
|
+
def transform_request(
|
|
18
|
+
request_options: RequestOptions,
|
|
19
|
+
) -> RequestOptions | RequestTransformAction:
|
|
20
|
+
# According to the Sitemap protocol, all URLs in a Sitemap must be from a single
|
|
21
|
+
# host.
|
|
22
|
+
request_host = URL(request_options['url']).host
|
|
23
|
+
|
|
24
|
+
if request_host and (mapping_data := data_mapper.get(request_host)):
|
|
25
|
+
# Set properties from the mapping data
|
|
26
|
+
if 'label' in mapping_data:
|
|
27
|
+
request_options['label'] = mapping_data['label']
|
|
28
|
+
if 'user_data' in mapping_data:
|
|
29
|
+
request_options['user_data'] = mapping_data['user_data']
|
|
30
|
+
|
|
31
|
+
return request_options
|
|
32
|
+
|
|
33
|
+
return 'unchanged'
|
|
34
|
+
|
|
35
|
+
return transform_request
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def main() -> None:
|
|
39
|
+
# Prepare data mapping for hosts
|
|
40
|
+
apify_host = URL('https://apify.com/sitemap.xml').host
|
|
41
|
+
crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
|
|
42
|
+
|
|
43
|
+
if not apify_host or not crawlee_host:
|
|
44
|
+
raise ValueError('Unable to extract host from URLs')
|
|
45
|
+
|
|
46
|
+
data_map = {
|
|
47
|
+
apify_host: {
|
|
48
|
+
'label': 'apify',
|
|
49
|
+
'user_data': {'source': 'apify'},
|
|
50
|
+
},
|
|
51
|
+
crawlee_host: {
|
|
52
|
+
'label': 'crawlee',
|
|
53
|
+
'user_data': {'source': 'crawlee'},
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Initialize the SitemapRequestLoader with the transform function
|
|
58
|
+
async with SitemapRequestLoader(
|
|
59
|
+
# Set the sitemap URLs and the HTTP client
|
|
60
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
|
|
61
|
+
http_client=ImpitHttpClient(),
|
|
62
|
+
transform_request_function=create_transform_request(data_map),
|
|
63
|
+
) as sitemap_loader:
|
|
64
|
+
# Convert the sitemap loader to a request manager
|
|
65
|
+
request_manager = await sitemap_loader.to_tandem()
|
|
66
|
+
|
|
67
|
+
# Create and configure the crawler
|
|
68
|
+
crawler = BeautifulSoupCrawler(
|
|
69
|
+
request_manager=request_manager,
|
|
70
|
+
max_requests_per_crawl=10,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Create default handler for requests without a specific label
|
|
74
|
+
@crawler.router.default_handler
|
|
75
|
+
async def handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
76
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
77
|
+
context.log.info(
|
|
78
|
+
f'Processing request: {context.request.url} from source: {source}'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Create handler for requests labeled 'apify'
|
|
82
|
+
@crawler.router.handler('apify')
|
|
83
|
+
async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
84
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
85
|
+
context.log.info(
|
|
86
|
+
f'Apify handler processing: {context.request.url} from source: {source}'
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Create handler for requests labeled 'crawlee'
|
|
90
|
+
@crawler.router.handler('crawlee')
|
|
91
|
+
async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
|
|
92
|
+
source = context.request.user_data.get('source', 'unknown')
|
|
93
|
+
context.log.info(
|
|
94
|
+
f'Crawlee handler processing: {context.request.url} from source: {source}'
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
await crawler.run()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == '__main__':
|
|
101
|
+
asyncio.run(main())
|
|
@@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas
|
|
|
18
18
|
|
|
19
19
|
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
|
|
20
20
|
|
|
21
|
-
You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
|
|
22
|
-
|
|
23
21
|
:::warning Profile access limitation
|
|
24
22
|
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
|
|
25
23
|
:::
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: using-sitemap-request-loader
|
|
3
|
+
title: Using sitemap request loader
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import ApiLink from '@site/src/components/ApiLink';
|
|
7
|
+
|
|
8
|
+
import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
|
|
9
|
+
|
|
10
|
+
import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
|
|
11
|
+
|
|
12
|
+
This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
|
|
13
|
+
|
|
14
|
+
The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
|
|
15
|
+
|
|
16
|
+
The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
|
|
17
|
+
|
|
18
|
+
<RunnableCodeBlock className="language-python" language="python">
|
|
19
|
+
{SitemapRequestLoaderExample}
|
|
20
|
+
</RunnableCodeBlock>
|
|
21
|
+
|
|
22
|
+
For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
|
|
@@ -291,7 +291,7 @@ Request loaders provide a subset of <ApiLink to="class/RequestQueue">`RequestQue
|
|
|
291
291
|
|
|
292
292
|
- <ApiLink to="class/RequestLoader">`RequestLoader`</ApiLink> - Base interface for read-only access to a stream of requests, with capabilities like fetching the next request, marking as handled, and status checking.
|
|
293
293
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink> - Lightweight in-memory implementation of `RequestLoader` for managing static lists of URLs.
|
|
294
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> -
|
|
294
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
295
295
|
|
|
296
296
|
### Request managers
|
|
297
297
|
|
|
@@ -25,7 +25,7 @@ Changing browser fingerprints can be a tedious job. Luckily, Crawlee provides th
|
|
|
25
25
|
{PlaywrightDefaultFingerprintGenerator}
|
|
26
26
|
</RunnableCodeBlock>
|
|
27
27
|
|
|
28
|
-
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example
|
|
28
|
+
In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example below:
|
|
29
29
|
|
|
30
30
|
<CodeBlock className="language-python">
|
|
31
31
|
{PlaywrightDefaultFingerprintGeneratorWithArgs}
|
crawlee-1.1.1b7/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from crawlee.crawlers import ParselCrawler
|
|
2
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
3
|
+
|
|
4
|
+
# Create a new instance of storage client using connection string.
|
|
5
|
+
# 'redis://localhost:6379' is the just placeholder, replace it with your actual
|
|
6
|
+
# connection string.
|
|
7
|
+
storage_client = RedisStorageClient(connection_string='redis://localhost:6379')
|
|
8
|
+
|
|
9
|
+
# And pass it to the crawler.
|
|
10
|
+
crawler = ParselCrawler(storage_client=storage_client)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from redis.asyncio import Redis
|
|
2
|
+
|
|
3
|
+
from crawlee.configuration import Configuration
|
|
4
|
+
from crawlee.crawlers import ParselCrawler
|
|
5
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
6
|
+
|
|
7
|
+
# Create a new instance of storage client using a Redis client with custom settings.
|
|
8
|
+
# Replace host and port with your actual Redis server configuration.
|
|
9
|
+
# Other Redis client settings can be adjusted as needed.
|
|
10
|
+
storage_client = RedisStorageClient(
|
|
11
|
+
redis=Redis(
|
|
12
|
+
host='localhost',
|
|
13
|
+
port=6379,
|
|
14
|
+
retry_on_timeout=True,
|
|
15
|
+
socket_keepalive=True,
|
|
16
|
+
socket_connect_timeout=10,
|
|
17
|
+
)
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Create a configuration with custom settings.
|
|
21
|
+
configuration = Configuration(purge_on_start=False)
|
|
22
|
+
|
|
23
|
+
# And pass them to the crawler.
|
|
24
|
+
crawler = ParselCrawler(
|
|
25
|
+
storage_client=storage_client,
|
|
26
|
+
configuration=configuration,
|
|
27
|
+
)
|
|
@@ -31,7 +31,7 @@ The [`request_loaders`](https://github.com/apify/crawlee-python/tree/master/src/
|
|
|
31
31
|
And specific request loader implementations:
|
|
32
32
|
|
|
33
33
|
- <ApiLink to="class/RequestList">`RequestList`</ApiLink>: A lightweight implementation for managing a static list of URLs.
|
|
34
|
-
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML sitemaps with filtering capabilities.
|
|
34
|
+
- <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
|
|
35
35
|
|
|
36
36
|
Below is a class diagram that illustrates the relationships between these components and the <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>:
|
|
37
37
|
|
|
@@ -130,7 +130,13 @@ To enable persistence, provide `persist_state_key` and optionally `persist_reque
|
|
|
130
130
|
|
|
131
131
|
### Sitemap request loader
|
|
132
132
|
|
|
133
|
-
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from
|
|
133
|
+
The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats. It's particularly useful when you want to crawl a website systematically by following its sitemap structure.
|
|
134
|
+
|
|
135
|
+
:::note
|
|
136
|
+
The `SitemapRequestLoader` is designed specifically for sitemaps that follow the standard Sitemaps protocol. HTML pages containing links are not supported by this loader - those should be handled by regular crawlers using the `enqueue_links` functionality.
|
|
137
|
+
:::
|
|
138
|
+
|
|
139
|
+
The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
|
|
134
140
|
|
|
135
141
|
<RunnableCodeBlock className="language-python" language="python">
|
|
136
142
|
{SitemapExample}
|