crawlee 1.0.5b3__tar.gz → 1.0.5b16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/build_and_deploy_docs.yaml +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/release.yaml +3 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/run_code_checks.yaml +4 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/templates_e2e_tests.yaml +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/CHANGELOG.md +5 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/CONTRIBUTING.md +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/Makefile +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/PKG-INFO +9 -5
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/apify_platform.mdx +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +1 -1
- crawlee-1.0.5b16/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +10 -0
- crawlee-1.0.5b16/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +27 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/storage_clients.mdx +175 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/trace_and_monitor_crawlers.mdx +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/pyproject.toml +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/pyproject.toml +24 -21
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/robots.py +17 -5
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/configuration.py +3 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/otel/crawler_instrumentor.py +3 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/_session_pool.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/statistics/_error_snapshotter.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/statistics/_statistics.py +28 -27
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/__init__.py +4 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_client_mixin.py +295 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_dataset_client.py +325 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_storage_client.py +146 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee-1.0.5b16/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_storage_client.py +9 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/e2e/project_template/utils.py +3 -2
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_system.py +8 -6
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/conftest.py +6 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +5 -5
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_basic/test_basic_crawler.py +33 -7
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +35 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_http/test_http_crawler.py +2 -2
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +35 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +34 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/server.py +10 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/server_endpoints.py +10 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +1 -1
- crawlee-1.0.5b16/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +146 -0
- crawlee-1.0.5b16/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +217 -0
- crawlee-1.0.5b16/tests/unit/storage_clients/_redis/test_redis_rq_client.py +257 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +1 -1
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +1 -1
- crawlee-1.0.5b16/tests/unit/storages/conftest.py +39 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storages/test_dataset.py +1 -4
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storages/test_key_value_store.py +1 -4
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storages/test_request_queue.py +0 -3
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storages/test_storage_instance_manager.py +7 -20
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/test_configuration.py +3 -12
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/uv.lock +405 -147
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/package.json +4 -0
- crawlee-1.0.5b16/website/src/components/RunnableCodeBlock.jsx +42 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/pages/home_page_example.py +14 -9
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/pages/index.js +1 -1
- crawlee-1.0.5b16/website/static/.nojekyll +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/yarn.lock +345 -325
- crawlee-1.0.5b3/tests/unit/storages/conftest.py +0 -18
- crawlee-1.0.5b3/website/src/components/RunnableCodeBlock.jsx +0 -40
- crawlee-1.0.5b3/website/static/img/apify_logo.svg +0 -5
- crawlee-1.0.5b3/website/static/img/apify_og_SDK.png +0 -0
- crawlee-1.0.5b3/website/static/img/apify_sdk.svg +0 -13
- crawlee-1.0.5b3/website/static/img/apify_sdk_white.svg +0 -13
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.editorconfig +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.gitignore +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.markdownlint.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/LICENSE +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/README.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/renovate.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/router.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.5b3/src/crawlee/storage_clients/_sql → crawlee-1.0.5b16/src/crawlee/storage_clients/_redis}/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.5b3/src/crawlee/storage_clients → crawlee-1.0.5b16/src/crawlee/storage_clients/_sql}/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.5b3/src/crawlee/storages → crawlee-1.0.5b16/src/crawlee/storage_clients}/py.typed +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/src/crawlee/storages/_utils.py +0 -0
- /crawlee-1.0.5b3/tests/__init__.py → /crawlee-1.0.5b16/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.5b3/tests/e2e → crawlee-1.0.5b16/tests}/__init__.py +0 -0
- {crawlee-1.0.5b3/tests/unit → crawlee-1.0.5b16/tests/e2e}/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/README.md +0 -0
- /crawlee-1.0.5b3/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.0.5b16/tests/unit/__init__.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- /crawlee-1.0.5b3/website/static/.nojekyll → /crawlee-1.0.5b16/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/.eslintrc.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/babel.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/sidebars.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/css/custom.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/API.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/check.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/robot.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/system.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/js/custom.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/static/robots.txt +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.5b3 → crawlee-1.0.5b16}/website/tsconfig.eslint.json +0 -0
|
@@ -47,13 +47,13 @@ jobs:
|
|
|
47
47
|
name: Lint check
|
|
48
48
|
uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
|
|
49
49
|
with:
|
|
50
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
50
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
51
51
|
|
|
52
52
|
type_check:
|
|
53
53
|
name: Type check
|
|
54
54
|
uses: apify/workflows/.github/workflows/python_type_check.yaml@main
|
|
55
55
|
with:
|
|
56
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
56
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
57
57
|
|
|
58
58
|
unit_tests:
|
|
59
59
|
name: Unit tests
|
|
@@ -61,7 +61,7 @@ jobs:
|
|
|
61
61
|
secrets:
|
|
62
62
|
httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
|
|
63
63
|
with:
|
|
64
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
64
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
65
65
|
|
|
66
66
|
update_changelog:
|
|
67
67
|
name: Update changelog
|
|
@@ -21,13 +21,13 @@ jobs:
|
|
|
21
21
|
name: Lint check
|
|
22
22
|
uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
|
|
23
23
|
with:
|
|
24
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
24
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
25
25
|
|
|
26
26
|
type_check:
|
|
27
27
|
name: Type check
|
|
28
28
|
uses: apify/workflows/.github/workflows/python_type_check.yaml@main
|
|
29
29
|
with:
|
|
30
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
30
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
31
31
|
|
|
32
32
|
unit_tests:
|
|
33
33
|
name: Unit tests
|
|
@@ -35,8 +35,9 @@ jobs:
|
|
|
35
35
|
secrets:
|
|
36
36
|
httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
|
|
37
37
|
with:
|
|
38
|
-
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
|
|
38
|
+
python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
|
|
39
39
|
|
|
40
40
|
docs_check:
|
|
41
41
|
name: Docs check
|
|
42
42
|
uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
|
|
43
|
+
secrets: inherit
|
|
@@ -8,10 +8,14 @@ All notable changes to this project will be documented in this file.
|
|
|
8
8
|
### 🚀 Features
|
|
9
9
|
|
|
10
10
|
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
11
|
+
- Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
|
|
12
|
+
- Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
|
|
11
13
|
|
|
12
14
|
### 🐛 Bug Fixes
|
|
13
15
|
|
|
14
16
|
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
17
|
+
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
18
|
+
- Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -281,7 +285,7 @@ All notable changes to this project will be documented in this file.
|
|
|
281
285
|
|
|
282
286
|
### 🐛 Bug Fixes
|
|
283
287
|
|
|
284
|
-
- Fix session
|
|
288
|
+
- Fix session management with retire ([#947](https://github.com/apify/crawlee-python/pull/947)) ([caee03f](https://github.com/apify/crawlee-python/commit/caee03fe3a43cc1d7a8d3f9e19b42df1bdb1c0aa)) by [@Mantisus](https://github.com/Mantisus)
|
|
285
289
|
- Fix templates - poetry-plugin-export version and camoufox template name ([#952](https://github.com/apify/crawlee-python/pull/952)) ([7addea6](https://github.com/apify/crawlee-python/commit/7addea6605359cceba208e16ec9131724bdb3e9b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#951](https://github.com/apify/crawlee-python/issues/951)
|
|
286
290
|
- Fix convert relative link to absolute in `enqueue_links` for response with redirect ([#956](https://github.com/apify/crawlee-python/pull/956)) ([694102e](https://github.com/apify/crawlee-python/commit/694102e163bb9021a4830d2545d153f6f8f3de90)) by [@Mantisus](https://github.com/Mantisus), closes [#955](https://github.com/apify/crawlee-python/issues/955)
|
|
287
291
|
- Fix `CurlImpersonateHttpClient` cookies handler ([#946](https://github.com/apify/crawlee-python/pull/946)) ([ed415c4](https://github.com/apify/crawlee-python/commit/ed415c433da2a40b0ee62534f0730d0737e991b8)) by [@Mantisus](https://github.com/Mantisus)
|
|
@@ -103,7 +103,7 @@ make run-docs
|
|
|
103
103
|
Publishing new versions to [PyPI](https://pypi.org/project/crawlee) is automated through GitHub Actions.
|
|
104
104
|
|
|
105
105
|
- **Beta releases**: On each commit to the master branch, a new beta release is automatically published. The version number is determined based on the latest release and conventional commits. The beta version suffix is incremented by 1 from the last beta release on PyPI.
|
|
106
|
-
- **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be
|
|
106
|
+
- **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be overridden using the `custom` release type.
|
|
107
107
|
|
|
108
108
|
### Publishing to PyPI manually
|
|
109
109
|
|
|
@@ -38,7 +38,7 @@ unit-tests-cov:
|
|
|
38
38
|
uv run pytest --numprocesses=auto -vv --cov=src/crawlee --cov-append --cov-report=html tests/unit -m "not run_alone"
|
|
39
39
|
|
|
40
40
|
e2e-templates-tests $(args):
|
|
41
|
-
uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)"
|
|
41
|
+
uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)" --timeout=600
|
|
42
42
|
|
|
43
43
|
format:
|
|
44
44
|
uv run ruff check --fix
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5b16
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -223,15 +223,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
226
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
228
|
Requires-Python: >=3.10
|
|
228
229
|
Requires-Dist: cachetools>=5.5.0
|
|
229
230
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
231
|
+
Requires-Dist: impit>=0.8.0
|
|
231
232
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
233
|
Requires-Dist: protego>=0.5.0
|
|
233
234
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic-settings
|
|
235
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
235
236
|
Requires-Dist: pydantic>=2.11.0
|
|
236
237
|
Requires-Dist: pyee>=9.0.0
|
|
237
238
|
Requires-Dist: tldextract>=5.1.0
|
|
@@ -246,7 +247,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
|
246
247
|
Provides-Extra: all
|
|
247
248
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
248
249
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
249
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
250
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
|
|
250
251
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
251
252
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
252
253
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -263,6 +264,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
263
264
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
264
265
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
265
266
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
267
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
266
268
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
267
269
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
270
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
@@ -296,8 +298,10 @@ Provides-Extra: playwright
|
|
|
296
298
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
297
299
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
298
300
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
301
|
+
Provides-Extra: redis
|
|
302
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
299
303
|
Provides-Extra: sql-postgres
|
|
300
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
304
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
|
|
301
305
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
302
306
|
Provides-Extra: sql-sqlite
|
|
303
307
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
@@ -99,7 +99,7 @@ apify run
|
|
|
99
99
|
For running Crawlee code as an Actor on [Apify platform](https://apify.com/actors) you need to wrap the body of the main function of your crawler with `async with Actor`.
|
|
100
100
|
|
|
101
101
|
:::info NOTE
|
|
102
|
-
Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle
|
|
102
|
+
Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle exiting the process.
|
|
103
103
|
:::
|
|
104
104
|
|
|
105
105
|
Let's look at the `BeautifulSoupCrawler` example from the [Quick start](../quick-start) guide:
|
crawlee-1.0.5b16/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from crawlee.crawlers import ParselCrawler
|
|
2
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
3
|
+
|
|
4
|
+
# Create a new instance of storage client using connection string.
|
|
5
|
+
# 'redis://localhost:6379' is the just placeholder, replace it with your actual
|
|
6
|
+
# connection string.
|
|
7
|
+
storage_client = RedisStorageClient(connection_string='redis://localhost:6379')
|
|
8
|
+
|
|
9
|
+
# And pass it to the crawler.
|
|
10
|
+
crawler = ParselCrawler(storage_client=storage_client)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from redis.asyncio import Redis
|
|
2
|
+
|
|
3
|
+
from crawlee.configuration import Configuration
|
|
4
|
+
from crawlee.crawlers import ParselCrawler
|
|
5
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
6
|
+
|
|
7
|
+
# Create a new instance of storage client using a Redis client with custom settings.
|
|
8
|
+
# Replace host and port with your actual Redis server configuration.
|
|
9
|
+
# Other Redis client settings can be adjusted as needed.
|
|
10
|
+
storage_client = RedisStorageClient(
|
|
11
|
+
redis=Redis(
|
|
12
|
+
host='localhost',
|
|
13
|
+
port=6379,
|
|
14
|
+
retry_on_timeout=True,
|
|
15
|
+
socket_keepalive=True,
|
|
16
|
+
socket_connect_timeout=10,
|
|
17
|
+
)
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Create a configuration with custom settings.
|
|
21
|
+
configuration = Configuration(purge_on_start=False)
|
|
22
|
+
|
|
23
|
+
# And pass them to the crawler.
|
|
24
|
+
crawler = ParselCrawler(
|
|
25
|
+
storage_client=storage_client,
|
|
26
|
+
configuration=configuration,
|
|
27
|
+
)
|
|
@@ -17,6 +17,8 @@ import CustomStorageClientExample from '!!raw-loader!roa-loader!./code_examples/
|
|
|
17
17
|
import RegisteringStorageClientsExample from '!!raw-loader!roa-loader!./code_examples/storage_clients/registering_storage_clients_example.py';
|
|
18
18
|
import SQLStorageClientBasicExample from '!!raw-loader!roa-loader!./code_examples/storage_clients/sql_storage_client_basic_example.py';
|
|
19
19
|
import SQLStorageClientConfigurationExample from '!!raw-loader!./code_examples/storage_clients/sql_storage_client_configuration_example.py';
|
|
20
|
+
import RedisStorageClientBasicExample from '!!raw-loader!./code_examples/storage_clients/redis_storage_client_basic_example.py';
|
|
21
|
+
import RedisStorageClientConfigurationExample from '!!raw-loader!./code_examples/storage_clients/redis_storage_client_configuration_example.py';
|
|
20
22
|
|
|
21
23
|
Storage clients provide a unified interface for interacting with <ApiLink to="class/Dataset">`Dataset`</ApiLink>, <ApiLink to="class/KeyValueStore">`KeyValueStore`</ApiLink>, and <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>, regardless of the underlying implementation. They handle operations like creating, reading, updating, and deleting storage instances, as well as managing data persistence and cleanup. This abstraction makes it easy to switch between different environments, such as local development and cloud production setups.
|
|
22
24
|
|
|
@@ -26,7 +28,8 @@ Crawlee provides three main storage client implementations:
|
|
|
26
28
|
|
|
27
29
|
- <ApiLink to="class/FileSystemStorageClient">`FileSystemStorageClient`</ApiLink> - Provides persistent file system storage with in-memory caching.
|
|
28
30
|
- <ApiLink to="class/MemoryStorageClient">`MemoryStorageClient`</ApiLink> - Stores data in memory with no persistence.
|
|
29
|
-
- <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink>
|
|
31
|
+
- <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> - Provides persistent storage using a SQL database ([SQLite](https://sqlite.org/) or [PostgreSQL](https://www.postgresql.org/)). Requires installing the extra dependency: `crawlee[sql_sqlite]` for SQLite or `crawlee[sql_postgres]` for PostgreSQL.
|
|
32
|
+
- <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> - Provides persistent storage using a [Redis](https://redis.io/) database v8.0+. Requires installing the extra dependency `crawlee[redis]`.
|
|
30
33
|
- [`ApifyStorageClient`](https://docs.apify.com/sdk/python/reference/class/ApifyStorageClient) - Manages storage on the [Apify platform](https://apify.com), implemented in the [Apify SDK](https://github.com/apify/apify-sdk-python).
|
|
31
34
|
|
|
32
35
|
```mermaid
|
|
@@ -56,6 +59,8 @@ class MemoryStorageClient
|
|
|
56
59
|
|
|
57
60
|
class SqlStorageClient
|
|
58
61
|
|
|
62
|
+
class RedisStorageClient
|
|
63
|
+
|
|
59
64
|
class ApifyStorageClient
|
|
60
65
|
|
|
61
66
|
%% ========================
|
|
@@ -65,6 +70,7 @@ class ApifyStorageClient
|
|
|
65
70
|
StorageClient --|> FileSystemStorageClient
|
|
66
71
|
StorageClient --|> MemoryStorageClient
|
|
67
72
|
StorageClient --|> SqlStorageClient
|
|
73
|
+
StorageClient --|> RedisStorageClient
|
|
68
74
|
StorageClient --|> ApifyStorageClient
|
|
69
75
|
```
|
|
70
76
|
|
|
@@ -304,8 +310,8 @@ Configuration options for the <ApiLink to="class/SqlStorageClient">`SqlStorageCl
|
|
|
304
310
|
|
|
305
311
|
Configuration options for the <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> can be set via constructor arguments:
|
|
306
312
|
|
|
307
|
-
- **`connection_string`** (default: SQLite in <ApiLink to="class/Configuration">`Configuration`</ApiLink> storage dir)
|
|
308
|
-
- **`engine`**
|
|
313
|
+
- **`connection_string`** (default: SQLite in <ApiLink to="class/Configuration">`Configuration`</ApiLink> storage dir) - SQLAlchemy connection string, e.g. `sqlite+aiosqlite:///my.db` or `postgresql+asyncpg://user:pass@host/db`.
|
|
314
|
+
- **`engine`** - Pre-configured SQLAlchemy AsyncEngine (optional).
|
|
309
315
|
|
|
310
316
|
For advanced scenarios, you can configure <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> with a custom SQLAlchemy engine and additional options via the <ApiLink to="class/Configuration">`Configuration`</ApiLink> class. This is useful, for example, when connecting to an external PostgreSQL database or customizing connection pooling.
|
|
311
317
|
|
|
@@ -313,6 +319,172 @@ For advanced scenarios, you can configure <ApiLink to="class/SqlStorageClient">`
|
|
|
313
319
|
{SQLStorageClientConfigurationExample}
|
|
314
320
|
</CodeBlock>
|
|
315
321
|
|
|
322
|
+
### Redis storage client
|
|
323
|
+
|
|
324
|
+
:::warning Experimental feature
|
|
325
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> is experimental. Its API and behavior may change in future releases.
|
|
326
|
+
:::
|
|
327
|
+
|
|
328
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> provides persistent storage using [Redis](https://redis.io/) database. It supports concurrent access from multiple independent clients or processes and uses Redis native data structures for efficient operations.
|
|
329
|
+
|
|
330
|
+
:::note dependencies
|
|
331
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> is not included in the core Crawlee package.
|
|
332
|
+
To use it, you need to install Crawlee with the Redis extra dependency:
|
|
333
|
+
|
|
334
|
+
<code>pip install 'crawlee[redis]'</code>
|
|
335
|
+
|
|
336
|
+
Additionally, Redis version 8.0 or higher is required.
|
|
337
|
+
:::
|
|
338
|
+
|
|
339
|
+
:::note Redis persistence
|
|
340
|
+
Data persistence in Redis depends on your [database configuration](https://redis.io/docs/latest/operate/oss_and_stack/management/persistence/).
|
|
341
|
+
:::
|
|
342
|
+
|
|
343
|
+
The client requires either a Redis connection string or a pre-configured Redis client instance. Use a pre-configured client when you need custom Redis settings such as connection pooling, timeouts, or SSL/TLS encryption.
|
|
344
|
+
|
|
345
|
+
<CodeBlock className="language-python" language="python">
|
|
346
|
+
{RedisStorageClientBasicExample}
|
|
347
|
+
</CodeBlock>
|
|
348
|
+
|
|
349
|
+
Data is organized using Redis key patterns. Below are the main data structures used for each storage type:
|
|
350
|
+
|
|
351
|
+
```mermaid
|
|
352
|
+
---
|
|
353
|
+
config:
|
|
354
|
+
class:
|
|
355
|
+
hideEmptyMembersBox: true
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
classDiagram
|
|
359
|
+
|
|
360
|
+
%% ========================
|
|
361
|
+
%% Storage Client
|
|
362
|
+
%% ========================
|
|
363
|
+
|
|
364
|
+
class RedisDatasetClient {
|
|
365
|
+
<<Dataset>>
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
%% ========================
|
|
369
|
+
%% Dataset Keys
|
|
370
|
+
%% ========================
|
|
371
|
+
|
|
372
|
+
class DatasetKeys {
|
|
373
|
+
datasets:[name]:items - JSON Array
|
|
374
|
+
datasets:[name]:metadata - JSON Object
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
class DatasetsIndexes {
|
|
378
|
+
datasets:id_to_name - Hash
|
|
379
|
+
datasets:name_to_id - Hash
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
%% ========================
|
|
383
|
+
%% Client to Keys arrows
|
|
384
|
+
%% ========================
|
|
385
|
+
|
|
386
|
+
RedisDatasetClient --> DatasetKeys
|
|
387
|
+
RedisDatasetClient --> DatasetsIndexes
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
```mermaid
|
|
391
|
+
---
|
|
392
|
+
config:
|
|
393
|
+
class:
|
|
394
|
+
hideEmptyMembersBox: true
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
classDiagram
|
|
398
|
+
|
|
399
|
+
%% ========================
|
|
400
|
+
%% Storage Clients
|
|
401
|
+
%% ========================
|
|
402
|
+
|
|
403
|
+
class RedisKeyValueStoreClient {
|
|
404
|
+
<<Key-value store>>
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
%% ========================
|
|
408
|
+
%% Key-Value Store Keys
|
|
409
|
+
%% ========================
|
|
410
|
+
|
|
411
|
+
class KeyValueStoreKeys {
|
|
412
|
+
key_value_stores:[name]:items - Hash
|
|
413
|
+
key_value_stores:[name]:metadata_items - Hash
|
|
414
|
+
key_value_stores:[name]:metadata - JSON Object
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
class KeyValueStoresIndexes {
|
|
418
|
+
key_value_stores:id_to_name - Hash
|
|
419
|
+
key_value_stores:name_to_id - Hash
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
%% ========================
|
|
423
|
+
%% Client to Keys arrows
|
|
424
|
+
%% ========================
|
|
425
|
+
|
|
426
|
+
RedisKeyValueStoreClient --> KeyValueStoreKeys
|
|
427
|
+
RedisKeyValueStoreClient --> KeyValueStoresIndexes
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
```mermaid
|
|
431
|
+
---
|
|
432
|
+
config:
|
|
433
|
+
class:
|
|
434
|
+
hideEmptyMembersBox: true
|
|
435
|
+
---
|
|
436
|
+
|
|
437
|
+
classDiagram
|
|
438
|
+
|
|
439
|
+
%% ========================
|
|
440
|
+
%% Storage Clients
|
|
441
|
+
%% ========================
|
|
442
|
+
|
|
443
|
+
class RedisRequestQueueClient {
|
|
444
|
+
<<Request queue>>
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
%% ========================
|
|
448
|
+
%% Request Queue Keys
|
|
449
|
+
%% ========================
|
|
450
|
+
|
|
451
|
+
class RequestQueueKeys{
|
|
452
|
+
request_queues:[name]:queue - List
|
|
453
|
+
request_queues:[name]:data - Hash
|
|
454
|
+
request_queues:[name]:in_progress - Hash
|
|
455
|
+
request_queues:[name]:added_bloom_filter - Bloom Filter | bloom queue_dedup_strategy
|
|
456
|
+
request_queues:[name]:handled_bloom_filter - Bloom Filter | bloom queue_dedup_strategy
|
|
457
|
+
request_queues:[name]:pending_set - Set | default queue_dedup_strategy
|
|
458
|
+
request_queues:[name]:handled_set - Set | default queue_dedup_strategy
|
|
459
|
+
request_queues:[name]:metadata - JSON Object
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
class RequestQueuesIndexes {
|
|
463
|
+
request_queues:id_to_name - Hash
|
|
464
|
+
request_queues:name_to_id - Hash
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
%% ========================
|
|
468
|
+
%% Client to Keys arrows
|
|
469
|
+
%% ========================
|
|
470
|
+
|
|
471
|
+
RedisRequestQueueClient --> RequestQueueKeys
|
|
472
|
+
RedisRequestQueueClient --> RequestQueuesIndexes
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
Configuration options for the <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> can be set through environment variables or the <ApiLink to="class/Configuration">`Configuration`</ApiLink> class:
|
|
476
|
+
|
|
477
|
+
- **`purge_on_start`** (env: `CRAWLEE_PURGE_ON_START`, default: `True`) - Whether to purge default storages on start.
|
|
478
|
+
|
|
479
|
+
Configuration options for the <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> can be set via constructor arguments:
|
|
480
|
+
|
|
481
|
+
- **`connection_string`** - Redis connection string, e.g. `redis://localhost:6379/0`.
|
|
482
|
+
- **`redis`** - Pre-configured Redis client instance (optional).
|
|
483
|
+
|
|
484
|
+
<CodeBlock className="language-python" language="python">
|
|
485
|
+
{RedisStorageClientConfigurationExample}
|
|
486
|
+
</CodeBlock>
|
|
487
|
+
|
|
316
488
|
## Creating a custom storage client
|
|
317
489
|
|
|
318
490
|
A storage client consists of two parts: the storage client factory and individual storage type clients. The <ApiLink to="class/StorageClient">`StorageClient`</ApiLink> acts as a factory that creates specific clients (<ApiLink to="class/DatasetClient">`DatasetClient`</ApiLink>, <ApiLink to="class/KeyValueStoreClient">`KeyValueStoreClient`</ApiLink>, <ApiLink to="class/RequestQueueClient">`RequestQueueClient`</ApiLink>) where the actual storage logic is implemented.
|
|
@@ -45,7 +45,7 @@ You can use different tools to consume the OpenTelemetry data that might better
|
|
|
45
45
|
|
|
46
46
|
## Customize the instrumentation
|
|
47
47
|
|
|
48
|
-
You can customize the <ApiLink to="class/CrawlerInstrumentor">`CrawlerInstrumentor`</ApiLink>. Depending on the arguments used during its initialization, the instrumentation will be applied to different parts
|
|
48
|
+
You can customize the <ApiLink to="class/CrawlerInstrumentor">`CrawlerInstrumentor`</ApiLink>. Depending on the arguments used during its initialization, the instrumentation will be applied to different parts of the Crawlee code. By default, it instruments some functions that can give quite a good picture of each individual request handling. To turn this default instrumentation off, you can pass `request_handling_instrumentation=False` during initialization. You can also extend instrumentation by passing `instrument_classes=[...]` initialization argument that contains classes you want to be auto-instrumented. All their public methods will be automatically instrumented. Bear in mind that instrumentation has some runtime costs as well. The more instrumentation is used, the more overhead it will add to the crawler execution.
|
|
49
49
|
|
|
50
50
|
You can also create your instrumentation by selecting only the methods you want to instrument. For more details, see the <ApiLink to="class/CrawlerInstrumentor">`CrawlerInstrumentor`</ApiLink> source code and the [Python documentation for OpenTelemetry](https://opentelemetry.io/docs/languages/python/).
|
|
51
51
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Line
|
|
1
|
+
# Line length different from the rest of the code to make sure that the example codes visualised on the generated
|
|
2
2
|
# documentation webpages are shown without vertical slider to make them more readable.
|
|
3
3
|
|
|
4
4
|
[tool.ruff]
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.5b16"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -20,6 +20,7 @@ classifiers = [
|
|
|
20
20
|
"Programming Language :: Python :: 3.11",
|
|
21
21
|
"Programming Language :: Python :: 3.12",
|
|
22
22
|
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Programming Language :: Python :: 3.14",
|
|
23
24
|
"Topic :: Software Development :: Libraries",
|
|
24
25
|
]
|
|
25
26
|
keywords = [
|
|
@@ -35,11 +36,11 @@ keywords = [
|
|
|
35
36
|
dependencies = [
|
|
36
37
|
"cachetools>=5.5.0",
|
|
37
38
|
"colorama>=0.4.0",
|
|
38
|
-
"impit>=0.
|
|
39
|
+
"impit>=0.8.0",
|
|
39
40
|
"more-itertools>=10.2.0",
|
|
40
41
|
"protego>=0.5.0",
|
|
41
42
|
"psutil>=6.0.0",
|
|
42
|
-
"pydantic-settings>=2.
|
|
43
|
+
"pydantic-settings>=2.12.0",
|
|
43
44
|
"pydantic>=2.11.0",
|
|
44
45
|
"pyee>=9.0.0",
|
|
45
46
|
"tldextract>=5.1.0",
|
|
@@ -48,7 +49,7 @@ dependencies = [
|
|
|
48
49
|
]
|
|
49
50
|
|
|
50
51
|
[project.optional-dependencies]
|
|
51
|
-
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres]"]
|
|
52
|
+
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres,redis]"]
|
|
52
53
|
adaptive-crawler = [
|
|
53
54
|
"jaro-winkler>=2.0.3",
|
|
54
55
|
"playwright>=1.27.0",
|
|
@@ -73,12 +74,13 @@ otel = [
|
|
|
73
74
|
]
|
|
74
75
|
sql_postgres = [
|
|
75
76
|
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
|
|
76
|
-
"asyncpg>=0.24.0"
|
|
77
|
+
"asyncpg>=0.24.0; python_version < '3.14'" # TODO: https://github.com/apify/crawlee-python/issues/1555
|
|
77
78
|
]
|
|
78
79
|
sql_sqlite = [
|
|
79
80
|
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
|
|
80
81
|
"aiosqlite>=0.21.0",
|
|
81
82
|
]
|
|
83
|
+
redis = ["redis[hiredis] >= 7.0.0"]
|
|
82
84
|
|
|
83
85
|
[project.scripts]
|
|
84
86
|
crawlee = "crawlee._cli:cli"
|
|
@@ -96,24 +98,25 @@ crawlee = "crawlee._cli:cli"
|
|
|
96
98
|
[dependency-groups]
|
|
97
99
|
dev = [
|
|
98
100
|
"apify_client", # For e2e tests.
|
|
99
|
-
"build
|
|
100
|
-
"dycw-pytest-only
|
|
101
|
-
"
|
|
102
|
-
"
|
|
103
|
-
"
|
|
104
|
-
"
|
|
105
|
-
"
|
|
106
|
-
"pytest-
|
|
107
|
-
"pytest-
|
|
108
|
-
"pytest-
|
|
109
|
-
"pytest
|
|
101
|
+
"build<2.0.0", # For e2e tests.
|
|
102
|
+
"dycw-pytest-only<3.0.0",
|
|
103
|
+
"fakeredis[probabilistic,json,lua]<3.0.0",
|
|
104
|
+
"mypy~=1.18.0",
|
|
105
|
+
"pre-commit<5.0.0",
|
|
106
|
+
"proxy-py<3.0.0",
|
|
107
|
+
"pydoc-markdown<5.0.0",
|
|
108
|
+
"pytest-asyncio<2.0.0",
|
|
109
|
+
"pytest-cov<8.0.0",
|
|
110
|
+
"pytest-timeout<3.0.0",
|
|
111
|
+
"pytest-xdist<4.0.0",
|
|
112
|
+
"pytest<9.0.0",
|
|
110
113
|
"ruff~=0.14.0",
|
|
111
114
|
"setuptools", # setuptools are used by pytest, but not explicitly required
|
|
112
|
-
"types-beautifulsoup4
|
|
113
|
-
"types-cachetools
|
|
114
|
-
"types-colorama
|
|
115
|
-
"types-psutil
|
|
116
|
-
"types-python-dateutil
|
|
115
|
+
"types-beautifulsoup4<5.0.0",
|
|
116
|
+
"types-cachetools<7.0.0",
|
|
117
|
+
"types-colorama<1.0.0",
|
|
118
|
+
"types-psutil<8.0.0",
|
|
119
|
+
"types-python-dateutil<3.0.0",
|
|
117
120
|
"uvicorn[standard]~=0.35.0", # https://github.com/apify/crawlee-python/issues/1441
|
|
118
121
|
]
|
|
119
122
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from logging import getLogger
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from protego import Protego
|
|
@@ -15,6 +16,9 @@ if TYPE_CHECKING:
|
|
|
15
16
|
from crawlee.proxy_configuration import ProxyInfo
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
logger = getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
18
22
|
class RobotsTxtFile:
|
|
19
23
|
def __init__(
|
|
20
24
|
self, url: str, robots: Protego, http_client: HttpClient | None = None, proxy_info: ProxyInfo | None = None
|
|
@@ -56,12 +60,20 @@ class RobotsTxtFile:
|
|
|
56
60
|
http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
|
|
57
61
|
proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
|
|
58
62
|
"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
try:
|
|
64
|
+
response = await http_client.send_request(url, proxy_info=proxy_info)
|
|
65
|
+
|
|
66
|
+
body = (
|
|
67
|
+
b'User-agent: *\nAllow: /'
|
|
68
|
+
if is_status_code_client_error(response.status_code)
|
|
69
|
+
else await response.read()
|
|
70
|
+
)
|
|
71
|
+
robots = Protego.parse(body.decode('utf-8'))
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.warning(f'Failed to fetch from robots.txt from "{url}" with error: "{e}"')
|
|
63
75
|
|
|
64
|
-
|
|
76
|
+
robots = Protego.parse('User-agent: *\nAllow: /')
|
|
65
77
|
|
|
66
78
|
return cls(url, robots, http_client=http_client, proxy_info=proxy_info)
|
|
67
79
|
|
|
@@ -28,7 +28,9 @@ class Configuration(BaseSettings):
|
|
|
28
28
|
Settings can also be configured via environment variables, prefixed with `CRAWLEE_`.
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
# TODO: https://github.com/pydantic/pydantic-settings/issues/706
|
|
32
|
+
# Use `SettingsConfigDict(validate_by_name=True, validate_by_alias=True)` when issue is resolved.
|
|
33
|
+
model_config = SettingsConfigDict(populate_by_name=True)
|
|
32
34
|
|
|
33
35
|
internal_timeout: Annotated[timedelta | None, Field(alias='crawlee_internal_timeout')] = None
|
|
34
36
|
"""Timeout for the internal asynchronous operations."""
|