crawlee 1.0.5b12__tar.gz → 1.0.5b14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/run_code_checks.yaml +1 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/CHANGELOG.md +2 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/Makefile +1 -4
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/PKG-INFO +4 -1
- crawlee-1.0.5b14/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +10 -0
- crawlee-1.0.5b14/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +27 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/storage_clients.mdx +175 -3
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/pyproject.toml +4 -2
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -1
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/statistics/_statistics.py +28 -27
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/__init__.py +4 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_client_mixin.py +295 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_dataset_client.py +325 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_storage_client.py +146 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee-1.0.5b14/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/conftest.py +6 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_basic/test_basic_crawler.py +26 -0
- crawlee-1.0.5b14/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +146 -0
- crawlee-1.0.5b14/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +217 -0
- crawlee-1.0.5b14/tests/unit/storage_clients/_redis/test_redis_rq_client.py +252 -0
- crawlee-1.0.5b14/tests/unit/storages/conftest.py +39 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/uv.lock +251 -3
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/pages/home_page_example.py +14 -9
- crawlee-1.0.5b14/website/static/.nojekyll +0 -0
- crawlee-1.0.5b12/tests/unit/storages/conftest.py +0 -18
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.editorconfig +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/build_and_deploy_docs.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/templates_e2e_tests.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.gitignore +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.markdownlint.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/LICENSE +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/README.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/examples/using_browser_profile.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/pyproject.toml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/docs/upgrading/upgrading_to_v1.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/renovate.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/router.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.5b12/src/crawlee/storage_clients/_sql → crawlee-1.0.5b14/src/crawlee/storage_clients/_redis}/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.5b12/src/crawlee/storage_clients → crawlee-1.0.5b14/src/crawlee/storage_clients/_sql}/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.5b12/src/crawlee/storages → crawlee-1.0.5b14/src/crawlee/storage_clients}/py.typed +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_base.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_dataset.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_key_value_store.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_request_queue.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/src/crawlee/storages/_utils.py +0 -0
- /crawlee-1.0.5b12/tests/__init__.py → /crawlee-1.0.5b14/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.5b12/tests/e2e → crawlee-1.0.5b14/tests}/__init__.py +0 -0
- {crawlee-1.0.5b12/tests/unit → crawlee-1.0.5b14/tests/e2e}/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/README.md +0 -0
- /crawlee-1.0.5b12/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.0.5b14/tests/unit/__init__.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- /crawlee-1.0.5b12/website/static/.nojekyll → /crawlee-1.0.5b14/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/server.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/sessions/test_session_pool.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storages/test_dataset.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storages/test_key_value_store.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storages/test_request_queue.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/.eslintrc.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/babel.config.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/package.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/sidebars.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/css/custom.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/pages/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/API.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/check.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/robot.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/system.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/js/custom.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/static/robots.txt +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/tsconfig.eslint.json +0 -0
- {crawlee-1.0.5b12 → crawlee-1.0.5b14}/website/yarn.lock +0 -0
|
@@ -8,11 +8,13 @@ All notable changes to this project will be documented in this file.
|
|
|
8
8
|
### 🚀 Features
|
|
9
9
|
|
|
10
10
|
- Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
|
|
11
|
+
- Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
|
|
11
12
|
|
|
12
13
|
### 🐛 Bug Fixes
|
|
13
14
|
|
|
14
15
|
- Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
|
|
15
16
|
- Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
|
|
17
|
+
- Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
<!-- git-cliff-unreleased-end -->
|
|
@@ -4,9 +4,6 @@
|
|
|
4
4
|
# This is default for local testing, but GitHub workflows override it to a higher value in CI
|
|
5
5
|
E2E_TESTS_CONCURRENCY = 1
|
|
6
6
|
|
|
7
|
-
# Placeholder token; replace with a real one for local docs testing if needed
|
|
8
|
-
APIFY_TOKEN = apify_api_token_placeholder
|
|
9
|
-
|
|
10
7
|
clean:
|
|
11
8
|
rm -rf .mypy_cache .pytest_cache .ruff_cache build dist htmlcov .coverage
|
|
12
9
|
|
|
@@ -58,4 +55,4 @@ build-docs:
|
|
|
58
55
|
cd website && corepack enable && yarn && uv run yarn build
|
|
59
56
|
|
|
60
57
|
run-docs: build-api-reference
|
|
61
|
-
|
|
58
|
+
cd website && corepack enable && yarn && uv run yarn start
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5b14
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -263,6 +263,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
263
263
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
264
264
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
265
265
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
266
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
266
267
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
267
268
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
269
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
@@ -296,6 +297,8 @@ Provides-Extra: playwright
|
|
|
296
297
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
297
298
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
298
299
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
300
|
+
Provides-Extra: redis
|
|
301
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
299
302
|
Provides-Extra: sql-postgres
|
|
300
303
|
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
301
304
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
crawlee-1.0.5b14/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from crawlee.crawlers import ParselCrawler
|
|
2
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
3
|
+
|
|
4
|
+
# Create a new instance of storage client using connection string.
|
|
5
|
+
# 'redis://localhost:6379' is the just placeholder, replace it with your actual
|
|
6
|
+
# connection string.
|
|
7
|
+
storage_client = RedisStorageClient(connection_string='redis://localhost:6379')
|
|
8
|
+
|
|
9
|
+
# And pass it to the crawler.
|
|
10
|
+
crawler = ParselCrawler(storage_client=storage_client)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from redis.asyncio import Redis
|
|
2
|
+
|
|
3
|
+
from crawlee.configuration import Configuration
|
|
4
|
+
from crawlee.crawlers import ParselCrawler
|
|
5
|
+
from crawlee.storage_clients import RedisStorageClient
|
|
6
|
+
|
|
7
|
+
# Create a new instance of storage client using a Redis client with custom settings.
|
|
8
|
+
# Replace host and port with your actual Redis server configuration.
|
|
9
|
+
# Other Redis client settings can be adjusted as needed.
|
|
10
|
+
storage_client = RedisStorageClient(
|
|
11
|
+
redis=Redis(
|
|
12
|
+
host='localhost',
|
|
13
|
+
port=6379,
|
|
14
|
+
retry_on_timeout=True,
|
|
15
|
+
socket_keepalive=True,
|
|
16
|
+
socket_connect_timeout=10,
|
|
17
|
+
)
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Create a configuration with custom settings.
|
|
21
|
+
configuration = Configuration(purge_on_start=False)
|
|
22
|
+
|
|
23
|
+
# And pass them to the crawler.
|
|
24
|
+
crawler = ParselCrawler(
|
|
25
|
+
storage_client=storage_client,
|
|
26
|
+
configuration=configuration,
|
|
27
|
+
)
|
|
@@ -17,6 +17,8 @@ import CustomStorageClientExample from '!!raw-loader!roa-loader!./code_examples/
|
|
|
17
17
|
import RegisteringStorageClientsExample from '!!raw-loader!roa-loader!./code_examples/storage_clients/registering_storage_clients_example.py';
|
|
18
18
|
import SQLStorageClientBasicExample from '!!raw-loader!roa-loader!./code_examples/storage_clients/sql_storage_client_basic_example.py';
|
|
19
19
|
import SQLStorageClientConfigurationExample from '!!raw-loader!./code_examples/storage_clients/sql_storage_client_configuration_example.py';
|
|
20
|
+
import RedisStorageClientBasicExample from '!!raw-loader!./code_examples/storage_clients/redis_storage_client_basic_example.py';
|
|
21
|
+
import RedisStorageClientConfigurationExample from '!!raw-loader!./code_examples/storage_clients/redis_storage_client_configuration_example.py';
|
|
20
22
|
|
|
21
23
|
Storage clients provide a unified interface for interacting with <ApiLink to="class/Dataset">`Dataset`</ApiLink>, <ApiLink to="class/KeyValueStore">`KeyValueStore`</ApiLink>, and <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>, regardless of the underlying implementation. They handle operations like creating, reading, updating, and deleting storage instances, as well as managing data persistence and cleanup. This abstraction makes it easy to switch between different environments, such as local development and cloud production setups.
|
|
22
24
|
|
|
@@ -26,7 +28,8 @@ Crawlee provides three main storage client implementations:
|
|
|
26
28
|
|
|
27
29
|
- <ApiLink to="class/FileSystemStorageClient">`FileSystemStorageClient`</ApiLink> - Provides persistent file system storage with in-memory caching.
|
|
28
30
|
- <ApiLink to="class/MemoryStorageClient">`MemoryStorageClient`</ApiLink> - Stores data in memory with no persistence.
|
|
29
|
-
- <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink>
|
|
31
|
+
- <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> - Provides persistent storage using a SQL database ([SQLite](https://sqlite.org/) or [PostgreSQL](https://www.postgresql.org/)). Requires installing the extra dependency: `crawlee[sql_sqlite]` for SQLite or `crawlee[sql_postgres]` for PostgreSQL.
|
|
32
|
+
- <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> - Provides persistent storage using a [Redis](https://redis.io/) database v8.0+. Requires installing the extra dependency `crawlee[redis]`.
|
|
30
33
|
- [`ApifyStorageClient`](https://docs.apify.com/sdk/python/reference/class/ApifyStorageClient) - Manages storage on the [Apify platform](https://apify.com), implemented in the [Apify SDK](https://github.com/apify/apify-sdk-python).
|
|
31
34
|
|
|
32
35
|
```mermaid
|
|
@@ -56,6 +59,8 @@ class MemoryStorageClient
|
|
|
56
59
|
|
|
57
60
|
class SqlStorageClient
|
|
58
61
|
|
|
62
|
+
class RedisStorageClient
|
|
63
|
+
|
|
59
64
|
class ApifyStorageClient
|
|
60
65
|
|
|
61
66
|
%% ========================
|
|
@@ -65,6 +70,7 @@ class ApifyStorageClient
|
|
|
65
70
|
StorageClient --|> FileSystemStorageClient
|
|
66
71
|
StorageClient --|> MemoryStorageClient
|
|
67
72
|
StorageClient --|> SqlStorageClient
|
|
73
|
+
StorageClient --|> RedisStorageClient
|
|
68
74
|
StorageClient --|> ApifyStorageClient
|
|
69
75
|
```
|
|
70
76
|
|
|
@@ -304,8 +310,8 @@ Configuration options for the <ApiLink to="class/SqlStorageClient">`SqlStorageCl
|
|
|
304
310
|
|
|
305
311
|
Configuration options for the <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> can be set via constructor arguments:
|
|
306
312
|
|
|
307
|
-
- **`connection_string`** (default: SQLite in <ApiLink to="class/Configuration">`Configuration`</ApiLink> storage dir)
|
|
308
|
-
- **`engine`**
|
|
313
|
+
- **`connection_string`** (default: SQLite in <ApiLink to="class/Configuration">`Configuration`</ApiLink> storage dir) - SQLAlchemy connection string, e.g. `sqlite+aiosqlite:///my.db` or `postgresql+asyncpg://user:pass@host/db`.
|
|
314
|
+
- **`engine`** - Pre-configured SQLAlchemy AsyncEngine (optional).
|
|
309
315
|
|
|
310
316
|
For advanced scenarios, you can configure <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> with a custom SQLAlchemy engine and additional options via the <ApiLink to="class/Configuration">`Configuration`</ApiLink> class. This is useful, for example, when connecting to an external PostgreSQL database or customizing connection pooling.
|
|
311
317
|
|
|
@@ -313,6 +319,172 @@ For advanced scenarios, you can configure <ApiLink to="class/SqlStorageClient">`
|
|
|
313
319
|
{SQLStorageClientConfigurationExample}
|
|
314
320
|
</CodeBlock>
|
|
315
321
|
|
|
322
|
+
### Redis storage client
|
|
323
|
+
|
|
324
|
+
:::warning Experimental feature
|
|
325
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> is experimental. Its API and behavior may change in future releases.
|
|
326
|
+
:::
|
|
327
|
+
|
|
328
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> provides persistent storage using [Redis](https://redis.io/) database. It supports concurrent access from multiple independent clients or processes and uses Redis native data structures for efficient operations.
|
|
329
|
+
|
|
330
|
+
:::note dependencies
|
|
331
|
+
The <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> is not included in the core Crawlee package.
|
|
332
|
+
To use it, you need to install Crawlee with the Redis extra dependency:
|
|
333
|
+
|
|
334
|
+
<code>pip install 'crawlee[redis]'</code>
|
|
335
|
+
|
|
336
|
+
Additionally, Redis version 8.0 or higher is required.
|
|
337
|
+
:::
|
|
338
|
+
|
|
339
|
+
:::note Redis persistence
|
|
340
|
+
Data persistence in Redis depends on your [database configuration](https://redis.io/docs/latest/operate/oss_and_stack/management/persistence/).
|
|
341
|
+
:::
|
|
342
|
+
|
|
343
|
+
The client requires either a Redis connection string or a pre-configured Redis client instance. Use a pre-configured client when you need custom Redis settings such as connection pooling, timeouts, or SSL/TLS encryption.
|
|
344
|
+
|
|
345
|
+
<CodeBlock className="language-python" language="python">
|
|
346
|
+
{RedisStorageClientBasicExample}
|
|
347
|
+
</CodeBlock>
|
|
348
|
+
|
|
349
|
+
Data is organized using Redis key patterns. Below are the main data structures used for each storage type:
|
|
350
|
+
|
|
351
|
+
```mermaid
|
|
352
|
+
---
|
|
353
|
+
config:
|
|
354
|
+
class:
|
|
355
|
+
hideEmptyMembersBox: true
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
classDiagram
|
|
359
|
+
|
|
360
|
+
%% ========================
|
|
361
|
+
%% Storage Client
|
|
362
|
+
%% ========================
|
|
363
|
+
|
|
364
|
+
class RedisDatasetClient {
|
|
365
|
+
<<Dataset>>
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
%% ========================
|
|
369
|
+
%% Dataset Keys
|
|
370
|
+
%% ========================
|
|
371
|
+
|
|
372
|
+
class DatasetKeys {
|
|
373
|
+
datasets:[name]:items - JSON Array
|
|
374
|
+
datasets:[name]:metadata - JSON Object
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
class DatasetsIndexes {
|
|
378
|
+
datasets:id_to_name - Hash
|
|
379
|
+
datasets:name_to_id - Hash
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
%% ========================
|
|
383
|
+
%% Client to Keys arrows
|
|
384
|
+
%% ========================
|
|
385
|
+
|
|
386
|
+
RedisDatasetClient --> DatasetKeys
|
|
387
|
+
RedisDatasetClient --> DatasetsIndexes
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
```mermaid
|
|
391
|
+
---
|
|
392
|
+
config:
|
|
393
|
+
class:
|
|
394
|
+
hideEmptyMembersBox: true
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
classDiagram
|
|
398
|
+
|
|
399
|
+
%% ========================
|
|
400
|
+
%% Storage Clients
|
|
401
|
+
%% ========================
|
|
402
|
+
|
|
403
|
+
class RedisKeyValueStoreClient {
|
|
404
|
+
<<Key-value store>>
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
%% ========================
|
|
408
|
+
%% Key-Value Store Keys
|
|
409
|
+
%% ========================
|
|
410
|
+
|
|
411
|
+
class KeyValueStoreKeys {
|
|
412
|
+
key_value_stores:[name]:items - Hash
|
|
413
|
+
key_value_stores:[name]:metadata_items - Hash
|
|
414
|
+
key_value_stores:[name]:metadata - JSON Object
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
class KeyValueStoresIndexes {
|
|
418
|
+
key_value_stores:id_to_name - Hash
|
|
419
|
+
key_value_stores:name_to_id - Hash
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
%% ========================
|
|
423
|
+
%% Client to Keys arrows
|
|
424
|
+
%% ========================
|
|
425
|
+
|
|
426
|
+
RedisKeyValueStoreClient --> KeyValueStoreKeys
|
|
427
|
+
RedisKeyValueStoreClient --> KeyValueStoresIndexes
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
```mermaid
|
|
431
|
+
---
|
|
432
|
+
config:
|
|
433
|
+
class:
|
|
434
|
+
hideEmptyMembersBox: true
|
|
435
|
+
---
|
|
436
|
+
|
|
437
|
+
classDiagram
|
|
438
|
+
|
|
439
|
+
%% ========================
|
|
440
|
+
%% Storage Clients
|
|
441
|
+
%% ========================
|
|
442
|
+
|
|
443
|
+
class RedisRequestQueueClient {
|
|
444
|
+
<<Request queue>>
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
%% ========================
|
|
448
|
+
%% Request Queue Keys
|
|
449
|
+
%% ========================
|
|
450
|
+
|
|
451
|
+
class RequestQueueKeys{
|
|
452
|
+
request_queues:[name]:queue - List
|
|
453
|
+
request_queues:[name]:data - Hash
|
|
454
|
+
request_queues:[name]:in_progress - Hash
|
|
455
|
+
request_queues:[name]:added_bloom_filter - Bloom Filter | bloom queue_dedup_strategy
|
|
456
|
+
request_queues:[name]:handled_bloom_filter - Bloom Filter | bloom queue_dedup_strategy
|
|
457
|
+
request_queues:[name]:pending_set - Set | default queue_dedup_strategy
|
|
458
|
+
request_queues:[name]:handled_set - Set | default queue_dedup_strategy
|
|
459
|
+
request_queues:[name]:metadata - JSON Object
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
class RequestQueuesIndexes {
|
|
463
|
+
request_queues:id_to_name - Hash
|
|
464
|
+
request_queues:name_to_id - Hash
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
%% ========================
|
|
468
|
+
%% Client to Keys arrows
|
|
469
|
+
%% ========================
|
|
470
|
+
|
|
471
|
+
RedisRequestQueueClient --> RequestQueueKeys
|
|
472
|
+
RedisRequestQueueClient --> RequestQueuesIndexes
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
Configuration options for the <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> can be set through environment variables or the <ApiLink to="class/Configuration">`Configuration`</ApiLink> class:
|
|
476
|
+
|
|
477
|
+
- **`purge_on_start`** (env: `CRAWLEE_PURGE_ON_START`, default: `True`) - Whether to purge default storages on start.
|
|
478
|
+
|
|
479
|
+
Configuration options for the <ApiLink to="class/RedisStorageClient">`RedisStorageClient`</ApiLink> can be set via constructor arguments:
|
|
480
|
+
|
|
481
|
+
- **`connection_string`** - Redis connection string, e.g. `redis://localhost:6379/0`.
|
|
482
|
+
- **`redis`** - Pre-configured Redis client instance (optional).
|
|
483
|
+
|
|
484
|
+
<CodeBlock className="language-python" language="python">
|
|
485
|
+
{RedisStorageClientConfigurationExample}
|
|
486
|
+
</CodeBlock>
|
|
487
|
+
|
|
316
488
|
## Creating a custom storage client
|
|
317
489
|
|
|
318
490
|
A storage client consists of two parts: the storage client factory and individual storage type clients. The <ApiLink to="class/StorageClient">`StorageClient`</ApiLink> acts as a factory that creates specific clients (<ApiLink to="class/DatasetClient">`DatasetClient`</ApiLink>, <ApiLink to="class/KeyValueStoreClient">`KeyValueStoreClient`</ApiLink>, <ApiLink to="class/RequestQueueClient">`RequestQueueClient`</ApiLink>) where the actual storage logic is implemented.
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.5b14"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -48,7 +48,7 @@ dependencies = [
|
|
|
48
48
|
]
|
|
49
49
|
|
|
50
50
|
[project.optional-dependencies]
|
|
51
|
-
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres]"]
|
|
51
|
+
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres,redis]"]
|
|
52
52
|
adaptive-crawler = [
|
|
53
53
|
"jaro-winkler>=2.0.3",
|
|
54
54
|
"playwright>=1.27.0",
|
|
@@ -79,6 +79,7 @@ sql_sqlite = [
|
|
|
79
79
|
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
|
|
80
80
|
"aiosqlite>=0.21.0",
|
|
81
81
|
]
|
|
82
|
+
redis = ["redis[hiredis] >= 7.0.0"]
|
|
82
83
|
|
|
83
84
|
[project.scripts]
|
|
84
85
|
crawlee = "crawlee._cli:cli"
|
|
@@ -98,6 +99,7 @@ dev = [
|
|
|
98
99
|
"apify_client", # For e2e tests.
|
|
99
100
|
"build<2.0.0", # For e2e tests.
|
|
100
101
|
"dycw-pytest-only<3.0.0",
|
|
102
|
+
"fakeredis[probabilistic,json,lua]<3.0.0",
|
|
101
103
|
"mypy~=1.18.0",
|
|
102
104
|
"pre-commit<5.0.0",
|
|
103
105
|
"proxy-py<3.0.0",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# Inspiration: https://github.com/apify/crawlee/blob/v3.9.2/packages/core/src/crawlers/statistics.ts
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import asyncio
|
|
4
5
|
import math
|
|
5
6
|
import time
|
|
6
7
|
from datetime import datetime, timedelta, timezone
|
|
@@ -84,8 +85,6 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
84
85
|
self._id = Statistics.__next_id
|
|
85
86
|
Statistics.__next_id += 1
|
|
86
87
|
|
|
87
|
-
self._instance_start: datetime | None = None
|
|
88
|
-
|
|
89
88
|
self.error_tracker = ErrorTracker(
|
|
90
89
|
save_error_snapshots=save_error_snapshots,
|
|
91
90
|
snapshot_kvs_name=persist_state_kvs_name,
|
|
@@ -111,6 +110,9 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
111
110
|
# Flag to indicate the context state.
|
|
112
111
|
self._active = False
|
|
113
112
|
|
|
113
|
+
# Pre-existing runtime offset, that can be non-zero when restoring serialized state from KVS.
|
|
114
|
+
self._runtime_offset = timedelta(seconds=0)
|
|
115
|
+
|
|
114
116
|
def replace_state_model(self, state_model: type[TNewStatisticsState]) -> Statistics[TNewStatisticsState]:
|
|
115
117
|
"""Create near copy of the `Statistics` with replaced `state_model`."""
|
|
116
118
|
new_statistics: Statistics[TNewStatisticsState] = Statistics(
|
|
@@ -165,14 +167,17 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
165
167
|
if self._active:
|
|
166
168
|
raise RuntimeError(f'The {self.__class__.__name__} is already active.')
|
|
167
169
|
|
|
168
|
-
self._active = True
|
|
169
|
-
self._instance_start = datetime.now(timezone.utc)
|
|
170
|
-
|
|
171
170
|
await self._state.initialize()
|
|
172
|
-
self._after_initialize()
|
|
173
171
|
|
|
172
|
+
self._runtime_offset = self.state.crawler_runtime
|
|
173
|
+
|
|
174
|
+
# Start periodic logging and let it print initial state before activation.
|
|
174
175
|
self._periodic_logger.start()
|
|
176
|
+
await asyncio.sleep(0.01)
|
|
177
|
+
self._active = True
|
|
175
178
|
|
|
179
|
+
self.state.crawler_last_started_at = datetime.now(timezone.utc)
|
|
180
|
+
self.state.crawler_started_at = self.state.crawler_started_at or self.state.crawler_last_started_at
|
|
176
181
|
return self
|
|
177
182
|
|
|
178
183
|
async def __aexit__(
|
|
@@ -191,14 +196,16 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
191
196
|
|
|
192
197
|
if not self.state.crawler_last_started_at:
|
|
193
198
|
raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
|
|
194
|
-
self.state.crawler_finished_at = datetime.now(timezone.utc)
|
|
195
|
-
self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
|
|
196
|
-
|
|
197
|
-
await self._state.teardown()
|
|
198
199
|
|
|
200
|
+
# Stop logging and deactivate the statistics to prevent further changes to crawler_runtime
|
|
199
201
|
await self._periodic_logger.stop()
|
|
202
|
+
self.state.crawler_finished_at = datetime.now(timezone.utc)
|
|
203
|
+
self.state.crawler_runtime = (
|
|
204
|
+
self._runtime_offset + self.state.crawler_finished_at - self.state.crawler_last_started_at
|
|
205
|
+
)
|
|
200
206
|
|
|
201
207
|
self._active = False
|
|
208
|
+
await self._state.teardown()
|
|
202
209
|
|
|
203
210
|
@property
|
|
204
211
|
def state(self) -> TStatisticsState:
|
|
@@ -255,10 +262,19 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
255
262
|
|
|
256
263
|
del self._requests_in_progress[request_id_or_key]
|
|
257
264
|
|
|
265
|
+
def _update_crawler_runtime(self) -> None:
|
|
266
|
+
current_run_duration = (
|
|
267
|
+
(datetime.now(timezone.utc) - self.state.crawler_last_started_at)
|
|
268
|
+
if self.state.crawler_last_started_at
|
|
269
|
+
else timedelta()
|
|
270
|
+
)
|
|
271
|
+
self.state.crawler_runtime = current_run_duration + self._runtime_offset
|
|
272
|
+
|
|
258
273
|
def calculate(self) -> FinalStatistics:
|
|
259
274
|
"""Calculate the current statistics."""
|
|
260
|
-
if self.
|
|
261
|
-
|
|
275
|
+
if self._active:
|
|
276
|
+
# Only update state when active. If not, just report the last known runtime.
|
|
277
|
+
self._update_crawler_runtime()
|
|
262
278
|
|
|
263
279
|
total_minutes = self.state.crawler_runtime.total_seconds() / 60
|
|
264
280
|
state = self._state.current_value
|
|
@@ -291,21 +307,6 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
291
307
|
else:
|
|
292
308
|
self._periodic_message_logger.info(self._log_message, extra=stats.to_dict())
|
|
293
309
|
|
|
294
|
-
def _after_initialize(self) -> None:
|
|
295
|
-
state = self._state.current_value
|
|
296
|
-
|
|
297
|
-
if state.crawler_started_at is None:
|
|
298
|
-
state.crawler_started_at = datetime.now(timezone.utc)
|
|
299
|
-
|
|
300
|
-
if state.stats_persisted_at is not None and state.crawler_last_started_at:
|
|
301
|
-
self._instance_start = datetime.now(timezone.utc) - (
|
|
302
|
-
state.stats_persisted_at - state.crawler_last_started_at
|
|
303
|
-
)
|
|
304
|
-
elif state.crawler_last_started_at:
|
|
305
|
-
self._instance_start = state.crawler_last_started_at
|
|
306
|
-
|
|
307
|
-
state.crawler_last_started_at = self._instance_start
|
|
308
|
-
|
|
309
310
|
def _save_retry_count_for_request(self, record: RequestProcessingRecord) -> None:
|
|
310
311
|
retry_count = record.retry_count
|
|
311
312
|
state = self._state.current_value
|
|
@@ -13,9 +13,13 @@ _install_import_hook(__name__)
|
|
|
13
13
|
with _try_import(__name__, 'SqlStorageClient'):
|
|
14
14
|
from ._sql import SqlStorageClient
|
|
15
15
|
|
|
16
|
+
with _try_import(__name__, 'RedisStorageClient'):
|
|
17
|
+
from ._redis import RedisStorageClient
|
|
18
|
+
|
|
16
19
|
__all__ = [
|
|
17
20
|
'FileSystemStorageClient',
|
|
18
21
|
'MemoryStorageClient',
|
|
22
|
+
'RedisStorageClient',
|
|
19
23
|
'SqlStorageClient',
|
|
20
24
|
'StorageClient',
|
|
21
25
|
]
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from ._dataset_client import RedisDatasetClient
|
|
2
|
+
from ._key_value_store_client import RedisKeyValueStoreClient
|
|
3
|
+
from ._request_queue_client import RedisRequestQueueClient
|
|
4
|
+
from ._storage_client import RedisStorageClient
|
|
5
|
+
|
|
6
|
+
__all__ = ['RedisDatasetClient', 'RedisKeyValueStoreClient', 'RedisRequestQueueClient', 'RedisStorageClient']
|