crawlee 1.0.1b11__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/build_and_deploy_docs.yaml +1 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/templates_e2e_tests.yaml +1 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/CHANGELOG.md +23 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/PKG-INFO +2 -2
- crawlee-1.0.2/docs/examples/code_examples/using_browser_profiles_chrome.py +56 -0
- crawlee-1.0.2/docs/examples/code_examples/using_browser_profiles_firefox.py +42 -0
- crawlee-1.0.2/docs/examples/using_browser_profile.mdx +41 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/upgrading/upgrading_to_v1.md +4 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/pyproject.toml +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_dataset_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_request_queue_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_dataset_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_request_queue_client.py +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/_base.py +3 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/_dataset.py +3 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/_key_value_store.py +3 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/_request_queue.py +3 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/_storage_instance_manager.py +9 -1
- crawlee-1.0.2/src/crawlee/storages/_utils.py +11 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_sitemap.py +6 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +1 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/otel/test_crawler_instrumentor.py +7 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/sessions/test_session_pool.py +1 -1
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +3 -3
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +6 -6
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +6 -6
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +6 -6
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/test_dataset.py +64 -32
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/test_key_value_store.py +61 -29
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/test_request_queue.py +63 -31
- {crawlee-1.0.1b11 → crawlee-1.0.2}/uv.lock +253 -208
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/package.json +2 -2
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/yarn.lock +136 -119
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.editorconfig +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/check_pr_title.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/pre_release.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.github/workflows/update_new_issue.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.gitignore +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.markdownlint.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/LICENSE +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/Makefile +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/README.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/opening.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/request_loaders.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/service_locator.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/storage_clients.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/storages.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/03_adding_more_urls.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/pyproject.toml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/renovate.json +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/snapshotter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_request.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_service_locator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/sitemap.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/system.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/events/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/_types.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/_request_list.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/_request_loader.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/router.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/_models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/statistics/_models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/statistics/_statistics.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/README.md +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/__init__.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_byte_size.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/browsers/test_browser_pool.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/conftest.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/events/test_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/server.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/conftest.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/storages/test_storage_instance_manager.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/tests/unit/test_service_locator.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/.eslintrc.json +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/babel.config.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/docusaurus.config.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/sidebars.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/css/custom.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/pages/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/.nojekyll +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/API.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/check.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/robot.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/system.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/js/custom.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/static/robots.txt +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.1b11 → crawlee-1.0.2}/website/tsconfig.eslint.json +0 -0
|
@@ -2,8 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
## [1.0.2](https://github.com/apify/crawlee-python/releases/tag/v1.0.2) (2025-10-08)
|
|
6
|
+
|
|
7
|
+
### 🐛 Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Use Self type in the open() method of storage clients ([#1462](https://github.com/apify/crawlee-python/pull/1462)) ([4ec6f6c](https://github.com/apify/crawlee-python/commit/4ec6f6c08f81632197f602ff99151338b3eba6e7)) by [@janbuchar](https://github.com/janbuchar)
|
|
10
|
+
- Add storages name validation ([#1457](https://github.com/apify/crawlee-python/pull/1457)) ([84de11a](https://github.com/apify/crawlee-python/commit/84de11a3a603503076f5b7df487c9abab68a9015)) by [@Mantisus](https://github.com/Mantisus), closes [#1434](https://github.com/apify/crawlee-python/issues/1434)
|
|
11
|
+
- Pin pydantic version to <2.12.0 to avoid compatibility issues ([#1467](https://github.com/apify/crawlee-python/pull/1467)) ([f11b86f](https://github.com/apify/crawlee-python/commit/f11b86f7ed57f98e83dc1b52f15f2017a919bf59)) by [@vdusek](https://github.com/vdusek)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
|
|
7
15
|
|
|
8
16
|
### 🐛 Bug Fixes
|
|
9
17
|
|
|
@@ -11,9 +19,11 @@ All notable changes to this project will be documented in this file.
|
|
|
11
19
|
- Update templates to handle optional httpx client ([#1440](https://github.com/apify/crawlee-python/pull/1440)) ([c087efd](https://github.com/apify/crawlee-python/commit/c087efd39baedf46ca3e5cae1ddc1acd6396e6c1)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
12
20
|
|
|
13
21
|
|
|
14
|
-
<!-- git-cliff-unreleased-end -->
|
|
15
22
|
## [1.0.0](https://github.com/apify/crawlee-python/releases/tag/v1.0.0) (2025-09-29)
|
|
16
23
|
|
|
24
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v1) for more details.
|
|
25
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v1) to ensure a smooth update.
|
|
26
|
+
|
|
17
27
|
### 🚀 Features
|
|
18
28
|
|
|
19
29
|
- Add utility for load and parse Sitemap and `SitemapRequestLoader` ([#1169](https://github.com/apify/crawlee-python/pull/1169)) ([66599f8](https://github.com/apify/crawlee-python/commit/66599f8d085f3a8622e130019b6fdce2325737de)) by [@Mantisus](https://github.com/Mantisus), closes [#1161](https://github.com/apify/crawlee-python/issues/1161)
|
|
@@ -196,6 +206,9 @@ All notable changes to this project will be documented in this file.
|
|
|
196
206
|
|
|
197
207
|
## [0.6.0](https://github.com/apify/crawlee-python/releases/tag/v0.6.0) (2025-03-03)
|
|
198
208
|
|
|
209
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v06) for more details.
|
|
210
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v06) to ensure a smooth update.
|
|
211
|
+
|
|
199
212
|
### 🚀 Features
|
|
200
213
|
|
|
201
214
|
- Integrate browserforge fingerprints ([#829](https://github.com/apify/crawlee-python/pull/829)) ([2b156b4](https://github.com/apify/crawlee-python/commit/2b156b4ba688f9111195422e6058dff30eb1f782)) by [@Pijukatel](https://github.com/Pijukatel), closes [#549](https://github.com/apify/crawlee-python/issues/549)
|
|
@@ -276,6 +289,9 @@ All notable changes to this project will be documented in this file.
|
|
|
276
289
|
|
|
277
290
|
## [0.5.0](https://github.com/apify/crawlee-python/releases/tag/v0.5.0) (2025-01-02)
|
|
278
291
|
|
|
292
|
+
- Check out the [Release blog post](https://crawlee.dev/blog/crawlee-for-python-v05) for more details.
|
|
293
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v05) to ensure a smooth update.
|
|
294
|
+
|
|
279
295
|
### 🚀 Features
|
|
280
296
|
|
|
281
297
|
- Add possibility to use None as no proxy in tiered proxies ([#760](https://github.com/apify/crawlee-python/pull/760)) ([0fbd017](https://github.com/apify/crawlee-python/commit/0fbd01723b9fe2e3410e0f358cab2f22848b08d0)) by [@Pijukatel](https://github.com/Pijukatel), closes [#687](https://github.com/apify/crawlee-python/issues/687)
|
|
@@ -367,6 +383,8 @@ All notable changes to this project will be documented in this file.
|
|
|
367
383
|
|
|
368
384
|
## [0.4.0](https://github.com/apify/crawlee-python/releases/tag/v0.4.0) (2024-11-01)
|
|
369
385
|
|
|
386
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v04) to ensure a smooth update.
|
|
387
|
+
|
|
370
388
|
### 🚀 Features
|
|
371
389
|
|
|
372
390
|
- [**breaking**] Add headers in unique key computation ([#609](https://github.com/apify/crawlee-python/pull/609)) ([6c4746f](https://github.com/apify/crawlee-python/commit/6c4746fa8ff86952a812b32a1d70dc910e76b43e)) by [@Prathamesh010](https://github.com/Prathamesh010), closes [#548](https://github.com/apify/crawlee-python/issues/548)
|
|
@@ -476,6 +494,8 @@ All notable changes to this project will be documented in this file.
|
|
|
476
494
|
|
|
477
495
|
## [0.3.0](https://github.com/apify/crawlee-python/releases/tag/v0.3.0) (2024-08-27)
|
|
478
496
|
|
|
497
|
+
- Check out the [Upgrading guide](https://crawlee.dev/python/docs/upgrading/upgrading-to-v0x#upgrading-to-v03) to ensure a smooth update.
|
|
498
|
+
|
|
479
499
|
### 🚀 Features
|
|
480
500
|
|
|
481
501
|
- Implement ParselCrawler that adds support for Parsel ([#348](https://github.com/apify/crawlee-python/pull/348)) ([a3832e5](https://github.com/apify/crawlee-python/commit/a3832e527f022f32cce4a80055da3b7967b74522)) by [@asymness](https://github.com/asymness), closes [#335](https://github.com/apify/crawlee-python/issues/335)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -232,7 +232,7 @@ Requires-Dist: more-itertools>=10.2.0
|
|
|
232
232
|
Requires-Dist: protego>=0.5.0
|
|
233
233
|
Requires-Dist: psutil>=6.0.0
|
|
234
234
|
Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
|
|
235
|
-
Requires-Dist: pydantic
|
|
235
|
+
Requires-Dist: pydantic<2.12.0,>=2.11.0
|
|
236
236
|
Requires-Dist: pyee>=9.0.0
|
|
237
237
|
Requires-Dist: tldextract>=5.1.0
|
|
238
238
|
Requires-Dist: typing-extensions>=4.1.0
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from tempfile import TemporaryDirectory
|
|
5
|
+
|
|
6
|
+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
7
|
+
|
|
8
|
+
# Profile name to use (usually 'Default' for single profile setups)
|
|
9
|
+
PROFILE_NAME = 'Default'
|
|
10
|
+
|
|
11
|
+
# Paths to Chrome profiles in your system (example for Windows)
|
|
12
|
+
# Use `chrome://version/` to find your profile path
|
|
13
|
+
PROFILE_PATH = Path(Path.home(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def main() -> None:
|
|
17
|
+
# Create a temporary folder to copy the profile to
|
|
18
|
+
with TemporaryDirectory(prefix='crawlee-') as tmpdirname:
|
|
19
|
+
tmp_profile_dir = Path(tmpdirname)
|
|
20
|
+
|
|
21
|
+
# Copy the profile to a temporary folder
|
|
22
|
+
shutil.copytree(
|
|
23
|
+
PROFILE_PATH / PROFILE_NAME,
|
|
24
|
+
tmp_profile_dir / PROFILE_NAME,
|
|
25
|
+
dirs_exist_ok=True,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
crawler = PlaywrightCrawler(
|
|
29
|
+
headless=False,
|
|
30
|
+
# Use chromium for Chrome compatibility
|
|
31
|
+
browser_type='chromium',
|
|
32
|
+
# Disable fingerprints to preserve profile identity
|
|
33
|
+
fingerprint_generator=None,
|
|
34
|
+
# Set user data directory to temp folder
|
|
35
|
+
user_data_dir=tmp_profile_dir,
|
|
36
|
+
browser_launch_options={
|
|
37
|
+
# Use installed Chrome browser
|
|
38
|
+
'channel': 'chrome',
|
|
39
|
+
# Slow down actions to mimic human behavior
|
|
40
|
+
'slow_mo': 200,
|
|
41
|
+
'args': [
|
|
42
|
+
# Use the specified profile
|
|
43
|
+
f'--profile-directory={PROFILE_NAME}',
|
|
44
|
+
],
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
@crawler.router.default_handler
|
|
49
|
+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
|
|
50
|
+
context.log.info(f'Visiting {context.request.url}')
|
|
51
|
+
|
|
52
|
+
await crawler.run(['https://crawlee.dev/'])
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == '__main__':
|
|
56
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
|
|
5
|
+
|
|
6
|
+
# Replace this with your actual Firefox profile name
|
|
7
|
+
# Find it at about:profiles in Firefox
|
|
8
|
+
PROFILE_NAME = 'your-profile-name-here'
|
|
9
|
+
|
|
10
|
+
# Paths to Firefox profiles in your system (example for Windows)
|
|
11
|
+
# Use `about:profiles` to find your profile path
|
|
12
|
+
PROFILE_PATH = Path(
|
|
13
|
+
Path.home(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def main() -> None:
|
|
18
|
+
crawler = PlaywrightCrawler(
|
|
19
|
+
# Use Firefox browser type
|
|
20
|
+
browser_type='firefox',
|
|
21
|
+
# Disable fingerprints to use the profile as is
|
|
22
|
+
fingerprint_generator=None,
|
|
23
|
+
headless=False,
|
|
24
|
+
# Path to your Firefox profile
|
|
25
|
+
user_data_dir=PROFILE_PATH,
|
|
26
|
+
browser_launch_options={
|
|
27
|
+
'args': [
|
|
28
|
+
# Required to avoid version conflicts
|
|
29
|
+
'--allow-downgrade'
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@crawler.router.default_handler
|
|
35
|
+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
|
|
36
|
+
context.log.info(f'Visiting {context.request.url}')
|
|
37
|
+
|
|
38
|
+
await crawler.run(['https://crawlee.dev/'])
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == '__main__':
|
|
42
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: using_browser_profile
|
|
3
|
+
title: Using browser profile
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
import ApiLink from '@site/src/components/ApiLink';
|
|
7
|
+
|
|
8
|
+
import CodeBlock from '@theme/CodeBlock';
|
|
9
|
+
|
|
10
|
+
import ChromeProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_chrome.py';
|
|
11
|
+
import FirefoxProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_firefox.py';
|
|
12
|
+
|
|
13
|
+
This example demonstrates how to run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> using your local browser profile from [Chrome](https://www.google.com/intl/us/chrome/) or [Firefox](https://www.firefox.com/).
|
|
14
|
+
|
|
15
|
+
Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.
|
|
16
|
+
|
|
17
|
+
## Chrome browser
|
|
18
|
+
|
|
19
|
+
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
|
|
20
|
+
|
|
21
|
+
You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
|
|
22
|
+
|
|
23
|
+
:::warning Profile access limitation
|
|
24
|
+
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
|
|
25
|
+
:::
|
|
26
|
+
|
|
27
|
+
Make sure you don't have any running Chrome browser processes before running this code:
|
|
28
|
+
|
|
29
|
+
<CodeBlock className="language-python" language="python">
|
|
30
|
+
{ChromeProfileExample}
|
|
31
|
+
</CodeBlock>
|
|
32
|
+
|
|
33
|
+
## Firefox browser
|
|
34
|
+
|
|
35
|
+
To find the path to your Firefox profile, enter `about:profiles` as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.
|
|
36
|
+
|
|
37
|
+
Make sure you don't have any running Firefox browser processes before running this code:
|
|
38
|
+
|
|
39
|
+
<CodeBlock className="language-python" language="python">
|
|
40
|
+
{FirefoxProfileExample}
|
|
41
|
+
</CodeBlock>
|
|
@@ -333,3 +333,7 @@ async def main() -> None:
|
|
|
333
333
|
|
|
334
334
|
await crawler.run(['https://crawlee.dev/'])
|
|
335
335
|
```
|
|
336
|
+
|
|
337
|
+
### New storage naming restrictions
|
|
338
|
+
|
|
339
|
+
We've introduced naming restrictions for storages to ensure compatibility with Apify Platform requirements and prevent potential conflicts. Storage names may include only letters (a–z, A–Z), digits (0–9), and hyphens (-), with hyphens allowed only in the middle of the name (for example, my-storage-1).
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "crawlee"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Crawlee for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -40,7 +40,7 @@ dependencies = [
|
|
|
40
40
|
"protego>=0.5.0",
|
|
41
41
|
"psutil>=6.0.0",
|
|
42
42
|
"pydantic-settings>=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0",
|
|
43
|
-
"pydantic>=2.11.0",
|
|
43
|
+
"pydantic>=2.11.0,<2.12.0",
|
|
44
44
|
"pyee>=9.0.0",
|
|
45
45
|
"tldextract>=5.1.0",
|
|
46
46
|
"typing-extensions>=4.1.0",
|
|
@@ -107,7 +107,7 @@ dev = [
|
|
|
107
107
|
"pytest-timeout~=2.4.0",
|
|
108
108
|
"pytest-xdist~=3.8.0",
|
|
109
109
|
"pytest~=8.4.0",
|
|
110
|
-
"ruff~=0.
|
|
110
|
+
"ruff~=0.14.0",
|
|
111
111
|
"setuptools", # setuptools are used by pytest, but not explicitly required
|
|
112
112
|
"types-beautifulsoup4~=4.12.0.20240229",
|
|
113
113
|
"types-cachetools~=6.2.0.20250827",
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_dataset_client.py
RENAMED
|
@@ -9,7 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any
|
|
10
10
|
|
|
11
11
|
from pydantic import ValidationError
|
|
12
|
-
from typing_extensions import override
|
|
12
|
+
from typing_extensions import Self, override
|
|
13
13
|
|
|
14
14
|
from crawlee._consts import METADATA_FILENAME
|
|
15
15
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
@@ -94,7 +94,7 @@ class FileSystemDatasetClient(DatasetClient):
|
|
|
94
94
|
name: str | None,
|
|
95
95
|
alias: str | None,
|
|
96
96
|
configuration: Configuration,
|
|
97
|
-
) ->
|
|
97
|
+
) -> Self:
|
|
98
98
|
"""Open or create a file system dataset client.
|
|
99
99
|
|
|
100
100
|
This method attempts to open an existing dataset from the file system. If a dataset with the specified ID
|
|
@@ -10,7 +10,7 @@ from pathlib import Path
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any
|
|
11
11
|
|
|
12
12
|
from pydantic import ValidationError
|
|
13
|
-
from typing_extensions import override
|
|
13
|
+
from typing_extensions import Self, override
|
|
14
14
|
|
|
15
15
|
from crawlee._consts import METADATA_FILENAME
|
|
16
16
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
@@ -93,7 +93,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
93
93
|
name: str | None,
|
|
94
94
|
alias: str | None,
|
|
95
95
|
configuration: Configuration,
|
|
96
|
-
) ->
|
|
96
|
+
) -> Self:
|
|
97
97
|
"""Open or create a file system key-value store client.
|
|
98
98
|
|
|
99
99
|
This method attempts to open an existing key-value store from the file system. If a KVS with the specified
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_file_system/_request_queue_client.py
RENAMED
|
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
13
|
from pydantic import BaseModel, ValidationError
|
|
14
|
-
from typing_extensions import override
|
|
14
|
+
from typing_extensions import Self, override
|
|
15
15
|
|
|
16
16
|
from crawlee import Request
|
|
17
17
|
from crawlee._consts import METADATA_FILENAME
|
|
@@ -144,7 +144,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
144
144
|
name: str | None,
|
|
145
145
|
alias: str | None,
|
|
146
146
|
configuration: Configuration,
|
|
147
|
-
) ->
|
|
147
|
+
) -> Self:
|
|
148
148
|
"""Open or create a file system request queue client.
|
|
149
149
|
|
|
150
150
|
This method attempts to open an existing request queue from the file system. If a queue with the specified
|
|
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
|
|
|
4
4
|
from logging import getLogger
|
|
5
5
|
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
|
-
from typing_extensions import override
|
|
7
|
+
from typing_extensions import Self, override
|
|
8
8
|
|
|
9
9
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
10
10
|
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
@@ -55,7 +55,7 @@ class MemoryDatasetClient(DatasetClient):
|
|
|
55
55
|
id: str | None,
|
|
56
56
|
name: str | None,
|
|
57
57
|
alias: str | None,
|
|
58
|
-
) ->
|
|
58
|
+
) -> Self:
|
|
59
59
|
"""Open or create a new memory dataset client.
|
|
60
60
|
|
|
61
61
|
This method creates a new in-memory dataset instance. Unlike persistent storage implementations, memory
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_key_value_store_client.py
RENAMED
|
@@ -4,7 +4,7 @@ import sys
|
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
|
-
from typing_extensions import override
|
|
7
|
+
from typing_extensions import Self, override
|
|
8
8
|
|
|
9
9
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
10
10
|
from crawlee._utils.file import infer_mime_type
|
|
@@ -53,7 +53,7 @@ class MemoryKeyValueStoreClient(KeyValueStoreClient):
|
|
|
53
53
|
id: str | None,
|
|
54
54
|
name: str | None,
|
|
55
55
|
alias: str | None,
|
|
56
|
-
) ->
|
|
56
|
+
) -> Self:
|
|
57
57
|
"""Open or create a new memory key-value store client.
|
|
58
58
|
|
|
59
59
|
This method creates a new in-memory key-value store instance. Unlike persistent storage implementations,
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_memory/_request_queue_client.py
RENAMED
|
@@ -6,7 +6,7 @@ from datetime import datetime, timezone
|
|
|
6
6
|
from logging import getLogger
|
|
7
7
|
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
|
-
from typing_extensions import override
|
|
9
|
+
from typing_extensions import Self, override
|
|
10
10
|
|
|
11
11
|
from crawlee import Request
|
|
12
12
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
@@ -65,7 +65,7 @@ class MemoryRequestQueueClient(RequestQueueClient):
|
|
|
65
65
|
id: str | None,
|
|
66
66
|
name: str | None,
|
|
67
67
|
alias: str | None,
|
|
68
|
-
) ->
|
|
68
|
+
) -> Self:
|
|
69
69
|
"""Open or create a new memory request queue client.
|
|
70
70
|
|
|
71
71
|
This method creates a new in-memory request queue instance. Unlike persistent storage implementations,
|
|
@@ -4,7 +4,7 @@ from logging import getLogger
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
6
|
from sqlalchemy import Select, insert, select
|
|
7
|
-
from typing_extensions import override
|
|
7
|
+
from typing_extensions import Self, override
|
|
8
8
|
|
|
9
9
|
from crawlee.storage_clients._base import DatasetClient
|
|
10
10
|
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
|
|
@@ -78,7 +78,7 @@ class SqlDatasetClient(DatasetClient, SqlClientMixin):
|
|
|
78
78
|
name: str | None,
|
|
79
79
|
alias: str | None,
|
|
80
80
|
storage_client: SqlStorageClient,
|
|
81
|
-
) ->
|
|
81
|
+
) -> Self:
|
|
82
82
|
"""Open an existing dataset or create a new one.
|
|
83
83
|
|
|
84
84
|
Args:
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_key_value_store_client.py
RENAMED
|
@@ -5,7 +5,7 @@ from logging import getLogger
|
|
|
5
5
|
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
7
|
from sqlalchemy import delete, select
|
|
8
|
-
from typing_extensions import override
|
|
8
|
+
from typing_extensions import Self, override
|
|
9
9
|
|
|
10
10
|
from crawlee._utils.file import infer_mime_type
|
|
11
11
|
from crawlee.storage_clients._base import KeyValueStoreClient
|
|
@@ -77,7 +77,7 @@ class SqlKeyValueStoreClient(KeyValueStoreClient, SqlClientMixin):
|
|
|
77
77
|
name: str | None,
|
|
78
78
|
alias: str | None,
|
|
79
79
|
storage_client: SqlStorageClient,
|
|
80
|
-
) ->
|
|
80
|
+
) -> Self:
|
|
81
81
|
"""Open or create a SQL key-value store client.
|
|
82
82
|
|
|
83
83
|
This method attempts to open an existing key-value store from the SQL database. If a KVS with the specified
|
{crawlee-1.0.1b11 → crawlee-1.0.2}/src/crawlee/storage_clients/_sql/_request_queue_client.py
RENAMED
|
@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
10
10
|
from sqlalchemy import func, or_, select, update
|
|
11
11
|
from sqlalchemy.exc import SQLAlchemyError
|
|
12
12
|
from sqlalchemy.orm import load_only
|
|
13
|
-
from typing_extensions import NotRequired, override
|
|
13
|
+
from typing_extensions import NotRequired, Self, override
|
|
14
14
|
|
|
15
15
|
from crawlee import Request
|
|
16
16
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
@@ -119,7 +119,7 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
|
|
|
119
119
|
name: str | None,
|
|
120
120
|
alias: str | None,
|
|
121
121
|
storage_client: SqlStorageClient,
|
|
122
|
-
) ->
|
|
122
|
+
) -> Self:
|
|
123
123
|
"""Open an existing request queue or create a new one.
|
|
124
124
|
|
|
125
125
|
This method first tries to find an existing queue by ID or name.
|
|
@@ -44,7 +44,9 @@ class Storage(ABC):
|
|
|
44
44
|
|
|
45
45
|
Args:
|
|
46
46
|
id: The storage ID.
|
|
47
|
-
name: The storage name (global scope, persists across runs).
|
|
47
|
+
name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
48
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
49
|
+
(e.g. "my-value-1").
|
|
48
50
|
alias: The storage alias (run scope, creates unnamed storage).
|
|
49
51
|
configuration: Configuration object used during the storage creation or restoration process.
|
|
50
52
|
storage_client: Underlying storage client to use. If not provided, the default global storage client
|
|
@@ -12,6 +12,7 @@ from crawlee._utils.file import export_csv_to_stream, export_json_to_stream
|
|
|
12
12
|
|
|
13
13
|
from ._base import Storage
|
|
14
14
|
from ._key_value_store import KeyValueStore
|
|
15
|
+
from ._utils import validate_storage_name
|
|
15
16
|
|
|
16
17
|
if TYPE_CHECKING:
|
|
17
18
|
from collections.abc import AsyncIterator
|
|
@@ -75,6 +76,8 @@ class Dataset(Storage):
|
|
|
75
76
|
id: The unique identifier of the storage.
|
|
76
77
|
name: The name of the storage, if available.
|
|
77
78
|
"""
|
|
79
|
+
validate_storage_name(name)
|
|
80
|
+
|
|
78
81
|
self._client = client
|
|
79
82
|
self._id = id
|
|
80
83
|
self._name = name
|
|
@@ -15,6 +15,7 @@ from crawlee._utils.recoverable_state import RecoverableState
|
|
|
15
15
|
from crawlee.storage_clients.models import KeyValueStoreMetadata
|
|
16
16
|
|
|
17
17
|
from ._base import Storage
|
|
18
|
+
from ._utils import validate_storage_name
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
20
21
|
from collections.abc import AsyncIterator
|
|
@@ -84,6 +85,8 @@ class KeyValueStore(Storage):
|
|
|
84
85
|
id: The unique identifier of the storage.
|
|
85
86
|
name: The name of the storage, if available.
|
|
86
87
|
"""
|
|
88
|
+
validate_storage_name(name)
|
|
89
|
+
|
|
87
90
|
self._client = client
|
|
88
91
|
self._id = id
|
|
89
92
|
self._name = name
|
|
@@ -13,6 +13,7 @@ from crawlee._utils.wait import wait_for_all_tasks_for_finish
|
|
|
13
13
|
from crawlee.request_loaders import RequestManager
|
|
14
14
|
|
|
15
15
|
from ._base import Storage
|
|
16
|
+
from ._utils import validate_storage_name
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from collections.abc import Sequence
|
|
@@ -80,6 +81,8 @@ class RequestQueue(Storage, RequestManager):
|
|
|
80
81
|
id: The unique identifier of the storage.
|
|
81
82
|
name: The name of the storage, if available.
|
|
82
83
|
"""
|
|
84
|
+
validate_storage_name(name)
|
|
85
|
+
|
|
83
86
|
self._client = client
|
|
84
87
|
self._id = id
|
|
85
88
|
self._name = name
|
|
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, TypeVar
|
|
|
8
8
|
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
9
9
|
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
|
|
10
10
|
|
|
11
|
+
from ._utils import validate_storage_name
|
|
12
|
+
|
|
11
13
|
if TYPE_CHECKING:
|
|
12
14
|
from ._base import Storage
|
|
13
15
|
|
|
@@ -90,7 +92,9 @@ class StorageInstanceManager:
|
|
|
90
92
|
Args:
|
|
91
93
|
cls: The storage class to instantiate.
|
|
92
94
|
id: Storage ID.
|
|
93
|
-
name: Storage name. (global scope, persists across runs).
|
|
95
|
+
name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
96
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
97
|
+
(e.g. "my-value-1").
|
|
94
98
|
alias: Storage alias (run scope, creates unnamed storage).
|
|
95
99
|
client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
|
|
96
100
|
storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
|
|
@@ -146,6 +150,10 @@ class StorageInstanceManager:
|
|
|
146
150
|
f'Use a different name or drop the existing alias storage first.'
|
|
147
151
|
)
|
|
148
152
|
|
|
153
|
+
# Validate storage name
|
|
154
|
+
if name is not None:
|
|
155
|
+
validate_storage_name(name)
|
|
156
|
+
|
|
149
157
|
# Create new instance
|
|
150
158
|
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
151
159
|
client = await client_opener_coro
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def validate_storage_name(name: str | None) -> None:
|
|
7
|
+
if name and not NAME_REGEX.match(name):
|
|
8
|
+
raise ValueError(
|
|
9
|
+
f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
|
|
10
|
+
'"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
|
|
11
|
+
)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import gzip
|
|
3
|
+
import os
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
|
|
6
|
+
import pytest
|
|
5
7
|
from yarl import URL
|
|
6
8
|
|
|
7
9
|
from crawlee._utils.sitemap import Sitemap, SitemapUrl, parse_sitemap
|
|
@@ -104,6 +106,10 @@ async def test_gzipped_sitemap_with_invalid_data(server_url: URL, http_client: H
|
|
|
104
106
|
assert sitemap.urls == []
|
|
105
107
|
|
|
106
108
|
|
|
109
|
+
@pytest.mark.skipif(
|
|
110
|
+
os.name == 'nt',
|
|
111
|
+
reason='This test is flaky on Windows, see https://github.com/apify/crawlee-python/issues/1460.',
|
|
112
|
+
)
|
|
107
113
|
async def test_gz_sitemap_with_non_gzipped(server_url: URL, http_client: HttpClient) -> None:
|
|
108
114
|
"""Test loading a sitemap with gzip type and .xml.gz url, but without gzipped data."""
|
|
109
115
|
sitemap_url = (server_url / 'sitemap.xml.gz').with_query(
|
|
@@ -473,7 +473,7 @@ async def test_adaptive_crawler_exceptions_in_sub_crawlers(*, error_in_pw_crawle
|
|
|
473
473
|
async def test_adaptive_playwright_crawler_statistics_in_init() -> None:
|
|
474
474
|
"""Tests that adaptive crawler uses created AdaptivePlaywrightCrawlerStatistics from inputted Statistics."""
|
|
475
475
|
persistence_enabled = True
|
|
476
|
-
persist_state_kvs_name = 'some
|
|
476
|
+
persist_state_kvs_name = 'some-name'
|
|
477
477
|
persist_state_key = 'come key'
|
|
478
478
|
log_message = 'some message'
|
|
479
479
|
periodic_message_logger = logging.getLogger('some logger')
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import json
|
|
3
|
+
import os
|
|
3
4
|
import re
|
|
4
5
|
from unittest import mock
|
|
5
6
|
|
|
7
|
+
import pytest
|
|
6
8
|
from opentelemetry.sdk.resources import Resource
|
|
7
9
|
from opentelemetry.sdk.trace import TracerProvider
|
|
8
10
|
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
|
|
@@ -14,6 +16,10 @@ from crawlee.otel.crawler_instrumentor import CrawlerInstrumentor
|
|
|
14
16
|
from crawlee.storages import Dataset
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
@pytest.mark.skipif(
|
|
20
|
+
os.name == 'nt',
|
|
21
|
+
reason='This test is flaky on Windows, see https://github.com/apify/crawlee-python/issues/1469.',
|
|
22
|
+
)
|
|
17
23
|
async def test_crawler_instrumentor_capability(server_url: URL) -> None:
|
|
18
24
|
"""Test OpenTelemetry instrumentation capability of the crawler.
|
|
19
25
|
|
|
@@ -38,7 +44,7 @@ async def test_crawler_instrumentor_capability(server_url: URL) -> None:
|
|
|
38
44
|
|
|
39
45
|
# Generate first telemetry data from `Dataset` public methods.
|
|
40
46
|
# `Dataset` is in `instrument_classes` argument, and thus it's public methods are instrumented.
|
|
41
|
-
dataset = await Dataset.open(name='
|
|
47
|
+
dataset = await Dataset.open(name='test-dataset')
|
|
42
48
|
await dataset.drop()
|
|
43
49
|
|
|
44
50
|
# Other traces will be from crawler run.
|