crawlee 1.0.0rc1__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/build_and_deploy_docs.yaml +3 -3
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/check_pr_title.yaml +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/pre_release.yaml +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/templates_e2e_tests.yaml +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/update_new_issue.yaml +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/CHANGELOG.md +18 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/PKG-INFO +12 -5
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/README.md +0 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_basic_example.py +1 -0
- crawlee-1.0.1/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +46 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +13 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +11 -0
- crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +30 -0
- crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +45 -0
- crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +53 -0
- crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +54 -0
- crawlee-1.0.1/docs/guides/code_examples/service_locator/service_storage_configuration.py +30 -0
- crawlee-1.0.1/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +12 -0
- crawlee-1.0.1/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +33 -0
- crawlee-1.0.1/docs/guides/code_examples/storages/opening.py +19 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/request_loaders.mdx +27 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/service_locator.mdx +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/storage_clients.mdx +188 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/storages.mdx +22 -9
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/03_adding_more_urls.mdx +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/upgrading/upgrading_to_v1.md +91 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/pyproject.toml +19 -10
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/snapshotter.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_request.py +2 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_service_locator.py +44 -24
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_types.py +76 -17
- crawlee-1.0.1/src/crawlee/_utils/raise_if_too_many_kwargs.py +12 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/sitemap.py +3 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/system.py +3 -3
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser_controller.py +20 -14
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/configuration.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +6 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +2 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_basic_crawler.py +107 -27
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_logging_utils.py +5 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +6 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_types.py +6 -6
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +3 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_types.py +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_list.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_loader.py +5 -1
- crawlee-1.0.1/src/crawlee/request_loaders/_sitemap_request_loader.py +357 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_models.py +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_models.py +1 -1
- crawlee-1.0.1/src/crawlee/storage_clients/__init__.py +21 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_storage_client.py +13 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_dataset_client.py +27 -25
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +27 -23
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +84 -98
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_storage_client.py +16 -3
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_dataset_client.py +14 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +14 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_request_queue_client.py +43 -12
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_storage_client.py +6 -3
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/__init__.py +6 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_client_mixin.py +385 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_dataset_client.py +310 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_db_models.py +269 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_key_value_store_client.py +299 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_request_queue_client.py +706 -0
- crawlee-1.0.1/src/crawlee/storage_clients/_sql/_storage_client.py +282 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/models.py +10 -10
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_base.py +3 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_dataset.py +9 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_key_value_store.py +9 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_request_queue.py +7 -2
- crawlee-1.0.1/src/crawlee/storages/_storage_instance_manager.py +187 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_autoscaled_pool.py +4 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_snapshotter.py +6 -6
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_byte_size.py +2 -2
- crawlee-1.0.1/tests/unit/_utils/test_raise_if_too_many_kwargs.py +38 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_browser_pool.py +5 -5
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser_controller.py +30 -4
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser_plugin.py +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/conftest.py +13 -15
- crawlee-1.0.1/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +1 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_basic/test_basic_crawler.py +225 -14
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +110 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_http/test_http_crawler.py +7 -3
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +108 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +135 -1
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/events/test_event_manager.py +3 -3
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/fingerprint_suite/test_header_generator.py +2 -2
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/request_loaders/test_sitemap_request_loader.py +69 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_session_pool.py +5 -5
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +5 -17
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +3 -13
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +4 -10
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -5
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -4
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -5
- crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +236 -0
- crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +287 -0
- crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_rq_client.py +239 -0
- crawlee-1.0.1/tests/unit/storages/conftest.py +18 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_dataset.py +511 -40
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_key_value_store.py +506 -41
- crawlee-1.0.1/tests/unit/storages/test_request_queue.py +1261 -0
- crawlee-1.0.1/tests/unit/storages/test_storage_instance_manager.py +143 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_service_locator.py +12 -16
- crawlee-1.0.1/uv.lock +3966 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/docusaurus.config.js +8 -4
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/package.json +15 -14
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/css/custom.css +4 -1
- crawlee-1.0.1/website/static/.nojekyll +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/yarn.lock +1853 -1447
- crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_example.py +0 -28
- crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -40
- crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -43
- crawlee-1.0.0rc1/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -22
- crawlee-1.0.0rc1/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -177
- crawlee-1.0.0rc1/src/crawlee/storage_clients/__init__.py +0 -9
- crawlee-1.0.0rc1/src/crawlee/storages/_storage_instance_manager.py +0 -133
- crawlee-1.0.0rc1/tests/unit/storages/test_request_queue.py +0 -644
- crawlee-1.0.0rc1/uv.lock +0 -3623
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.editorconfig +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/CODEOWNERS +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/pull_request_template.md +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/release.yaml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/run_code_checks.yaml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.gitignore +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.markdownlint.yaml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/.pre-commit-config.yaml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/CONTRIBUTING.md +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/LICENSE +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/Makefile +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/apify_platform.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/google/google_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/google_cloud.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/google_cloud_run.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/add_data_to_dataset.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/beautifulsoup_crawler.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/configure_json_logging.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/parsel_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_block_requests.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_all_links_on_website.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_multiple_urls.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawler_keep_alive.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawler_stop.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/fill_and_submit_web_form.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/json_logging.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/parsel_crawler.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/respect_robots_txt_file.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/resuming_paused_crawl.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/architecture_overview.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/avoid_blocking.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/error_handler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_http.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/crawler_login.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/creating_web_archive.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/error_handling.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/http_clients.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/http_crawlers.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/proxy_management.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/request_router.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/running_in_web_server.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/scaling_crawlers.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/session_management.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/01_setting_up.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/02_first_crawler.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/04_real_world_project.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/05_crawling.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/06_scraping.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/07_saving_data.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/08_refactoring.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/09_running_in_cloud.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_bs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_bs_better.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_request_queue.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_globs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_original_code.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_transform_request.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/04_sanity_check.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/06_scraping.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/07_final_code.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/07_first_code.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/08_main.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/08_routes.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/routes.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/index.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/pyproject.toml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/index.mdx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/upgrading/upgrading_to_v0x.md +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/renovate.json +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/_types.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/system_status.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_browserforge_workaround.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_cli.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_consts.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_log_config.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/blocked.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/byte_size.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/console.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/crypto.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/docs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/file.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/globs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/html_to_text.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/models.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/recoverable_state.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/recurring_task.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/requests.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/robots.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/time.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/try_import.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/urls.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/wait.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/web.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_controller.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_plugin.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_pool.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_types.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_types.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_types.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/errors.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_event_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_local_event_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_consts.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_base.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_httpx.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_impit.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/otel/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/otel/crawler_instrumentor.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/cookiecutter.json +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_parsel.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_playwright.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/proxy_configuration.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/router.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_cookies.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_session.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_session_pool.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_error_snapshotter.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_error_tracker.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_statistics.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
- /crawlee-1.0.0rc1/src/crawlee/storage_clients/_file_system/py.typed → /crawlee-1.0.1/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
- {crawlee-1.0.0rc1/src/crawlee/storage_clients/_memory → crawlee-1.0.1/src/crawlee/storage_clients/_file_system}/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
- {crawlee-1.0.0rc1/src/crawlee/storage_clients → crawlee-1.0.1/src/crawlee/storage_clients/_memory}/py.typed +0 -0
- {crawlee-1.0.0rc1/src/crawlee/storages → crawlee-1.0.1/src/crawlee/storage_clients/_sql}/py.typed +0 -0
- /crawlee-1.0.0rc1/tests/__init__.py → /crawlee-1.0.1/src/crawlee/storage_clients/py.typed +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/__init__.py +0 -0
- /crawlee-1.0.0rc1/tests/e2e/__init__.py → /crawlee-1.0.1/src/crawlee/storages/py.typed +0 -0
- {crawlee-1.0.0rc1/tests/unit → crawlee-1.0.1/tests}/__init__.py +0 -0
- /crawlee-1.0.0rc1/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.0.1/tests/e2e/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/conftest.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/project_template/utils.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/README.md +0 -0
- /crawlee-1.0.0rc1/website/static/.nojekyll → /crawlee-1.0.1/tests/unit/__init__.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_system_status.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_error_tracker.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_periodic_logging.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_persistence.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_request_processing_record.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_console.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_crypto.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_file.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_globs.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_html_to_text.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_measure_time.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_recurring_task.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_requests.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_robots.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_sitemap.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_system.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_timedelata_ms.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_urls.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/events/test_local_event_manager.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/http_clients/test_http_clients.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/http_clients/test_httpx.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/proxy_configuration/test_tiers.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/request_loaders/test_request_list.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/server.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/server_endpoints.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_cookies.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_models.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_session.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_request_manager_tandem.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_cli.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_configuration.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_log_config.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_router.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/.eslintrc.json +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/.yarnrc.yml +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/babel.config.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/build_api_reference.sh +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/generate_module_shortcuts.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/roa-loader/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/roa-loader/package.json +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/sidebars.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/ApiLink.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Button.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Button.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/CopyButton.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/CopyButton.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Gradients.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Highlights.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Highlights.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/RiverSection.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/RiverSection.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/RunnableCodeBlock.jsx +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/RunnableCodeBlock.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/home_page_example.py +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/index.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/DocItem/Layout/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/LinkItem/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/index.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/MDXComponents/A.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Content/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Content/styles.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Logo/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Logo/index.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/font/lota.woff +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/font/lota.woff2 +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/API.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_logo.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_og_SDK.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_sdk.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_sdk_white.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/arrow_right.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/auto-scaling-dark.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/auto-scaling-light.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/check.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/cloud_icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/community-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/community-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-dark-new.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-javascript-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-javascript-light.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-light-new.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-light.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-logo-monocolor.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-logo.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-light.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-og.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/defaults-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/defaults-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/discord-brand-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/discord-brand.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/docusaurus.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/external-link.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favicon.ico +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favorite-tools-dark.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favorite-tools-light.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/auto-scaling.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/automate-everything.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/fingerprints.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/node-requests.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/runs-on-py.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/storage.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/works-everywhere.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/current-price.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/scraping-practice.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/select-an-element.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/selected-element.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/sku.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/title.jpg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/github-brand-dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/github-brand.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/hearth copy.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/hearth.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/javascript_logo.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/js_file.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-big.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-blur.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-blur.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-zoom.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/menu-arrows.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/oss_logo.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/puppeteer-live-view-detail.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/queue-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/queue-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/resuming-paused-crawl/00.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/resuming-paused-crawl/01.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/robot.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/routing-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/routing-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/scraping-utils-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/scraping-utils-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/smart-proxy-dark.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/smart-proxy-light.webp +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/source_code.png +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/system.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/triangles_dark.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/triangles_light.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/workflow.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/zero-setup-dark-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/zero-setup-light-icon.svg +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/js/custom.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/robots.txt +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/docs-prettier.config.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/utils/externalLink.js +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/website_gif.mjs +0 -0
- {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tsconfig.eslint.json +0 -0
|
@@ -30,12 +30,12 @@ jobs:
|
|
|
30
30
|
ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
|
|
31
31
|
|
|
32
32
|
- name: Set up Node
|
|
33
|
-
uses: actions/setup-node@
|
|
33
|
+
uses: actions/setup-node@v5
|
|
34
34
|
with:
|
|
35
35
|
node-version: ${{ env.NODE_VERSION }}
|
|
36
36
|
|
|
37
37
|
- name: Set up Python
|
|
38
|
-
uses: actions/setup-python@
|
|
38
|
+
uses: actions/setup-python@v6
|
|
39
39
|
with:
|
|
40
40
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
41
41
|
|
|
@@ -59,7 +59,7 @@ jobs:
|
|
|
59
59
|
uses: actions/configure-pages@v5
|
|
60
60
|
|
|
61
61
|
- name: Upload GitHub Pages artifact
|
|
62
|
-
uses: actions/upload-pages-artifact@
|
|
62
|
+
uses: actions/upload-pages-artifact@v4
|
|
63
63
|
with:
|
|
64
64
|
path: ./website/build
|
|
65
65
|
|
|
@@ -27,7 +27,7 @@ jobs:
|
|
|
27
27
|
uses: actions/checkout@v5
|
|
28
28
|
|
|
29
29
|
- name: Setup node
|
|
30
|
-
uses: actions/setup-node@
|
|
30
|
+
uses: actions/setup-node@v5
|
|
31
31
|
with:
|
|
32
32
|
node-version: ${{ env.NODE_VERSION }}
|
|
33
33
|
|
|
@@ -35,7 +35,7 @@ jobs:
|
|
|
35
35
|
run: npm install -g apify-cli
|
|
36
36
|
|
|
37
37
|
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
|
38
|
-
uses: actions/setup-python@
|
|
38
|
+
uses: actions/setup-python@v6
|
|
39
39
|
with:
|
|
40
40
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
41
41
|
|
|
@@ -2,7 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [1.0.
|
|
5
|
+
## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
|
|
6
|
+
|
|
7
|
+
### 🐛 Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Fix memory leak in `PlaywrightCrawler` on browser context creation ([#1446](https://github.com/apify/crawlee-python/pull/1446)) ([bb181e5](https://github.com/apify/crawlee-python/commit/bb181e58d8070fba38e62d6e57fe981a00e5f035)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1443](https://github.com/apify/crawlee-python/issues/1443)
|
|
10
|
+
- Update templates to handle optional httpx client ([#1440](https://github.com/apify/crawlee-python/pull/1440)) ([c087efd](https://github.com/apify/crawlee-python/commit/c087efd39baedf46ca3e5cae1ddc1acd6396e6c1)) by [@Pijukatel](https://github.com/Pijukatel)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## [1.0.0](https://github.com/apify/crawlee-python/releases/tag/v1.0.0) (2025-09-29)
|
|
6
14
|
|
|
7
15
|
### 🚀 Features
|
|
8
16
|
|
|
@@ -17,6 +25,10 @@ All notable changes to this project will be documented in this file.
|
|
|
17
25
|
- Add `impit` option for Crawlee CLI ([#1312](https://github.com/apify/crawlee-python/pull/1312)) ([508d7ce](https://github.com/apify/crawlee-python/commit/508d7ce4d998f37ab2adcf9c057c3c635a69f863)) by [@Mantisus](https://github.com/Mantisus)
|
|
18
26
|
- Persist RequestList state ([#1274](https://github.com/apify/crawlee-python/pull/1274)) ([cc68014](https://github.com/apify/crawlee-python/commit/cc680147ba3cc8b35b9da70274e53e6f5dd92434)) by [@janbuchar](https://github.com/janbuchar), closes [#99](https://github.com/apify/crawlee-python/issues/99)
|
|
19
27
|
- Persist `DefaultRenderingTypePredictor` state ([#1340](https://github.com/apify/crawlee-python/pull/1340)) ([fad4c25](https://github.com/apify/crawlee-python/commit/fad4c25fc712915c4a45b24e3290b6f5dbd8a683)) by [@Mantisus](https://github.com/Mantisus), closes [#1272](https://github.com/apify/crawlee-python/issues/1272)
|
|
28
|
+
- Persist the `SitemapRequestLoader` state ([#1347](https://github.com/apify/crawlee-python/pull/1347)) ([27ef9ad](https://github.com/apify/crawlee-python/commit/27ef9ad194552ea9f1321d91a7a52054be9a8a51)) by [@Mantisus](https://github.com/Mantisus), closes [#1269](https://github.com/apify/crawlee-python/issues/1269)
|
|
29
|
+
- Add support for NDU storages ([#1401](https://github.com/apify/crawlee-python/pull/1401)) ([5dbd212](https://github.com/apify/crawlee-python/commit/5dbd212663e7abc37535713f4c6e3a5bbf30a12e)) by [@vdusek](https://github.com/vdusek), closes [#1175](https://github.com/apify/crawlee-python/issues/1175)
|
|
30
|
+
- Add RQ id, name, alias args to `add_requests` and `enqueue_links` methods ([#1413](https://github.com/apify/crawlee-python/pull/1413)) ([1cae2bc](https://github.com/apify/crawlee-python/commit/1cae2bca0b1508fcb3cb419dc239caf33e20a7ef)) by [@Mantisus](https://github.com/Mantisus), closes [#1402](https://github.com/apify/crawlee-python/issues/1402)
|
|
31
|
+
- Add `SqlStorageClient` based on `sqlalchemy` v2+ ([#1339](https://github.com/apify/crawlee-python/pull/1339)) ([07c75a0](https://github.com/apify/crawlee-python/commit/07c75a078b443b58bfaaeb72eb2aa1439458dc47)) by [@Mantisus](https://github.com/Mantisus), closes [#307](https://github.com/apify/crawlee-python/issues/307)
|
|
20
32
|
|
|
21
33
|
### 🐛 Bug Fixes
|
|
22
34
|
|
|
@@ -27,6 +39,9 @@ All notable changes to this project will be documented in this file.
|
|
|
27
39
|
- Fix `timeout` for `stream` method in `ImpitHttpClient` ([#1352](https://github.com/apify/crawlee-python/pull/1352)) ([54b693b](https://github.com/apify/crawlee-python/commit/54b693b838f135a596e1e9493b565bc558b19a3a)) by [@Mantisus](https://github.com/Mantisus)
|
|
28
40
|
- Include reason in the session rotation warning logs ([#1363](https://github.com/apify/crawlee-python/pull/1363)) ([d6d7a45](https://github.com/apify/crawlee-python/commit/d6d7a45dd64a906419d9552c45062d726cbb1a0f)) by [@vdusek](https://github.com/vdusek), closes [#1318](https://github.com/apify/crawlee-python/issues/1318)
|
|
29
41
|
- Improve crawler statistics logging ([#1364](https://github.com/apify/crawlee-python/pull/1364)) ([1eb6da5](https://github.com/apify/crawlee-python/commit/1eb6da5dd85870124593dcad877284ccaed9c0ce)) by [@vdusek](https://github.com/vdusek), closes [#1317](https://github.com/apify/crawlee-python/issues/1317)
|
|
42
|
+
- Do not add a request that is already in progress to `MemoryRequestQueueClient` ([#1384](https://github.com/apify/crawlee-python/pull/1384)) ([3af326c](https://github.com/apify/crawlee-python/commit/3af326c9dfa8fffd56a42ca42981374613739e39)) by [@Mantisus](https://github.com/Mantisus), closes [#1383](https://github.com/apify/crawlee-python/issues/1383)
|
|
43
|
+
- Save `RequestQueueState` for `FileSystemRequestQueueClient` in default KVS ([#1411](https://github.com/apify/crawlee-python/pull/1411)) ([6ee60a0](https://github.com/apify/crawlee-python/commit/6ee60a08ac1f9414e1b792f4935cc3799cb5089a)) by [@Mantisus](https://github.com/Mantisus), closes [#1410](https://github.com/apify/crawlee-python/issues/1410)
|
|
44
|
+
- Set default desired concurrency for non-browser crawlers to 10 ([#1419](https://github.com/apify/crawlee-python/pull/1419)) ([1cc9401](https://github.com/apify/crawlee-python/commit/1cc940197600d2539bda967880d7f9d241eb8c3e)) by [@vdusek](https://github.com/vdusek)
|
|
30
45
|
|
|
31
46
|
### Refactor
|
|
32
47
|
|
|
@@ -36,6 +51,8 @@ All notable changes to this project will be documented in this file.
|
|
|
36
51
|
- [**breaking**] Replace `HttpxHttpClient` with `ImpitHttpClient` as default HTTP client ([#1307](https://github.com/apify/crawlee-python/pull/1307)) ([c803a97](https://github.com/apify/crawlee-python/commit/c803a976776a76846866d533e3a3ee8144e248c4)) by [@Mantisus](https://github.com/Mantisus), closes [#1079](https://github.com/apify/crawlee-python/issues/1079)
|
|
37
52
|
- [**breaking**] Change Dataset unwind parameter to accept list of strings ([#1357](https://github.com/apify/crawlee-python/pull/1357)) ([862a203](https://github.com/apify/crawlee-python/commit/862a20398f00fe91802fe7a1ccd58b05aee053a1)) by [@vdusek](https://github.com/vdusek)
|
|
38
53
|
- [**breaking**] Remove `Request.id` field ([#1366](https://github.com/apify/crawlee-python/pull/1366)) ([32f3580](https://github.com/apify/crawlee-python/commit/32f3580e9775a871924ab1233085d0c549c4cd52)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1358](https://github.com/apify/crawlee-python/issues/1358)
|
|
54
|
+
- [**breaking**] Refactor storage creation and caching, configuration and services ([#1386](https://github.com/apify/crawlee-python/pull/1386)) ([04649bd](https://github.com/apify/crawlee-python/commit/04649bde60d46b2bc18ae4f6e3fd9667d02a9cef)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1379](https://github.com/apify/crawlee-python/issues/1379)
|
|
55
|
+
|
|
39
56
|
|
|
40
57
|
|
|
41
58
|
## [0.6.12](https://github.com/apify/crawlee-python/releases/tag/v0.6.12) (2025-07-30)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -227,12 +227,12 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
227
227
|
Requires-Python: >=3.10
|
|
228
228
|
Requires-Dist: cachetools>=5.5.0
|
|
229
229
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
230
|
+
Requires-Dist: impit>=0.6.1
|
|
231
231
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
232
|
Requires-Dist: protego>=0.5.0
|
|
233
233
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,>=2.8.0
|
|
235
234
|
Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
|
|
235
|
+
Requires-Dist: pydantic>=2.11.0
|
|
236
236
|
Requires-Dist: pyee>=9.0.0
|
|
237
237
|
Requires-Dist: tldextract>=5.1.0
|
|
238
238
|
Requires-Dist: typing-extensions>=4.1.0
|
|
@@ -244,7 +244,9 @@ Requires-Dist: jaro-winkler>=2.0.3; extra == 'adaptive-crawler'
|
|
|
244
244
|
Requires-Dist: playwright>=1.27.0; extra == 'adaptive-crawler'
|
|
245
245
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
246
246
|
Provides-Extra: all
|
|
247
|
+
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
247
248
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
249
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
248
250
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
249
251
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
250
252
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -263,6 +265,7 @@ Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
|
263
265
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
264
266
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
265
267
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
266
269
|
Requires-Dist: typer>=0.12.0; extra == 'all'
|
|
267
270
|
Requires-Dist: wrapt>=1.17.0; extra == 'all'
|
|
268
271
|
Provides-Extra: beautifulsoup
|
|
@@ -293,6 +296,12 @@ Provides-Extra: playwright
|
|
|
293
296
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
294
297
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
295
298
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
299
|
+
Provides-Extra: sql-postgres
|
|
300
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
301
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
302
|
+
Provides-Extra: sql-sqlite
|
|
303
|
+
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
304
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-sqlite'
|
|
296
305
|
Description-Content-Type: text/markdown
|
|
297
306
|
|
|
298
307
|
<h1 align="center">
|
|
@@ -327,8 +336,6 @@ Description-Content-Type: text/markdown
|
|
|
327
336
|
|
|
328
337
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|
|
329
338
|
|
|
330
|
-
> 🚀 Crawlee for Python is open to early adopters!
|
|
331
|
-
|
|
332
339
|
Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
|
|
333
340
|
|
|
334
341
|
> 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
|
|
@@ -30,8 +30,6 @@
|
|
|
30
30
|
|
|
31
31
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|
|
32
32
|
|
|
33
|
-
> 🚀 Crawlee for Python is open to early adopters!
|
|
34
|
-
|
|
35
33
|
Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
|
|
36
34
|
|
|
37
35
|
> 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
|
{crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py
RENAMED
|
@@ -5,7 +5,7 @@ from crawlee.crawlers import AdaptivePlaywrightCrawler, AdaptivePlaywrightCrawli
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
async def main() -> None:
|
|
8
|
-
crawler = AdaptivePlaywrightCrawler.
|
|
8
|
+
crawler = AdaptivePlaywrightCrawler.with_parsel_static_parser()
|
|
9
9
|
|
|
10
10
|
@crawler.router.default_handler
|
|
11
11
|
async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
|
{crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_basic_example.py
RENAMED
|
@@ -18,6 +18,7 @@ async def main() -> None:
|
|
|
18
18
|
# Fetch and process requests from the queue.
|
|
19
19
|
while request := await request_list.fetch_next_request():
|
|
20
20
|
# Do something with it...
|
|
21
|
+
print(f'Processing {request.url}')
|
|
21
22
|
|
|
22
23
|
# And mark it as handled.
|
|
23
24
|
await request_list.mark_request_as_handled(request)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from crawlee import service_locator
|
|
5
|
+
from crawlee.request_loaders import RequestList
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(message)s')
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Disable clearing the `KeyValueStore` on each run.
|
|
12
|
+
# This is necessary so that the state keys are not cleared at startup.
|
|
13
|
+
# The recommended way to achieve this behavior is setting the environment variable
|
|
14
|
+
# `CRAWLEE_PURGE_ON_START=0`
|
|
15
|
+
configuration = service_locator.get_configuration()
|
|
16
|
+
configuration.purge_on_start = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def main() -> None:
|
|
20
|
+
# Open the request list, if it does not exist, it will be created.
|
|
21
|
+
# Leave name empty to use the default request list.
|
|
22
|
+
request_list = RequestList(
|
|
23
|
+
name='my-request-list',
|
|
24
|
+
requests=[
|
|
25
|
+
'https://apify.com/',
|
|
26
|
+
'https://crawlee.dev/',
|
|
27
|
+
'https://crawlee.dev/python/',
|
|
28
|
+
],
|
|
29
|
+
# Enable persistence
|
|
30
|
+
persist_state_key='my-persist-state',
|
|
31
|
+
persist_requests_key='my-persist-requests',
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# We receive only one request.
|
|
35
|
+
# Each time you run it, it will be a new request until you exhaust the `RequestList`.
|
|
36
|
+
request = await request_list.fetch_next_request()
|
|
37
|
+
if request:
|
|
38
|
+
logger.info(f'Processing request: {request.url}')
|
|
39
|
+
# Do something with it...
|
|
40
|
+
|
|
41
|
+
# And mark it as handled.
|
|
42
|
+
await request_list.mark_request_as_handled(request)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == '__main__':
|
|
46
|
+
asyncio.run(main())
|
{crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_tandem_example.py
RENAMED
|
@@ -8,9 +8,11 @@ async def main() -> None:
|
|
|
8
8
|
# Create a static request list.
|
|
9
9
|
request_list = RequestList(['https://crawlee.dev', 'https://apify.com'])
|
|
10
10
|
|
|
11
|
+
# highlight-start
|
|
11
12
|
# Convert the request list to a request manager using the to_tandem method.
|
|
12
13
|
# It is a tandem with the default request queue.
|
|
13
14
|
request_manager = await request_list.to_tandem()
|
|
15
|
+
# highlight-end
|
|
14
16
|
|
|
15
17
|
# Create a crawler and pass the request manager to it.
|
|
16
18
|
crawler = ParselCrawler(
|
|
@@ -20,9 +22,20 @@ async def main() -> None:
|
|
|
20
22
|
|
|
21
23
|
@crawler.router.default_handler
|
|
22
24
|
async def handler(context: ParselCrawlingContext) -> None:
|
|
25
|
+
context.log.info(f'Processing {context.request.url}')
|
|
26
|
+
|
|
23
27
|
# New links will be enqueued directly to the queue.
|
|
24
28
|
await context.enqueue_links()
|
|
25
29
|
|
|
30
|
+
# Extract data using Parsel's XPath and CSS selectors.
|
|
31
|
+
data = {
|
|
32
|
+
'url': context.request.url,
|
|
33
|
+
'title': context.selector.xpath('//title/text()').get(),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Push extracted data to the dataset.
|
|
37
|
+
await context.push_data(data)
|
|
38
|
+
|
|
26
39
|
await crawler.run()
|
|
27
40
|
|
|
28
41
|
|
|
@@ -23,9 +23,20 @@ async def main() -> None:
|
|
|
23
23
|
|
|
24
24
|
@crawler.router.default_handler
|
|
25
25
|
async def handler(context: ParselCrawlingContext) -> None:
|
|
26
|
+
context.log.info(f'Processing {context.request.url}')
|
|
27
|
+
|
|
26
28
|
# New links will be enqueued directly to the queue.
|
|
27
29
|
await context.enqueue_links()
|
|
28
30
|
|
|
31
|
+
# Extract data using Parsel's XPath and CSS selectors.
|
|
32
|
+
data = {
|
|
33
|
+
'url': context.request.url,
|
|
34
|
+
'title': context.selector.xpath('//title/text()').get(),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Push extracted data to the dataset.
|
|
38
|
+
await context.push_data(data)
|
|
39
|
+
|
|
29
40
|
await crawler.run()
|
|
30
41
|
|
|
31
42
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
5
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def main() -> None:
|
|
9
|
+
# Create an HTTP client for fetching the sitemap.
|
|
10
|
+
http_client = ImpitHttpClient()
|
|
11
|
+
|
|
12
|
+
# Create a sitemap request loader with filtering rules.
|
|
13
|
+
sitemap_loader = SitemapRequestLoader(
|
|
14
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml'],
|
|
15
|
+
http_client=http_client,
|
|
16
|
+
include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
|
|
17
|
+
max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# We work with the loader until we process all relevant links from the sitemap.
|
|
21
|
+
while request := await sitemap_loader.fetch_next_request():
|
|
22
|
+
# Do something with it...
|
|
23
|
+
print(f'Processing {request.url}')
|
|
24
|
+
|
|
25
|
+
# And mark it as handled.
|
|
26
|
+
await sitemap_loader.mark_request_as_handled(request)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == '__main__':
|
|
30
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from crawlee import service_locator
|
|
5
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
6
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
7
|
+
|
|
8
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(message)s')
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Disable clearing the `KeyValueStore` on each run.
|
|
13
|
+
# This is necessary so that the state keys are not cleared at startup.
|
|
14
|
+
# The recommended way to achieve this behavior is setting the environment variable
|
|
15
|
+
# `CRAWLEE_PURGE_ON_START=0`
|
|
16
|
+
configuration = service_locator.get_configuration()
|
|
17
|
+
configuration.purge_on_start = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def main() -> None:
|
|
21
|
+
# Create an HTTP client for fetching sitemaps
|
|
22
|
+
# Use the context manager for `SitemapRequestLoader` to correctly save the state when
|
|
23
|
+
# the work is completed.
|
|
24
|
+
async with (
|
|
25
|
+
ImpitHttpClient() as http_client,
|
|
26
|
+
SitemapRequestLoader(
|
|
27
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml'],
|
|
28
|
+
http_client=http_client,
|
|
29
|
+
# Enable persistence
|
|
30
|
+
persist_state_key='my-persist-state',
|
|
31
|
+
) as sitemap_loader,
|
|
32
|
+
):
|
|
33
|
+
# We receive only one request.
|
|
34
|
+
# Each time you run it, it will be a new request until you exhaust the sitemap.
|
|
35
|
+
request = await sitemap_loader.fetch_next_request()
|
|
36
|
+
if request:
|
|
37
|
+
logger.info(f'Processing request: {request.url}')
|
|
38
|
+
# Do something with it...
|
|
39
|
+
|
|
40
|
+
# And mark it as handled.
|
|
41
|
+
await sitemap_loader.mark_request_as_handled(request)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == '__main__':
|
|
45
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
|
|
5
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
6
|
+
from crawlee.request_loaders import SitemapRequestLoader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def main() -> None:
|
|
10
|
+
# Create an HTTP client for fetching the sitemap.
|
|
11
|
+
http_client = ImpitHttpClient()
|
|
12
|
+
|
|
13
|
+
# Create a sitemap request loader with filtering rules.
|
|
14
|
+
sitemap_loader = SitemapRequestLoader(
|
|
15
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml'],
|
|
16
|
+
http_client=http_client,
|
|
17
|
+
include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
|
|
18
|
+
max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# highlight-start
|
|
22
|
+
# Convert the sitemap loader into a request manager linked
|
|
23
|
+
# to the default request queue.
|
|
24
|
+
request_manager = await sitemap_loader.to_tandem()
|
|
25
|
+
# highlight-end
|
|
26
|
+
|
|
27
|
+
# Create a crawler and pass the request manager to it.
|
|
28
|
+
crawler = ParselCrawler(
|
|
29
|
+
request_manager=request_manager,
|
|
30
|
+
max_requests_per_crawl=10, # Limit the max requests per crawl.
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
@crawler.router.default_handler
|
|
34
|
+
async def handler(context: ParselCrawlingContext) -> None:
|
|
35
|
+
context.log.info(f'Processing {context.request.url}')
|
|
36
|
+
|
|
37
|
+
# New links will be enqueued directly to the queue.
|
|
38
|
+
await context.enqueue_links()
|
|
39
|
+
|
|
40
|
+
# Extract data using Parsel's XPath and CSS selectors.
|
|
41
|
+
data = {
|
|
42
|
+
'url': context.request.url,
|
|
43
|
+
'title': context.selector.xpath('//title/text()').get(),
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Push extracted data to the dataset.
|
|
47
|
+
await context.push_data(data)
|
|
48
|
+
|
|
49
|
+
await crawler.run()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == '__main__':
|
|
53
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
|
|
5
|
+
from crawlee.http_clients import ImpitHttpClient
|
|
6
|
+
from crawlee.request_loaders import RequestManagerTandem, SitemapRequestLoader
|
|
7
|
+
from crawlee.storages import RequestQueue
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def main() -> None:
|
|
11
|
+
# Create an HTTP client for fetching the sitemap.
|
|
12
|
+
http_client = ImpitHttpClient()
|
|
13
|
+
|
|
14
|
+
# Create a sitemap request loader with filtering rules.
|
|
15
|
+
sitemap_loader = SitemapRequestLoader(
|
|
16
|
+
sitemap_urls=['https://crawlee.dev/sitemap.xml'],
|
|
17
|
+
http_client=http_client,
|
|
18
|
+
include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
|
|
19
|
+
max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Open the default request queue.
|
|
23
|
+
request_queue = await RequestQueue.open()
|
|
24
|
+
|
|
25
|
+
# And combine them together to a single request manager.
|
|
26
|
+
request_manager = RequestManagerTandem(sitemap_loader, request_queue)
|
|
27
|
+
|
|
28
|
+
# Create a crawler and pass the request manager to it.
|
|
29
|
+
crawler = ParselCrawler(
|
|
30
|
+
request_manager=request_manager,
|
|
31
|
+
max_requests_per_crawl=10, # Limit the max requests per crawl.
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@crawler.router.default_handler
|
|
35
|
+
async def handler(context: ParselCrawlingContext) -> None:
|
|
36
|
+
context.log.info(f'Processing {context.request.url}')
|
|
37
|
+
|
|
38
|
+
# New links will be enqueued directly to the queue.
|
|
39
|
+
await context.enqueue_links()
|
|
40
|
+
|
|
41
|
+
# Extract data using Parsel's XPath and CSS selectors.
|
|
42
|
+
data = {
|
|
43
|
+
'url': context.request.url,
|
|
44
|
+
'title': context.selector.xpath('//title/text()').get(),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Push extracted data to the dataset.
|
|
48
|
+
await context.push_data(data)
|
|
49
|
+
|
|
50
|
+
await crawler.run()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == '__main__':
|
|
54
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from datetime import timedelta
|
|
3
|
+
|
|
4
|
+
from crawlee import service_locator
|
|
5
|
+
from crawlee.configuration import Configuration
|
|
6
|
+
from crawlee.storage_clients import MemoryStorageClient
|
|
7
|
+
from crawlee.storages import Dataset
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def main() -> None:
|
|
11
|
+
configuration = Configuration(
|
|
12
|
+
log_level='DEBUG',
|
|
13
|
+
headless=False,
|
|
14
|
+
persist_state_interval=timedelta(seconds=30),
|
|
15
|
+
)
|
|
16
|
+
# Set the custom configuration as the global default configuration.
|
|
17
|
+
service_locator.set_configuration(configuration)
|
|
18
|
+
|
|
19
|
+
# Use the global defaults when creating the dataset (or other storage).
|
|
20
|
+
dataset_1 = await Dataset.open()
|
|
21
|
+
|
|
22
|
+
# Or set explicitly specific configuration if
|
|
23
|
+
# you do not want to rely on global defaults.
|
|
24
|
+
dataset_2 = await Dataset.open(
|
|
25
|
+
storage_client=MemoryStorageClient(), configuration=configuration
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == '__main__':
|
|
30
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from crawlee.crawlers import ParselCrawler
|
|
2
|
+
from crawlee.storage_clients import SqlStorageClient
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
async def main() -> None:
|
|
6
|
+
# Create a new instance of storage client.
|
|
7
|
+
# This will create an SQLite database file crawlee.db or created tables in your
|
|
8
|
+
# database if you pass `connection_string` or `engine`
|
|
9
|
+
# Use the context manager to ensure that connections are properly cleaned up.
|
|
10
|
+
async with SqlStorageClient() as storage_client:
|
|
11
|
+
# And pass it to the crawler.
|
|
12
|
+
crawler = ParselCrawler(storage_client=storage_client)
|
crawlee-1.0.1/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from sqlalchemy.ext.asyncio import create_async_engine
|
|
2
|
+
|
|
3
|
+
from crawlee.configuration import Configuration
|
|
4
|
+
from crawlee.crawlers import ParselCrawler
|
|
5
|
+
from crawlee.storage_clients import SqlStorageClient
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def main() -> None:
|
|
9
|
+
# Create a new instance of storage client.
|
|
10
|
+
# On first run, also creates tables in your PostgreSQL database.
|
|
11
|
+
# Use the context manager to ensure that connections are properly cleaned up.
|
|
12
|
+
async with SqlStorageClient(
|
|
13
|
+
# Create an `engine` with the desired configuration
|
|
14
|
+
engine=create_async_engine(
|
|
15
|
+
'postgresql+asyncpg://myuser:mypassword@localhost:5432/postgres',
|
|
16
|
+
future=True,
|
|
17
|
+
pool_size=5,
|
|
18
|
+
max_overflow=10,
|
|
19
|
+
pool_recycle=3600,
|
|
20
|
+
pool_pre_ping=True,
|
|
21
|
+
echo=False,
|
|
22
|
+
)
|
|
23
|
+
) as storage_client:
|
|
24
|
+
# Create a configuration with custom settings.
|
|
25
|
+
configuration = Configuration(
|
|
26
|
+
purge_on_start=False,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# And pass them to the crawler.
|
|
30
|
+
crawler = ParselCrawler(
|
|
31
|
+
storage_client=storage_client,
|
|
32
|
+
configuration=configuration,
|
|
33
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from crawlee.storages import Dataset
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def main() -> None:
|
|
7
|
+
# Named storage (persists across runs)
|
|
8
|
+
dataset_named = await Dataset.open(name='my-persistent-dataset')
|
|
9
|
+
|
|
10
|
+
# Unnamed storage with alias (purged on start)
|
|
11
|
+
dataset_unnamed = await Dataset.open(alias='temporary-results')
|
|
12
|
+
|
|
13
|
+
# Default unnamed storage (both are equivalent and purged on start)
|
|
14
|
+
dataset_default = await Dataset.open()
|
|
15
|
+
dataset_default = await Dataset.open(alias='default')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == '__main__':
|
|
19
|
+
asyncio.run(main())
|