crawlee 1.0.2b3__tar.gz → 1.0.5b7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

Files changed (680) hide show
  1. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/build_and_deploy_docs.yaml +2 -2
  2. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/templates_e2e_tests.yaml +2 -2
  3. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.gitignore +1 -0
  4. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/CHANGELOG.md +36 -2
  5. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/PKG-INFO +1 -1
  6. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/using_browser_profiles_chrome.py +2 -4
  7. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/using_browser_profile.mdx +0 -2
  8. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/architecture_overview.mdx +1 -1
  9. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/avoid_blocking.mdx +1 -1
  10. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/request_loaders.mdx +8 -2
  11. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/09_running_in_cloud.mdx +1 -1
  12. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/pyproject.toml +1 -1
  13. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/pyproject.toml +2 -2
  14. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_request.py +31 -20
  15. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_service_locator.py +4 -4
  16. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_types.py +10 -16
  17. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/recoverable_state.py +32 -8
  18. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/recurring_task.py +15 -0
  19. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/robots.py +17 -5
  20. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/sitemap.py +1 -1
  21. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/urls.py +9 -2
  22. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_browser_pool.py +4 -1
  23. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_playwright_browser_controller.py +1 -1
  24. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_playwright_browser_plugin.py +17 -3
  25. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_types.py +1 -1
  26. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
  27. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -12
  28. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/_basic_crawler.py +23 -12
  29. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
  30. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/_header_generator.py +2 -2
  31. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/_sitemap_request_loader.py +5 -0
  32. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/statistics/_error_snapshotter.py +1 -1
  33. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/statistics/_statistics.py +15 -6
  34. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +24 -6
  35. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_db_models.py +1 -2
  36. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +3 -2
  37. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_request_queue_client.py +18 -4
  38. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_storage_client.py +1 -1
  39. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_key_value_store.py +5 -2
  40. crawlee-1.0.5b7/tests/unit/_autoscaling/test_snapshotter.py +353 -0
  41. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/browsers/test_playwright_browser_plugin.py +10 -0
  42. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/conftest.py +19 -7
  43. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +105 -5
  44. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_basic/test_basic_crawler.py +58 -0
  45. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +37 -3
  46. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_http/test_http_crawler.py +2 -2
  47. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +37 -3
  48. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +34 -1
  49. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/otel/test_crawler_instrumentor.py +8 -2
  50. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/server.py +10 -0
  51. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/server_endpoints.py +11 -0
  52. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +10 -2
  53. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/test_dataset.py +2 -2
  54. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/test_key_value_store.py +44 -4
  55. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/test_request_queue.py +63 -3
  56. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/test_configuration.py +32 -6
  57. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/uv.lock +1319 -1155
  58. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/package.json +5 -1
  59. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/css/custom.css +0 -1
  60. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/yarn.lock +653 -636
  61. crawlee-1.0.2b3/tests/unit/_autoscaling/test_snapshotter.py +0 -333
  62. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.editorconfig +0 -0
  63. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/CODEOWNERS +0 -0
  64. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/pull_request_template.md +0 -0
  65. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/check_pr_title.yaml +0 -0
  66. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/pre_release.yaml +0 -0
  67. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/release.yaml +0 -0
  68. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/run_code_checks.yaml +0 -0
  69. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.github/workflows/update_new_issue.yaml +0 -0
  70. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.markdownlint.yaml +0 -0
  71. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/.pre-commit-config.yaml +0 -0
  72. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/CONTRIBUTING.md +0 -0
  73. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/LICENSE +0 -0
  74. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/Makefile +0 -0
  75. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/README.md +0 -0
  76. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/apify_platform.mdx +0 -0
  77. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  78. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  79. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  80. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  81. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  82. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
  83. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/code_examples/google/google_example.py +0 -0
  84. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/google_cloud.mdx +0 -0
  85. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/deployment/google_cloud_run.mdx +0 -0
  86. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/add_data_to_dataset.mdx +0 -0
  87. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  88. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  89. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  90. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  91. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  92. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  93. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  94. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  95. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  96. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  97. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  98. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/configure_json_logging.py +0 -0
  99. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  100. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  101. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  102. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  103. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  104. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  105. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  106. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  107. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  108. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  109. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  110. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  111. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  112. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  113. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  114. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  115. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/parsel_crawler.py +0 -0
  116. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  117. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  118. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/playwright_crawler.py +0 -0
  119. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  120. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  121. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  122. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  123. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  124. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  125. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  126. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  127. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawl_multiple_urls.mdx +0 -0
  128. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  129. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  130. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawler_keep_alive.mdx +0 -0
  131. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/crawler_stop.mdx +0 -0
  132. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  133. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  134. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/json_logging.mdx +0 -0
  135. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/parsel_crawler.mdx +0 -0
  136. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/playwright_crawler.mdx +0 -0
  137. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  138. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  139. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  140. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  141. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/respect_robots_txt_file.mdx +0 -0
  142. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/examples/resuming_paused_crawl.mdx +0 -0
  143. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  144. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  145. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  146. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  147. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  148. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  149. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  150. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  151. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  152. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  153. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  154. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  155. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  156. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  157. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  158. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  159. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  160. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  161. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  162. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  163. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  164. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  165. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  166. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  167. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  168. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  169. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  170. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  171. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  172. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  173. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  174. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  175. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  176. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  177. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  178. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  179. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  180. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  181. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  182. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  183. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  184. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  185. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  186. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  187. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  188. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  189. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  190. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  191. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  192. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  193. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  194. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  195. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  196. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  197. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  198. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  199. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  200. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
  201. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  202. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  203. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  204. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  205. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  206. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  207. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  208. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  209. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  210. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  211. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  212. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  213. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  214. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  215. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  216. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  217. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  218. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  219. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  220. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  221. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  222. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  223. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  224. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  225. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  226. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  227. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  228. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  229. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  230. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  231. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  232. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  233. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  234. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  235. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  236. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  237. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/opening.py +0 -0
  238. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  239. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  240. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  241. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  242. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/crawler_login.mdx +0 -0
  243. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/creating_web_archive.mdx +0 -0
  244. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/error_handling.mdx +0 -0
  245. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/http_clients.mdx +0 -0
  246. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/http_crawlers.mdx +0 -0
  247. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/playwright_crawler.mdx +0 -0
  248. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  249. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  250. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/proxy_management.mdx +0 -0
  251. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/request_router.mdx +0 -0
  252. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/running_in_web_server.mdx +0 -0
  253. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/scaling_crawlers.mdx +0 -0
  254. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/service_locator.mdx +0 -0
  255. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/session_management.mdx +0 -0
  256. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/storage_clients.mdx +0 -0
  257. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/storages.mdx +0 -0
  258. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  259. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/01_setting_up.mdx +0 -0
  260. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/02_first_crawler.mdx +0 -0
  261. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/03_adding_more_urls.mdx +0 -0
  262. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/04_real_world_project.mdx +0 -0
  263. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/05_crawling.mdx +0 -0
  264. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/06_scraping.mdx +0 -0
  265. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/07_saving_data.mdx +0 -0
  266. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/08_refactoring.mdx +0 -0
  267. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/02_bs.py +0 -0
  268. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/02_bs_better.py +0 -0
  269. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/02_request_queue.py +0 -0
  270. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  271. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  272. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/03_globs.py +0 -0
  273. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/03_original_code.py +0 -0
  274. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/03_transform_request.py +0 -0
  275. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  276. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  277. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  278. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/06_scraping.py +0 -0
  279. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/07_final_code.py +0 -0
  280. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/07_first_code.py +0 -0
  281. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/08_main.py +0 -0
  282. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/08_routes.py +0 -0
  283. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  284. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/__init__.py +0 -0
  285. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/code_examples/routes.py +0 -0
  286. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/introduction/index.mdx +0 -0
  287. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  288. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  289. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  290. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  291. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/quick-start/index.mdx +0 -0
  292. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/upgrading/upgrading_to_v0x.md +0 -0
  293. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/docs/upgrading/upgrading_to_v1.md +0 -0
  294. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/renovate.json +0 -0
  295. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/__init__.py +0 -0
  296. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/__init__.py +0 -0
  297. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/_types.py +0 -0
  298. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  299. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/py.typed +0 -0
  300. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  301. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_autoscaling/system_status.py +0 -0
  302. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_browserforge_workaround.py +0 -0
  303. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_cli.py +0 -0
  304. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_consts.py +0 -0
  305. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_log_config.py +0 -0
  306. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/__init__.py +0 -0
  307. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/blocked.py +0 -0
  308. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/byte_size.py +0 -0
  309. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/console.py +0 -0
  310. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/context.py +0 -0
  311. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/crypto.py +0 -0
  312. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/docs.py +0 -0
  313. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/file.py +0 -0
  314. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/globs.py +0 -0
  315. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/html_to_text.py +0 -0
  316. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/models.py +0 -0
  317. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  318. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/requests.py +0 -0
  319. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/system.py +0 -0
  320. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/time.py +0 -0
  321. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/try_import.py +0 -0
  322. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/wait.py +0 -0
  323. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/_utils/web.py +0 -0
  324. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/__init__.py +0 -0
  325. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_browser_controller.py +0 -0
  326. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_browser_plugin.py +0 -0
  327. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/_playwright_browser.py +0 -0
  328. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/browsers/py.typed +0 -0
  329. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/configuration.py +0 -0
  330. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/__init__.py +0 -0
  331. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  332. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  333. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  334. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  335. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  336. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  337. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
  338. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  339. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  340. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  341. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  342. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  343. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  344. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
  345. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_basic/py.typed +0 -0
  346. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  347. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  348. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  349. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  350. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  351. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  352. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_http/__init__.py +0 -0
  353. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  354. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  355. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  356. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  357. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  358. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  359. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  360. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  361. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  362. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  363. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  364. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  365. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  366. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/_types.py +0 -0
  367. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/crawlers/py.typed +0 -0
  368. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/errors.py +0 -0
  369. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/events/__init__.py +0 -0
  370. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/events/_event_manager.py +0 -0
  371. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/events/_local_event_manager.py +0 -0
  372. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/events/_types.py +0 -0
  373. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/events/py.typed +0 -0
  374. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  375. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  376. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  377. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  378. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/_types.py +0 -0
  379. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/fingerprint_suite/py.typed +0 -0
  380. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/http_clients/__init__.py +0 -0
  381. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/http_clients/_base.py +0 -0
  382. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  383. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/http_clients/_httpx.py +0 -0
  384. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/http_clients/_impit.py +0 -0
  385. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/otel/__init__.py +0 -0
  386. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  387. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/cookiecutter.json +0 -0
  388. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  389. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  390. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/main.py +0 -0
  391. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  392. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  393. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  394. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  395. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  396. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  397. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  398. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  399. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  400. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  401. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  402. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  403. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  404. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  405. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  406. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  407. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  408. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  409. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/proxy_configuration.py +0 -0
  410. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/py.typed +0 -0
  411. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/__init__.py +0 -0
  412. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/_request_list.py +0 -0
  413. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/_request_loader.py +0 -0
  414. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/_request_manager.py +0 -0
  415. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  416. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/router.py +0 -0
  417. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/__init__.py +0 -0
  418. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/_cookies.py +0 -0
  419. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/_models.py +0 -0
  420. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/_session.py +0 -0
  421. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/_session_pool.py +0 -0
  422. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/sessions/py.typed +0 -0
  423. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/statistics/__init__.py +0 -0
  424. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/statistics/_error_tracker.py +0 -0
  425. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/statistics/_models.py +0 -0
  426. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/__init__.py +0 -0
  427. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  428. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  429. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  430. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  431. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  432. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_base/py.typed +0 -0
  433. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  434. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
  435. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
  436. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  437. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  438. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  439. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  440. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  441. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  442. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  443. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  444. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  445. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  446. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  447. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  448. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  449. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/models.py +0 -0
  450. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storage_clients/py.typed +0 -0
  451. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/__init__.py +0 -0
  452. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_base.py +0 -0
  453. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_dataset.py +0 -0
  454. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_request_queue.py +0 -0
  455. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  456. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/_utils.py +0 -0
  457. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/src/crawlee/storages/py.typed +0 -0
  458. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/__init__.py +0 -0
  459. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/e2e/__init__.py +0 -0
  460. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/e2e/conftest.py +0 -0
  461. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
  462. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/e2e/project_template/utils.py +0 -0
  463. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/README.md +0 -0
  464. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/__init__.py +0 -0
  465. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
  466. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_autoscaling/test_system_status.py +0 -0
  467. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_statistics/test_error_tracker.py +0 -0
  468. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  469. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_statistics/test_persistence.py +0 -0
  470. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  471. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_byte_size.py +0 -0
  472. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_console.py +0 -0
  473. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_crypto.py +0 -0
  474. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_file.py +0 -0
  475. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_globs.py +0 -0
  476. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_html_to_text.py +0 -0
  477. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_measure_time.py +0 -0
  478. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  479. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_recurring_task.py +0 -0
  480. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_requests.py +0 -0
  481. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_robots.py +0 -0
  482. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_sitemap.py +0 -0
  483. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_system.py +0 -0
  484. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  485. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/_utils/test_urls.py +0 -0
  486. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/browsers/test_browser_pool.py +0 -0
  487. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/browsers/test_playwright_browser.py +0 -0
  488. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
  489. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  490. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  491. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  492. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  493. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/events/test_event_manager.py +0 -0
  494. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/events/test_local_event_manager.py +0 -0
  495. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  496. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  497. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/http_clients/test_http_clients.py +0 -0
  498. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/http_clients/test_httpx.py +0 -0
  499. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  500. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  501. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/request_loaders/test_request_list.py +0 -0
  502. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
  503. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/sessions/test_cookies.py +0 -0
  504. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/sessions/test_models.py +0 -0
  505. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/sessions/test_session.py +0 -0
  506. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/sessions/test_session_pool.py +0 -0
  507. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  508. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  509. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  510. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  511. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  512. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  513. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  514. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  515. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/conftest.py +0 -0
  516. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  517. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  518. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/test_cli.py +0 -0
  519. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/test_log_config.py +0 -0
  520. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/test_router.py +0 -0
  521. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/tests/unit/test_service_locator.py +0 -0
  522. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/.eslintrc.json +0 -0
  523. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/.yarnrc.yml +0 -0
  524. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/babel.config.js +0 -0
  525. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/build_api_reference.sh +0 -0
  526. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/docusaurus.config.js +0 -0
  527. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/generate_module_shortcuts.py +0 -0
  528. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  529. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  530. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/roa-loader/index.js +0 -0
  531. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/roa-loader/package.json +0 -0
  532. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/sidebars.js +0 -0
  533. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/ApiLink.jsx +0 -0
  534. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Button.jsx +0 -0
  535. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Button.module.css +0 -0
  536. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/CopyButton.jsx +0 -0
  537. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/CopyButton.module.css +0 -0
  538. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Gradients.jsx +0 -0
  539. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Highlights.jsx +0 -0
  540. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Highlights.module.css +0 -0
  541. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  542. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  543. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  544. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  545. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  546. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  547. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  548. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  549. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  550. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  551. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/RiverSection.jsx +0 -0
  552. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/RiverSection.module.css +0 -0
  553. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  554. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  555. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  556. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  557. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/RunnableCodeBlock.jsx +0 -0
  558. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/components/RunnableCodeBlock.module.css +0 -0
  559. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/pages/home_page_example.py +0 -0
  560. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/pages/index.js +0 -0
  561. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/pages/index.module.css +0 -0
  562. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  563. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/ColorModeToggle/index.js +0 -0
  564. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  565. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  566. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/DocItem/Layout/index.js +0 -0
  567. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  568. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Footer/LinkItem/index.js +0 -0
  569. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  570. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Footer/index.js +0 -0
  571. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Footer/index.module.css +0 -0
  572. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/MDXComponents/A.js +0 -0
  573. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/Content/index.js +0 -0
  574. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  575. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/Logo/index.js +0 -0
  576. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  577. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  578. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  579. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  580. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  581. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  582. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  583. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/.nojekyll +0 -0
  584. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/font/lota.woff +0 -0
  585. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/font/lota.woff2 +0 -0
  586. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/API.png +0 -0
  587. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/apify_logo.svg +0 -0
  588. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/apify_og_SDK.png +0 -0
  589. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/apify_sdk.svg +0 -0
  590. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/apify_sdk_white.svg +0 -0
  591. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/arrow_right.svg +0 -0
  592. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/auto-scaling-dark.webp +0 -0
  593. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/auto-scaling-light.webp +0 -0
  594. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/check.svg +0 -0
  595. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/chrome-scrape-dark.gif +0 -0
  596. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/chrome-scrape-light.gif +0 -0
  597. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/cloud_icon.svg +0 -0
  598. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/community-dark-icon.svg +0 -0
  599. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/community-light-icon.svg +0 -0
  600. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-dark-new.svg +0 -0
  601. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-dark.svg +0 -0
  602. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-javascript-dark.svg +0 -0
  603. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-javascript-light.svg +0 -0
  604. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-light-new.svg +0 -0
  605. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-light.svg +0 -0
  606. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  607. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-logo.svg +0 -0
  608. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-python-dark.svg +0 -0
  609. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-python-light.svg +0 -0
  610. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/crawlee-python-og.png +0 -0
  611. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/defaults-dark-icon.svg +0 -0
  612. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/defaults-light-icon.svg +0 -0
  613. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/discord-brand-dark.svg +0 -0
  614. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/discord-brand.svg +0 -0
  615. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/docusaurus.svg +0 -0
  616. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/external-link.svg +0 -0
  617. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/favicon.ico +0 -0
  618. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/favorite-tools-dark.webp +0 -0
  619. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/favorite-tools-light.webp +0 -0
  620. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/auto-scaling.svg +0 -0
  621. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/automate-everything.svg +0 -0
  622. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/fingerprints.svg +0 -0
  623. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/node-requests.svg +0 -0
  624. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/runs-on-py.svg +0 -0
  625. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/storage.svg +0 -0
  626. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/features/works-everywhere.svg +0 -0
  627. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  628. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  629. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  630. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  631. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/current-price.jpg +0 -0
  632. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  633. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/select-an-element.jpg +0 -0
  634. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/selected-element.jpg +0 -0
  635. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/sku.jpg +0 -0
  636. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/getting-started/title.jpg +0 -0
  637. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/github-brand-dark.svg +0 -0
  638. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/github-brand.svg +0 -0
  639. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  640. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  641. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/hearth copy.svg +0 -0
  642. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/hearth.svg +0 -0
  643. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/javascript_logo.svg +0 -0
  644. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/js_file.svg +0 -0
  645. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/logo-big.svg +0 -0
  646. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/logo-blur.png +0 -0
  647. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/logo-blur.svg +0 -0
  648. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/logo-zoom.svg +0 -0
  649. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/menu-arrows.svg +0 -0
  650. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/oss_logo.png +0 -0
  651. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  652. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/puppeteer-live-view-detail.png +0 -0
  653. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/queue-dark-icon.svg +0 -0
  654. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/queue-light-icon.svg +0 -0
  655. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  656. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  657. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/robot.png +0 -0
  658. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/routing-dark-icon.svg +0 -0
  659. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/routing-light-icon.svg +0 -0
  660. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  661. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/scraping-utils-light-icon.svg +0 -0
  662. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/smart-proxy-dark.webp +0 -0
  663. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/smart-proxy-light.webp +0 -0
  664. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/source_code.png +0 -0
  665. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/system.svg +0 -0
  666. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/triangles_dark.svg +0 -0
  667. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/triangles_light.svg +0 -0
  668. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/workflow.svg +0 -0
  669. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/zero-setup-dark-icon.svg +0 -0
  670. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/img/zero-setup-light-icon.svg +0 -0
  671. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/js/custom.js +0 -0
  672. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/static/robots.txt +0 -0
  673. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/docs-prettier.config.js +0 -0
  674. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/utils/externalLink.js +0 -0
  675. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  676. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  677. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  678. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  679. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tools/website_gif/website_gif.mjs +0 -0
  680. {crawlee-1.0.2b3 → crawlee-1.0.5b7}/website/tsconfig.eslint.json +0 -0
@@ -30,7 +30,7 @@ jobs:
30
30
  ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
31
31
 
32
32
  - name: Set up Node
33
- uses: actions/setup-node@v5
33
+ uses: actions/setup-node@v6
34
34
  with:
35
35
  node-version: ${{ env.NODE_VERSION }}
36
36
 
@@ -40,7 +40,7 @@ jobs:
40
40
  python-version: ${{ env.PYTHON_VERSION }}
41
41
 
42
42
  - name: Set up uv package manager
43
- uses: astral-sh/setup-uv@v6
43
+ uses: astral-sh/setup-uv@v7
44
44
  with:
45
45
  python-version: ${{ env.PYTHON_VERSION }}
46
46
 
@@ -27,7 +27,7 @@ jobs:
27
27
  uses: actions/checkout@v5
28
28
 
29
29
  - name: Setup node
30
- uses: actions/setup-node@v5
30
+ uses: actions/setup-node@v6
31
31
  with:
32
32
  node-version: ${{ env.NODE_VERSION }}
33
33
 
@@ -44,7 +44,7 @@ jobs:
44
44
  run: pipx install poetry
45
45
 
46
46
  - name: Set up uv package manager
47
- uses: astral-sh/setup-uv@v6
47
+ uses: astral-sh/setup-uv@v7
48
48
  with:
49
49
  python-version: ${{ env.PYTHON_VERSION }}
50
50
 
@@ -30,6 +30,7 @@ htmlcov
30
30
  # IDE, editors
31
31
  .vscode
32
32
  .idea
33
+ *~
33
34
  .DS_Store
34
35
  .nvim.lua
35
36
  Session.vim
@@ -3,15 +3,49 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
5
  <!-- git-cliff-unreleased-start -->
6
- ## 1.0.2 - **not yet released**
6
+ ## 1.0.5 - **not yet released**
7
+
8
+ ### 🚀 Features
9
+
10
+ - Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
11
+
12
+ ### 🐛 Bug Fixes
13
+
14
+ - Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
15
+ - Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
16
+
17
+
18
+ <!-- git-cliff-unreleased-end -->
19
+ ## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
20
+
21
+ ### 🐛 Bug Fixes
22
+
23
+ - Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
24
+ - Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
25
+ - Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
26
+ - Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
27
+ - Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
28
+
29
+
30
+ ## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
31
+
32
+ ### 🐛 Bug Fixes
33
+
34
+ - Add support for Pydantic v2.12 ([#1471](https://github.com/apify/crawlee-python/pull/1471)) ([35c1108](https://github.com/apify/crawlee-python/commit/35c110878c2f445a2866be2522ea8703e9b371dd)) by [@Mantisus](https://github.com/Mantisus), closes [#1464](https://github.com/apify/crawlee-python/issues/1464)
35
+ - Fix database version warning message ([#1485](https://github.com/apify/crawlee-python/pull/1485)) ([18a545e](https://github.com/apify/crawlee-python/commit/18a545ee8add92e844acd0068f9cb8580a82e1c9)) by [@Mantisus](https://github.com/Mantisus)
36
+ - Fix `reclaim_request` in `SqlRequestQueueClient` to correctly update the request state ([#1486](https://github.com/apify/crawlee-python/pull/1486)) ([1502469](https://github.com/apify/crawlee-python/commit/150246957f8f7f1ceb77bb77e3a02a903c50cae1)) by [@Mantisus](https://github.com/Mantisus), closes [#1484](https://github.com/apify/crawlee-python/issues/1484)
37
+ - Fix `KeyValueStore.auto_saved_value` failing in some scenarios ([#1438](https://github.com/apify/crawlee-python/pull/1438)) ([b35dee7](https://github.com/apify/crawlee-python/commit/b35dee78180e57161b826641d45a61b8d8f6ef51)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1354](https://github.com/apify/crawlee-python/issues/1354)
38
+
39
+
40
+ ## [1.0.2](https://github.com/apify/crawlee-python/releases/tag/v1.0.2) (2025-10-08)
7
41
 
8
42
  ### 🐛 Bug Fixes
9
43
 
10
44
  - Use Self type in the open() method of storage clients ([#1462](https://github.com/apify/crawlee-python/pull/1462)) ([4ec6f6c](https://github.com/apify/crawlee-python/commit/4ec6f6c08f81632197f602ff99151338b3eba6e7)) by [@janbuchar](https://github.com/janbuchar)
11
45
  - Add storages name validation ([#1457](https://github.com/apify/crawlee-python/pull/1457)) ([84de11a](https://github.com/apify/crawlee-python/commit/84de11a3a603503076f5b7df487c9abab68a9015)) by [@Mantisus](https://github.com/Mantisus), closes [#1434](https://github.com/apify/crawlee-python/issues/1434)
46
+ - Pin pydantic version to &lt;2.12.0 to avoid compatibility issues ([#1467](https://github.com/apify/crawlee-python/pull/1467)) ([f11b86f](https://github.com/apify/crawlee-python/commit/f11b86f7ed57f98e83dc1b52f15f2017a919bf59)) by [@vdusek](https://github.com/vdusek)
12
47
 
13
48
 
14
- <!-- git-cliff-unreleased-end -->
15
49
  ## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
16
50
 
17
51
  ### 🐛 Bug Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.2b3
3
+ Version: 1.0.5b7
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -27,15 +27,13 @@ async def main() -> None:
27
27
 
28
28
  crawler = PlaywrightCrawler(
29
29
  headless=False,
30
- # Use chromium for Chrome compatibility
31
- browser_type='chromium',
30
+ # Use the installed Chrome browser
31
+ browser_type='chrome',
32
32
  # Disable fingerprints to preserve profile identity
33
33
  fingerprint_generator=None,
34
34
  # Set user data directory to temp folder
35
35
  user_data_dir=tmp_profile_dir,
36
36
  browser_launch_options={
37
- # Use installed Chrome browser
38
- 'channel': 'chrome',
39
37
  # Slow down actions to mimic human behavior
40
38
  'slow_mo': 200,
41
39
  'args': [
@@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas
18
18
 
19
19
  To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
20
20
 
21
- You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
22
-
23
21
  :::warning Profile access limitation
24
22
  Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
25
23
  :::
@@ -291,7 +291,7 @@ Request loaders provide a subset of <ApiLink to="class/RequestQueue">`RequestQue
291
291
 
292
292
  - <ApiLink to="class/RequestLoader">`RequestLoader`</ApiLink> - Base interface for read-only access to a stream of requests, with capabilities like fetching the next request, marking as handled, and status checking.
293
293
  - <ApiLink to="class/RequestList">`RequestList`</ApiLink> - Lightweight in-memory implementation of `RequestLoader` for managing static lists of URLs.
294
- - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - Specialized loader for reading URLs from XML sitemaps with filtering capabilities.
294
+ - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
295
295
 
296
296
  ### Request managers
297
297
 
@@ -25,7 +25,7 @@ Changing browser fingerprints can be a tedious job. Luckily, Crawlee provides th
25
25
  {PlaywrightDefaultFingerprintGenerator}
26
26
  </RunnableCodeBlock>
27
27
 
28
- In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example bellow:
28
+ In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example below:
29
29
 
30
30
  <CodeBlock className="language-python">
31
31
  {PlaywrightDefaultFingerprintGeneratorWithArgs}
@@ -31,7 +31,7 @@ The [`request_loaders`](https://github.com/apify/crawlee-python/tree/master/src/
31
31
  And specific request loader implementations:
32
32
 
33
33
  - <ApiLink to="class/RequestList">`RequestList`</ApiLink>: A lightweight implementation for managing a static list of URLs.
34
- - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML sitemaps with filtering capabilities.
34
+ - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
35
35
 
36
36
  Below is a class diagram that illustrates the relationships between these components and the <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>:
37
37
 
@@ -130,7 +130,13 @@ To enable persistence, provide `persist_state_key` and optionally `persist_reque
130
130
 
131
131
  ### Sitemap request loader
132
132
 
133
- The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from XML sitemaps. It's particularly useful when you want to crawl a website systematically by following its sitemap structure. The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
133
+ The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats. It's particularly useful when you want to crawl a website systematically by following its sitemap structure.
134
+
135
+ :::note
136
+ The `SitemapRequestLoader` is designed specifically for sitemaps that follow the standard Sitemaps protocol. HTML pages containing links are not supported by this loader - those should be handled by regular crawlers using the `enqueue_links` functionality.
137
+ :::
138
+
139
+ The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
134
140
 
135
141
  <RunnableCodeBlock className="language-python" language="python">
136
142
  {SitemapExample}
@@ -50,7 +50,7 @@ apify login
50
50
 
51
51
  Now that you have your account set up, you will need to adjust the code a tiny bit. We will use the [Apify SDK](https://docs.apify.com/sdk/python/), which will help us to wire the Crawlee storages (like the [`RequestQueue`](https://docs.apify.com/sdk/python/reference/class/RequestQueue)) to their Apify platform counterparts - otherwise Crawlee would keep things only in memory.
52
52
 
53
- Open your `src/main.py` file, and wrap everyting in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
53
+ Open your `src/main.py` file, and wrap everything in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
54
54
 
55
55
  <CodeBlock className="language-python" title="src/main.py">
56
56
  {MainExample}
@@ -1,4 +1,4 @@
1
- # Line lenght different from the rest of the code to make sure that the example codes visualised on the generated
1
+ # Line length different from the rest of the code to make sure that the example codes visualised on the generated
2
2
  # documentation webpages are shown without vertical slider to make them more readable.
3
3
 
4
4
  [tool.ruff]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.0.2b3"
7
+ version = "1.0.5b7"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -107,7 +107,7 @@ dev = [
107
107
  "pytest-timeout~=2.4.0",
108
108
  "pytest-xdist~=3.8.0",
109
109
  "pytest~=8.4.0",
110
- "ruff~=0.13.0",
110
+ "ruff~=0.14.0",
111
111
  "setuptools", # setuptools are used by pytest, but not explicitly required
112
112
  "types-beautifulsoup4~=4.12.0.20240229",
113
113
  "types-cachetools~=6.2.0.20250827",
@@ -185,9 +185,6 @@ class Request(BaseModel):
185
185
  method: HttpMethod = 'GET'
186
186
  """HTTP request method."""
187
187
 
188
- headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
189
- """HTTP request headers."""
190
-
191
188
  payload: Annotated[
192
189
  HttpPayload | None,
193
190
  BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
@@ -195,23 +192,37 @@ class Request(BaseModel):
195
192
  ] = None
196
193
  """HTTP request payload."""
197
194
 
198
- user_data: Annotated[
199
- dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
200
- Field(alias='userData', default_factory=lambda: UserData()),
201
- PlainValidator(user_data_adapter.validate_python),
202
- PlainSerializer(
203
- lambda instance: user_data_adapter.dump_python(
204
- instance,
205
- by_alias=True,
206
- exclude_none=True,
207
- exclude_unset=True,
208
- exclude_defaults=True,
209
- )
210
- ),
211
- ] = {}
212
- """Custom user data assigned to the request. Use this to save any request related data to the
213
- request's scope, keeping them accessible on retries, failures etc.
214
- """
195
+ # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
196
+ if TYPE_CHECKING:
197
+ headers: HttpHeaders = HttpHeaders()
198
+ """HTTP request headers."""
199
+
200
+ user_data: dict[str, JsonSerializable] = {}
201
+ """Custom user data assigned to the request. Use this to save any request related data to the
202
+ request's scope, keeping them accessible on retries, failures etc.
203
+ """
204
+
205
+ else:
206
+ headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)]
207
+ """HTTP request headers."""
208
+
209
+ user_data: Annotated[
210
+ dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
211
+ Field(alias='userData', default_factory=lambda: UserData()),
212
+ PlainValidator(user_data_adapter.validate_python),
213
+ PlainSerializer(
214
+ lambda instance: user_data_adapter.dump_python(
215
+ instance,
216
+ by_alias=True,
217
+ exclude_none=True,
218
+ exclude_unset=True,
219
+ exclude_defaults=True,
220
+ )
221
+ ),
222
+ ]
223
+ """Custom user data assigned to the request. Use this to save any request related data to the
224
+ request's scope, keeping them accessible on retries, failures etc.
225
+ """
215
226
 
216
227
  retry_count: Annotated[int, Field(alias='retryCount')] = 0
217
228
  """Number of times the request has been retried."""
@@ -38,7 +38,7 @@ class ServiceLocator:
38
38
  def get_configuration(self) -> Configuration:
39
39
  """Get the configuration."""
40
40
  if self._configuration is None:
41
- logger.warning('No configuration set, implicitly creating and using default Configuration.')
41
+ logger.debug('No configuration set, implicitly creating and using default Configuration.')
42
42
  self._configuration = Configuration()
43
43
 
44
44
  return self._configuration
@@ -63,9 +63,9 @@ class ServiceLocator:
63
63
  def get_event_manager(self) -> EventManager:
64
64
  """Get the event manager."""
65
65
  if self._event_manager is None:
66
- logger.warning('No event manager set, implicitly creating and using default LocalEventManager.')
66
+ logger.debug('No event manager set, implicitly creating and using default LocalEventManager.')
67
67
  if self._configuration is None:
68
- logger.warning(
68
+ logger.debug(
69
69
  'Implicit creation of event manager will implicitly set configuration as side effect. '
70
70
  'It is advised to explicitly first set the configuration instead.'
71
71
  )
@@ -93,7 +93,7 @@ class ServiceLocator:
93
93
  def get_storage_client(self) -> StorageClient:
94
94
  """Get the storage client."""
95
95
  if self._storage_client is None:
96
- logger.warning('No storage client set, implicitly creating and using default FileSystemStorageClient.')
96
+ logger.debug('No storage client set, implicitly creating and using default FileSystemStorageClient.')
97
97
  if self._configuration is None:
98
98
  logger.warning(
99
99
  'Implicit creation of storage client will implicitly set configuration as side effect. '
@@ -3,17 +3,7 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  from collections.abc import Callable, Iterator, Mapping
5
5
  from dataclasses import dataclass
6
- from typing import (
7
- TYPE_CHECKING,
8
- Annotated,
9
- Any,
10
- Literal,
11
- Protocol,
12
- TypedDict,
13
- TypeVar,
14
- cast,
15
- overload,
16
- )
6
+ from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload
17
7
 
18
8
  from pydantic import ConfigDict, Field, PlainValidator, RootModel
19
9
 
@@ -71,11 +61,15 @@ class HttpHeaders(RootModel, Mapping[str, str]):
71
61
 
72
62
  model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
73
63
 
74
- root: Annotated[
75
- dict[str, str],
76
- PlainValidator(lambda value: _normalize_headers(value)),
77
- Field(default_factory=dict),
78
- ] = {}
64
+ # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
65
+ if TYPE_CHECKING:
66
+ root: dict[str, str] = {}
67
+ else:
68
+ root: Annotated[
69
+ dict[str, str],
70
+ PlainValidator(lambda value: _normalize_headers(value)),
71
+ Field(default_factory=dict),
72
+ ]
79
73
 
80
74
  def __getitem__(self, key: str) -> str:
81
75
  return self.root[key.lower()]
@@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, Generic, Literal, TypeVar
4
4
 
5
5
  from pydantic import BaseModel
6
6
 
7
+ from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
7
8
  from crawlee.events._types import Event, EventPersistStateData
8
9
 
9
10
  if TYPE_CHECKING:
10
11
  import logging
12
+ from collections.abc import Callable, Coroutine
11
13
 
12
- from crawlee.storages._key_value_store import KeyValueStore
14
+ from crawlee.storages import KeyValueStore
13
15
 
14
16
  TStateModel = TypeVar('TStateModel', bound=BaseModel)
15
17
 
@@ -37,6 +39,7 @@ class RecoverableState(Generic[TStateModel]):
37
39
  persistence_enabled: Literal[True, False, 'explicit_only'] = False,
38
40
  persist_state_kvs_name: str | None = None,
39
41
  persist_state_kvs_id: str | None = None,
42
+ persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
40
43
  logger: logging.Logger,
41
44
  ) -> None:
42
45
  """Initialize a new recoverable state object.
@@ -51,16 +54,40 @@ class RecoverableState(Generic[TStateModel]):
51
54
  If neither a name nor and id are supplied, the default store will be used.
52
55
  persist_state_kvs_id: The identifier of the KeyValueStore to use for persistence.
53
56
  If neither a name nor and id are supplied, the default store will be used.
57
+ persist_state_kvs_factory: Factory that can be awaited to create KeyValueStore to use for persistence. If
58
+ not provided, a system-wide KeyValueStore will be used, based on service locator configuration.
54
59
  logger: A logger instance for logging operations related to state persistence
55
60
  """
61
+ raise_if_too_many_kwargs(
62
+ persist_state_kvs_name=persist_state_kvs_name,
63
+ persist_state_kvs_id=persist_state_kvs_id,
64
+ persist_state_kvs_factory=persist_state_kvs_factory,
65
+ )
66
+ if not persist_state_kvs_factory:
67
+ logger.debug(
68
+ 'No explicit key_value_store set for recoverable state. Recovery will use a system-wide KeyValueStore '
69
+ 'based on service_locator configuration, potentially calling service_locator.set_storage_client in the '
70
+ 'process. It is recommended to initialize RecoverableState with explicit key_value_store to avoid '
71
+ 'global side effects.'
72
+ )
73
+
56
74
  self._default_state = default_state
57
75
  self._state_type: type[TStateModel] = self._default_state.__class__
58
76
  self._state: TStateModel | None = None
59
77
  self._persistence_enabled = persistence_enabled
60
78
  self._persist_state_key = persist_state_key
61
- self._persist_state_kvs_name = persist_state_kvs_name
62
- self._persist_state_kvs_id = persist_state_kvs_id
63
- self._key_value_store: 'KeyValueStore | None' = None # noqa: UP037
79
+ if persist_state_kvs_factory is None:
80
+
81
+ async def kvs_factory() -> KeyValueStore:
82
+ from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
83
+
84
+ return await KeyValueStore.open(name=persist_state_kvs_name, id=persist_state_kvs_id)
85
+
86
+ self._persist_state_kvs_factory = kvs_factory
87
+ else:
88
+ self._persist_state_kvs_factory = persist_state_kvs_factory
89
+
90
+ self._key_value_store: KeyValueStore | None = None
64
91
  self._log = logger
65
92
 
66
93
  async def initialize(self) -> TStateModel:
@@ -77,11 +104,8 @@ class RecoverableState(Generic[TStateModel]):
77
104
  return self.current_value
78
105
 
79
106
  # Import here to avoid circular imports.
80
- from crawlee.storages._key_value_store import KeyValueStore # noqa: PLC0415
81
107
 
82
- self._key_value_store = await KeyValueStore.open(
83
- name=self._persist_state_kvs_name, id=self._persist_state_kvs_id
84
- )
108
+ self._key_value_store = await self._persist_state_kvs_factory()
85
109
 
86
110
  await self._load_saved_state()
87
111
 
@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
7
7
  if TYPE_CHECKING:
8
8
  from collections.abc import Callable
9
9
  from datetime import timedelta
10
+ from types import TracebackType
11
+
12
+ from typing_extensions import Self
10
13
 
11
14
  logger = getLogger(__name__)
12
15
 
@@ -26,6 +29,18 @@ class RecurringTask:
26
29
  self.delay = delay
27
30
  self.task: asyncio.Task | None = None
28
31
 
32
+ async def __aenter__(self) -> Self:
33
+ self.start()
34
+ return self
35
+
36
+ async def __aexit__(
37
+ self,
38
+ exc_type: type[BaseException] | None,
39
+ exc_value: BaseException | None,
40
+ exc_traceback: TracebackType | None,
41
+ ) -> None:
42
+ await self.stop()
43
+
29
44
  async def _wrapper(self) -> None:
30
45
  """Continuously execute the provided function with the specified delay.
31
46
 
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from logging import getLogger
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from protego import Protego
@@ -15,6 +16,9 @@ if TYPE_CHECKING:
15
16
  from crawlee.proxy_configuration import ProxyInfo
16
17
 
17
18
 
19
+ logger = getLogger(__name__)
20
+
21
+
18
22
  class RobotsTxtFile:
19
23
  def __init__(
20
24
  self, url: str, robots: Protego, http_client: HttpClient | None = None, proxy_info: ProxyInfo | None = None
@@ -56,12 +60,20 @@ class RobotsTxtFile:
56
60
  http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
57
61
  proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
58
62
  """
59
- response = await http_client.send_request(url, proxy_info=proxy_info)
60
- body = (
61
- b'User-agent: *\nAllow: /' if is_status_code_client_error(response.status_code) else await response.read()
62
- )
63
+ try:
64
+ response = await http_client.send_request(url, proxy_info=proxy_info)
65
+
66
+ body = (
67
+ b'User-agent: *\nAllow: /'
68
+ if is_status_code_client_error(response.status_code)
69
+ else await response.read()
70
+ )
71
+ robots = Protego.parse(body.decode('utf-8'))
72
+
73
+ except Exception as e:
74
+ logger.warning(f'Failed to fetch from robots.txt from "{url}" with error: "{e}"')
63
75
 
64
- robots = Protego.parse(body.decode('utf-8'))
76
+ robots = Protego.parse('User-agent: *\nAllow: /')
65
77
 
66
78
  return cls(url, robots, http_client=http_client, proxy_info=proxy_info)
67
79
 
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
335
335
  # Check if the first chunk is a valid gzip header
336
336
  if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
337
337
  decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
338
- first_chunk = False
338
+ first_chunk = False
339
339
 
340
340
  chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
341
341
  text_chunk = decoder.decode(chunk)
@@ -7,6 +7,7 @@ from yarl import URL
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from collections.abc import Iterator
10
+ from logging import Logger
10
11
 
11
12
 
12
13
  def is_url_absolute(url: str) -> bool:
@@ -22,13 +23,19 @@ def convert_to_absolute_url(base_url: str, relative_url: str) -> str:
22
23
  return str(URL(base_url).join(URL(relative_url)))
23
24
 
24
25
 
25
- def to_absolute_url_iterator(base_url: str, urls: Iterator[str]) -> Iterator[str]:
26
+ def to_absolute_url_iterator(base_url: str, urls: Iterator[str], logger: Logger | None = None) -> Iterator[str]:
26
27
  """Convert an iterator of relative URLs to absolute URLs using a base URL."""
27
28
  for url in urls:
28
29
  if is_url_absolute(url):
29
30
  yield url
30
31
  else:
31
- yield convert_to_absolute_url(base_url, url)
32
+ converted_url = convert_to_absolute_url(base_url, url)
33
+ # Skip the URL if conversion fails, probably due to an incorrect format, such as 'mailto:'.
34
+ if not is_url_absolute(converted_url):
35
+ if logger:
36
+ logger.debug(f'Could not convert URL "{url}" to absolute using base URL "{base_url}". Skipping it.')
37
+ continue
38
+ yield converted_url
32
39
 
33
40
 
34
41
  _http_url_adapter = TypeAdapter(AnyHttpUrl)
@@ -118,7 +118,10 @@ class BrowserPool:
118
118
  """Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
119
119
 
120
120
  Args:
121
- browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
121
+ browser_type: The type of browser to launch:
122
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
123
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
124
+ the system.
122
125
  user_data_dir: Path to a user data directory, which stores browser session data like cookies
123
126
  and local storage.
124
127
  browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
@@ -216,7 +216,7 @@ class PlaywrightBrowserController(BrowserController):
216
216
  browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
217
217
  if proxy_info:
218
218
  if browser_new_context_options.get('proxy'):
219
- logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
219
+ logger.warning("browser_new_context_options['proxy'] overridden by explicit `proxy_info` argument.")
220
220
 
221
221
  browser_new_context_options['proxy'] = ProxySettings(
222
222
  server=f'{proxy_info.scheme}://{proxy_info.hostname}:{proxy_info.port}',
@@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
34
34
 
35
35
  It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
36
36
  for creating new browser instances and provides a unified interface for interacting with different browser types
37
- (chromium, firefox, and webkit). This class integrates configuration options for browser launches (headless mode,
38
- executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
37
+ (chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
38
+ mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
39
39
  browser instance, ensuring that resource limits are respected.
40
40
  """
41
41
 
@@ -55,7 +55,10 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
55
55
  """Initialize a new instance.
56
56
 
57
57
  Args:
58
- browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
58
+ browser_type: The type of browser to launch:
59
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
60
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
61
+ the system.
59
62
  user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
60
63
  storage.
61
64
  browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
@@ -80,6 +83,17 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
80
83
  'chromium_sandbox': not config.disable_browser_sandbox,
81
84
  }
82
85
 
86
+ if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
87
+ raise ValueError(
88
+ 'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
89
+ )
90
+
91
+ # Map 'chrome' to 'chromium' with the 'chrome' channel.
92
+ if browser_type == 'chrome':
93
+ browser_type = 'chromium'
94
+ # Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
95
+ default_launch_browser_options['channel'] = 'chrome'
96
+
83
97
  self._browser_type: BrowserType = browser_type
84
98
  self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
85
99
  self._browser_new_context_options = browser_new_context_options or {}
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Literal
6
6
  if TYPE_CHECKING:
7
7
  from playwright.async_api import Page
8
8
 
9
- BrowserType = Literal['chromium', 'firefox', 'webkit']
9
+ BrowserType = Literal['chromium', 'firefox', 'webkit', 'chrome']
10
10
 
11
11
 
12
12
  @dataclass