crawlee 1.0.4b4__tar.gz → 1.0.5b8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (680) hide show
  1. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.gitignore +1 -0
  2. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/CHANGELOG.md +17 -2
  3. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/PKG-INFO +1 -1
  4. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/using_browser_profiles_chrome.py +2 -4
  5. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/using_browser_profile.mdx +0 -2
  6. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/architecture_overview.mdx +1 -1
  7. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/avoid_blocking.mdx +1 -1
  8. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/request_loaders.mdx +8 -2
  9. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/09_running_in_cloud.mdx +1 -1
  10. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/pyproject.toml +1 -1
  11. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/pyproject.toml +1 -1
  12. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/recurring_task.py +15 -0
  13. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/robots.py +17 -5
  14. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_browser_pool.py +4 -1
  15. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_playwright_browser_controller.py +1 -1
  16. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_playwright_browser_plugin.py +17 -3
  17. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_types.py +1 -1
  18. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -12
  19. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/_basic_crawler.py +22 -12
  20. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +8 -3
  21. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/_header_generator.py +2 -2
  22. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/_sitemap_request_loader.py +5 -0
  23. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/statistics/_error_snapshotter.py +1 -1
  24. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/statistics/_statistics.py +9 -5
  25. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_db_models.py +1 -2
  26. crawlee-1.0.5b8/tests/unit/_autoscaling/test_snapshotter.py +353 -0
  27. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_sitemap.py +0 -4
  28. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/browsers/test_playwright_browser_plugin.py +10 -0
  29. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/conftest.py +13 -6
  30. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +105 -4
  31. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_basic/test_basic_crawler.py +58 -0
  32. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +35 -1
  33. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_http/test_http_crawler.py +2 -2
  34. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +35 -1
  35. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +34 -1
  36. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/http_clients/test_http_clients.py +0 -5
  37. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/server.py +10 -0
  38. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/server_endpoints.py +10 -0
  39. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/test_request_queue.py +3 -2
  40. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/test_configuration.py +32 -6
  41. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/uv.lock +735 -683
  42. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/package.json +4 -0
  43. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/pages/index.js +1 -1
  44. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/yarn.lock +571 -550
  45. crawlee-1.0.4b4/tests/unit/_autoscaling/test_snapshotter.py +0 -333
  46. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.editorconfig +0 -0
  47. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/CODEOWNERS +0 -0
  48. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/pull_request_template.md +0 -0
  49. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/build_and_deploy_docs.yaml +0 -0
  50. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/check_pr_title.yaml +0 -0
  51. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/pre_release.yaml +0 -0
  52. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/release.yaml +0 -0
  53. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/run_code_checks.yaml +0 -0
  54. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/templates_e2e_tests.yaml +0 -0
  55. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.github/workflows/update_new_issue.yaml +0 -0
  56. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.markdownlint.yaml +0 -0
  57. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/.pre-commit-config.yaml +0 -0
  58. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/CONTRIBUTING.md +0 -0
  59. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/LICENSE +0 -0
  60. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/Makefile +0 -0
  61. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/README.md +0 -0
  62. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/apify_platform.mdx +0 -0
  63. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  64. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  65. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  66. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  67. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  68. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
  69. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/code_examples/google/google_example.py +0 -0
  70. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/google_cloud.mdx +0 -0
  71. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/deployment/google_cloud_run.mdx +0 -0
  72. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/add_data_to_dataset.mdx +0 -0
  73. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  74. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  75. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  76. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  77. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  78. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  79. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  80. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  81. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  82. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  83. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  84. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/configure_json_logging.py +0 -0
  85. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  86. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  87. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  88. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  89. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  90. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  91. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  92. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  93. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  94. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  95. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  96. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  97. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  98. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  99. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  100. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  101. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/parsel_crawler.py +0 -0
  102. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  103. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  104. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/playwright_crawler.py +0 -0
  105. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  106. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  107. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  108. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  109. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  110. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  111. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  112. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  113. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawl_multiple_urls.mdx +0 -0
  114. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  115. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  116. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawler_keep_alive.mdx +0 -0
  117. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/crawler_stop.mdx +0 -0
  118. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  119. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  120. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/json_logging.mdx +0 -0
  121. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/parsel_crawler.mdx +0 -0
  122. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/playwright_crawler.mdx +0 -0
  123. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  124. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  125. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  126. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  127. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/respect_robots_txt_file.mdx +0 -0
  128. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/examples/resuming_paused_crawl.mdx +0 -0
  129. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  130. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  131. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  132. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  133. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  134. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  135. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  136. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  137. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  138. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  139. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  140. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  141. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  142. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  143. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  144. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  145. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  146. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  147. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  148. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  149. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  150. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  151. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  152. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  153. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  154. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  155. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  156. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  157. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  158. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  159. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  160. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  161. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  162. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  163. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  164. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  165. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  166. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  167. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  168. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  169. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  170. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  171. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  172. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  173. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  174. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  175. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  176. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  177. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  178. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  179. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  180. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  181. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  182. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  183. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  184. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  185. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  186. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
  187. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  188. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  189. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  190. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  191. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  192. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  193. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  194. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  195. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  196. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  197. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  198. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  199. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  200. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  201. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  202. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  203. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  204. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  205. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  206. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  207. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  208. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  209. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  210. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  211. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  212. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  213. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  214. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  215. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  216. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  217. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  218. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  219. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  220. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  221. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  222. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  223. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/opening.py +0 -0
  224. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  225. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  226. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  227. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  228. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/crawler_login.mdx +0 -0
  229. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/creating_web_archive.mdx +0 -0
  230. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/error_handling.mdx +0 -0
  231. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/http_clients.mdx +0 -0
  232. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/http_crawlers.mdx +0 -0
  233. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/playwright_crawler.mdx +0 -0
  234. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  235. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  236. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/proxy_management.mdx +0 -0
  237. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/request_router.mdx +0 -0
  238. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/running_in_web_server.mdx +0 -0
  239. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/scaling_crawlers.mdx +0 -0
  240. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/service_locator.mdx +0 -0
  241. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/session_management.mdx +0 -0
  242. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/storage_clients.mdx +0 -0
  243. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/storages.mdx +0 -0
  244. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  245. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/01_setting_up.mdx +0 -0
  246. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/02_first_crawler.mdx +0 -0
  247. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/03_adding_more_urls.mdx +0 -0
  248. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/04_real_world_project.mdx +0 -0
  249. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/05_crawling.mdx +0 -0
  250. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/06_scraping.mdx +0 -0
  251. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/07_saving_data.mdx +0 -0
  252. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/08_refactoring.mdx +0 -0
  253. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/02_bs.py +0 -0
  254. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/02_bs_better.py +0 -0
  255. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/02_request_queue.py +0 -0
  256. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  257. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  258. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/03_globs.py +0 -0
  259. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/03_original_code.py +0 -0
  260. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/03_transform_request.py +0 -0
  261. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  262. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  263. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  264. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/06_scraping.py +0 -0
  265. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/07_final_code.py +0 -0
  266. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/07_first_code.py +0 -0
  267. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/08_main.py +0 -0
  268. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/08_routes.py +0 -0
  269. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  270. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/__init__.py +0 -0
  271. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/code_examples/routes.py +0 -0
  272. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/introduction/index.mdx +0 -0
  273. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  274. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  275. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  276. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  277. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/quick-start/index.mdx +0 -0
  278. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/upgrading/upgrading_to_v0x.md +0 -0
  279. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/docs/upgrading/upgrading_to_v1.md +0 -0
  280. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/renovate.json +0 -0
  281. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/__init__.py +0 -0
  282. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/__init__.py +0 -0
  283. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/_types.py +0 -0
  284. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  285. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/py.typed +0 -0
  286. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  287. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_autoscaling/system_status.py +0 -0
  288. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_browserforge_workaround.py +0 -0
  289. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_cli.py +0 -0
  290. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_consts.py +0 -0
  291. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_log_config.py +0 -0
  292. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_request.py +0 -0
  293. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_service_locator.py +0 -0
  294. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_types.py +0 -0
  295. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/__init__.py +0 -0
  296. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/blocked.py +0 -0
  297. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/byte_size.py +0 -0
  298. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/console.py +0 -0
  299. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/context.py +0 -0
  300. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/crypto.py +0 -0
  301. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/docs.py +0 -0
  302. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/file.py +0 -0
  303. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/globs.py +0 -0
  304. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/html_to_text.py +0 -0
  305. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/models.py +0 -0
  306. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  307. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/recoverable_state.py +0 -0
  308. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/requests.py +0 -0
  309. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/sitemap.py +0 -0
  310. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/system.py +0 -0
  311. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/time.py +0 -0
  312. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/try_import.py +0 -0
  313. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/urls.py +0 -0
  314. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/wait.py +0 -0
  315. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/_utils/web.py +0 -0
  316. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/__init__.py +0 -0
  317. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_browser_controller.py +0 -0
  318. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_browser_plugin.py +0 -0
  319. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/_playwright_browser.py +0 -0
  320. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/browsers/py.typed +0 -0
  321. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/configuration.py +0 -0
  322. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/__init__.py +0 -0
  323. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  324. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
  325. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  326. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  327. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  328. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  329. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  330. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
  331. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  332. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  333. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  334. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  335. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  336. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  337. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
  338. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_basic/py.typed +0 -0
  339. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  340. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  341. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  342. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  343. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  344. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  345. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_http/__init__.py +0 -0
  346. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  347. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  348. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  349. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  350. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  351. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  352. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  353. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  354. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  355. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  356. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  357. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  358. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  359. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/_types.py +0 -0
  360. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/crawlers/py.typed +0 -0
  361. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/errors.py +0 -0
  362. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/events/__init__.py +0 -0
  363. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/events/_event_manager.py +0 -0
  364. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/events/_local_event_manager.py +0 -0
  365. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/events/_types.py +0 -0
  366. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/events/py.typed +0 -0
  367. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  368. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  369. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  370. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  371. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/_types.py +0 -0
  372. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/fingerprint_suite/py.typed +0 -0
  373. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/http_clients/__init__.py +0 -0
  374. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/http_clients/_base.py +0 -0
  375. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  376. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/http_clients/_httpx.py +0 -0
  377. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/http_clients/_impit.py +0 -0
  378. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/otel/__init__.py +0 -0
  379. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  380. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/cookiecutter.json +0 -0
  381. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  382. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  383. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/main.py +0 -0
  384. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  385. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  386. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  387. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  388. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  389. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  390. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  391. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  392. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  393. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  394. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  395. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  396. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  397. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  398. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  399. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  400. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  401. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  402. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/proxy_configuration.py +0 -0
  403. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/py.typed +0 -0
  404. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/__init__.py +0 -0
  405. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/_request_list.py +0 -0
  406. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/_request_loader.py +0 -0
  407. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/_request_manager.py +0 -0
  408. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  409. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/router.py +0 -0
  410. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/__init__.py +0 -0
  411. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/_cookies.py +0 -0
  412. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/_models.py +0 -0
  413. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/_session.py +0 -0
  414. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/_session_pool.py +0 -0
  415. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/sessions/py.typed +0 -0
  416. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/statistics/__init__.py +0 -0
  417. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/statistics/_error_tracker.py +0 -0
  418. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/statistics/_models.py +0 -0
  419. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/__init__.py +0 -0
  420. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  421. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  422. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  423. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  424. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  425. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_base/py.typed +0 -0
  426. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  427. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
  428. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
  429. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
  430. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  431. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  432. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  433. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  434. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  435. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  436. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  437. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  438. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  439. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  440. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  441. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  442. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  443. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  444. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
  445. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  446. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/models.py +0 -0
  447. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storage_clients/py.typed +0 -0
  448. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/__init__.py +0 -0
  449. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_base.py +0 -0
  450. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_dataset.py +0 -0
  451. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_key_value_store.py +0 -0
  452. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_request_queue.py +0 -0
  453. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  454. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/_utils.py +0 -0
  455. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/src/crawlee/storages/py.typed +0 -0
  456. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/__init__.py +0 -0
  457. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/e2e/__init__.py +0 -0
  458. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/e2e/conftest.py +0 -0
  459. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
  460. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/e2e/project_template/utils.py +0 -0
  461. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/README.md +0 -0
  462. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/__init__.py +0 -0
  463. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
  464. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_autoscaling/test_system_status.py +0 -0
  465. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_statistics/test_error_tracker.py +0 -0
  466. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  467. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_statistics/test_persistence.py +0 -0
  468. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  469. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_byte_size.py +0 -0
  470. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_console.py +0 -0
  471. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_crypto.py +0 -0
  472. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_file.py +0 -0
  473. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_globs.py +0 -0
  474. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_html_to_text.py +0 -0
  475. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_measure_time.py +0 -0
  476. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  477. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_recurring_task.py +0 -0
  478. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_requests.py +0 -0
  479. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_robots.py +0 -0
  480. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_system.py +0 -0
  481. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  482. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/_utils/test_urls.py +0 -0
  483. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/browsers/test_browser_pool.py +0 -0
  484. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/browsers/test_playwright_browser.py +0 -0
  485. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
  486. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  487. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  488. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  489. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  490. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/events/test_event_manager.py +0 -0
  491. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/events/test_local_event_manager.py +0 -0
  492. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  493. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  494. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/http_clients/test_httpx.py +0 -0
  495. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  496. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  497. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  498. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/request_loaders/test_request_list.py +0 -0
  499. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
  500. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/sessions/test_cookies.py +0 -0
  501. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/sessions/test_models.py +0 -0
  502. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/sessions/test_session.py +0 -0
  503. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/sessions/test_session_pool.py +0 -0
  504. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  505. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  506. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
  507. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  508. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  509. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  510. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  511. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  512. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  513. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/conftest.py +0 -0
  514. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/test_dataset.py +0 -0
  515. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/test_key_value_store.py +0 -0
  516. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  517. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  518. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/test_cli.py +0 -0
  519. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/test_log_config.py +0 -0
  520. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/test_router.py +0 -0
  521. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/tests/unit/test_service_locator.py +0 -0
  522. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/.eslintrc.json +0 -0
  523. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/.yarnrc.yml +0 -0
  524. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/babel.config.js +0 -0
  525. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/build_api_reference.sh +0 -0
  526. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/docusaurus.config.js +0 -0
  527. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/generate_module_shortcuts.py +0 -0
  528. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  529. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  530. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/roa-loader/index.js +0 -0
  531. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/roa-loader/package.json +0 -0
  532. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/sidebars.js +0 -0
  533. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/ApiLink.jsx +0 -0
  534. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Button.jsx +0 -0
  535. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Button.module.css +0 -0
  536. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/CopyButton.jsx +0 -0
  537. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/CopyButton.module.css +0 -0
  538. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Gradients.jsx +0 -0
  539. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Highlights.jsx +0 -0
  540. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Highlights.module.css +0 -0
  541. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  542. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  543. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  544. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  545. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  546. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  547. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  548. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  549. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  550. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  551. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/RiverSection.jsx +0 -0
  552. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/RiverSection.module.css +0 -0
  553. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  554. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  555. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  556. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  557. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/RunnableCodeBlock.jsx +0 -0
  558. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/components/RunnableCodeBlock.module.css +0 -0
  559. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/css/custom.css +0 -0
  560. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/pages/home_page_example.py +0 -0
  561. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/pages/index.module.css +0 -0
  562. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  563. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/ColorModeToggle/index.js +0 -0
  564. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  565. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  566. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/DocItem/Layout/index.js +0 -0
  567. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  568. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Footer/LinkItem/index.js +0 -0
  569. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  570. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Footer/index.js +0 -0
  571. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Footer/index.module.css +0 -0
  572. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/MDXComponents/A.js +0 -0
  573. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/Content/index.js +0 -0
  574. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  575. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/Logo/index.js +0 -0
  576. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  577. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  578. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  579. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  580. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  581. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  582. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  583. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/.nojekyll +0 -0
  584. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/font/lota.woff +0 -0
  585. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/font/lota.woff2 +0 -0
  586. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/API.png +0 -0
  587. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/apify_logo.svg +0 -0
  588. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/apify_og_SDK.png +0 -0
  589. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/apify_sdk.svg +0 -0
  590. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/apify_sdk_white.svg +0 -0
  591. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/arrow_right.svg +0 -0
  592. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/auto-scaling-dark.webp +0 -0
  593. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/auto-scaling-light.webp +0 -0
  594. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/check.svg +0 -0
  595. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/chrome-scrape-dark.gif +0 -0
  596. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/chrome-scrape-light.gif +0 -0
  597. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/cloud_icon.svg +0 -0
  598. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/community-dark-icon.svg +0 -0
  599. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/community-light-icon.svg +0 -0
  600. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-dark-new.svg +0 -0
  601. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-dark.svg +0 -0
  602. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-javascript-dark.svg +0 -0
  603. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-javascript-light.svg +0 -0
  604. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-light-new.svg +0 -0
  605. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-light.svg +0 -0
  606. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  607. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-logo.svg +0 -0
  608. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-python-dark.svg +0 -0
  609. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-python-light.svg +0 -0
  610. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/crawlee-python-og.png +0 -0
  611. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/defaults-dark-icon.svg +0 -0
  612. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/defaults-light-icon.svg +0 -0
  613. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/discord-brand-dark.svg +0 -0
  614. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/discord-brand.svg +0 -0
  615. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/docusaurus.svg +0 -0
  616. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/external-link.svg +0 -0
  617. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/favicon.ico +0 -0
  618. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/favorite-tools-dark.webp +0 -0
  619. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/favorite-tools-light.webp +0 -0
  620. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/auto-scaling.svg +0 -0
  621. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/automate-everything.svg +0 -0
  622. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/fingerprints.svg +0 -0
  623. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/node-requests.svg +0 -0
  624. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/runs-on-py.svg +0 -0
  625. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/storage.svg +0 -0
  626. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/features/works-everywhere.svg +0 -0
  627. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  628. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  629. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  630. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  631. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/current-price.jpg +0 -0
  632. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  633. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/select-an-element.jpg +0 -0
  634. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/selected-element.jpg +0 -0
  635. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/sku.jpg +0 -0
  636. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/getting-started/title.jpg +0 -0
  637. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/github-brand-dark.svg +0 -0
  638. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/github-brand.svg +0 -0
  639. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  640. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  641. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/hearth copy.svg +0 -0
  642. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/hearth.svg +0 -0
  643. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/javascript_logo.svg +0 -0
  644. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/js_file.svg +0 -0
  645. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/logo-big.svg +0 -0
  646. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/logo-blur.png +0 -0
  647. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/logo-blur.svg +0 -0
  648. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/logo-zoom.svg +0 -0
  649. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/menu-arrows.svg +0 -0
  650. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/oss_logo.png +0 -0
  651. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  652. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/puppeteer-live-view-detail.png +0 -0
  653. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/queue-dark-icon.svg +0 -0
  654. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/queue-light-icon.svg +0 -0
  655. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  656. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  657. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/robot.png +0 -0
  658. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/routing-dark-icon.svg +0 -0
  659. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/routing-light-icon.svg +0 -0
  660. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  661. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/scraping-utils-light-icon.svg +0 -0
  662. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/smart-proxy-dark.webp +0 -0
  663. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/smart-proxy-light.webp +0 -0
  664. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/source_code.png +0 -0
  665. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/system.svg +0 -0
  666. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/triangles_dark.svg +0 -0
  667. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/triangles_light.svg +0 -0
  668. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/workflow.svg +0 -0
  669. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/zero-setup-dark-icon.svg +0 -0
  670. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/img/zero-setup-light-icon.svg +0 -0
  671. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/js/custom.js +0 -0
  672. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/static/robots.txt +0 -0
  673. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/docs-prettier.config.js +0 -0
  674. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/utils/externalLink.js +0 -0
  675. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  676. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  677. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  678. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  679. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tools/website_gif/website_gif.mjs +0 -0
  680. {crawlee-1.0.4b4 → crawlee-1.0.5b8}/website/tsconfig.eslint.json +0 -0
@@ -30,6 +30,7 @@ htmlcov
30
30
  # IDE, editors
31
31
  .vscode
32
32
  .idea
33
+ *~
33
34
  .DS_Store
34
35
  .nvim.lua
35
36
  Session.vim
@@ -3,15 +3,30 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
5
  <!-- git-cliff-unreleased-start -->
6
- ## 1.0.4 - **not yet released**
6
+ ## 1.0.5 - **not yet released**
7
+
8
+ ### 🚀 Features
9
+
10
+ - Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
11
+
12
+ ### 🐛 Bug Fixes
13
+
14
+ - Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
15
+ - Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
16
+
17
+
18
+ <!-- git-cliff-unreleased-end -->
19
+ ## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
7
20
 
8
21
  ### 🐛 Bug Fixes
9
22
 
10
23
  - Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
11
24
  - Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
25
+ - Resolve compatibility issue between `SqlStorageClient` and `AdaptivePlaywrightCrawler` ([#1496](https://github.com/apify/crawlee-python/pull/1496)) ([ce172c4](https://github.com/apify/crawlee-python/commit/ce172c425a8643a1d4c919db4f5e5a6e47e91deb)) by [@Mantisus](https://github.com/Mantisus), closes [#1495](https://github.com/apify/crawlee-python/issues/1495)
26
+ - Fix `BasicCrawler` statistics persistence ([#1490](https://github.com/apify/crawlee-python/pull/1490)) ([1eb1c19](https://github.com/apify/crawlee-python/commit/1eb1c19aa6f9dda4a0e3f7eda23f77a554f95076)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1501](https://github.com/apify/crawlee-python/issues/1501)
27
+ - Save context state in result for `AdaptivePlaywrightCrawler` after isolated processing in `SubCrawler` ([#1488](https://github.com/apify/crawlee-python/pull/1488)) ([62b7c70](https://github.com/apify/crawlee-python/commit/62b7c70b54085fc65a660062028014f4502beba9)) by [@Mantisus](https://github.com/Mantisus), closes [#1483](https://github.com/apify/crawlee-python/issues/1483)
12
28
 
13
29
 
14
- <!-- git-cliff-unreleased-end -->
15
30
  ## [1.0.3](https://github.com/apify/crawlee-python/releases/tag/v1.0.3) (2025-10-17)
16
31
 
17
32
  ### 🐛 Bug Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.4b4
3
+ Version: 1.0.5b8
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -27,15 +27,13 @@ async def main() -> None:
27
27
 
28
28
  crawler = PlaywrightCrawler(
29
29
  headless=False,
30
- # Use chromium for Chrome compatibility
31
- browser_type='chromium',
30
+ # Use the installed Chrome browser
31
+ browser_type='chrome',
32
32
  # Disable fingerprints to preserve profile identity
33
33
  fingerprint_generator=None,
34
34
  # Set user data directory to temp folder
35
35
  user_data_dir=tmp_profile_dir,
36
36
  browser_launch_options={
37
- # Use installed Chrome browser
38
- 'channel': 'chrome',
39
37
  # Slow down actions to mimic human behavior
40
38
  'slow_mo': 200,
41
39
  'args': [
@@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas
18
18
 
19
19
  To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
20
20
 
21
- You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
22
-
23
21
  :::warning Profile access limitation
24
22
  Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
25
23
  :::
@@ -291,7 +291,7 @@ Request loaders provide a subset of <ApiLink to="class/RequestQueue">`RequestQue
291
291
 
292
292
  - <ApiLink to="class/RequestLoader">`RequestLoader`</ApiLink> - Base interface for read-only access to a stream of requests, with capabilities like fetching the next request, marking as handled, and status checking.
293
293
  - <ApiLink to="class/RequestList">`RequestList`</ApiLink> - Lightweight in-memory implementation of `RequestLoader` for managing static lists of URLs.
294
- - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - Specialized loader for reading URLs from XML sitemaps with filtering capabilities.
294
+ - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> - A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
295
295
 
296
296
  ### Request managers
297
297
 
@@ -25,7 +25,7 @@ Changing browser fingerprints can be a tedious job. Luckily, Crawlee provides th
25
25
  {PlaywrightDefaultFingerprintGenerator}
26
26
  </RunnableCodeBlock>
27
27
 
28
- In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example bellow:
28
+ In certain cases we want to narrow down the fingerprints used - e.g. specify a certain operating system, locale or browser. This is also possible with Crawlee - the crawler can have the generation algorithm customized to reflect the particular browser version and many more. For description of fingerprint generation options please see <ApiLink to="class/HeaderGeneratorOptions">`HeaderGeneratorOptions`</ApiLink>, <ApiLink to="class/ScreenOptions">`ScreenOptions`</ApiLink> and <ApiLink to="class/BrowserforgeFingerprintGenerator#__init__">`DefaultFingerprintGenerator.__init__`</ApiLink> See the example below:
29
29
 
30
30
  <CodeBlock className="language-python">
31
31
  {PlaywrightDefaultFingerprintGeneratorWithArgs}
@@ -31,7 +31,7 @@ The [`request_loaders`](https://github.com/apify/crawlee-python/tree/master/src/
31
31
  And specific request loader implementations:
32
32
 
33
33
  - <ApiLink to="class/RequestList">`RequestList`</ApiLink>: A lightweight implementation for managing a static list of URLs.
34
- - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML sitemaps with filtering capabilities.
34
+ - <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink>: A specialized loader that reads URLs from XML and plain-text sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html) with filtering capabilities.
35
35
 
36
36
  Below is a class diagram that illustrates the relationships between these components and the <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>:
37
37
 
@@ -130,7 +130,13 @@ To enable persistence, provide `persist_state_key` and optionally `persist_reque
130
130
 
131
131
  ### Sitemap request loader
132
132
 
133
- The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from XML sitemaps. It's particularly useful when you want to crawl a website systematically by following its sitemap structure. The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
133
+ The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> is a specialized request loader that reads URLs from sitemaps following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats. It's particularly useful when you want to crawl a website systematically by following its sitemap structure.
134
+
135
+ :::note
136
+ The `SitemapRequestLoader` is designed specifically for sitemaps that follow the standard Sitemaps protocol. HTML pages containing links are not supported by this loader - those should be handled by regular crawlers using the `enqueue_links` functionality.
137
+ :::
138
+
139
+ The loader supports filtering URLs using glob patterns and regular expressions, allowing you to include or exclude specific types of URLs. The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> provides streaming processing of sitemaps, ensuring efficient memory usage without loading the entire sitemap into memory.
134
140
 
135
141
  <RunnableCodeBlock className="language-python" language="python">
136
142
  {SitemapExample}
@@ -50,7 +50,7 @@ apify login
50
50
 
51
51
  Now that you have your account set up, you will need to adjust the code a tiny bit. We will use the [Apify SDK](https://docs.apify.com/sdk/python/), which will help us to wire the Crawlee storages (like the [`RequestQueue`](https://docs.apify.com/sdk/python/reference/class/RequestQueue)) to their Apify platform counterparts - otherwise Crawlee would keep things only in memory.
52
52
 
53
- Open your `src/main.py` file, and wrap everyting in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
53
+ Open your `src/main.py` file, and wrap everything in your `main` function with the [`Actor`](https://docs.apify.com/sdk/python/reference/class/Actor) context manager. Your code should look like this:
54
54
 
55
55
  <CodeBlock className="language-python" title="src/main.py">
56
56
  {MainExample}
@@ -1,4 +1,4 @@
1
- # Line lenght different from the rest of the code to make sure that the example codes visualised on the generated
1
+ # Line length different from the rest of the code to make sure that the example codes visualised on the generated
2
2
  # documentation webpages are shown without vertical slider to make them more readable.
3
3
 
4
4
  [tool.ruff]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.0.4b4"
7
+ version = "1.0.5b8"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
7
7
  if TYPE_CHECKING:
8
8
  from collections.abc import Callable
9
9
  from datetime import timedelta
10
+ from types import TracebackType
11
+
12
+ from typing_extensions import Self
10
13
 
11
14
  logger = getLogger(__name__)
12
15
 
@@ -26,6 +29,18 @@ class RecurringTask:
26
29
  self.delay = delay
27
30
  self.task: asyncio.Task | None = None
28
31
 
32
+ async def __aenter__(self) -> Self:
33
+ self.start()
34
+ return self
35
+
36
+ async def __aexit__(
37
+ self,
38
+ exc_type: type[BaseException] | None,
39
+ exc_value: BaseException | None,
40
+ exc_traceback: TracebackType | None,
41
+ ) -> None:
42
+ await self.stop()
43
+
29
44
  async def _wrapper(self) -> None:
30
45
  """Continuously execute the provided function with the specified delay.
31
46
 
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from logging import getLogger
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from protego import Protego
@@ -15,6 +16,9 @@ if TYPE_CHECKING:
15
16
  from crawlee.proxy_configuration import ProxyInfo
16
17
 
17
18
 
19
+ logger = getLogger(__name__)
20
+
21
+
18
22
  class RobotsTxtFile:
19
23
  def __init__(
20
24
  self, url: str, robots: Protego, http_client: HttpClient | None = None, proxy_info: ProxyInfo | None = None
@@ -56,12 +60,20 @@ class RobotsTxtFile:
56
60
  http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
57
61
  proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
58
62
  """
59
- response = await http_client.send_request(url, proxy_info=proxy_info)
60
- body = (
61
- b'User-agent: *\nAllow: /' if is_status_code_client_error(response.status_code) else await response.read()
62
- )
63
+ try:
64
+ response = await http_client.send_request(url, proxy_info=proxy_info)
65
+
66
+ body = (
67
+ b'User-agent: *\nAllow: /'
68
+ if is_status_code_client_error(response.status_code)
69
+ else await response.read()
70
+ )
71
+ robots = Protego.parse(body.decode('utf-8'))
72
+
73
+ except Exception as e:
74
+ logger.warning(f'Failed to fetch from robots.txt from "{url}" with error: "{e}"')
63
75
 
64
- robots = Protego.parse(body.decode('utf-8'))
76
+ robots = Protego.parse('User-agent: *\nAllow: /')
65
77
 
66
78
  return cls(url, robots, http_client=http_client, proxy_info=proxy_info)
67
79
 
@@ -118,7 +118,10 @@ class BrowserPool:
118
118
  """Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
119
119
 
120
120
  Args:
121
- browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
121
+ browser_type: The type of browser to launch:
122
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
123
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
124
+ the system.
122
125
  user_data_dir: Path to a user data directory, which stores browser session data like cookies
123
126
  and local storage.
124
127
  browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
@@ -216,7 +216,7 @@ class PlaywrightBrowserController(BrowserController):
216
216
  browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
217
217
  if proxy_info:
218
218
  if browser_new_context_options.get('proxy'):
219
- logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
219
+ logger.warning("browser_new_context_options['proxy'] overridden by explicit `proxy_info` argument.")
220
220
 
221
221
  browser_new_context_options['proxy'] = ProxySettings(
222
222
  server=f'{proxy_info.scheme}://{proxy_info.hostname}:{proxy_info.port}',
@@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
34
34
 
35
35
  It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
36
36
  for creating new browser instances and provides a unified interface for interacting with different browser types
37
- (chromium, firefox, and webkit). This class integrates configuration options for browser launches (headless mode,
38
- executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
37
+ (chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
38
+ mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
39
39
  browser instance, ensuring that resource limits are respected.
40
40
  """
41
41
 
@@ -55,7 +55,10 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
55
55
  """Initialize a new instance.
56
56
 
57
57
  Args:
58
- browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
58
+ browser_type: The type of browser to launch:
59
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
60
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
61
+ the system.
59
62
  user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
60
63
  storage.
61
64
  browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
@@ -80,6 +83,17 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
80
83
  'chromium_sandbox': not config.disable_browser_sandbox,
81
84
  }
82
85
 
86
+ if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
87
+ raise ValueError(
88
+ 'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
89
+ )
90
+
91
+ # Map 'chrome' to 'chromium' with the 'chrome' channel.
92
+ if browser_type == 'chrome':
93
+ browser_type = 'chromium'
94
+ # Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
95
+ default_launch_browser_options['channel'] = 'chrome'
96
+
83
97
  self._browser_type: BrowserType = browser_type
84
98
  self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
85
99
  self._browser_new_context_options = browser_new_context_options or {}
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Literal
6
6
  if TYPE_CHECKING:
7
7
  from playwright.async_api import Page
8
8
 
9
- BrowserType = Literal['chromium', 'firefox', 'webkit']
9
+ BrowserType = Literal['chromium', 'firefox', 'webkit', 'chrome']
10
10
 
11
11
 
12
12
  @dataclass
@@ -149,10 +149,6 @@ class AdaptivePlaywrightCrawler(
149
149
  non-default configuration.
150
150
  kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
151
151
  """
152
- # Some sub crawler kwargs are internally modified. Prepare copies.
153
- basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
154
- basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)
155
-
156
152
  # Adaptive crawling related.
157
153
  self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
158
154
  self.result_checker = result_checker or (lambda _: True)
@@ -170,11 +166,11 @@ class AdaptivePlaywrightCrawler(
170
166
  # Each sub crawler will use custom logger .
171
167
  static_logger = getLogger('Subcrawler_static')
172
168
  static_logger.setLevel(logging.ERROR)
173
- basic_crawler_kwargs_for_static_crawler['_logger'] = static_logger
169
+ basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}
174
170
 
175
171
  pw_logger = getLogger('Subcrawler_playwright')
176
172
  pw_logger.setLevel(logging.ERROR)
177
- basic_crawler_kwargs_for_pw_crawler['_logger'] = pw_logger
173
+ basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}
178
174
 
179
175
  # Initialize sub crawlers to create their pipelines.
180
176
  static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
@@ -319,7 +315,7 @@ class AdaptivePlaywrightCrawler(
319
315
  ),
320
316
  logger=self._logger,
321
317
  )
322
- return SubCrawlerRun(result=result)
318
+ return SubCrawlerRun(result=result, run_context=context_linked_to_result)
323
319
  except Exception as e:
324
320
  return SubCrawlerRun(exception=e)
325
321
 
@@ -375,7 +371,8 @@ class AdaptivePlaywrightCrawler(
375
371
  self.track_http_only_request_handler_runs()
376
372
 
377
373
  static_run = await self._crawl_one(rendering_type='static', context=context)
378
- if static_run.result and self.result_checker(static_run.result):
374
+ if static_run.result and static_run.run_context and self.result_checker(static_run.result):
375
+ self._update_context_from_copy(context, static_run.run_context)
379
376
  self._context_result_map[context] = static_run.result
380
377
  return
381
378
  if static_run.exception:
@@ -406,13 +403,10 @@ class AdaptivePlaywrightCrawler(
406
403
  if pw_run.exception is not None:
407
404
  raise pw_run.exception
408
405
 
409
- if pw_run.result:
410
- self._context_result_map[context] = pw_run.result
411
-
406
+ if pw_run.result and pw_run.run_context:
412
407
  if should_detect_rendering_type:
413
408
  detection_result: RenderingType
414
409
  static_run = await self._crawl_one('static', context=context, state=old_state_copy)
415
-
416
410
  if static_run.result and self.result_comparator(static_run.result, pw_run.result):
417
411
  detection_result = 'static'
418
412
  else:
@@ -421,6 +415,9 @@ class AdaptivePlaywrightCrawler(
421
415
  context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
422
416
  self.rendering_type_predictor.store_result(context.request, detection_result)
423
417
 
418
+ self._update_context_from_copy(context, pw_run.run_context)
419
+ self._context_result_map[context] = pw_run.result
420
+
424
421
  def pre_navigation_hook(
425
422
  self,
426
423
  hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
@@ -455,8 +452,32 @@ class AdaptivePlaywrightCrawler(
455
452
  def track_rendering_type_mispredictions(self) -> None:
456
453
  self.statistics.state.rendering_type_mispredictions += 1
457
454
 
455
+ def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
456
+ """Update mutable fields of `context` from `context_copy`.
457
+
458
+ Uses object.__setattr__ to bypass frozen dataclass restrictions,
459
+ allowing state synchronization after isolated crawler execution.
460
+ """
461
+ updating_attributes = {
462
+ 'request': ('headers', 'user_data'),
463
+ 'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
464
+ }
465
+
466
+ for attr, sub_attrs in updating_attributes.items():
467
+ original_sub_obj = getattr(context, attr)
468
+ copy_sub_obj = getattr(context_copy, attr)
469
+
470
+ # Check that both sub objects are not None
471
+ if original_sub_obj is None or copy_sub_obj is None:
472
+ continue
473
+
474
+ for sub_attr in sub_attrs:
475
+ new_value = getattr(copy_sub_obj, sub_attr)
476
+ object.__setattr__(original_sub_obj, sub_attr, new_value)
477
+
458
478
 
459
479
  @dataclass(frozen=True)
460
480
  class SubCrawlerRun:
461
481
  result: RequestHandlerRunResult | None = None
462
482
  exception: Exception | None = None
483
+ run_context: BasicCrawlingContext | None = None
@@ -56,7 +56,7 @@ from crawlee.errors import (
56
56
  SessionError,
57
57
  UserDefinedErrorHandlerError,
58
58
  )
59
- from crawlee.events._types import Event, EventCrawlerStatusData
59
+ from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
60
60
  from crawlee.http_clients import ImpitHttpClient
61
61
  from crawlee.router import Router
62
62
  from crawlee.sessions import SessionPool
@@ -437,14 +437,23 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
437
437
  self._statistics_log_format = statistics_log_format
438
438
 
439
439
  # Statistics
440
- self._statistics = statistics or cast(
441
- 'Statistics[TStatisticsState]',
442
- Statistics.with_default_state(
443
- periodic_message_logger=self._logger,
444
- statistics_log_format=self._statistics_log_format,
445
- log_message='Current request statistics:',
446
- ),
447
- )
440
+ if statistics:
441
+ self._statistics = statistics
442
+ else:
443
+
444
+ async def persist_state_factory() -> KeyValueStore:
445
+ return await self.get_key_value_store()
446
+
447
+ self._statistics = cast(
448
+ 'Statistics[TStatisticsState]',
449
+ Statistics.with_default_state(
450
+ persistence_enabled=True,
451
+ periodic_message_logger=self._logger,
452
+ statistics_log_format=self._statistics_log_format,
453
+ log_message='Current request statistics:',
454
+ persist_state_kvs_factory=persist_state_factory,
455
+ ),
456
+ )
448
457
 
449
458
  # Additional context managers to enter and exit
450
459
  self._additional_context_managers = _additional_context_managers or []
@@ -689,7 +698,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
689
698
  except CancelledError:
690
699
  pass
691
700
  finally:
692
- await self._crawler_state_rec_task.stop()
693
701
  if threading.current_thread() is threading.main_thread():
694
702
  with suppress(NotImplementedError):
695
703
  asyncio.get_running_loop().remove_signal_handler(signal.SIGINT)
@@ -721,8 +729,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
721
729
  async def _run_crawler(self) -> None:
722
730
  event_manager = self._service_locator.get_event_manager()
723
731
 
724
- self._crawler_state_rec_task.start()
725
-
726
732
  # Collect the context managers to be entered. Context managers that are already active are excluded,
727
733
  # as they were likely entered by the caller, who will also be responsible for exiting them.
728
734
  contexts_to_enter = [
@@ -733,6 +739,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
733
739
  self._statistics,
734
740
  self._session_pool if self._use_session_pool else None,
735
741
  self._http_client,
742
+ self._crawler_state_rec_task,
736
743
  *self._additional_context_managers,
737
744
  )
738
745
  if cm and getattr(cm, 'active', False) is False
@@ -744,6 +751,9 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
744
751
 
745
752
  await self._autoscaled_pool.run()
746
753
 
754
+ # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
755
+ event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
756
+
747
757
  async def add_requests(
748
758
  self,
749
759
  requests: Sequence[str | Request],
@@ -114,7 +114,10 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
114
114
  browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
115
115
  user_data_dir: Path to a user data directory, which stores browser session data like cookies
116
116
  and local storage.
117
- browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
117
+ browser_type: The type of browser to launch:
118
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
119
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
120
+ the system.
118
121
  This option should not be used if `browser_pool` is provided.
119
122
  browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
120
123
  directly to Playwright's `browser_type.launch` method. For more details, refer to the
@@ -153,7 +156,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
153
156
  ):
154
157
  raise ValueError(
155
158
  'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
156
- '`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or'
159
+ '`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
157
160
  '`fingerprint_generator` arguments when `browser_pool` is provided.'
158
161
  )
159
162
 
@@ -496,7 +499,9 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
496
499
  """A `BrowserPool` instance to be used for launching the browsers and getting pages."""
497
500
 
498
501
  browser_type: NotRequired[BrowserType]
499
- """The type of browser to launch ('chromium', 'firefox', or 'webkit').
502
+ """The type of browser to launch:
503
+ - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
504
+ - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on the system.
500
505
  This option should not be used if `browser_pool` is provided."""
501
506
 
502
507
  browser_launch_options: NotRequired[Mapping[str, Any]]
@@ -11,9 +11,9 @@ if TYPE_CHECKING:
11
11
 
12
12
 
13
13
  def fingerprint_browser_type_from_playwright_browser_type(
14
- playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
14
+ playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
15
15
  ) -> SupportedBrowserType:
16
- if playwright_browser_type == 'chromium':
16
+ if playwright_browser_type in {'chromium', 'chrome'}:
17
17
  return 'chrome'
18
18
  if playwright_browser_type == 'firefox':
19
19
  return 'firefox'
@@ -90,6 +90,11 @@ class SitemapRequestLoaderState(BaseModel):
90
90
  class SitemapRequestLoader(RequestLoader):
91
91
  """A request loader that reads URLs from sitemap(s).
92
92
 
93
+ The loader is designed to handle sitemaps that follow the format described in the Sitemaps protocol
94
+ (https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats.
95
+ Note that HTML pages containing links are not supported - those should be handled by regular crawlers
96
+ and the `enqueue_links` functionality.
97
+
93
98
  The loader fetches and parses sitemaps in the background, allowing crawling to start
94
99
  before all URLs are loaded. It supports filtering URLs using glob and regex patterns.
95
100
 
@@ -32,7 +32,7 @@ class ErrorSnapshotter:
32
32
  """Capture error snapshot and save it to key value store.
33
33
 
34
34
  It saves the error snapshot directly to a key value store. It can't use `context.get_key_value_store` because
35
- it returns `KeyValueStoreChangeRecords` which is commited to the key value store only if the `RequestHandler`
35
+ it returns `KeyValueStoreChangeRecords` which is committed to the key value store only if the `RequestHandler`
36
36
  returned without an exception. ErrorSnapshotter is on the contrary active only when `RequestHandler` fails with
37
37
  an exception.
38
38
 
@@ -96,7 +96,7 @@ class Statistics(Generic[TStatisticsState]):
96
96
 
97
97
  self._state = RecoverableState(
98
98
  default_state=state_model(stats_id=self._id),
99
- persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
99
+ persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
100
100
  persistence_enabled=persistence_enabled,
101
101
  persist_state_kvs_name=persist_state_kvs_name,
102
102
  persist_state_kvs_factory=persist_state_kvs_factory,
@@ -130,6 +130,7 @@ class Statistics(Generic[TStatisticsState]):
130
130
  persistence_enabled: bool = False,
131
131
  persist_state_kvs_name: str | None = None,
132
132
  persist_state_key: str | None = None,
133
+ persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
133
134
  log_message: str = 'Statistics',
134
135
  periodic_message_logger: Logger | None = None,
135
136
  log_interval: timedelta = timedelta(minutes=1),
@@ -141,6 +142,7 @@ class Statistics(Generic[TStatisticsState]):
141
142
  persistence_enabled=persistence_enabled,
142
143
  persist_state_kvs_name=persist_state_kvs_name,
143
144
  persist_state_key=persist_state_key,
145
+ persist_state_kvs_factory=persist_state_kvs_factory,
144
146
  log_message=log_message,
145
147
  periodic_message_logger=periodic_message_logger,
146
148
  log_interval=log_interval,
@@ -187,7 +189,10 @@ class Statistics(Generic[TStatisticsState]):
187
189
  if not self._active:
188
190
  raise RuntimeError(f'The {self.__class__.__name__} is not active.')
189
191
 
190
- self._state.current_value.crawler_finished_at = datetime.now(timezone.utc)
192
+ if not self.state.crawler_last_started_at:
193
+ raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
194
+ self.state.crawler_finished_at = datetime.now(timezone.utc)
195
+ self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
191
196
 
192
197
  await self._state.teardown()
193
198
 
@@ -255,8 +260,7 @@ class Statistics(Generic[TStatisticsState]):
255
260
  if self._instance_start is None:
256
261
  raise RuntimeError('The Statistics object is not initialized')
257
262
 
258
- crawler_runtime = datetime.now(timezone.utc) - self._instance_start
259
- total_minutes = crawler_runtime.total_seconds() / 60
263
+ total_minutes = self.state.crawler_runtime.total_seconds() / 60
260
264
  state = self._state.current_value
261
265
  serialized_state = state.model_dump(by_alias=False)
262
266
 
@@ -267,7 +271,7 @@ class Statistics(Generic[TStatisticsState]):
267
271
  requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
268
272
  request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
269
273
  requests_total=state.requests_failed + state.requests_finished,
270
- crawler_runtime=crawler_runtime,
274
+ crawler_runtime=state.crawler_runtime,
271
275
  requests_finished=state.requests_finished,
272
276
  requests_failed=state.requests_failed,
273
277
  retry_histogram=serialized_state['request_retry_histogram'],
@@ -205,9 +205,8 @@ class RequestDb(Base):
205
205
  'idx_fetch_available',
206
206
  'request_queue_id',
207
207
  'is_handled',
208
- 'time_blocked_until',
209
208
  'sequence_number',
210
- postgresql_where=text('is_handled = false'),
209
+ postgresql_where=text('is_handled is false'),
211
210
  ),
212
211
  )
213
212