crawlee 1.0.4b3__tar.gz → 1.0.4b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

Files changed (680) hide show
  1. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/CHANGELOG.md +1 -0
  2. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/PKG-INFO +1 -1
  3. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/pyproject.toml +1 -1
  4. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/urls.py +9 -2
  5. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
  6. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +3 -1
  7. crawlee-1.0.4b5/tests/unit/_autoscaling/test_snapshotter.py +353 -0
  8. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +2 -2
  9. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +2 -2
  10. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/server_endpoints.py +1 -0
  11. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/uv.lock +1 -1
  12. crawlee-1.0.4b3/tests/unit/_autoscaling/test_snapshotter.py +0 -333
  13. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.editorconfig +0 -0
  14. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/CODEOWNERS +0 -0
  15. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/pull_request_template.md +0 -0
  16. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/build_and_deploy_docs.yaml +0 -0
  17. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/check_pr_title.yaml +0 -0
  18. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/pre_release.yaml +0 -0
  19. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/release.yaml +0 -0
  20. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/run_code_checks.yaml +0 -0
  21. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/templates_e2e_tests.yaml +0 -0
  22. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.github/workflows/update_new_issue.yaml +0 -0
  23. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.gitignore +0 -0
  24. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.markdownlint.yaml +0 -0
  25. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/.pre-commit-config.yaml +0 -0
  26. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/CONTRIBUTING.md +0 -0
  27. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/LICENSE +0 -0
  28. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/Makefile +0 -0
  29. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/README.md +0 -0
  30. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/apify_platform.mdx +0 -0
  31. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  32. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  33. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  34. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  35. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  36. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
  37. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/code_examples/google/google_example.py +0 -0
  38. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/google_cloud.mdx +0 -0
  39. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/deployment/google_cloud_run.mdx +0 -0
  40. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/add_data_to_dataset.mdx +0 -0
  41. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  42. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  43. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  44. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  45. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  46. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  47. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  48. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  49. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  50. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  51. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  52. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/configure_json_logging.py +0 -0
  53. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  54. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  55. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  56. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  57. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  58. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  59. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  60. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  61. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  62. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  63. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  64. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  65. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  66. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  67. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  68. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  69. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/parsel_crawler.py +0 -0
  70. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  71. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  72. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/playwright_crawler.py +0 -0
  73. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  74. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  75. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  76. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  77. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  78. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  79. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
  80. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  81. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  82. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawl_multiple_urls.mdx +0 -0
  83. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  84. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  85. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawler_keep_alive.mdx +0 -0
  86. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/crawler_stop.mdx +0 -0
  87. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  88. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  89. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/json_logging.mdx +0 -0
  90. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/parsel_crawler.mdx +0 -0
  91. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/playwright_crawler.mdx +0 -0
  92. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  93. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  94. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  95. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  96. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/respect_robots_txt_file.mdx +0 -0
  97. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/resuming_paused_crawl.mdx +0 -0
  98. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/examples/using_browser_profile.mdx +0 -0
  99. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/architecture_overview.mdx +0 -0
  100. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/avoid_blocking.mdx +0 -0
  101. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  102. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  103. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  104. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  105. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  106. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  107. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  108. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  109. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  110. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  111. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  112. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  113. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  114. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  115. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  116. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  117. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  118. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  119. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  120. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  121. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  122. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  123. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  124. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  125. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  126. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  127. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  128. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  129. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  130. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  131. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  132. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  133. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  134. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  135. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  136. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  137. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  138. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  139. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  140. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  141. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  142. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  143. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  144. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  145. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  146. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  147. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  148. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  149. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  150. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  151. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  152. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  153. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  154. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  155. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  156. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  157. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  158. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
  159. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  160. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  161. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  162. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  163. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  164. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  165. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  166. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  167. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  168. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  169. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  170. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  171. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  172. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  173. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  174. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  175. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  176. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  177. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  178. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  179. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  180. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  181. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  182. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  183. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  184. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  185. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  186. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  187. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  188. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  189. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  190. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  191. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  192. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  193. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  194. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  195. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/opening.py +0 -0
  196. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  197. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  198. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  199. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  200. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/crawler_login.mdx +0 -0
  201. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/creating_web_archive.mdx +0 -0
  202. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/error_handling.mdx +0 -0
  203. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/http_clients.mdx +0 -0
  204. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/http_crawlers.mdx +0 -0
  205. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/playwright_crawler.mdx +0 -0
  206. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  207. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  208. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/proxy_management.mdx +0 -0
  209. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/request_loaders.mdx +0 -0
  210. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/request_router.mdx +0 -0
  211. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/running_in_web_server.mdx +0 -0
  212. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/scaling_crawlers.mdx +0 -0
  213. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/service_locator.mdx +0 -0
  214. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/session_management.mdx +0 -0
  215. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/storage_clients.mdx +0 -0
  216. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/storages.mdx +0 -0
  217. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  218. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/01_setting_up.mdx +0 -0
  219. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/02_first_crawler.mdx +0 -0
  220. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/03_adding_more_urls.mdx +0 -0
  221. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/04_real_world_project.mdx +0 -0
  222. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/05_crawling.mdx +0 -0
  223. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/06_scraping.mdx +0 -0
  224. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/07_saving_data.mdx +0 -0
  225. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/08_refactoring.mdx +0 -0
  226. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/09_running_in_cloud.mdx +0 -0
  227. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/02_bs.py +0 -0
  228. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/02_bs_better.py +0 -0
  229. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/02_request_queue.py +0 -0
  230. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  231. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  232. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/03_globs.py +0 -0
  233. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/03_original_code.py +0 -0
  234. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/03_transform_request.py +0 -0
  235. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  236. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  237. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  238. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/06_scraping.py +0 -0
  239. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/07_final_code.py +0 -0
  240. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/07_first_code.py +0 -0
  241. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/08_main.py +0 -0
  242. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/08_routes.py +0 -0
  243. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  244. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/__init__.py +0 -0
  245. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/code_examples/routes.py +0 -0
  246. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/introduction/index.mdx +0 -0
  247. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/pyproject.toml +0 -0
  248. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  249. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  250. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  251. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  252. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/quick-start/index.mdx +0 -0
  253. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/upgrading/upgrading_to_v0x.md +0 -0
  254. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/docs/upgrading/upgrading_to_v1.md +0 -0
  255. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/renovate.json +0 -0
  256. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/__init__.py +0 -0
  257. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/__init__.py +0 -0
  258. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/_types.py +0 -0
  259. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  260. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/py.typed +0 -0
  261. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  262. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_autoscaling/system_status.py +0 -0
  263. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_browserforge_workaround.py +0 -0
  264. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_cli.py +0 -0
  265. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_consts.py +0 -0
  266. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_log_config.py +0 -0
  267. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_request.py +0 -0
  268. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_service_locator.py +0 -0
  269. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_types.py +0 -0
  270. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/__init__.py +0 -0
  271. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/blocked.py +0 -0
  272. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/byte_size.py +0 -0
  273. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/console.py +0 -0
  274. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/context.py +0 -0
  275. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/crypto.py +0 -0
  276. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/docs.py +0 -0
  277. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/file.py +0 -0
  278. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/globs.py +0 -0
  279. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/html_to_text.py +0 -0
  280. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/models.py +0 -0
  281. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  282. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/recoverable_state.py +0 -0
  283. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/recurring_task.py +0 -0
  284. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/requests.py +0 -0
  285. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/robots.py +0 -0
  286. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/sitemap.py +0 -0
  287. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/system.py +0 -0
  288. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/time.py +0 -0
  289. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/try_import.py +0 -0
  290. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/wait.py +0 -0
  291. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/_utils/web.py +0 -0
  292. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/__init__.py +0 -0
  293. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_browser_controller.py +0 -0
  294. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_browser_plugin.py +0 -0
  295. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_browser_pool.py +0 -0
  296. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_playwright_browser.py +0 -0
  297. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
  298. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  299. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/_types.py +0 -0
  300. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/browsers/py.typed +0 -0
  301. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/configuration.py +0 -0
  302. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/__init__.py +0 -0
  303. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  304. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  305. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  306. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  307. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  308. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
  309. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  310. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
  311. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  312. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  313. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  314. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  315. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/_basic_crawler.py +0 -0
  316. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  317. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  318. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
  319. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_basic/py.typed +0 -0
  320. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  321. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  322. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  323. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  324. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  325. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  326. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_http/__init__.py +0 -0
  327. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  328. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  329. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  330. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  331. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  332. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  333. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  334. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  335. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  336. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  337. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  338. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  339. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  340. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/_types.py +0 -0
  341. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/crawlers/py.typed +0 -0
  342. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/errors.py +0 -0
  343. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/events/__init__.py +0 -0
  344. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/events/_event_manager.py +0 -0
  345. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/events/_local_event_manager.py +0 -0
  346. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/events/_types.py +0 -0
  347. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/events/py.typed +0 -0
  348. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  349. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  350. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  351. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  352. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  353. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/_types.py +0 -0
  354. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/fingerprint_suite/py.typed +0 -0
  355. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/http_clients/__init__.py +0 -0
  356. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/http_clients/_base.py +0 -0
  357. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  358. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/http_clients/_httpx.py +0 -0
  359. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/http_clients/_impit.py +0 -0
  360. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/otel/__init__.py +0 -0
  361. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  362. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/cookiecutter.json +0 -0
  363. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  364. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  365. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/main.py +0 -0
  366. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  367. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  368. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  369. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  370. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  371. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  372. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  373. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  374. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  375. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  376. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  377. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  378. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  379. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  380. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  381. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  382. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  383. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  384. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/proxy_configuration.py +0 -0
  385. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/py.typed +0 -0
  386. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/__init__.py +0 -0
  387. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/_request_list.py +0 -0
  388. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/_request_loader.py +0 -0
  389. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/_request_manager.py +0 -0
  390. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  391. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
  392. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/router.py +0 -0
  393. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/__init__.py +0 -0
  394. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/_cookies.py +0 -0
  395. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/_models.py +0 -0
  396. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/_session.py +0 -0
  397. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/_session_pool.py +0 -0
  398. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/sessions/py.typed +0 -0
  399. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/statistics/__init__.py +0 -0
  400. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  401. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/statistics/_error_tracker.py +0 -0
  402. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/statistics/_models.py +0 -0
  403. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/statistics/_statistics.py +0 -0
  404. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/__init__.py +0 -0
  405. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  406. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  407. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  408. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  409. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  410. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_base/py.typed +0 -0
  411. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  412. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
  413. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
  414. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
  415. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  416. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  417. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  418. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  419. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  420. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  421. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  422. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  423. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  424. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  425. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  426. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  427. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
  428. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  429. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  430. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
  431. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  432. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/models.py +0 -0
  433. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storage_clients/py.typed +0 -0
  434. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/__init__.py +0 -0
  435. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_base.py +0 -0
  436. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_dataset.py +0 -0
  437. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_key_value_store.py +0 -0
  438. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_request_queue.py +0 -0
  439. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  440. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/_utils.py +0 -0
  441. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/src/crawlee/storages/py.typed +0 -0
  442. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/__init__.py +0 -0
  443. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/e2e/__init__.py +0 -0
  444. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/e2e/conftest.py +0 -0
  445. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
  446. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/e2e/project_template/utils.py +0 -0
  447. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/README.md +0 -0
  448. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/__init__.py +0 -0
  449. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
  450. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_autoscaling/test_system_status.py +0 -0
  451. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_statistics/test_error_tracker.py +0 -0
  452. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  453. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_statistics/test_persistence.py +0 -0
  454. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  455. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_byte_size.py +0 -0
  456. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_console.py +0 -0
  457. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_crypto.py +0 -0
  458. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_file.py +0 -0
  459. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_globs.py +0 -0
  460. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_html_to_text.py +0 -0
  461. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_measure_time.py +0 -0
  462. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  463. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_recurring_task.py +0 -0
  464. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_requests.py +0 -0
  465. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_robots.py +0 -0
  466. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_sitemap.py +0 -0
  467. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_system.py +0 -0
  468. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  469. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/_utils/test_urls.py +0 -0
  470. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/browsers/test_browser_pool.py +0 -0
  471. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/browsers/test_playwright_browser.py +0 -0
  472. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
  473. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
  474. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/conftest.py +0 -0
  475. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
  476. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  477. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  478. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  479. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -0
  480. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  481. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
  482. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
  483. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/events/test_event_manager.py +0 -0
  484. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/events/test_local_event_manager.py +0 -0
  485. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  486. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  487. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/http_clients/test_http_clients.py +0 -0
  488. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/http_clients/test_httpx.py +0 -0
  489. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  490. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  491. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  492. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/request_loaders/test_request_list.py +0 -0
  493. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
  494. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/server.py +0 -0
  495. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/sessions/test_cookies.py +0 -0
  496. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/sessions/test_models.py +0 -0
  497. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/sessions/test_session.py +0 -0
  498. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/sessions/test_session_pool.py +0 -0
  499. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  500. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  501. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
  502. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  503. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  504. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  505. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  506. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  507. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  508. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/conftest.py +0 -0
  509. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/test_dataset.py +0 -0
  510. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/test_key_value_store.py +0 -0
  511. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  512. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/test_request_queue.py +0 -0
  513. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  514. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/test_cli.py +0 -0
  515. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/test_configuration.py +0 -0
  516. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/test_log_config.py +0 -0
  517. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/test_router.py +0 -0
  518. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/tests/unit/test_service_locator.py +0 -0
  519. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/.eslintrc.json +0 -0
  520. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/.yarnrc.yml +0 -0
  521. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/babel.config.js +0 -0
  522. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/build_api_reference.sh +0 -0
  523. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/docusaurus.config.js +0 -0
  524. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/generate_module_shortcuts.py +0 -0
  525. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/package.json +0 -0
  526. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  527. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  528. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/roa-loader/index.js +0 -0
  529. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/roa-loader/package.json +0 -0
  530. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/sidebars.js +0 -0
  531. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/ApiLink.jsx +0 -0
  532. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Button.jsx +0 -0
  533. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Button.module.css +0 -0
  534. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/CopyButton.jsx +0 -0
  535. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/CopyButton.module.css +0 -0
  536. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Gradients.jsx +0 -0
  537. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Highlights.jsx +0 -0
  538. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Highlights.module.css +0 -0
  539. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  540. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  541. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  542. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  543. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  544. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  545. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  546. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  547. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  548. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  549. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/RiverSection.jsx +0 -0
  550. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/RiverSection.module.css +0 -0
  551. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  552. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  553. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  554. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  555. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/RunnableCodeBlock.jsx +0 -0
  556. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/components/RunnableCodeBlock.module.css +0 -0
  557. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/css/custom.css +0 -0
  558. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/pages/home_page_example.py +0 -0
  559. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/pages/index.js +0 -0
  560. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/pages/index.module.css +0 -0
  561. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  562. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/ColorModeToggle/index.js +0 -0
  563. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  564. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  565. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/DocItem/Layout/index.js +0 -0
  566. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  567. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Footer/LinkItem/index.js +0 -0
  568. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  569. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Footer/index.js +0 -0
  570. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Footer/index.module.css +0 -0
  571. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/MDXComponents/A.js +0 -0
  572. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/Content/index.js +0 -0
  573. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  574. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/Logo/index.js +0 -0
  575. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  576. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  577. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  578. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  579. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  580. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  581. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  582. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/.nojekyll +0 -0
  583. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/font/lota.woff +0 -0
  584. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/font/lota.woff2 +0 -0
  585. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/API.png +0 -0
  586. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/apify_logo.svg +0 -0
  587. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/apify_og_SDK.png +0 -0
  588. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/apify_sdk.svg +0 -0
  589. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/apify_sdk_white.svg +0 -0
  590. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/arrow_right.svg +0 -0
  591. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/auto-scaling-dark.webp +0 -0
  592. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/auto-scaling-light.webp +0 -0
  593. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/check.svg +0 -0
  594. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/chrome-scrape-dark.gif +0 -0
  595. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/chrome-scrape-light.gif +0 -0
  596. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/cloud_icon.svg +0 -0
  597. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/community-dark-icon.svg +0 -0
  598. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/community-light-icon.svg +0 -0
  599. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-dark-new.svg +0 -0
  600. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-dark.svg +0 -0
  601. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-javascript-dark.svg +0 -0
  602. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-javascript-light.svg +0 -0
  603. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-light-new.svg +0 -0
  604. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-light.svg +0 -0
  605. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  606. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-logo.svg +0 -0
  607. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-python-dark.svg +0 -0
  608. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-python-light.svg +0 -0
  609. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/crawlee-python-og.png +0 -0
  610. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/defaults-dark-icon.svg +0 -0
  611. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/defaults-light-icon.svg +0 -0
  612. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/discord-brand-dark.svg +0 -0
  613. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/discord-brand.svg +0 -0
  614. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/docusaurus.svg +0 -0
  615. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/external-link.svg +0 -0
  616. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/favicon.ico +0 -0
  617. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/favorite-tools-dark.webp +0 -0
  618. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/favorite-tools-light.webp +0 -0
  619. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/auto-scaling.svg +0 -0
  620. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/automate-everything.svg +0 -0
  621. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/fingerprints.svg +0 -0
  622. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/node-requests.svg +0 -0
  623. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/runs-on-py.svg +0 -0
  624. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/storage.svg +0 -0
  625. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/features/works-everywhere.svg +0 -0
  626. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  627. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  628. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  629. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  630. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/current-price.jpg +0 -0
  631. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  632. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/select-an-element.jpg +0 -0
  633. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/selected-element.jpg +0 -0
  634. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/sku.jpg +0 -0
  635. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/getting-started/title.jpg +0 -0
  636. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/github-brand-dark.svg +0 -0
  637. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/github-brand.svg +0 -0
  638. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  639. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  640. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/hearth copy.svg +0 -0
  641. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/hearth.svg +0 -0
  642. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/javascript_logo.svg +0 -0
  643. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/js_file.svg +0 -0
  644. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/logo-big.svg +0 -0
  645. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/logo-blur.png +0 -0
  646. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/logo-blur.svg +0 -0
  647. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/logo-zoom.svg +0 -0
  648. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/menu-arrows.svg +0 -0
  649. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/oss_logo.png +0 -0
  650. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  651. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/puppeteer-live-view-detail.png +0 -0
  652. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/queue-dark-icon.svg +0 -0
  653. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/queue-light-icon.svg +0 -0
  654. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  655. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  656. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/robot.png +0 -0
  657. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/routing-dark-icon.svg +0 -0
  658. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/routing-light-icon.svg +0 -0
  659. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  660. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/scraping-utils-light-icon.svg +0 -0
  661. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/smart-proxy-dark.webp +0 -0
  662. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/smart-proxy-light.webp +0 -0
  663. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/source_code.png +0 -0
  664. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/system.svg +0 -0
  665. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/triangles_dark.svg +0 -0
  666. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/triangles_light.svg +0 -0
  667. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/workflow.svg +0 -0
  668. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/zero-setup-dark-icon.svg +0 -0
  669. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/img/zero-setup-light-icon.svg +0 -0
  670. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/js/custom.js +0 -0
  671. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/static/robots.txt +0 -0
  672. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/docs-prettier.config.js +0 -0
  673. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/utils/externalLink.js +0 -0
  674. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  675. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  676. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  677. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  678. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tools/website_gif/website_gif.mjs +0 -0
  679. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/tsconfig.eslint.json +0 -0
  680. {crawlee-1.0.4b3 → crawlee-1.0.4b5}/website/yarn.lock +0 -0
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
8
8
  ### 🐛 Bug Fixes
9
9
 
10
10
  - Respect `enqueue_strategy` in `enqueue_links` ([#1505](https://github.com/apify/crawlee-python/pull/1505)) ([6ee04bc](https://github.com/apify/crawlee-python/commit/6ee04bc08c50a70f2e956a79d4ce5072a726c3a8)) by [@Mantisus](https://github.com/Mantisus), closes [#1504](https://github.com/apify/crawlee-python/issues/1504)
11
+ - Exclude incorrect links before checking `robots.txt` ([#1502](https://github.com/apify/crawlee-python/pull/1502)) ([3273da5](https://github.com/apify/crawlee-python/commit/3273da5fee62ec9254666b376f382474c3532a56)) by [@Mantisus](https://github.com/Mantisus), closes [#1499](https://github.com/apify/crawlee-python/issues/1499)
11
12
 
12
13
 
13
14
  <!-- git-cliff-unreleased-end -->
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.4b3
3
+ Version: 1.0.4b5
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.0.4b3"
7
+ version = "1.0.4b5"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -7,6 +7,7 @@ from yarl import URL
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from collections.abc import Iterator
10
+ from logging import Logger
10
11
 
11
12
 
12
13
  def is_url_absolute(url: str) -> bool:
@@ -22,13 +23,19 @@ def convert_to_absolute_url(base_url: str, relative_url: str) -> str:
22
23
  return str(URL(base_url).join(URL(relative_url)))
23
24
 
24
25
 
25
- def to_absolute_url_iterator(base_url: str, urls: Iterator[str]) -> Iterator[str]:
26
+ def to_absolute_url_iterator(base_url: str, urls: Iterator[str], logger: Logger | None = None) -> Iterator[str]:
26
27
  """Convert an iterator of relative URLs to absolute URLs using a base URL."""
27
28
  for url in urls:
28
29
  if is_url_absolute(url):
29
30
  yield url
30
31
  else:
31
- yield convert_to_absolute_url(base_url, url)
32
+ converted_url = convert_to_absolute_url(base_url, url)
33
+ # Skip the URL if conversion fails, probably due to an incorrect format, such as 'mailto:'.
34
+ if not is_url_absolute(converted_url):
35
+ if logger:
36
+ logger.debug(f'Could not convert URL "{url}" to absolute using base URL "{base_url}". Skipping it.')
37
+ continue
38
+ yield converted_url
32
39
 
33
40
 
34
41
  _http_url_adapter = TypeAdapter(AnyHttpUrl)
@@ -167,7 +167,9 @@ class AbstractHttpCrawler(
167
167
  kwargs.setdefault('strategy', 'same-hostname')
168
168
 
169
169
  links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
170
- links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
170
+ links_iterator = to_absolute_url_iterator(
171
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
172
+ )
171
173
 
172
174
  if robots_txt_file:
173
175
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -366,7 +366,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
366
366
  links_iterator: Iterator[str] = iter(
367
367
  [url for element in elements if (url := await element.get_attribute('href')) is not None]
368
368
  )
369
- links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
369
+ links_iterator = to_absolute_url_iterator(
370
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
371
+ )
370
372
 
371
373
  if robots_txt_file:
372
374
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -0,0 +1,353 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timedelta, timezone
4
+ from logging import getLogger
5
+ from typing import TYPE_CHECKING, cast
6
+ from unittest.mock import MagicMock
7
+
8
+ import pytest
9
+
10
+ from crawlee import service_locator
11
+ from crawlee._autoscaling import Snapshotter
12
+ from crawlee._autoscaling._types import ClientSnapshot, CpuSnapshot, MemorySnapshot
13
+ from crawlee._autoscaling.snapshotter import SortedSnapshotList
14
+ from crawlee._utils.byte_size import ByteSize
15
+ from crawlee._utils.system import CpuInfo, MemoryInfo
16
+ from crawlee.configuration import Configuration
17
+ from crawlee.events import LocalEventManager
18
+ from crawlee.events._types import Event, EventSystemInfoData
19
+
20
+ if TYPE_CHECKING:
21
+ from collections.abc import AsyncGenerator
22
+
23
+
24
+ @pytest.fixture
25
+ async def event_manager() -> AsyncGenerator[LocalEventManager, None]:
26
+ # Use a long interval to avoid interference from periodic system info events during tests
27
+ async with LocalEventManager(system_info_interval=timedelta(hours=9999)) as event_manager:
28
+ yield event_manager
29
+
30
+
31
+ @pytest.fixture
32
+ async def snapshotter(event_manager: LocalEventManager) -> AsyncGenerator[Snapshotter, None]:
33
+ config = Configuration(available_memory_ratio=0.25)
34
+ service_locator.set_event_manager(event_manager)
35
+ async with Snapshotter.from_config(config) as snapshotter:
36
+ yield snapshotter
37
+
38
+
39
+ @pytest.fixture
40
+ def default_cpu_info() -> CpuInfo:
41
+ return CpuInfo(used_ratio=0.5)
42
+
43
+
44
+ @pytest.fixture
45
+ def default_memory_info() -> MemoryInfo:
46
+ return MemoryInfo(
47
+ total_size=ByteSize.from_gb(8),
48
+ current_size=ByteSize.from_gb(4),
49
+ system_wide_used_size=ByteSize.from_gb(5),
50
+ )
51
+
52
+
53
+ @pytest.fixture
54
+ def event_system_data_info(default_cpu_info: CpuInfo, default_memory_info: MemoryInfo) -> EventSystemInfoData:
55
+ return EventSystemInfoData(
56
+ cpu_info=default_cpu_info,
57
+ memory_info=default_memory_info,
58
+ )
59
+
60
+
61
+ async def test_start_stop_lifecycle() -> None:
62
+ config = Configuration(available_memory_ratio=0.25)
63
+
64
+ async with Snapshotter.from_config(config):
65
+ pass
66
+
67
+
68
+ async def test_snapshot_cpu(
69
+ snapshotter: Snapshotter, event_system_data_info: EventSystemInfoData, event_manager: LocalEventManager
70
+ ) -> None:
71
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_system_data_info)
72
+ await event_manager.wait_for_all_listeners_to_complete()
73
+ cpu_snapshots = cast('list[CpuSnapshot]', snapshotter.get_cpu_sample())
74
+ assert len(cpu_snapshots) == 1
75
+ assert cpu_snapshots[0].used_ratio == event_system_data_info.cpu_info.used_ratio
76
+
77
+
78
+ async def test_snapshot_memory(
79
+ snapshotter: Snapshotter, event_system_data_info: EventSystemInfoData, event_manager: LocalEventManager
80
+ ) -> None:
81
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_system_data_info)
82
+ await event_manager.wait_for_all_listeners_to_complete()
83
+ memory_snapshots = cast('list[MemorySnapshot]', snapshotter.get_memory_sample())
84
+ assert len(memory_snapshots) == 1
85
+ assert memory_snapshots[0].current_size == event_system_data_info.memory_info.current_size
86
+
87
+
88
+ async def test_snapshot_memory_with_memory_info_sets_system_wide_fields(
89
+ snapshotter: Snapshotter, event_manager: LocalEventManager
90
+ ) -> None:
91
+ memory_info = MemoryInfo(
92
+ total_size=ByteSize.from_gb(16),
93
+ current_size=ByteSize.from_gb(4),
94
+ system_wide_used_size=ByteSize.from_gb(12),
95
+ )
96
+
97
+ event_data = EventSystemInfoData(
98
+ cpu_info=CpuInfo(used_ratio=0.5),
99
+ memory_info=memory_info,
100
+ )
101
+
102
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
103
+ await event_manager.wait_for_all_listeners_to_complete()
104
+
105
+ memory_snapshots = cast('list[MemorySnapshot]', snapshotter.get_memory_sample())
106
+
107
+ assert len(memory_snapshots) == 1
108
+ memory_snapshot = memory_snapshots[0]
109
+
110
+ # Test that system-wide fields are properly set
111
+ assert memory_snapshot.system_wide_used_size == memory_info.system_wide_used_size
112
+ assert memory_snapshot.system_wide_memory_size == memory_info.total_size
113
+
114
+
115
+ def test_snapshot_event_loop(snapshotter: Snapshotter) -> None:
116
+ # A first event loop snapshot is created when an instance is created.
117
+ event_loop_snapshots = snapshotter.get_event_loop_sample()
118
+ assert len(event_loop_snapshots) == 1
119
+
120
+
121
+ def test_snapshot_client(snapshotter: Snapshotter) -> None:
122
+ # A first client snapshot is created when an instance is created.
123
+ client_snapshots = snapshotter.get_client_sample()
124
+ assert len(client_snapshots) == 1
125
+
126
+
127
+ def test_snapshot_client_overloaded() -> None:
128
+ assert not ClientSnapshot(error_count=1, new_error_count=1, max_error_count=2).is_overloaded
129
+ assert not ClientSnapshot(error_count=2, new_error_count=1, max_error_count=2).is_overloaded
130
+ assert not ClientSnapshot(error_count=4, new_error_count=2, max_error_count=2).is_overloaded
131
+ assert ClientSnapshot(error_count=7, new_error_count=3, max_error_count=2).is_overloaded
132
+
133
+
134
+ async def test_get_cpu_sample(
135
+ snapshotter: Snapshotter, event_manager: LocalEventManager, default_memory_info: MemoryInfo
136
+ ) -> None:
137
+ now = datetime.now(timezone.utc)
138
+ snapshotter._SNAPSHOT_HISTORY = timedelta(hours=10) # Extend history for testing
139
+
140
+ events_data = [
141
+ EventSystemInfoData(
142
+ cpu_info=CpuInfo(used_ratio=0.5, created_at=now - timedelta(hours=delta)),
143
+ memory_info=default_memory_info,
144
+ )
145
+ for delta in range(5, 0, -1)
146
+ ]
147
+ for event_data in events_data:
148
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
149
+ await event_manager.wait_for_all_listeners_to_complete()
150
+
151
+ # When no sample duration is provided it should return all snapshots
152
+ samples = snapshotter.get_cpu_sample()
153
+ assert len(samples) == len(events_data)
154
+
155
+ duration = timedelta(hours=0.5)
156
+ samples = snapshotter.get_cpu_sample(duration)
157
+ assert len(samples) == 1
158
+
159
+ duration = timedelta(hours=2.5)
160
+ samples = snapshotter.get_cpu_sample(duration)
161
+ assert len(samples) == 3
162
+
163
+ duration = timedelta(hours=10)
164
+ samples = snapshotter.get_cpu_sample(duration)
165
+ assert len(samples) == len(events_data)
166
+
167
+
168
+ async def test_methods_raise_error_when_not_active() -> None:
169
+ snapshotter = Snapshotter.from_config(Configuration(available_memory_ratio=0.25))
170
+ assert snapshotter.active is False
171
+
172
+ with pytest.raises(RuntimeError, match=r'Snapshotter is not active.'):
173
+ snapshotter.get_cpu_sample()
174
+
175
+ with pytest.raises(RuntimeError, match=r'Snapshotter is not active.'):
176
+ snapshotter.get_memory_sample()
177
+
178
+ with pytest.raises(RuntimeError, match=r'Snapshotter is not active.'):
179
+ snapshotter.get_event_loop_sample()
180
+
181
+ with pytest.raises(RuntimeError, match=r'Snapshotter is not active.'):
182
+ snapshotter.get_client_sample()
183
+
184
+ with pytest.raises(RuntimeError, match=r'Snapshotter is already active.'):
185
+ async with snapshotter, snapshotter:
186
+ pass
187
+
188
+ async with snapshotter:
189
+ snapshotter.get_cpu_sample()
190
+ snapshotter.get_memory_sample()
191
+ snapshotter.get_event_loop_sample()
192
+ snapshotter.get_client_sample()
193
+
194
+ assert snapshotter.active is True
195
+
196
+
197
+ async def test_snapshot_pruning_removes_outdated_records(
198
+ snapshotter: Snapshotter, event_manager: LocalEventManager, default_memory_info: MemoryInfo
199
+ ) -> None:
200
+ # Set the snapshot history to 2 hours
201
+ snapshotter._SNAPSHOT_HISTORY = timedelta(hours=2)
202
+
203
+ # Create timestamps for testing
204
+ now = datetime.now(timezone.utc)
205
+
206
+ events_data = [
207
+ EventSystemInfoData(
208
+ cpu_info=CpuInfo(used_ratio=0.5, created_at=now - timedelta(hours=delta)),
209
+ memory_info=default_memory_info,
210
+ )
211
+ for delta in [5, 3, 2, 0]
212
+ ]
213
+
214
+ for event_data in events_data:
215
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
216
+ await event_manager.wait_for_all_listeners_to_complete()
217
+
218
+ cpu_snapshots = cast('list[CpuSnapshot]', snapshotter.get_cpu_sample())
219
+
220
+ # Check that only the last two snapshots remain
221
+ assert len(cpu_snapshots) == 2
222
+ assert cpu_snapshots[0].created_at == now - timedelta(hours=2)
223
+ assert cpu_snapshots[1].created_at == now
224
+
225
+
226
+ async def test_memory_load_evaluation_logs_warning_on_high_usage(
227
+ caplog: pytest.LogCaptureFixture,
228
+ event_manager: LocalEventManager,
229
+ default_cpu_info: CpuInfo,
230
+ ) -> None:
231
+ config = Configuration(memory_mbytes=8192)
232
+
233
+ service_locator.set_event_manager(event_manager)
234
+ snapshotter = Snapshotter.from_config(config)
235
+
236
+ high_memory_usage = ByteSize.from_gb(8) * 0.95 # 95% of 8 GB
237
+
238
+ event_data = EventSystemInfoData(
239
+ cpu_info=default_cpu_info,
240
+ memory_info=MemoryInfo(
241
+ total_size=ByteSize.from_gb(8),
242
+ current_size=high_memory_usage,
243
+ system_wide_used_size=ByteSize.from_gb(7),
244
+ ),
245
+ )
246
+
247
+ async with snapshotter:
248
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
249
+ await event_manager.wait_for_all_listeners_to_complete()
250
+
251
+ # Filter log records to only include those from snapshotter
252
+ log_records = [record for record in caplog.records if 'snapshotter' in record.pathname.lower()]
253
+
254
+ assert len(log_records) == 1
255
+ assert log_records[0].levelname.lower() == 'warning'
256
+ assert 'Memory is critically overloaded' in log_records[0].msg
257
+
258
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
259
+ await event_manager.wait_for_all_listeners_to_complete()
260
+
261
+ log_records = [record for record in caplog.records if 'snapshotter' in record.pathname.lower()]
262
+
263
+ assert len(log_records) == 1
264
+
265
+
266
+ async def test_memory_load_evaluation_silent_on_acceptable_usage(
267
+ monkeypatch: pytest.MonkeyPatch,
268
+ event_manager: LocalEventManager,
269
+ default_cpu_info: CpuInfo,
270
+ ) -> None:
271
+ mock_logger_warn = MagicMock()
272
+ monkeypatch.setattr(getLogger('crawlee.autoscaling.snapshotter'), 'warning', mock_logger_warn)
273
+
274
+ service_locator.set_event_manager(event_manager)
275
+ snapshotter = Snapshotter.from_config(Configuration(memory_mbytes=8192))
276
+
277
+ low_memory_usage = ByteSize.from_gb(8) * 0.8 # 80% of 8 GB
278
+
279
+ event_data = EventSystemInfoData(
280
+ cpu_info=default_cpu_info,
281
+ memory_info=MemoryInfo(
282
+ total_size=ByteSize.from_gb(8),
283
+ current_size=low_memory_usage,
284
+ system_wide_used_size=ByteSize.from_gb(7),
285
+ ),
286
+ )
287
+
288
+ async with snapshotter:
289
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=event_data)
290
+ await event_manager.wait_for_all_listeners_to_complete()
291
+
292
+ assert mock_logger_warn.call_count == 0
293
+
294
+
295
+ async def test_snapshots_time_ordered(snapshotter: Snapshotter, event_manager: LocalEventManager) -> None:
296
+ # All internal snapshot list should be ordered by creation time in ascending order.
297
+ # Scenario where older emitted event arrives after newer event.
298
+ # Snapshotter should not trust the event order and check events' times.
299
+ time_new = datetime.now(tz=timezone.utc)
300
+ time_old = datetime.now(tz=timezone.utc) - timedelta(milliseconds=50)
301
+
302
+ def create_event_data(creation_time: datetime) -> EventSystemInfoData:
303
+ return EventSystemInfoData(
304
+ cpu_info=CpuInfo(used_ratio=0.5, created_at=creation_time),
305
+ memory_info=MemoryInfo(
306
+ current_size=ByteSize(bytes=1),
307
+ created_at=creation_time,
308
+ total_size=ByteSize(bytes=2),
309
+ system_wide_used_size=ByteSize.from_gb(5),
310
+ ),
311
+ )
312
+
313
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=create_event_data(time_new))
314
+ event_manager.emit(event=Event.SYSTEM_INFO, event_data=create_event_data(time_old))
315
+ await event_manager.wait_for_all_listeners_to_complete()
316
+
317
+ memory_samples = snapshotter.get_memory_sample()
318
+ cpu_samples = snapshotter.get_cpu_sample()
319
+ assert memory_samples[0].created_at == time_old
320
+ assert cpu_samples[0].created_at == time_old
321
+ assert memory_samples[1].created_at == time_new
322
+ assert cpu_samples[1].created_at == time_new
323
+
324
+
325
+ def test_sorted_snapshot_list_add_maintains_order() -> None:
326
+ """Test that SortedSnapshotList.add method maintains sorted order by created_at with multiple items."""
327
+ sorted_list = SortedSnapshotList[CpuSnapshot]()
328
+
329
+ # Create snapshots with different timestamps (more items to test binary search better)
330
+ now = datetime.now(timezone.utc)
331
+ snapshots = [
332
+ CpuSnapshot(used_ratio=0.1, max_used_ratio=0.95, created_at=now - timedelta(seconds=50)), # oldest
333
+ CpuSnapshot(used_ratio=0.2, max_used_ratio=0.95, created_at=now - timedelta(seconds=40)),
334
+ CpuSnapshot(used_ratio=0.3, max_used_ratio=0.95, created_at=now - timedelta(seconds=30)),
335
+ CpuSnapshot(used_ratio=0.4, max_used_ratio=0.95, created_at=now - timedelta(seconds=20)),
336
+ CpuSnapshot(used_ratio=0.5, max_used_ratio=0.95, created_at=now - timedelta(seconds=10)),
337
+ CpuSnapshot(used_ratio=0.6, max_used_ratio=0.95, created_at=now - timedelta(seconds=5)),
338
+ CpuSnapshot(used_ratio=0.7, max_used_ratio=0.95, created_at=now), # newest
339
+ ]
340
+
341
+ # Add snapshots in random order to test binary search insertion
342
+ add_order = [3, 0, 5, 1, 6, 2, 4] # indices in random order
343
+ for i in add_order:
344
+ sorted_list.add(snapshots[i])
345
+
346
+ # Verify the list is sorted by created_at (should be in original order)
347
+ assert len(sorted_list) == 7
348
+ for i, snapshot in enumerate(sorted_list):
349
+ assert snapshot == snapshots[i], f'Item at index {i} is not correctly sorted'
350
+ if i > 0:
351
+ prev_time = sorted_list[i - 1].created_at
352
+ curr_time = snapshot.created_at
353
+ assert prev_time <= curr_time, f'Items at indices {i - 1} and {i} are not in chronological order'
@@ -29,8 +29,8 @@ async def test_basic(server_url: URL, http_client: HttpClient) -> None:
29
29
 
30
30
  assert handler.called
31
31
 
32
- # The handler should find two links
33
- assert len(handler.call_args[0][0]) == 2
32
+ # The handler should find three links
33
+ assert len(handler.call_args[0][0]) == 3
34
34
 
35
35
 
36
36
  async def test_enqueue_links(redirect_server_url: URL, server_url: URL, http_client: HttpClient) -> None:
@@ -31,8 +31,8 @@ async def test_basic(server_url: URL, http_client: HttpClient) -> None:
31
31
 
32
32
  assert handler.called
33
33
 
34
- # The handler should find two links
35
- assert len(handler.call_args[0][0]) == 2
34
+ # The handler should find three links
35
+ assert len(handler.call_args[0][0]) == 3
36
36
 
37
37
 
38
38
  async def test_enqueue_links(redirect_server_url: URL, server_url: URL, http_client: HttpClient) -> None:
@@ -14,6 +14,7 @@ START_ENQUEUE = b"""\
14
14
  <body>
15
15
  <a href="/sub_index" class="foo">Link 1</a>
16
16
  <a href="/page_1">Link 2</a>
17
+ <a href="mailto:test@test.com">test@test.com</a>
17
18
  </body></html>"""
18
19
 
19
20
  SECONDARY_INDEX = b"""\
@@ -705,7 +705,7 @@ toml = [
705
705
 
706
706
  [[package]]
707
707
  name = "crawlee"
708
- version = "1.0.4b3"
708
+ version = "1.0.4b5"
709
709
  source = { editable = "." }
710
710
  dependencies = [
711
711
  { name = "cachetools" },