crawlee 1.1.1__tar.gz → 1.1.1b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (703) hide show
  1. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/build_and_deploy_docs.yaml +2 -6
  2. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/templates_e2e_tests.yaml +1 -1
  3. {crawlee-1.1.1 → crawlee-1.1.1b1}/CHANGELOG.md +3 -5
  4. {crawlee-1.1.1 → crawlee-1.1.1b1}/PKG-INFO +1 -1
  5. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/google/cloud_run_example.py +1 -1
  6. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/google/google_example.py +5 -2
  7. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/server.py +2 -2
  8. {crawlee-1.1.1 → crawlee-1.1.1b1}/pyproject.toml +2 -2
  9. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_types.py +1 -20
  10. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +2 -8
  11. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_basic_crawler.py +46 -62
  12. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +3 -6
  13. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
  14. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
  15. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
  16. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -3
  17. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_autoscaled_pool.py +4 -2
  18. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_system.py +0 -3
  19. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_basic/test_basic_crawler.py +3 -44
  20. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -9
  21. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -9
  22. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -6
  23. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/server.py +0 -32
  24. crawlee-1.1.1b1/tests/unit/server_endpoints.py +71 -0
  25. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_dataset.py +0 -17
  26. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_key_value_store.py +0 -18
  27. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_request_queue.py +0 -19
  28. {crawlee-1.1.1 → crawlee-1.1.1b1}/uv.lock +292 -370
  29. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/package.json +0 -1
  30. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/yarn.lock +499 -389
  31. crawlee-1.1.1/before_scroll.png +0 -0
  32. crawlee-1.1.1/tests/unit/crawlers/_playwright/test_utils.py +0 -157
  33. crawlee-1.1.1/tests/unit/server_endpoints.py +0 -142
  34. crawlee-1.1.1/tests/unit/server_static/test.js +0 -0
  35. crawlee-1.1.1/tests/unit/server_static/test.png +0 -0
  36. crawlee-1.1.1/website/src/components/LLMButtons.jsx +0 -510
  37. crawlee-1.1.1/website/src/components/LLMButtons.module.css +0 -151
  38. crawlee-1.1.1/website/src/theme/DocItem/Content/index.js +0 -35
  39. crawlee-1.1.1/website/src/theme/DocItem/Content/styles.module.css +0 -22
  40. {crawlee-1.1.1 → crawlee-1.1.1b1}/.editorconfig +0 -0
  41. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/CODEOWNERS +0 -0
  42. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/pull_request_template.md +0 -0
  43. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/check_pr_title.yaml +0 -0
  44. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/pre_release.yaml +0 -0
  45. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/release.yaml +0 -0
  46. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/run_code_checks.yaml +0 -0
  47. {crawlee-1.1.1 → crawlee-1.1.1b1}/.github/workflows/update_new_issue.yaml +0 -0
  48. {crawlee-1.1.1 → crawlee-1.1.1b1}/.gitignore +0 -0
  49. {crawlee-1.1.1 → crawlee-1.1.1b1}/.markdownlint.yaml +0 -0
  50. {crawlee-1.1.1 → crawlee-1.1.1b1}/.pre-commit-config.yaml +0 -0
  51. {crawlee-1.1.1 → crawlee-1.1.1b1}/CONTRIBUTING.md +0 -0
  52. {crawlee-1.1.1 → crawlee-1.1.1b1}/LICENSE +0 -0
  53. {crawlee-1.1.1 → crawlee-1.1.1b1}/Makefile +0 -0
  54. {crawlee-1.1.1 → crawlee-1.1.1b1}/README.md +0 -0
  55. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/apify_platform.mdx +0 -0
  56. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  57. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  58. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  59. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  60. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  61. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/google_cloud.mdx +0 -0
  62. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/deployment/google_cloud_run.mdx +0 -0
  63. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/add_data_to_dataset.mdx +0 -0
  64. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  65. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  66. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  67. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  68. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  69. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  70. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  71. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  72. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  73. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  74. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  75. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/configure_json_logging.py +0 -0
  76. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  77. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  78. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  79. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  80. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  81. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  82. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  83. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  84. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  85. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  86. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  87. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  88. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  89. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  90. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  91. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  92. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/parsel_crawler.py +0 -0
  93. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  94. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  95. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler.py +0 -0
  96. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  97. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  98. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  99. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  100. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  101. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  102. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
  103. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  104. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/code_examples/using_sitemap_request_loader.py +0 -0
  105. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  106. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_multiple_urls.mdx +0 -0
  107. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  108. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  109. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawler_keep_alive.mdx +0 -0
  110. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/crawler_stop.mdx +0 -0
  111. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  112. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  113. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/json_logging.mdx +0 -0
  114. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/parsel_crawler.mdx +0 -0
  115. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler.mdx +0 -0
  116. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  117. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  118. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  119. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  120. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/respect_robots_txt_file.mdx +0 -0
  121. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/resuming_paused_crawl.mdx +0 -0
  122. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/using_browser_profile.mdx +0 -0
  123. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/examples/using_sitemap_request_loader.mdx +0 -0
  124. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/architecture_overview.mdx +0 -0
  125. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/avoid_blocking.mdx +0 -0
  126. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  127. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  128. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  129. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  130. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  131. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  132. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  133. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  134. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  135. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  136. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  137. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  138. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  139. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  140. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  141. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  142. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  143. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  144. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  145. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  146. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  147. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  148. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  149. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  150. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  151. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  152. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  153. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  154. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  155. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  156. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  157. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  158. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  159. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  160. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  161. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  162. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  163. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  164. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  165. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  166. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  167. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  168. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  169. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  170. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  171. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  172. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  173. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  174. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  175. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  176. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  177. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  178. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  179. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  180. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  181. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  182. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  183. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  184. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  185. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  186. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  187. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  188. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  189. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  190. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  191. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  192. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  193. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  194. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  195. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  196. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  197. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  198. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  199. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  200. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  201. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  202. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  203. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  204. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  205. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  206. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
  207. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
  208. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  209. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  210. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  211. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  212. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  213. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  214. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  215. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  216. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  217. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  218. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  219. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  220. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  221. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/opening.py +0 -0
  222. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  223. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  224. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  225. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  226. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/crawler_login.mdx +0 -0
  227. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/creating_web_archive.mdx +0 -0
  228. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/error_handling.mdx +0 -0
  229. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/http_clients.mdx +0 -0
  230. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/http_crawlers.mdx +0 -0
  231. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler.mdx +0 -0
  232. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  233. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  234. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/proxy_management.mdx +0 -0
  235. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/request_loaders.mdx +0 -0
  236. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/request_router.mdx +0 -0
  237. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/running_in_web_server.mdx +0 -0
  238. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/scaling_crawlers.mdx +0 -0
  239. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/service_locator.mdx +0 -0
  240. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/session_management.mdx +0 -0
  241. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/storage_clients.mdx +0 -0
  242. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/storages.mdx +0 -0
  243. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  244. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/01_setting_up.mdx +0 -0
  245. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/02_first_crawler.mdx +0 -0
  246. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/03_adding_more_urls.mdx +0 -0
  247. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/04_real_world_project.mdx +0 -0
  248. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/05_crawling.mdx +0 -0
  249. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/06_scraping.mdx +0 -0
  250. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/07_saving_data.mdx +0 -0
  251. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/08_refactoring.mdx +0 -0
  252. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/09_running_in_cloud.mdx +0 -0
  253. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_bs.py +0 -0
  254. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_bs_better.py +0 -0
  255. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/02_request_queue.py +0 -0
  256. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  257. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  258. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_globs.py +0 -0
  259. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_original_code.py +0 -0
  260. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/03_transform_request.py +0 -0
  261. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  262. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  263. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  264. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/06_scraping.py +0 -0
  265. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/07_final_code.py +0 -0
  266. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/07_first_code.py +0 -0
  267. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/08_main.py +0 -0
  268. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/08_routes.py +0 -0
  269. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  270. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/__init__.py +0 -0
  271. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/code_examples/routes.py +0 -0
  272. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/introduction/index.mdx +0 -0
  273. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/pyproject.toml +0 -0
  274. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  275. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  276. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  277. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  278. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/quick-start/index.mdx +0 -0
  279. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/upgrading/upgrading_to_v0x.md +0 -0
  280. {crawlee-1.1.1 → crawlee-1.1.1b1}/docs/upgrading/upgrading_to_v1.md +0 -0
  281. {crawlee-1.1.1 → crawlee-1.1.1b1}/renovate.json +0 -0
  282. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/__init__.py +0 -0
  283. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/__init__.py +0 -0
  284. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/_types.py +0 -0
  285. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  286. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/py.typed +0 -0
  287. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  288. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_autoscaling/system_status.py +0 -0
  289. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_browserforge_workaround.py +0 -0
  290. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_cli.py +0 -0
  291. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_consts.py +0 -0
  292. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_log_config.py +0 -0
  293. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_request.py +0 -0
  294. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_service_locator.py +0 -0
  295. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/__init__.py +0 -0
  296. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/blocked.py +0 -0
  297. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/byte_size.py +0 -0
  298. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/console.py +0 -0
  299. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/context.py +0 -0
  300. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/crypto.py +0 -0
  301. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/docs.py +0 -0
  302. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/file.py +0 -0
  303. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/globs.py +0 -0
  304. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/html_to_text.py +0 -0
  305. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/models.py +0 -0
  306. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  307. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/recoverable_state.py +0 -0
  308. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/recurring_task.py +0 -0
  309. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/requests.py +0 -0
  310. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/robots.py +0 -0
  311. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/sitemap.py +0 -0
  312. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/system.py +0 -0
  313. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/time.py +0 -0
  314. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/try_import.py +0 -0
  315. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/urls.py +0 -0
  316. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/wait.py +0 -0
  317. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/_utils/web.py +0 -0
  318. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/__init__.py +0 -0
  319. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_controller.py +0 -0
  320. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_plugin.py +0 -0
  321. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_browser_pool.py +0 -0
  322. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser.py +0 -0
  323. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
  324. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  325. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/_types.py +0 -0
  326. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/browsers/py.typed +0 -0
  327. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/configuration.py +0 -0
  328. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/__init__.py +0 -0
  329. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  330. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  331. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  332. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  333. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  334. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
  335. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  336. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
  337. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  338. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  339. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  340. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  341. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  342. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  343. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
  344. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_basic/py.typed +0 -0
  345. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  346. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  347. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  348. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  349. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  350. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  351. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/__init__.py +0 -0
  352. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  353. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  354. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  355. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  356. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  357. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  358. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  359. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  360. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  361. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  362. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  363. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  364. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  365. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/_types.py +0 -0
  366. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/crawlers/py.typed +0 -0
  367. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/errors.py +0 -0
  368. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/__init__.py +0 -0
  369. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_event_manager.py +0 -0
  370. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_local_event_manager.py +0 -0
  371. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/_types.py +0 -0
  372. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/events/py.typed +0 -0
  373. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  374. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  375. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  376. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  377. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  378. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/_types.py +0 -0
  379. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/fingerprint_suite/py.typed +0 -0
  380. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/__init__.py +0 -0
  381. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_base.py +0 -0
  382. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  383. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_httpx.py +0 -0
  384. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/http_clients/_impit.py +0 -0
  385. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/otel/__init__.py +0 -0
  386. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  387. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/cookiecutter.json +0 -0
  388. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  389. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  390. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main.py +0 -0
  391. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  392. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  393. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  394. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  395. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  396. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  397. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  398. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  399. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  400. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  401. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  402. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  403. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  404. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  405. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  406. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  407. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  408. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  409. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/proxy_configuration.py +0 -0
  410. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/py.typed +0 -0
  411. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/__init__.py +0 -0
  412. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_list.py +0 -0
  413. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_loader.py +0 -0
  414. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_manager.py +0 -0
  415. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  416. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -0
  417. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/router.py +0 -0
  418. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/__init__.py +0 -0
  419. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_cookies.py +0 -0
  420. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_models.py +0 -0
  421. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_session.py +0 -0
  422. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/_session_pool.py +0 -0
  423. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/sessions/py.typed +0 -0
  424. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/__init__.py +0 -0
  425. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  426. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_error_tracker.py +0 -0
  427. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_models.py +0 -0
  428. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/statistics/_statistics.py +0 -0
  429. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/__init__.py +0 -0
  430. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  431. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  432. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  433. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  434. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  435. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_base/py.typed +0 -0
  436. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  437. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  438. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  439. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  440. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  441. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  442. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  443. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  444. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  445. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  446. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
  447. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
  448. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
  449. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
  450. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
  451. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
  452. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
  453. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
  454. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
  455. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
  456. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
  457. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_redis/py.typed +0 -0
  458. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  459. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  460. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  461. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
  462. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  463. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  464. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
  465. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  466. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/models.py +0 -0
  467. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storage_clients/py.typed +0 -0
  468. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/__init__.py +0 -0
  469. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_base.py +0 -0
  470. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_dataset.py +0 -0
  471. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_key_value_store.py +0 -0
  472. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_request_queue.py +0 -0
  473. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  474. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/_utils.py +0 -0
  475. {crawlee-1.1.1 → crawlee-1.1.1b1}/src/crawlee/storages/py.typed +0 -0
  476. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/__init__.py +0 -0
  477. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/__init__.py +0 -0
  478. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/conftest.py +0 -0
  479. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/e2e/project_template/utils.py +0 -0
  480. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/README.md +0 -0
  481. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/__init__.py +0 -0
  482. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
  483. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_autoscaling/test_system_status.py +0 -0
  484. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_error_tracker.py +0 -0
  485. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  486. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_persistence.py +0 -0
  487. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  488. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_byte_size.py +0 -0
  489. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_console.py +0 -0
  490. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_crypto.py +0 -0
  491. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_file.py +0 -0
  492. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_globs.py +0 -0
  493. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_html_to_text.py +0 -0
  494. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_measure_time.py +0 -0
  495. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  496. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_recurring_task.py +0 -0
  497. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_requests.py +0 -0
  498. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_robots.py +0 -0
  499. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_sitemap.py +0 -0
  500. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  501. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/_utils/test_urls.py +0 -0
  502. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_browser_pool.py +0 -0
  503. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser.py +0 -0
  504. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
  505. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
  506. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/conftest.py +0 -0
  507. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
  508. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  509. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  510. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  511. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  512. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
  513. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/events/test_event_manager.py +0 -0
  514. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/events/test_local_event_manager.py +0 -0
  515. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  516. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  517. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/http_clients/test_http_clients.py +0 -0
  518. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/http_clients/test_httpx.py +0 -0
  519. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  520. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  521. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  522. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/request_loaders/test_request_list.py +0 -0
  523. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/request_loaders/test_sitemap_request_loader.py +0 -0
  524. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_cookies.py +0 -0
  525. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_models.py +0 -0
  526. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_session.py +0 -0
  527. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/sessions/test_session_pool.py +0 -0
  528. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  529. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  530. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
  531. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  532. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  533. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  534. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
  535. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
  536. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
  537. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  538. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  539. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  540. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/conftest.py +0 -0
  541. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  542. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  543. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_cli.py +0 -0
  544. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_configuration.py +0 -0
  545. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_log_config.py +0 -0
  546. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_router.py +0 -0
  547. {crawlee-1.1.1 → crawlee-1.1.1b1}/tests/unit/test_service_locator.py +0 -0
  548. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/.eslintrc.json +0 -0
  549. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/.yarnrc.yml +0 -0
  550. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/babel.config.js +0 -0
  551. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/build_api_reference.sh +0 -0
  552. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/docusaurus.config.js +0 -0
  553. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/generate_module_shortcuts.py +0 -0
  554. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  555. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  556. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/roa-loader/index.js +0 -0
  557. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/roa-loader/package.json +0 -0
  558. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/sidebars.js +0 -0
  559. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/ApiLink.jsx +0 -0
  560. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Button.jsx +0 -0
  561. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Button.module.css +0 -0
  562. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/CopyButton.jsx +0 -0
  563. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/CopyButton.module.css +0 -0
  564. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Gradients.jsx +0 -0
  565. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Highlights.jsx +0 -0
  566. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Highlights.module.css +0 -0
  567. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  568. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  569. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  570. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  571. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  572. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  573. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  574. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  575. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  576. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  577. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/RiverSection.jsx +0 -0
  578. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/RiverSection.module.css +0 -0
  579. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  580. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  581. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  582. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  583. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/RunnableCodeBlock.jsx +0 -0
  584. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/components/RunnableCodeBlock.module.css +0 -0
  585. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/css/custom.css +0 -0
  586. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/home_page_example.py +0 -0
  587. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/index.js +0 -0
  588. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/pages/index.module.css +0 -0
  589. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  590. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/index.js +0 -0
  591. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  592. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  593. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/DocItem/Layout/index.js +0 -0
  594. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  595. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/LinkItem/index.js +0 -0
  596. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  597. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/index.js +0 -0
  598. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Footer/index.module.css +0 -0
  599. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/MDXComponents/A.js +0 -0
  600. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Content/index.js +0 -0
  601. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  602. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Logo/index.js +0 -0
  603. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  604. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  605. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  606. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  607. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  608. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  609. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  610. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/.nojekyll +0 -0
  611. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/font/lota.woff +0 -0
  612. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/font/lota.woff2 +0 -0
  613. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/API.png +0 -0
  614. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/arrow_right.svg +0 -0
  615. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/auto-scaling-dark.webp +0 -0
  616. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/auto-scaling-light.webp +0 -0
  617. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/check.svg +0 -0
  618. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/chrome-scrape-dark.gif +0 -0
  619. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/chrome-scrape-light.gif +0 -0
  620. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/cloud_icon.svg +0 -0
  621. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/community-dark-icon.svg +0 -0
  622. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/community-light-icon.svg +0 -0
  623. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-dark-new.svg +0 -0
  624. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-dark.svg +0 -0
  625. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-javascript-dark.svg +0 -0
  626. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-javascript-light.svg +0 -0
  627. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-light-new.svg +0 -0
  628. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-light.svg +0 -0
  629. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  630. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-logo.svg +0 -0
  631. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-dark.svg +0 -0
  632. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-light.svg +0 -0
  633. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/crawlee-python-og.png +0 -0
  634. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/defaults-dark-icon.svg +0 -0
  635. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/defaults-light-icon.svg +0 -0
  636. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/discord-brand-dark.svg +0 -0
  637. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/discord-brand.svg +0 -0
  638. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/docusaurus.svg +0 -0
  639. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/external-link.svg +0 -0
  640. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favicon.ico +0 -0
  641. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favorite-tools-dark.webp +0 -0
  642. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/favorite-tools-light.webp +0 -0
  643. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/auto-scaling.svg +0 -0
  644. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/automate-everything.svg +0 -0
  645. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/fingerprints.svg +0 -0
  646. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/node-requests.svg +0 -0
  647. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/runs-on-py.svg +0 -0
  648. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/storage.svg +0 -0
  649. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/features/works-everywhere.svg +0 -0
  650. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  651. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  652. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  653. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  654. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/current-price.jpg +0 -0
  655. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  656. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/select-an-element.jpg +0 -0
  657. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/selected-element.jpg +0 -0
  658. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/sku.jpg +0 -0
  659. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/getting-started/title.jpg +0 -0
  660. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/github-brand-dark.svg +0 -0
  661. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/github-brand.svg +0 -0
  662. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  663. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  664. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/hearth copy.svg +0 -0
  665. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/hearth.svg +0 -0
  666. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/javascript_logo.svg +0 -0
  667. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/js_file.svg +0 -0
  668. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-big.svg +0 -0
  669. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-blur.png +0 -0
  670. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-blur.svg +0 -0
  671. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/logo-zoom.svg +0 -0
  672. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/menu-arrows.svg +0 -0
  673. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/oss_logo.png +0 -0
  674. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  675. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/puppeteer-live-view-detail.png +0 -0
  676. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/queue-dark-icon.svg +0 -0
  677. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/queue-light-icon.svg +0 -0
  678. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  679. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  680. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/robot.png +0 -0
  681. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/routing-dark-icon.svg +0 -0
  682. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/routing-light-icon.svg +0 -0
  683. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  684. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/scraping-utils-light-icon.svg +0 -0
  685. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/smart-proxy-dark.webp +0 -0
  686. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/smart-proxy-light.webp +0 -0
  687. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/source_code.png +0 -0
  688. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/system.svg +0 -0
  689. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/triangles_dark.svg +0 -0
  690. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/triangles_light.svg +0 -0
  691. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/workflow.svg +0 -0
  692. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/zero-setup-dark-icon.svg +0 -0
  693. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/img/zero-setup-light-icon.svg +0 -0
  694. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/js/custom.js +0 -0
  695. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/static/robots.txt +0 -0
  696. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/docs-prettier.config.js +0 -0
  697. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/utils/externalLink.js +0 -0
  698. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  699. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  700. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  701. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  702. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tools/website_gif/website_gif.mjs +0 -0
  703. {crawlee-1.1.1 → crawlee-1.1.1b1}/website/tsconfig.eslint.json +0 -0
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v6
27
+ uses: actions/checkout@v5
28
28
  with:
29
29
  token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
30
30
  ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
@@ -67,10 +67,6 @@ jobs:
67
67
  uses: actions/deploy-pages@v4
68
68
 
69
69
  - name: Invalidate CloudFront cache
70
- run: |
71
- gh workflow run invalidate-cloudfront.yml \
72
- --repo apify/apify-docs-private \
73
- --field deployment=crawlee-web
74
- echo "✅ CloudFront cache invalidation workflow triggered successfully"
70
+ run: gh workflow run invalidate.yaml --repo apify/apify-docs-private
75
71
  env:
76
72
  GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v6
27
+ uses: actions/checkout@v5
28
28
 
29
29
  - name: Setup node
30
30
  uses: actions/setup-node@v6
@@ -2,17 +2,15 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
- ## [1.1.1](https://github.com/apify/crawlee-python/releases/tag/v1.1.1) (2025-12-02)
5
+ <!-- git-cliff-unreleased-start -->
6
+ ## 1.1.1 - **not yet released**
6
7
 
7
8
  ### 🐛 Bug Fixes
8
9
 
9
10
  - Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
10
- - Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
11
- - Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
12
- - Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
13
- - Respect `&lt;base&gt;` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
14
11
 
15
12
 
13
+ <!-- git-cliff-unreleased-end -->
16
14
  ## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
17
15
 
18
16
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.1.1
3
+ Version: 1.1.1b1
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -9,7 +9,7 @@ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
9
9
  from crawlee.storage_clients import MemoryStorageClient
10
10
 
11
11
 
12
- @get('/') # type: ignore[untyped-decorator]
12
+ @get('/')
13
13
  async def main() -> str:
14
14
  """The crawler entry point that will be called when the HTTP endpoint is accessed."""
15
15
  # highlight-start
@@ -6,7 +6,10 @@ from datetime import timedelta
6
6
  import functions_framework
7
7
  from flask import Request, Response
8
8
 
9
- from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
9
+ from crawlee.crawlers import (
10
+ BeautifulSoupCrawler,
11
+ BeautifulSoupCrawlingContext,
12
+ )
10
13
  from crawlee.storage_clients import MemoryStorageClient
11
14
 
12
15
 
@@ -48,7 +51,7 @@ async def main() -> str:
48
51
  # highlight-end
49
52
 
50
53
 
51
- @functions_framework.http # type: ignore[untyped-decorator]
54
+ @functions_framework.http
52
55
  def crawlee_run(request: Request) -> Response:
53
56
  # You can pass data to your crawler using `request`
54
57
  function_id = request.headers['Function-Execution-Id']
@@ -14,7 +14,7 @@ from .crawler import lifespan
14
14
  app = FastAPI(lifespan=lifespan, title='Crawler app')
15
15
 
16
16
 
17
- @app.get('/', response_class=HTMLResponse) # type: ignore[untyped-decorator]
17
+ @app.get('/', response_class=HTMLResponse)
18
18
  def index() -> str:
19
19
  return """
20
20
  <!DOCTYPE html>
@@ -32,7 +32,7 @@ def index() -> str:
32
32
  """
33
33
 
34
34
 
35
- @app.get('/scrape') # type: ignore[untyped-decorator]
35
+ @app.get('/scrape')
36
36
  async def scrape_url(request: Request, url: str | None = None) -> dict:
37
37
  if not url:
38
38
  return {'url': 'missing', 'scrape result': 'no results'}
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.1.1"
7
+ version = "1.1.1b1"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -101,7 +101,7 @@ dev = [
101
101
  "build<2.0.0", # For e2e tests.
102
102
  "dycw-pytest-only<3.0.0",
103
103
  "fakeredis[probabilistic,json,lua]<3.0.0",
104
- "mypy~=1.19.0",
104
+ "mypy~=1.18.0",
105
105
  "pre-commit<5.0.0",
106
106
  "proxy-py<3.0.0",
107
107
  "pydoc-markdown<5.0.0",
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
15
15
  import re
16
16
  from collections.abc import Callable, Coroutine, Sequence
17
17
 
18
- from typing_extensions import NotRequired, Required, Self, Unpack
18
+ from typing_extensions import NotRequired, Required, Unpack
19
19
 
20
20
  from crawlee import Glob, Request
21
21
  from crawlee._request import RequestOptions
@@ -643,25 +643,6 @@ class BasicCrawlingContext:
643
643
  """Return hash of the context. Each context is considered unique."""
644
644
  return id(self)
645
645
 
646
- def create_modified_copy(
647
- self,
648
- push_data: PushDataFunction | None = None,
649
- add_requests: AddRequestsFunction | None = None,
650
- get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
651
- ) -> Self:
652
- """Create a modified copy of the crawling context with specified changes."""
653
- original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
654
- modified_fields = {
655
- key: value
656
- for key, value in {
657
- 'push_data': push_data,
658
- 'add_requests': add_requests,
659
- 'get_key_value_store': get_key_value_store,
660
- }.items()
661
- if value
662
- }
663
- return self.__class__(**{**original_fields, **modified_fields})
664
-
665
646
 
666
647
  class GetDataKwargs(TypedDict):
667
648
  """Keyword arguments for dataset's `get_data` method."""
@@ -167,15 +167,9 @@ class AbstractHttpCrawler(
167
167
  kwargs.setdefault('strategy', 'same-hostname')
168
168
 
169
169
  links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
170
-
171
- # Get base URL from <base> tag if present
172
- extracted_base_urls = list(self._parser.find_links(parsed_content, 'base[href]'))
173
- base_url: str = (
174
- str(extracted_base_urls[0])
175
- if extracted_base_urls
176
- else context.request.loaded_url or context.request.url
170
+ links_iterator = to_absolute_url_iterator(
171
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
177
172
  )
178
- links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
179
173
 
180
174
  if robots_txt_file:
181
175
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -2,7 +2,6 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import asyncio
5
- import functools
6
5
  import logging
7
6
  import signal
8
7
  import sys
@@ -15,7 +14,7 @@ from contextlib import AsyncExitStack, suppress
15
14
  from datetime import timedelta
16
15
  from functools import partial
17
16
  from pathlib import Path
18
- from typing import TYPE_CHECKING, Any, Generic, Literal, ParamSpec, cast
17
+ from typing import TYPE_CHECKING, Any, Generic, Literal, cast
19
18
  from urllib.parse import ParseResult, urlparse
20
19
  from weakref import WeakKeyDictionary
21
20
 
@@ -97,9 +96,6 @@ if TYPE_CHECKING:
97
96
  TCrawlingContext = TypeVar('TCrawlingContext', bound=BasicCrawlingContext, default=BasicCrawlingContext)
98
97
  TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
99
98
  TRequestIterator = TypeVar('TRequestIterator', str, Request)
100
- TParams = ParamSpec('TParams')
101
- T = TypeVar('T')
102
-
103
99
  ErrorHandler = Callable[[TCrawlingContext, Exception], Awaitable[Request | None]]
104
100
  FailedRequestHandler = Callable[[TCrawlingContext, Exception], Awaitable[None]]
105
101
  SkippedRequestCallback = Callable[[str, SkippedReason], Awaitable[None]]
@@ -524,24 +520,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
524
520
  self._logger.info(f'Crawler.stop() was called with following reason: {reason}.')
525
521
  self._unexpected_stop = True
526
522
 
527
- def _wrap_handler_with_error_context(
528
- self, handler: Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]
529
- ) -> Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]:
530
- """Decorate error handlers to make their context helpers usable."""
531
-
532
- @functools.wraps(handler)
533
- async def wrapped_handler(context: TCrawlingContext | BasicCrawlingContext, exception: Exception) -> T:
534
- # Original context helpers that are from `RequestHandlerRunResult` will not be commited as the request
535
- # failed. Modified context provides context helpers with direct access to the storages.
536
- error_context = context.create_modified_copy(
537
- push_data=self._push_data,
538
- get_key_value_store=self.get_key_value_store,
539
- add_requests=functools.partial(self._add_requests, context),
540
- )
541
- return await handler(error_context, exception)
542
-
543
- return wrapped_handler
544
-
545
523
  def _stop_if_max_requests_count_exceeded(self) -> None:
546
524
  """Call `stop` when the maximum number of requests to crawl has been reached."""
547
525
  if self._max_requests_per_crawl is None:
@@ -640,7 +618,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
640
618
 
641
619
  The error handler is invoked after a request handler error occurs and before a retry attempt.
642
620
  """
643
- self._error_handler = self._wrap_handler_with_error_context(handler)
621
+ self._error_handler = handler
644
622
  return handler
645
623
 
646
624
  def failed_request_handler(
@@ -650,7 +628,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
650
628
 
651
629
  The failed request handler is invoked when a request has failed all retry attempts.
652
630
  """
653
- self._failed_request_handler = self._wrap_handler_with_error_context(handler)
631
+ self._failed_request_handler = handler
654
632
  return handler
655
633
 
656
634
  def on_skipped_request(self, callback: SkippedRequestCallback) -> SkippedRequestCallback:
@@ -1065,8 +1043,8 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
1065
1043
  return target_url.hostname == origin_url.hostname
1066
1044
 
1067
1045
  if strategy == 'same-domain':
1068
- origin_domain = self._tld_extractor.extract_str(origin_url.hostname).top_domain_under_public_suffix
1069
- target_domain = self._tld_extractor.extract_str(target_url.hostname).top_domain_under_public_suffix
1046
+ origin_domain = self._tld_extractor.extract_str(origin_url.hostname).domain
1047
+ target_domain = self._tld_extractor.extract_str(target_url.hostname).domain
1070
1048
  return origin_domain == target_domain
1071
1049
 
1072
1050
  if strategy == 'same-origin':
@@ -1278,46 +1256,52 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
1278
1256
  else:
1279
1257
  yield Request.from_url(url)
1280
1258
 
1281
- async def _add_requests(
1282
- self,
1283
- context: BasicCrawlingContext,
1284
- requests: Sequence[str | Request],
1285
- rq_id: str | None = None,
1286
- rq_name: str | None = None,
1287
- rq_alias: str | None = None,
1288
- **kwargs: Unpack[EnqueueLinksKwargs],
1289
- ) -> None:
1290
- """Add requests method aware of the crawling context."""
1291
- if rq_id or rq_name or rq_alias:
1292
- request_manager: RequestManager = await RequestQueue.open(
1293
- id=rq_id,
1294
- name=rq_name,
1295
- alias=rq_alias,
1296
- storage_client=self._service_locator.get_storage_client(),
1297
- configuration=self._service_locator.get_configuration(),
1298
- )
1299
- else:
1300
- request_manager = await self.get_request_manager()
1301
-
1302
- context_aware_requests = list[Request]()
1303
- base_url = kwargs.get('base_url') or context.request.loaded_url or context.request.url
1304
- requests_iterator = self._convert_url_to_request_iterator(requests, base_url)
1305
- filter_requests_iterator = self._enqueue_links_filter_iterator(requests_iterator, context.request.url, **kwargs)
1306
- for dst_request in filter_requests_iterator:
1307
- # Update the crawl depth of the request.
1308
- dst_request.crawl_depth = context.request.crawl_depth + 1
1309
-
1310
- if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
1311
- context_aware_requests.append(dst_request)
1312
-
1313
- return await request_manager.add_requests(context_aware_requests)
1314
-
1315
1259
  async def _commit_request_handler_result(self, context: BasicCrawlingContext) -> None:
1316
1260
  """Commit request handler result for the input `context`. Result is taken from `_context_result_map`."""
1317
1261
  result = self._context_result_map[context]
1318
1262
 
1263
+ base_request_manager = await self.get_request_manager()
1264
+
1265
+ origin = context.request.loaded_url or context.request.url
1266
+
1319
1267
  for add_requests_call in result.add_requests_calls:
1320
- await self._add_requests(context, **add_requests_call)
1268
+ rq_id = add_requests_call.get('rq_id')
1269
+ rq_name = add_requests_call.get('rq_name')
1270
+ rq_alias = add_requests_call.get('rq_alias')
1271
+ specified_params = sum(1 for param in [rq_id, rq_name, rq_alias] if param is not None)
1272
+ if specified_params > 1:
1273
+ raise ValueError('You can only provide one of `rq_id`, `rq_name` or `rq_alias` arguments.')
1274
+ if rq_id or rq_name or rq_alias:
1275
+ request_manager: RequestManager | RequestQueue = await RequestQueue.open(
1276
+ id=rq_id,
1277
+ name=rq_name,
1278
+ alias=rq_alias,
1279
+ storage_client=self._service_locator.get_storage_client(),
1280
+ configuration=self._service_locator.get_configuration(),
1281
+ )
1282
+ else:
1283
+ request_manager = base_request_manager
1284
+
1285
+ requests = list[Request]()
1286
+
1287
+ base_url = url if (url := add_requests_call.get('base_url')) else origin
1288
+
1289
+ requests_iterator = self._convert_url_to_request_iterator(add_requests_call['requests'], base_url)
1290
+
1291
+ enqueue_links_kwargs: EnqueueLinksKwargs = {k: v for k, v in add_requests_call.items() if k != 'requests'} # type: ignore[assignment]
1292
+
1293
+ filter_requests_iterator = self._enqueue_links_filter_iterator(
1294
+ requests_iterator, context.request.url, **enqueue_links_kwargs
1295
+ )
1296
+
1297
+ for dst_request in filter_requests_iterator:
1298
+ # Update the crawl depth of the request.
1299
+ dst_request.crawl_depth = context.request.crawl_depth + 1
1300
+
1301
+ if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
1302
+ requests.append(dst_request)
1303
+
1304
+ await request_manager.add_requests(requests)
1321
1305
 
1322
1306
  for push_data_call in result.push_data_calls:
1323
1307
  await self._push_data(**push_data_call)
@@ -369,12 +369,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
369
369
  links_iterator: Iterator[str] = iter(
370
370
  [url for element in elements if (url := await element.get_attribute('href')) is not None]
371
371
  )
372
-
373
- # Get base URL from <base> tag if present
374
- extracted_base_url = await context.page.evaluate('document.baseURI')
375
- base_url: str = extracted_base_url or context.request.loaded_url or context.request.url
376
-
377
- links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
372
+ links_iterator = to_absolute_url_iterator(
373
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
374
+ )
378
375
 
379
376
  if robots_txt_file:
380
377
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -134,7 +134,7 @@ class FileSystemDatasetClient(DatasetClient):
134
134
  continue
135
135
 
136
136
  try:
137
- file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
137
+ file = await asyncio.to_thread(path_to_metadata.open)
138
138
  try:
139
139
  file_content = json.load(file)
140
140
  metadata = DatasetMetadata(**file_content)
@@ -163,7 +163,7 @@ class FileSystemDatasetClient(DatasetClient):
163
163
 
164
164
  # If the dataset directory exists, reconstruct the client from the metadata file.
165
165
  if path_to_dataset.exists() and path_to_metadata.exists():
166
- file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
166
+ file = await asyncio.to_thread(open, path_to_metadata)
167
167
  try:
168
168
  file_content = json.load(file)
169
169
  finally:
@@ -133,7 +133,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
133
133
  continue
134
134
 
135
135
  try:
136
- file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
136
+ file = await asyncio.to_thread(path_to_metadata.open)
137
137
  try:
138
138
  file_content = json.load(file)
139
139
  metadata = KeyValueStoreMetadata(**file_content)
@@ -162,7 +162,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
162
162
 
163
163
  # If the key-value store directory exists, reconstruct the client from the metadata file.
164
164
  if path_to_kvs.exists() and path_to_metadata.exists():
165
- file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
165
+ file = await asyncio.to_thread(open, path_to_metadata)
166
166
  try:
167
167
  file_content = json.load(file)
168
168
  finally:
@@ -239,7 +239,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
239
239
  # Read the metadata file
240
240
  async with self._lock:
241
241
  try:
242
- file = await asyncio.to_thread(open, record_metadata_filepath, 'r', encoding='utf-8')
242
+ file = await asyncio.to_thread(open, record_metadata_filepath)
243
243
  except FileNotFoundError:
244
244
  logger.warning(f'Metadata file disappeared for key "{key}", aborting get_value')
245
245
  return None
@@ -197,7 +197,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
197
197
  continue
198
198
 
199
199
  try:
200
- file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
200
+ file = await asyncio.to_thread(path_to_metadata.open)
201
201
  try:
202
202
  file_content = json.load(file)
203
203
  metadata = RequestQueueMetadata(**file_content)
@@ -232,7 +232,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
232
232
 
233
233
  # If the RQ directory exists, reconstruct the client from the metadata file.
234
234
  if path_to_rq.exists() and path_to_metadata.exists():
235
- file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
235
+ file = await asyncio.to_thread(open, path_to_metadata)
236
236
  try:
237
237
  file_content = json.load(file)
238
238
  finally:
@@ -775,7 +775,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
775
775
  """
776
776
  # Open the request file.
777
777
  try:
778
- file = await asyncio.to_thread(open, file_path, 'r', encoding='utf-8')
778
+ file = await asyncio.to_thread(open, file_path)
779
779
  except FileNotFoundError:
780
780
  logger.warning(f'Request file "{file_path}" not found.')
781
781
  return None
@@ -71,9 +71,6 @@ async def test_static_crawler_actor_at_apify(
71
71
  project_path=tmp_path / actor_name, wheel_path=crawlee_wheel_path, package_manager=package_manager
72
72
  )
73
73
 
74
- # Print apify version for debugging purposes in rare cases of CLI failures
75
- subprocess.run(['apify', '--version'], check=True) # noqa: ASYNC221, S607
76
-
77
74
  # Build actor using sequence of cli commands as the user would
78
75
  subprocess.run( # noqa: ASYNC221, S603
79
76
  ['apify', 'login', '-t', os.environ['APIFY_TEST_USER_API_TOKEN']], # noqa: S607
@@ -310,14 +310,14 @@ async def test_allows_multiple_run_calls(system_status: SystemStatus | Mock) ->
310
310
  done_count = 0
311
311
 
312
312
  async def run() -> None:
313
+ await asyncio.sleep(0.1)
313
314
  nonlocal done_count
314
315
  done_count += 1
315
- await asyncio.sleep(0.1)
316
316
 
317
317
  pool = AutoscaledPool(
318
318
  system_status=system_status,
319
319
  run_task_function=run,
320
- is_task_ready_function=lambda: future(done_count < 4),
320
+ is_task_ready_function=lambda: future(True),
321
321
  is_finished_function=lambda: future(done_count >= 4),
322
322
  concurrency_settings=ConcurrencySettings(
323
323
  min_concurrency=4,
@@ -330,6 +330,8 @@ async def test_allows_multiple_run_calls(system_status: SystemStatus | Mock) ->
330
330
  assert done_count == 4
331
331
 
332
332
  done_count = 0
333
+ await asyncio.sleep(0.2) # Allow any lingering callbacks to complete
334
+ done_count = 0 # Reset again to ensure clean state
333
335
 
334
336
  await pool.run()
335
337
  assert done_count == 4
@@ -54,7 +54,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
54
54
 
55
55
  def extra_memory_child(ready: synchronize.Barrier, measured: synchronize.Barrier) -> None:
56
56
  memory = SharedMemory(size=extra_memory_size, create=True)
57
- assert memory.buf is not None
58
57
  memory.buf[:] = bytearray([255 for _ in range(extra_memory_size)])
59
58
  print(f'Using the memory... {memory.buf[-1]}')
60
59
  ready.wait()
@@ -65,7 +64,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
65
64
  def shared_extra_memory_child(
66
65
  ready: synchronize.Barrier, measured: synchronize.Barrier, memory: SharedMemory
67
66
  ) -> None:
68
- assert memory.buf is not None
69
67
  print(f'Using the memory... {memory.buf[-1]}')
70
68
  ready.wait()
71
69
  measured.wait()
@@ -81,7 +79,6 @@ def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
81
79
 
82
80
  if use_shared_memory:
83
81
  shared_memory = SharedMemory(size=extra_memory_size, create=True)
84
- assert shared_memory.buf is not None
85
82
  shared_memory.buf[:] = bytearray([255 for _ in range(extra_memory_size)])
86
83
  extra_args = [shared_memory]
87
84
  else:
@@ -284,46 +284,6 @@ async def test_calls_failed_request_handler() -> None:
284
284
  assert isinstance(calls[0][1], RuntimeError)
285
285
 
286
286
 
287
- @pytest.mark.parametrize('handler', ['failed_request_handler', 'error_handler'])
288
- async def test_handlers_use_context_helpers(tmp_path: Path, handler: str) -> None:
289
- """Test that context helpers used in `failed_request_handler` and in `error_handler` have effect."""
290
- # Prepare crawler
291
- storage_client = FileSystemStorageClient()
292
- crawler = BasicCrawler(
293
- max_request_retries=1, storage_client=storage_client, configuration=Configuration(storage_dir=str(tmp_path))
294
- )
295
- # Test data
296
- rq_alias = 'other'
297
- test_data = {'some': 'data'}
298
- test_key = 'key'
299
- test_value = 'value'
300
- test_request = Request.from_url('https://d.placeholder.com')
301
-
302
- # Request handler with injected error
303
- @crawler.router.default_handler
304
- async def request_handler(context: BasicCrawlingContext) -> None:
305
- raise RuntimeError('Arbitrary crash for testing purposes')
306
-
307
- # Apply one of the handlers
308
- @getattr(crawler, handler) # type: ignore[untyped-decorator]
309
- async def handler_implementation(context: BasicCrawlingContext, error: Exception) -> None:
310
- await context.push_data(test_data)
311
- await context.add_requests(requests=[test_request], rq_alias=rq_alias)
312
- kvs = await context.get_key_value_store()
313
- await kvs.set_value(test_key, test_value)
314
-
315
- await crawler.run(['https://b.placeholder.com'])
316
-
317
- # Verify that the context helpers used in handlers had effect on used storages
318
- dataset = await Dataset.open(storage_client=storage_client)
319
- kvs = await KeyValueStore.open(storage_client=storage_client)
320
- rq = await RequestQueue.open(alias=rq_alias, storage_client=storage_client)
321
-
322
- assert test_value == await kvs.get_value(test_key)
323
- assert [test_data] == (await dataset.get_data()).items
324
- assert test_request == await rq.fetch_next_request()
325
-
326
-
327
287
  async def test_handles_error_in_failed_request_handler() -> None:
328
288
  crawler = BasicCrawler(max_request_retries=3)
329
289
 
@@ -387,7 +347,6 @@ STRATEGY_TEST_URLS = (
387
347
  'https://blog.someplace.com/index.html',
388
348
  'https://redirect.someplace.com',
389
349
  'https://other.place.com/index.html',
390
- 'https://someplace.jp/',
391
350
  )
392
351
 
393
352
  INCLUDE_TEST_URLS = (
@@ -442,7 +401,7 @@ INCLUDE_TEST_URLS = (
442
401
  AddRequestsTestInput(
443
402
  start_url=STRATEGY_TEST_URLS[0],
444
403
  loaded_url=STRATEGY_TEST_URLS[0],
445
- requests=STRATEGY_TEST_URLS,
404
+ requests=STRATEGY_TEST_URLS[:4],
446
405
  kwargs=EnqueueLinksKwargs(strategy='same-domain'),
447
406
  expected_urls=STRATEGY_TEST_URLS[1:4],
448
407
  ),
@@ -452,7 +411,7 @@ INCLUDE_TEST_URLS = (
452
411
  AddRequestsTestInput(
453
412
  start_url=STRATEGY_TEST_URLS[0],
454
413
  loaded_url=STRATEGY_TEST_URLS[0],
455
- requests=STRATEGY_TEST_URLS,
414
+ requests=STRATEGY_TEST_URLS[:4],
456
415
  kwargs=EnqueueLinksKwargs(strategy='same-hostname'),
457
416
  expected_urls=[STRATEGY_TEST_URLS[1]],
458
417
  ),
@@ -462,7 +421,7 @@ INCLUDE_TEST_URLS = (
462
421
  AddRequestsTestInput(
463
422
  start_url=STRATEGY_TEST_URLS[0],
464
423
  loaded_url=STRATEGY_TEST_URLS[0],
465
- requests=STRATEGY_TEST_URLS,
424
+ requests=STRATEGY_TEST_URLS[:4],
466
425
  kwargs=EnqueueLinksKwargs(strategy='same-origin'),
467
426
  expected_urls=[],
468
427
  ),
@@ -58,9 +58,6 @@ async def test_enqueue_links(redirect_server_url: URL, server_url: URL, http_cli
58
58
  str(server_url / 'page_1'),
59
59
  str(server_url / 'page_2'),
60
60
  str(server_url / 'page_3'),
61
- str(server_url / 'page_4'),
62
- str(server_url / 'base_page'),
63
- str(server_url / 'base_subpath/page_5'),
64
61
  }
65
62
 
66
63
 
@@ -134,9 +131,6 @@ async def test_enqueue_links_with_transform_request_function(server_url: URL, ht
134
131
  str(server_url / 'sub_index'),
135
132
  str(server_url / 'page_1'),
136
133
  str(server_url / 'page_2'),
137
- str(server_url / 'base_page'),
138
- str(server_url / 'page_4'),
139
- str(server_url / 'base_subpath/page_5'),
140
134
  }
141
135
 
142
136
  # # all urls added to `enqueue_links` must have a custom header
@@ -170,8 +164,6 @@ async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> N
170
164
  assert visited == {
171
165
  str(server_url / 'start_enqueue'),
172
166
  str(server_url / 'sub_index'),
173
- str(server_url / 'base_page'),
174
- str(server_url / 'base_subpath/page_5'),
175
167
  }
176
168
 
177
169
 
@@ -229,7 +221,6 @@ async def test_on_skipped_request(server_url: URL, http_client: HttpClient) -> N
229
221
  str(server_url / 'page_1'),
230
222
  str(server_url / 'page_2'),
231
223
  str(server_url / 'page_3'),
232
- str(server_url / 'page_4'),
233
224
  }
234
225
 
235
226