crawlee 1.0.5b16__tar.gz → 1.2.1b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

Files changed (704) hide show
  1. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/build_and_deploy_docs.yaml +6 -2
  2. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/run_code_checks.yaml +1 -0
  3. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/templates_e2e_tests.yaml +1 -1
  4. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.gitignore +1 -0
  5. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/CHANGELOG.md +37 -2
  6. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/PKG-INFO +4 -3
  7. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/cloud_run_example.py +1 -1
  8. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/google_example.py +2 -5
  9. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +2 -1
  10. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +2 -1
  11. crawlee-1.2.1b5/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
  12. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/export_entire_dataset_to_file.mdx +1 -1
  13. crawlee-1.2.1b5/docs/examples/using_sitemap_request_loader.mdx +22 -0
  14. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/server.py +2 -2
  15. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/pyproject.toml +12 -4
  16. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/__init__.py +2 -1
  17. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_request.py +30 -11
  18. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_types.py +20 -1
  19. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/context.py +2 -2
  20. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/file.py +7 -0
  21. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/recurring_task.py +2 -1
  22. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/time.py +41 -1
  23. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/__init__.py +2 -1
  24. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/__init__.py +2 -1
  25. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +48 -13
  26. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
  27. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawler.py +115 -112
  28. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_logging_utils.py +23 -4
  29. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
  30. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
  31. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +49 -11
  32. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
  33. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
  34. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_types.py +12 -2
  35. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/errors.py +4 -0
  36. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_event_manager.py +4 -4
  37. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_base.py +4 -0
  38. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_curl_impersonate.py +12 -0
  39. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_httpx.py +16 -6
  40. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/_impit.py +25 -10
  41. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_sitemap_request_loader.py +17 -4
  42. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/router.py +13 -3
  43. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_models.py +32 -1
  44. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_statistics.py +2 -21
  45. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
  46. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
  47. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
  48. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -9
  49. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/project_template/test_static_crawlers_templates.py +3 -0
  50. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_autoscaled_pool.py +2 -4
  51. crawlee-1.2.1b5/tests/unit/_utils/test_shared_timeout.py +57 -0
  52. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_system.py +3 -0
  53. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_controller.py +5 -1
  54. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/conftest.py +1 -0
  55. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_basic_crawler.py +138 -6
  56. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +68 -0
  57. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_http/test_http_crawler.py +56 -1
  58. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +9 -0
  59. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +122 -1
  60. crawlee-1.2.1b5/tests/unit/crawlers/_playwright/test_utils.py +157 -0
  61. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/events/test_event_manager.py +12 -0
  62. crawlee-1.2.1b5/tests/unit/events/test_local_event_manager.py +25 -0
  63. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
  64. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/server.py +44 -1
  65. crawlee-1.2.1b5/tests/unit/server_endpoints.py +142 -0
  66. crawlee-1.2.1b5/tests/unit/server_static/test.png +0 -0
  67. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_dataset.py +17 -0
  68. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_key_value_store.py +30 -9
  69. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_request_queue.py +19 -0
  70. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/uv.lock +770 -692
  71. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/docusaurus.config.js +2 -2
  72. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/package.json +2 -1
  73. crawlee-1.2.1b5/website/src/components/LLMButtons.jsx +510 -0
  74. crawlee-1.2.1b5/website/src/components/LLMButtons.module.css +151 -0
  75. crawlee-1.2.1b5/website/src/theme/DocItem/Content/index.js +35 -0
  76. crawlee-1.2.1b5/website/src/theme/DocItem/Content/styles.module.css +22 -0
  77. crawlee-1.2.1b5/website/static/.nojekyll +0 -0
  78. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/yarn.lock +734 -822
  79. crawlee-1.0.5b16/tests/unit/events/test_local_event_manager.py +0 -31
  80. crawlee-1.0.5b16/tests/unit/server_endpoints.py +0 -71
  81. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.editorconfig +0 -0
  82. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/CODEOWNERS +0 -0
  83. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/pull_request_template.md +0 -0
  84. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/check_pr_title.yaml +0 -0
  85. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/pre_release.yaml +0 -0
  86. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/release.yaml +0 -0
  87. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.github/workflows/update_new_issue.yaml +0 -0
  88. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.markdownlint.yaml +0 -0
  89. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/.pre-commit-config.yaml +0 -0
  90. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/CONTRIBUTING.md +0 -0
  91. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/LICENSE +0 -0
  92. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/Makefile +0 -0
  93. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/README.md +0 -0
  94. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/apify_platform.mdx +0 -0
  95. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  96. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  97. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  98. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  99. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  100. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/google_cloud.mdx +0 -0
  101. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/deployment/google_cloud_run.mdx +0 -0
  102. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/add_data_to_dataset.mdx +0 -0
  103. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  104. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  105. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  106. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  107. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  108. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  109. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  110. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  111. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  112. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  113. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  114. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/configure_json_logging.py +0 -0
  115. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  116. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  117. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  118. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  119. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  120. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  121. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  122. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  123. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  124. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  125. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  126. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  127. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  128. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  129. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler.py +0 -0
  130. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  131. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  132. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler.py +0 -0
  133. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  134. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  135. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  136. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  137. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  138. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  139. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
  140. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  141. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  142. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_multiple_urls.mdx +0 -0
  143. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  144. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  145. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawler_keep_alive.mdx +0 -0
  146. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/crawler_stop.mdx +0 -0
  147. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  148. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/json_logging.mdx +0 -0
  149. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/parsel_crawler.mdx +0 -0
  150. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler.mdx +0 -0
  151. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  152. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  153. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  154. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  155. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/respect_robots_txt_file.mdx +0 -0
  156. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/resuming_paused_crawl.mdx +0 -0
  157. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/examples/using_browser_profile.mdx +0 -0
  158. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/architecture_overview.mdx +0 -0
  159. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/avoid_blocking.mdx +0 -0
  160. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  161. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  162. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  163. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  164. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  165. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  166. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  167. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  168. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  169. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  170. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  171. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  172. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  173. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  174. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  175. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  176. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  177. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  178. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  179. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  180. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  181. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  182. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  183. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  184. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  185. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  186. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  187. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  188. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  189. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  190. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  191. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  192. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  193. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  194. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  195. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  196. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  197. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  198. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  199. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  200. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  201. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  202. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  203. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  204. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  205. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  206. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  207. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  208. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  209. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  210. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  211. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  212. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  213. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  214. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  215. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  216. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  217. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  218. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  219. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  220. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  221. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  222. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  223. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  224. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  225. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  226. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  227. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  228. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  229. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  230. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  231. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  232. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  233. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  234. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  235. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  236. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  237. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  238. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  239. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  240. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
  241. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
  242. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  243. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  244. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  245. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  246. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  247. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  248. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  249. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  250. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  251. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  252. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  253. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  254. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  255. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/opening.py +0 -0
  256. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  257. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  258. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  259. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  260. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/crawler_login.mdx +0 -0
  261. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/creating_web_archive.mdx +0 -0
  262. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/error_handling.mdx +0 -0
  263. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/http_clients.mdx +0 -0
  264. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/http_crawlers.mdx +0 -0
  265. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler.mdx +0 -0
  266. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  267. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  268. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/proxy_management.mdx +0 -0
  269. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/request_loaders.mdx +0 -0
  270. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/request_router.mdx +0 -0
  271. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/running_in_web_server.mdx +0 -0
  272. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/scaling_crawlers.mdx +0 -0
  273. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/service_locator.mdx +0 -0
  274. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/session_management.mdx +0 -0
  275. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/storage_clients.mdx +0 -0
  276. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/storages.mdx +0 -0
  277. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  278. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/01_setting_up.mdx +0 -0
  279. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/02_first_crawler.mdx +0 -0
  280. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/03_adding_more_urls.mdx +0 -0
  281. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/04_real_world_project.mdx +0 -0
  282. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/05_crawling.mdx +0 -0
  283. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/06_scraping.mdx +0 -0
  284. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/07_saving_data.mdx +0 -0
  285. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/08_refactoring.mdx +0 -0
  286. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/09_running_in_cloud.mdx +0 -0
  287. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs.py +0 -0
  288. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs_better.py +0 -0
  289. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_request_queue.py +0 -0
  290. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  291. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  292. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_globs.py +0 -0
  293. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_original_code.py +0 -0
  294. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_transform_request.py +0 -0
  295. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  296. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  297. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  298. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/06_scraping.py +0 -0
  299. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_final_code.py +0 -0
  300. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_first_code.py +0 -0
  301. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_main.py +0 -0
  302. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_routes.py +0 -0
  303. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  304. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/__init__.py +0 -0
  305. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/code_examples/routes.py +0 -0
  306. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/introduction/index.mdx +0 -0
  307. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/pyproject.toml +0 -0
  308. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  309. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  310. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  311. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  312. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/quick-start/index.mdx +0 -0
  313. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v0x.md +0 -0
  314. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v1.md +0 -0
  315. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/renovate.json +0 -0
  316. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/__init__.py +0 -0
  317. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/_types.py +0 -0
  318. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  319. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/py.typed +0 -0
  320. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  321. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/system_status.py +0 -0
  322. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_browserforge_workaround.py +0 -0
  323. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_cli.py +0 -0
  324. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_consts.py +0 -0
  325. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_log_config.py +0 -0
  326. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_service_locator.py +0 -0
  327. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/__init__.py +0 -0
  328. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/blocked.py +0 -0
  329. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/byte_size.py +0 -0
  330. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/console.py +0 -0
  331. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/crypto.py +0 -0
  332. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/docs.py +0 -0
  333. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/globs.py +0 -0
  334. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/html_to_text.py +0 -0
  335. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/models.py +0 -0
  336. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  337. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/recoverable_state.py +0 -0
  338. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/requests.py +0 -0
  339. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/robots.py +0 -0
  340. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/sitemap.py +0 -0
  341. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/system.py +0 -0
  342. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/try_import.py +0 -0
  343. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/urls.py +0 -0
  344. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/wait.py +0 -0
  345. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/_utils/web.py +0 -0
  346. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/__init__.py +0 -0
  347. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_controller.py +0 -0
  348. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_plugin.py +0 -0
  349. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_pool.py +0 -0
  350. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser.py +0 -0
  351. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
  352. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  353. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/_types.py +0 -0
  354. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/browsers/py.typed +0 -0
  355. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/configuration.py +0 -0
  356. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  357. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  358. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  359. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  360. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
  361. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  362. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  363. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  364. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  365. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  366. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  367. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  368. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/py.typed +0 -0
  369. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  370. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  371. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  372. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  373. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  374. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/__init__.py +0 -0
  375. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  376. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  377. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  378. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  379. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  380. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  381. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  382. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  383. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  384. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/_types.py +0 -0
  385. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/crawlers/py.typed +0 -0
  386. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/__init__.py +0 -0
  387. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_local_event_manager.py +0 -0
  388. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/_types.py +0 -0
  389. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/events/py.typed +0 -0
  390. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  391. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  392. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  393. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  394. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  395. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_types.py +0 -0
  396. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/py.typed +0 -0
  397. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/http_clients/__init__.py +0 -0
  398. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/otel/__init__.py +0 -0
  399. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  400. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/cookiecutter.json +0 -0
  401. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  402. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  403. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main.py +0 -0
  404. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  405. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  406. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  407. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  408. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  409. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  410. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  411. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  412. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  413. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  414. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  415. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  416. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  417. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  418. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  419. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  420. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  421. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  422. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/proxy_configuration.py +0 -0
  423. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/py.typed +0 -0
  424. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/__init__.py +0 -0
  425. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_list.py +0 -0
  426. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_loader.py +0 -0
  427. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager.py +0 -0
  428. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  429. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/__init__.py +0 -0
  430. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_cookies.py +0 -0
  431. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_models.py +0 -0
  432. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_session.py +0 -0
  433. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/_session_pool.py +0 -0
  434. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/sessions/py.typed +0 -0
  435. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/__init__.py +0 -0
  436. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  437. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_tracker.py +0 -0
  438. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/__init__.py +0 -0
  439. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  440. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  441. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  442. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  443. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  444. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/py.typed +0 -0
  445. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  446. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  447. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  448. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  449. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  450. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  451. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  452. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  453. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  454. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  455. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
  456. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
  457. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
  458. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
  459. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
  460. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
  461. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
  462. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
  463. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
  464. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
  465. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
  466. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_redis/py.typed +0 -0
  467. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  468. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  469. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  470. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
  471. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  472. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  473. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  474. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/models.py +0 -0
  475. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storage_clients/py.typed +0 -0
  476. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/__init__.py +0 -0
  477. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_base.py +0 -0
  478. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_dataset.py +0 -0
  479. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_key_value_store.py +0 -0
  480. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_request_queue.py +0 -0
  481. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  482. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/_utils.py +0 -0
  483. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/src/crawlee/storages/py.typed +0 -0
  484. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/__init__.py +0 -0
  485. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/__init__.py +0 -0
  486. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/conftest.py +0 -0
  487. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/e2e/project_template/utils.py +0 -0
  488. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/README.md +0 -0
  489. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/__init__.py +0 -0
  490. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
  491. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_system_status.py +0 -0
  492. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_error_tracker.py +0 -0
  493. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  494. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_persistence.py +0 -0
  495. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  496. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_byte_size.py +0 -0
  497. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_console.py +0 -0
  498. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_crypto.py +0 -0
  499. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_file.py +0 -0
  500. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_globs.py +0 -0
  501. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_html_to_text.py +0 -0
  502. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_measure_time.py +0 -0
  503. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  504. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_recurring_task.py +0 -0
  505. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_requests.py +0 -0
  506. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_robots.py +0 -0
  507. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_sitemap.py +0 -0
  508. /crawlee-1.0.5b16/tests/unit/_utils/test_timedelata_ms.py → /crawlee-1.2.1b5/tests/unit/_utils/test_timedelta_ms.py +0 -0
  509. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/_utils/test_urls.py +0 -0
  510. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_browser_pool.py +0 -0
  511. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser.py +0 -0
  512. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
  513. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
  514. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  515. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  516. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  517. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  518. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  519. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  520. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/http_clients/test_http_clients.py +0 -0
  521. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/http_clients/test_httpx.py +0 -0
  522. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  523. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  524. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  525. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_request_list.py +0 -0
  526. /crawlee-1.0.5b16/website/static/.nojekyll → /crawlee-1.2.1b5/tests/unit/server_static/test.js +0 -0
  527. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_cookies.py +0 -0
  528. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_models.py +0 -0
  529. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_session.py +0 -0
  530. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/sessions/test_session_pool.py +0 -0
  531. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  532. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  533. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
  534. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  535. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  536. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  537. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
  538. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
  539. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
  540. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  541. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  542. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  543. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/conftest.py +0 -0
  544. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  545. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  546. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_cli.py +0 -0
  547. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_configuration.py +0 -0
  548. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_log_config.py +0 -0
  549. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_router.py +0 -0
  550. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/tests/unit/test_service_locator.py +0 -0
  551. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/.eslintrc.json +0 -0
  552. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/.yarnrc.yml +0 -0
  553. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/babel.config.js +0 -0
  554. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/build_api_reference.sh +0 -0
  555. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/generate_module_shortcuts.py +0 -0
  556. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  557. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  558. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/roa-loader/index.js +0 -0
  559. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/roa-loader/package.json +0 -0
  560. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/sidebars.js +0 -0
  561. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/ApiLink.jsx +0 -0
  562. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Button.jsx +0 -0
  563. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Button.module.css +0 -0
  564. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/CopyButton.jsx +0 -0
  565. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/CopyButton.module.css +0 -0
  566. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Gradients.jsx +0 -0
  567. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Highlights.jsx +0 -0
  568. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Highlights.module.css +0 -0
  569. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  570. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  571. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  572. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  573. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  574. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  575. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  576. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  577. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  578. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  579. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.jsx +0 -0
  580. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.module.css +0 -0
  581. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  582. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  583. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  584. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  585. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/RunnableCodeBlock.jsx +0 -0
  586. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/components/RunnableCodeBlock.module.css +0 -0
  587. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/css/custom.css +0 -0
  588. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/home_page_example.py +0 -0
  589. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/index.js +0 -0
  590. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/pages/index.module.css +0 -0
  591. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  592. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/index.js +0 -0
  593. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  594. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  595. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/index.js +0 -0
  596. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  597. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.js +0 -0
  598. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  599. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/index.js +0 -0
  600. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Footer/index.module.css +0 -0
  601. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/MDXComponents/A.js +0 -0
  602. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/index.js +0 -0
  603. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  604. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.js +0 -0
  605. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  606. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  607. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  608. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  609. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  610. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  611. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  612. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/font/lota.woff +0 -0
  613. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/font/lota.woff2 +0 -0
  614. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/API.png +0 -0
  615. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/arrow_right.svg +0 -0
  616. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/auto-scaling-dark.webp +0 -0
  617. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/auto-scaling-light.webp +0 -0
  618. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/check.svg +0 -0
  619. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-dark.gif +0 -0
  620. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-light.gif +0 -0
  621. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/cloud_icon.svg +0 -0
  622. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/community-dark-icon.svg +0 -0
  623. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/community-light-icon.svg +0 -0
  624. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-dark-new.svg +0 -0
  625. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-dark.svg +0 -0
  626. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-dark.svg +0 -0
  627. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-light.svg +0 -0
  628. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-light-new.svg +0 -0
  629. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-light.svg +0 -0
  630. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  631. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-logo.svg +0 -0
  632. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-dark.svg +0 -0
  633. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-light.svg +0 -0
  634. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/crawlee-python-og.png +0 -0
  635. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/defaults-dark-icon.svg +0 -0
  636. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/defaults-light-icon.svg +0 -0
  637. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/discord-brand-dark.svg +0 -0
  638. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/discord-brand.svg +0 -0
  639. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/docusaurus.svg +0 -0
  640. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/external-link.svg +0 -0
  641. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favicon.ico +0 -0
  642. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favorite-tools-dark.webp +0 -0
  643. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/favorite-tools-light.webp +0 -0
  644. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/auto-scaling.svg +0 -0
  645. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/automate-everything.svg +0 -0
  646. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/fingerprints.svg +0 -0
  647. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/node-requests.svg +0 -0
  648. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/runs-on-py.svg +0 -0
  649. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/storage.svg +0 -0
  650. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/features/works-everywhere.svg +0 -0
  651. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  652. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  653. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  654. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  655. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/current-price.jpg +0 -0
  656. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  657. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/select-an-element.jpg +0 -0
  658. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/selected-element.jpg +0 -0
  659. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/sku.jpg +0 -0
  660. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/getting-started/title.jpg +0 -0
  661. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/github-brand-dark.svg +0 -0
  662. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/github-brand.svg +0 -0
  663. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  664. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  665. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/hearth copy.svg +0 -0
  666. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/hearth.svg +0 -0
  667. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/javascript_logo.svg +0 -0
  668. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/js_file.svg +0 -0
  669. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-big.svg +0 -0
  670. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-blur.png +0 -0
  671. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-blur.svg +0 -0
  672. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/logo-zoom.svg +0 -0
  673. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/menu-arrows.svg +0 -0
  674. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/oss_logo.png +0 -0
  675. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  676. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-detail.png +0 -0
  677. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/queue-dark-icon.svg +0 -0
  678. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/queue-light-icon.svg +0 -0
  679. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  680. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  681. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/robot.png +0 -0
  682. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/routing-dark-icon.svg +0 -0
  683. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/routing-light-icon.svg +0 -0
  684. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  685. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/scraping-utils-light-icon.svg +0 -0
  686. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/smart-proxy-dark.webp +0 -0
  687. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/smart-proxy-light.webp +0 -0
  688. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/source_code.png +0 -0
  689. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/system.svg +0 -0
  690. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/triangles_dark.svg +0 -0
  691. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/triangles_light.svg +0 -0
  692. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/workflow.svg +0 -0
  693. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/zero-setup-dark-icon.svg +0 -0
  694. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/img/zero-setup-light-icon.svg +0 -0
  695. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/js/custom.js +0 -0
  696. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/static/robots.txt +0 -0
  697. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/docs-prettier.config.js +0 -0
  698. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/utils/externalLink.js +0 -0
  699. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  700. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  701. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  702. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  703. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tools/website_gif/website_gif.mjs +0 -0
  704. {crawlee-1.0.5b16 → crawlee-1.2.1b5}/website/tsconfig.eslint.json +0 -0
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v5
27
+ uses: actions/checkout@v6
28
28
  with:
29
29
  token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
30
30
  ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
@@ -67,6 +67,10 @@ jobs:
67
67
  uses: actions/deploy-pages@v4
68
68
 
69
69
  - name: Invalidate CloudFront cache
70
- run: gh workflow run invalidate.yaml --repo apify/apify-docs-private
70
+ run: |
71
+ gh workflow run invalidate-cloudfront.yml \
72
+ --repo apify/apify-docs-private \
73
+ --field deployment=crawlee-web
74
+ echo "✅ CloudFront cache invalidation workflow triggered successfully"
71
75
  env:
72
76
  GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
@@ -36,6 +36,7 @@ jobs:
36
36
  httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
37
37
  with:
38
38
  python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
39
+ os: '["ubuntu-latest", "windows-latest", "macos-latest"]'
39
40
 
40
41
  docs_check:
41
42
  name: Docs check
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v5
27
+ uses: actions/checkout@v6
28
28
 
29
29
  - name: Setup node
30
30
  uses: actions/setup-node@v6
@@ -3,6 +3,7 @@ __pycache__
3
3
  .mypy_cache
4
4
  .pytest_cache
5
5
  .ruff_cache
6
+ .uv-cache
6
7
 
7
8
  # Virtual envs
8
9
  .venv
@@ -3,22 +3,57 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
5
  <!-- git-cliff-unreleased-start -->
6
- ## 1.0.5 - **not yet released**
6
+ ## 1.2.1 - **not yet released**
7
+
8
+ ### 🐛 Bug Fixes
9
+
10
+ - Fix short error summary ([#1605](https://github.com/apify/crawlee-python/pull/1605)) ([b751208](https://github.com/apify/crawlee-python/commit/b751208d9a56e9d923e4559baeba35e2eede0450)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1602](https://github.com/apify/crawlee-python/issues/1602)
11
+ - Freeze core `Request` fields ([#1603](https://github.com/apify/crawlee-python/pull/1603)) ([ae6d86b](https://github.com/apify/crawlee-python/commit/ae6d86b8c82900116032596201d94cd7875aaadc)) by [@Mantisus](https://github.com/Mantisus)
12
+
13
+
14
+ <!-- git-cliff-unreleased-end -->
15
+ ## [1.2.0](https://github.com/apify/crawlee-python/releases/tag/v1.2.0) (2025-12-08)
16
+
17
+ ### 🚀 Features
18
+
19
+ - Add additional kwargs to Crawler&#x27;s export_data ([#1597](https://github.com/apify/crawlee-python/pull/1597)) ([5977f37](https://github.com/apify/crawlee-python/commit/5977f376b93a7c0d4dd53f0d331a4b04fedba2c6)) by [@vdusek](https://github.com/vdusek), closes [#526](https://github.com/apify/crawlee-python/issues/526)
20
+ - Add `goto_options` for `PlaywrightCrawler` ([#1599](https://github.com/apify/crawlee-python/pull/1599)) ([0b82f3b](https://github.com/apify/crawlee-python/commit/0b82f3b6fb175223ea2aa5b348afcd5fdb767972)) by [@Mantisus](https://github.com/Mantisus), closes [#1576](https://github.com/apify/crawlee-python/issues/1576)
21
+
22
+ ### 🐛 Bug Fixes
23
+
24
+ - Only apply requestHandlerTimeout to request handler ([#1474](https://github.com/apify/crawlee-python/pull/1474)) ([0dfb6c2](https://github.com/apify/crawlee-python/commit/0dfb6c2a13b6650736245fa39b3fbff397644df7)) by [@janbuchar](https://github.com/janbuchar)
25
+ - Handle the case when `error_handler` returns `Request` ([#1595](https://github.com/apify/crawlee-python/pull/1595)) ([8a961a2](https://github.com/apify/crawlee-python/commit/8a961a2b07d0d33a7302dbb13c17f3d90999d390)) by [@Mantisus](https://github.com/Mantisus)
26
+ - Align `Request.state` transitions with `Request` lifecycle ([#1601](https://github.com/apify/crawlee-python/pull/1601)) ([383225f](https://github.com/apify/crawlee-python/commit/383225f9f055d95ffb1302b8cf96f42ec264f1fc)) by [@Mantisus](https://github.com/Mantisus)
27
+
28
+
29
+ ## [1.1.1](https://github.com/apify/crawlee-python/releases/tag/v1.1.1) (2025-12-02)
30
+
31
+ ### 🐛 Bug Fixes
32
+
33
+ - Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
34
+ - Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
35
+ - Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
36
+ - Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
37
+ - Respect `&lt;base&gt;` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
38
+
39
+
40
+ ## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
7
41
 
8
42
  ### 🚀 Features
9
43
 
10
44
  - Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
11
45
  - Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
12
46
  - Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
47
+ - Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
13
48
 
14
49
  ### 🐛 Bug Fixes
15
50
 
16
51
  - Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
17
52
  - Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
18
53
  - Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
54
+ - Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
19
55
 
20
56
 
21
- <!-- git-cliff-unreleased-end -->
22
57
  ## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
23
58
 
24
59
  ### 🐛 Bug Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.5b16
3
+ Version: 1.2.1b5
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -226,6 +226,7 @@ Classifier: Programming Language :: Python :: 3.13
226
226
  Classifier: Programming Language :: Python :: 3.14
227
227
  Classifier: Topic :: Software Development :: Libraries
228
228
  Requires-Python: >=3.10
229
+ Requires-Dist: async-timeout>=5.0.1
229
230
  Requires-Dist: cachetools>=5.5.0
230
231
  Requires-Dist: colorama>=0.4.0
231
232
  Requires-Dist: impit>=0.8.0
@@ -247,7 +248,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
247
248
  Provides-Extra: all
248
249
  Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
249
250
  Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
250
- Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
251
+ Requires-Dist: asyncpg>=0.24.0; extra == 'all'
251
252
  Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
252
253
  Requires-Dist: browserforge>=1.2.3; extra == 'all'
253
254
  Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
@@ -301,7 +302,7 @@ Requires-Dist: playwright>=1.27.0; extra == 'playwright'
301
302
  Provides-Extra: redis
302
303
  Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
303
304
  Provides-Extra: sql-postgres
304
- Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
305
+ Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
305
306
  Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
306
307
  Provides-Extra: sql-sqlite
307
308
  Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
@@ -9,7 +9,7 @@ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
9
9
  from crawlee.storage_clients import MemoryStorageClient
10
10
 
11
11
 
12
- @get('/')
12
+ @get('/') # type: ignore[untyped-decorator]
13
13
  async def main() -> str:
14
14
  """The crawler entry point that will be called when the HTTP endpoint is accessed."""
15
15
  # highlight-start
@@ -6,10 +6,7 @@ from datetime import timedelta
6
6
  import functions_framework
7
7
  from flask import Request, Response
8
8
 
9
- from crawlee.crawlers import (
10
- BeautifulSoupCrawler,
11
- BeautifulSoupCrawlingContext,
12
- )
9
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
13
10
  from crawlee.storage_clients import MemoryStorageClient
14
11
 
15
12
 
@@ -51,7 +48,7 @@ async def main() -> str:
51
48
  # highlight-end
52
49
 
53
50
 
54
- @functions_framework.http
51
+ @functions_framework.http # type: ignore[untyped-decorator]
55
52
  def crawlee_run(request: Request) -> Response:
56
53
  # You can pass data to your crawler using `request`
57
54
  function_id = request.headers['Function-Execution-Id']
@@ -30,7 +30,8 @@ async def main() -> None:
30
30
  await crawler.run(['https://crawlee.dev'])
31
31
 
32
32
  # Export the entire dataset to a CSV file.
33
- await crawler.export_data(path='results.csv')
33
+ # Use semicolon as delimiter and always quote strings.
34
+ await crawler.export_data(path='results.csv', delimiter=';', quoting='all')
34
35
 
35
36
 
36
37
  if __name__ == '__main__':
@@ -30,7 +30,8 @@ async def main() -> None:
30
30
  await crawler.run(['https://crawlee.dev'])
31
31
 
32
32
  # Export the entire dataset to a JSON file.
33
- await crawler.export_data(path='results.json')
33
+ # Set ensure_ascii=False to allow Unicode characters in the output.
34
+ await crawler.export_data(path='results.json', ensure_ascii=False)
34
35
 
35
36
 
36
37
  if __name__ == '__main__':
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from collections.abc import Callable
3
+
4
+ from yarl import URL
5
+
6
+ from crawlee import RequestOptions, RequestTransformAction
7
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
8
+ from crawlee.http_clients import ImpitHttpClient
9
+ from crawlee.request_loaders import SitemapRequestLoader
10
+
11
+
12
+ # Create a transform_request_function that maps request options based on the host in
13
+ # the URL
14
+ def create_transform_request(
15
+ data_mapper: dict[str, dict],
16
+ ) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
17
+ def transform_request(
18
+ request_options: RequestOptions,
19
+ ) -> RequestOptions | RequestTransformAction:
20
+ # According to the Sitemap protocol, all URLs in a Sitemap must be from a single
21
+ # host.
22
+ request_host = URL(request_options['url']).host
23
+
24
+ if request_host and (mapping_data := data_mapper.get(request_host)):
25
+ # Set properties from the mapping data
26
+ if 'label' in mapping_data:
27
+ request_options['label'] = mapping_data['label']
28
+ if 'user_data' in mapping_data:
29
+ request_options['user_data'] = mapping_data['user_data']
30
+
31
+ return request_options
32
+
33
+ return 'unchanged'
34
+
35
+ return transform_request
36
+
37
+
38
+ async def main() -> None:
39
+ # Prepare data mapping for hosts
40
+ apify_host = URL('https://apify.com/sitemap.xml').host
41
+ crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
42
+
43
+ if not apify_host or not crawlee_host:
44
+ raise ValueError('Unable to extract host from URLs')
45
+
46
+ data_map = {
47
+ apify_host: {
48
+ 'label': 'apify',
49
+ 'user_data': {'source': 'apify'},
50
+ },
51
+ crawlee_host: {
52
+ 'label': 'crawlee',
53
+ 'user_data': {'source': 'crawlee'},
54
+ },
55
+ }
56
+
57
+ # Initialize the SitemapRequestLoader with the transform function
58
+ async with SitemapRequestLoader(
59
+ # Set the sitemap URLs and the HTTP client
60
+ sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
61
+ http_client=ImpitHttpClient(),
62
+ transform_request_function=create_transform_request(data_map),
63
+ ) as sitemap_loader:
64
+ # Convert the sitemap loader to a request manager
65
+ request_manager = await sitemap_loader.to_tandem()
66
+
67
+ # Create and configure the crawler
68
+ crawler = BeautifulSoupCrawler(
69
+ request_manager=request_manager,
70
+ max_requests_per_crawl=10,
71
+ )
72
+
73
+ # Create default handler for requests without a specific label
74
+ @crawler.router.default_handler
75
+ async def handler(context: BeautifulSoupCrawlingContext) -> None:
76
+ source = context.request.user_data.get('source', 'unknown')
77
+ context.log.info(
78
+ f'Processing request: {context.request.url} from source: {source}'
79
+ )
80
+
81
+ # Create handler for requests labeled 'apify'
82
+ @crawler.router.handler('apify')
83
+ async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
84
+ source = context.request.user_data.get('source', 'unknown')
85
+ context.log.info(
86
+ f'Apify handler processing: {context.request.url} from source: {source}'
87
+ )
88
+
89
+ # Create handler for requests labeled 'crawlee'
90
+ @crawler.router.handler('crawlee')
91
+ async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
92
+ source = context.request.user_data.get('source', 'unknown')
93
+ context.log.info(
94
+ f'Crawlee handler processing: {context.request.url} from source: {source}'
95
+ )
96
+
97
+ await crawler.run()
98
+
99
+
100
+ if __name__ == '__main__':
101
+ asyncio.run(main())
@@ -11,7 +11,7 @@ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
11
11
  import JsonExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_json.py';
12
12
  import CsvExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_csv.py';
13
13
 
14
- This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format.
14
+ This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format and also accepts additional keyword arguments so you can fine-tune the underlying `json.dump` or `csv.writer` behavior.
15
15
 
16
16
  :::note
17
17
 
@@ -0,0 +1,22 @@
1
+ ---
2
+ id: using-sitemap-request-loader
3
+ title: Using sitemap request loader
4
+ ---
5
+
6
+ import ApiLink from '@site/src/components/ApiLink';
7
+
8
+ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
9
+
10
+ import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
11
+
12
+ This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
13
+
14
+ The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
15
+
16
+ The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
17
+
18
+ <RunnableCodeBlock className="language-python" language="python">
19
+ {SitemapRequestLoaderExample}
20
+ </RunnableCodeBlock>
21
+
22
+ For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
@@ -14,7 +14,7 @@ from .crawler import lifespan
14
14
  app = FastAPI(lifespan=lifespan, title='Crawler app')
15
15
 
16
16
 
17
- @app.get('/', response_class=HTMLResponse)
17
+ @app.get('/', response_class=HTMLResponse) # type: ignore[untyped-decorator]
18
18
  def index() -> str:
19
19
  return """
20
20
  <!DOCTYPE html>
@@ -32,7 +32,7 @@ def index() -> str:
32
32
  """
33
33
 
34
34
 
35
- @app.get('/scrape')
35
+ @app.get('/scrape') # type: ignore[untyped-decorator]
36
36
  async def scrape_url(request: Request, url: str | None = None) -> dict:
37
37
  if not url:
38
38
  return {'url': 'missing', 'scrape result': 'no results'}
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.0.5b16"
7
+ version = "1.2.1b5"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -34,6 +34,7 @@ keywords = [
34
34
  "scraping",
35
35
  ]
36
36
  dependencies = [
37
+ "async-timeout>=5.0.1",
37
38
  "cachetools>=5.5.0",
38
39
  "colorama>=0.4.0",
39
40
  "impit>=0.8.0",
@@ -74,7 +75,7 @@ otel = [
74
75
  ]
75
76
  sql_postgres = [
76
77
  "sqlalchemy[asyncio]>=2.0.0,<3.0.0",
77
- "asyncpg>=0.24.0; python_version < '3.14'" # TODO: https://github.com/apify/crawlee-python/issues/1555
78
+ "asyncpg>=0.24.0"
78
79
  ]
79
80
  sql_sqlite = [
80
81
  "sqlalchemy[asyncio]>=2.0.0,<3.0.0",
@@ -101,7 +102,7 @@ dev = [
101
102
  "build<2.0.0", # For e2e tests.
102
103
  "dycw-pytest-only<3.0.0",
103
104
  "fakeredis[probabilistic,json,lua]<3.0.0",
104
- "mypy~=1.18.0",
105
+ "mypy~=1.19.0",
105
106
  "pre-commit<5.0.0",
106
107
  "proxy-py<3.0.0",
107
108
  "pydoc-markdown<5.0.0",
@@ -117,7 +118,7 @@ dev = [
117
118
  "types-colorama<1.0.0",
118
119
  "types-psutil<8.0.0",
119
120
  "types-python-dateutil<3.0.0",
120
- "uvicorn[standard]~=0.35.0", # https://github.com/apify/crawlee-python/issues/1441
121
+ "uvicorn[standard]<1.0.0",
121
122
  ]
122
123
 
123
124
  [tool.hatch.build.targets.wheel]
@@ -221,6 +222,13 @@ timeout = 300
221
222
  markers = [
222
223
  "run_alone: marks tests that must run in isolation",
223
224
  ]
225
+ # Ignore DeprecationWarnings coming from Uvicorn's internal imports. Uvicorn relies on deprecated
226
+ # modules from `websockets`, which triggers warnings during tests. These are safe to ignore until
227
+ # Uvicorn updates its internals.
228
+ filterwarnings = [
229
+ "ignore:websockets.legacy is deprecated:DeprecationWarning",
230
+ "ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning",
231
+ ]
224
232
 
225
233
  [tool.mypy]
226
234
  python_version = "3.10"
@@ -1,6 +1,6 @@
1
1
  from importlib import metadata
2
2
 
3
- from ._request import Request, RequestOptions
3
+ from ._request import Request, RequestOptions, RequestState
4
4
  from ._service_locator import service_locator
5
5
  from ._types import ConcurrencySettings, EnqueueStrategy, HttpHeaders, RequestTransformAction, SkippedReason
6
6
  from ._utils.globs import Glob
@@ -14,6 +14,7 @@ __all__ = [
14
14
  'HttpHeaders',
15
15
  'Request',
16
16
  'RequestOptions',
17
+ 'RequestState',
17
18
  'RequestTransformAction',
18
19
  'SkippedReason',
19
20
  'service_locator',
@@ -34,14 +34,14 @@ class RequestState(IntEnum):
34
34
  class CrawleeRequestData(BaseModel):
35
35
  """Crawlee-specific configuration stored in the `user_data`."""
36
36
 
37
- max_retries: Annotated[int | None, Field(alias='maxRetries')] = None
37
+ max_retries: Annotated[int | None, Field(alias='maxRetries', frozen=True)] = None
38
38
  """Maximum number of retries for this request. Allows to override the global `max_request_retries` option of
39
39
  `BasicCrawler`."""
40
40
 
41
41
  enqueue_strategy: Annotated[EnqueueStrategy | None, Field(alias='enqueueStrategy')] = None
42
42
  """The strategy that was used for enqueuing the request."""
43
43
 
44
- state: RequestState | None = None
44
+ state: RequestState = RequestState.UNPROCESSED
45
45
  """Describes the request's current lifecycle state."""
46
46
 
47
47
  session_rotation_count: Annotated[int | None, Field(alias='sessionRotationCount')] = None
@@ -137,6 +137,8 @@ class RequestOptions(TypedDict):
137
137
  always_enqueue: NotRequired[bool]
138
138
  user_data: NotRequired[dict[str, JsonSerializable]]
139
139
  no_retry: NotRequired[bool]
140
+ enqueue_strategy: NotRequired[EnqueueStrategy]
141
+ max_retries: NotRequired[int | None]
140
142
 
141
143
 
142
144
  @docs_group('Storage data')
@@ -166,7 +168,7 @@ class Request(BaseModel):
166
168
 
167
169
  model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
168
170
 
169
- unique_key: Annotated[str, Field(alias='uniqueKey')]
171
+ unique_key: Annotated[str, Field(alias='uniqueKey', frozen=True)]
170
172
  """A unique key identifying the request. Two requests with the same `unique_key` are considered as pointing
171
173
  to the same URL.
172
174
 
@@ -178,17 +180,18 @@ class Request(BaseModel):
178
180
  and specify which URLs shall be considered equal.
179
181
  """
180
182
 
181
- url: Annotated[str, BeforeValidator(validate_http_url), Field()]
183
+ url: Annotated[str, BeforeValidator(validate_http_url), Field(frozen=True)]
182
184
  """The URL of the web page to crawl. Must be a valid HTTP or HTTPS URL, and may include query parameters
183
185
  and fragments."""
184
186
 
185
- method: HttpMethod = 'GET'
187
+ method: Annotated[HttpMethod, Field(frozen=True)] = 'GET'
186
188
  """HTTP request method."""
187
189
 
188
190
  payload: Annotated[
189
191
  HttpPayload | None,
190
192
  BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
191
193
  PlainSerializer(lambda v: v.decode() if isinstance(v, bytes) else v),
194
+ Field(frozen=True),
192
195
  ] = None
193
196
  """HTTP request payload."""
194
197
 
@@ -250,6 +253,8 @@ class Request(BaseModel):
250
253
  keep_url_fragment: bool = False,
251
254
  use_extended_unique_key: bool = False,
252
255
  always_enqueue: bool = False,
256
+ enqueue_strategy: EnqueueStrategy | None = None,
257
+ max_retries: int | None = None,
253
258
  **kwargs: Any,
254
259
  ) -> Self:
255
260
  """Create a new `Request` instance from a URL.
@@ -277,6 +282,9 @@ class Request(BaseModel):
277
282
  `unique_key` computation. This is only relevant when `unique_key` is not provided.
278
283
  always_enqueue: If set to `True`, the request will be enqueued even if it is already present in the queue.
279
284
  Using this is not allowed when a custom `unique_key` is also provided and will result in a `ValueError`.
285
+ enqueue_strategy: The strategy that will be used for enqueuing the request.
286
+ max_retries: Maximum number of retries for this request. Allows to override the global `max_request_retries`
287
+ option of `BasicCrawler`.
280
288
  **kwargs: Additional request properties.
281
289
  """
282
290
  if unique_key is not None and always_enqueue:
@@ -299,7 +307,21 @@ class Request(BaseModel):
299
307
  )
300
308
 
301
309
  if always_enqueue:
302
- unique_key = f'{unique_key}_{crypto_random_object_id()}'
310
+ unique_key = f'{crypto_random_object_id()}|{unique_key}'
311
+
312
+ user_data_dict = kwargs.pop('user_data', {}) or {}
313
+ crawlee_data_dict = user_data_dict.get('__crawlee', {})
314
+
315
+ if max_retries is not None:
316
+ crawlee_data_dict['maxRetries'] = max_retries
317
+
318
+ if enqueue_strategy is not None:
319
+ crawlee_data_dict['enqueueStrategy'] = enqueue_strategy
320
+
321
+ crawlee_data = CrawleeRequestData(**crawlee_data_dict)
322
+
323
+ if crawlee_data:
324
+ user_data_dict['__crawlee'] = crawlee_data
303
325
 
304
326
  request = cls(
305
327
  url=url,
@@ -307,6 +329,7 @@ class Request(BaseModel):
307
329
  method=method,
308
330
  headers=headers,
309
331
  payload=payload,
332
+ user_data=user_data_dict,
310
333
  **kwargs,
311
334
  )
312
335
 
@@ -352,7 +375,7 @@ class Request(BaseModel):
352
375
  self.crawlee_data.crawl_depth = new_value
353
376
 
354
377
  @property
355
- def state(self) -> RequestState | None:
378
+ def state(self) -> RequestState:
356
379
  """Crawlee-specific request handling state."""
357
380
  return self.crawlee_data.state
358
381
 
@@ -365,10 +388,6 @@ class Request(BaseModel):
365
388
  """Crawlee-specific limit on the number of retries of the request."""
366
389
  return self.crawlee_data.max_retries
367
390
 
368
- @max_retries.setter
369
- def max_retries(self, new_max_retries: int) -> None:
370
- self.crawlee_data.max_retries = new_max_retries
371
-
372
391
  @property
373
392
  def session_rotation_count(self) -> int | None:
374
393
  """Crawlee-specific number of finished session rotations for the request."""
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
15
15
  import re
16
16
  from collections.abc import Callable, Coroutine, Sequence
17
17
 
18
- from typing_extensions import NotRequired, Required, Unpack
18
+ from typing_extensions import NotRequired, Required, Self, Unpack
19
19
 
20
20
  from crawlee import Glob, Request
21
21
  from crawlee._request import RequestOptions
@@ -643,6 +643,25 @@ class BasicCrawlingContext:
643
643
  """Return hash of the context. Each context is considered unique."""
644
644
  return id(self)
645
645
 
646
+ def create_modified_copy(
647
+ self,
648
+ push_data: PushDataFunction | None = None,
649
+ add_requests: AddRequestsFunction | None = None,
650
+ get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
651
+ ) -> Self:
652
+ """Create a modified copy of the crawling context with specified changes."""
653
+ original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
654
+ modified_fields = {
655
+ key: value
656
+ for key, value in {
657
+ 'push_data': push_data,
658
+ 'add_requests': add_requests,
659
+ 'get_key_value_store': get_key_value_store,
660
+ }.items()
661
+ if value
662
+ }
663
+ return self.__class__(**{**original_fields, **modified_fields})
664
+
646
665
 
647
666
  class GetDataKwargs(TypedDict):
648
667
  """Keyword arguments for dataset's `get_data` method."""
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- import asyncio
3
+ import inspect
4
4
  from collections.abc import Callable
5
5
  from functools import wraps
6
6
  from typing import Any, TypeVar
@@ -44,4 +44,4 @@ def ensure_context(method: T) -> T:
44
44
 
45
45
  return await method(self, *args, **kwargs)
46
46
 
47
- return async_wrapper if asyncio.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value]
47
+ return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value]
@@ -163,6 +163,13 @@ async def export_csv_to_stream(
163
163
  dst: TextIO,
164
164
  **kwargs: Unpack[ExportDataCsvKwargs],
165
165
  ) -> None:
166
+ # Set lineterminator to '\n' if not explicitly provided. This prevents double line endings on Windows.
167
+ # The csv.writer default is '\r\n', which when written to a file in text mode on Windows gets converted
168
+ # to '\r\r\n' due to newline translation. By using '\n', we let the platform handle the line ending
169
+ # conversion: '\n' stays as '\n' on Unix, and becomes '\r\n' on Windows.
170
+ if 'lineterminator' not in kwargs:
171
+ kwargs['lineterminator'] = '\n'
172
+
166
173
  writer = csv.writer(dst, **kwargs) # type: ignore[arg-type]
167
174
  write_header = True
168
175
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import inspect
4
5
  from logging import getLogger
5
6
  from typing import TYPE_CHECKING
6
7
 
@@ -49,7 +50,7 @@ class RecurringTask:
49
50
  """
50
51
  sleep_time_secs = self.delay.total_seconds()
51
52
  while True:
52
- await self.func() if asyncio.iscoroutinefunction(self.func) else self.func()
53
+ await self.func() if inspect.iscoroutinefunction(self.func) else self.func()
53
54
  await asyncio.sleep(sleep_time_secs)
54
55
 
55
56
  def start(self) -> None: