crawlee 1.0.5b7__tar.gz → 1.2.1b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

Files changed (710) hide show
  1. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/build_and_deploy_docs.yaml +7 -3
  2. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/release.yaml +3 -3
  3. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/run_code_checks.yaml +5 -3
  4. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/templates_e2e_tests.yaml +2 -2
  5. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.gitignore +1 -0
  6. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/CHANGELOG.md +41 -3
  7. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/CONTRIBUTING.md +1 -1
  8. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/Makefile +1 -1
  9. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/PKG-INFO +8 -3
  10. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/apify_platform.mdx +1 -1
  11. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/cloud_run_example.py +1 -1
  12. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/google/google_example.py +2 -5
  13. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +2 -1
  14. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +2 -1
  15. crawlee-1.2.1b5/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
  16. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/export_entire_dataset_to_file.mdx +1 -1
  17. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +1 -1
  18. crawlee-1.2.1b5/docs/examples/using_sitemap_request_loader.mdx +22 -0
  19. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/server.py +2 -2
  20. crawlee-1.2.1b5/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +10 -0
  21. crawlee-1.2.1b5/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +27 -0
  22. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/storage_clients.mdx +175 -3
  23. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/trace_and_monitor_crawlers.mdx +1 -1
  24. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/pyproject.toml +32 -21
  25. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/__init__.py +2 -1
  26. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_request.py +30 -11
  27. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_types.py +20 -1
  28. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/context.py +2 -2
  29. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/file.py +7 -0
  30. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/recurring_task.py +2 -1
  31. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/time.py +41 -1
  32. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/configuration.py +3 -1
  33. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/__init__.py +2 -1
  34. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/__init__.py +2 -1
  35. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +48 -13
  36. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -1
  37. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
  38. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawler.py +115 -112
  39. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_logging_utils.py +23 -4
  40. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
  41. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
  42. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +49 -11
  43. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
  44. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
  45. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_types.py +12 -2
  46. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/errors.py +4 -0
  47. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/events/_event_manager.py +4 -4
  48. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/http_clients/_base.py +4 -0
  49. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/http_clients/_curl_impersonate.py +12 -0
  50. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/http_clients/_httpx.py +16 -6
  51. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/http_clients/_impit.py +25 -10
  52. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/otel/crawler_instrumentor.py +3 -3
  53. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_sitemap_request_loader.py +17 -4
  54. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/router.py +13 -3
  55. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/_session_pool.py +1 -1
  56. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/statistics/_models.py +32 -1
  57. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/statistics/_statistics.py +11 -29
  58. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/__init__.py +4 -0
  59. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
  60. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
  61. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
  62. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/__init__.py +6 -0
  63. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_client_mixin.py +295 -0
  64. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_dataset_client.py +325 -0
  65. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
  66. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
  67. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_storage_client.py +146 -0
  68. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/_utils.py +23 -0
  69. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
  70. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
  71. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
  72. crawlee-1.2.1b5/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
  73. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/e2e/project_template/test_static_crawlers_templates.py +3 -0
  74. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/e2e/project_template/utils.py +3 -2
  75. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_autoscaled_pool.py +2 -4
  76. crawlee-1.2.1b5/tests/unit/_utils/test_shared_timeout.py +57 -0
  77. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_system.py +11 -6
  78. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_controller.py +5 -1
  79. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/conftest.py +7 -3
  80. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +2 -2
  81. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_basic_crawler.py +171 -13
  82. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +68 -0
  83. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_http/test_http_crawler.py +56 -1
  84. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +9 -0
  85. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +122 -1
  86. crawlee-1.2.1b5/tests/unit/crawlers/_playwright/test_utils.py +157 -0
  87. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/events/test_event_manager.py +12 -0
  88. crawlee-1.2.1b5/tests/unit/events/test_local_event_manager.py +25 -0
  89. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
  90. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/server.py +44 -1
  91. crawlee-1.2.1b5/tests/unit/server_endpoints.py +142 -0
  92. crawlee-1.2.1b5/tests/unit/server_static/test.js +0 -0
  93. crawlee-1.2.1b5/tests/unit/server_static/test.png +0 -0
  94. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +1 -1
  95. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +1 -1
  96. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +1 -1
  97. crawlee-1.2.1b5/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +146 -0
  98. crawlee-1.2.1b5/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +217 -0
  99. crawlee-1.2.1b5/tests/unit/storage_clients/_redis/test_redis_rq_client.py +257 -0
  100. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +1 -1
  101. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +1 -1
  102. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +1 -1
  103. crawlee-1.2.1b5/tests/unit/storages/conftest.py +39 -0
  104. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storages/test_dataset.py +18 -4
  105. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storages/test_key_value_store.py +31 -13
  106. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storages/test_request_queue.py +19 -3
  107. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storages/test_storage_instance_manager.py +7 -20
  108. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/test_configuration.py +3 -12
  109. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/uv.lock +1026 -700
  110. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/docusaurus.config.js +2 -2
  111. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/package.json +2 -1
  112. crawlee-1.2.1b5/website/src/components/LLMButtons.jsx +510 -0
  113. crawlee-1.2.1b5/website/src/components/LLMButtons.module.css +151 -0
  114. crawlee-1.2.1b5/website/src/components/RunnableCodeBlock.jsx +42 -0
  115. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/pages/home_page_example.py +14 -9
  116. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/pages/index.js +1 -1
  117. crawlee-1.2.1b5/website/src/theme/DocItem/Content/index.js +35 -0
  118. crawlee-1.2.1b5/website/src/theme/DocItem/Content/styles.module.css +22 -0
  119. crawlee-1.2.1b5/website/static/.nojekyll +0 -0
  120. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/yarn.lock +734 -822
  121. crawlee-1.0.5b7/tests/unit/events/test_local_event_manager.py +0 -31
  122. crawlee-1.0.5b7/tests/unit/server_endpoints.py +0 -71
  123. crawlee-1.0.5b7/tests/unit/storages/conftest.py +0 -18
  124. crawlee-1.0.5b7/website/src/components/RunnableCodeBlock.jsx +0 -40
  125. crawlee-1.0.5b7/website/static/img/apify_logo.svg +0 -5
  126. crawlee-1.0.5b7/website/static/img/apify_og_SDK.png +0 -0
  127. crawlee-1.0.5b7/website/static/img/apify_sdk.svg +0 -13
  128. crawlee-1.0.5b7/website/static/img/apify_sdk_white.svg +0 -13
  129. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.editorconfig +0 -0
  130. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/CODEOWNERS +0 -0
  131. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/pull_request_template.md +0 -0
  132. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/check_pr_title.yaml +0 -0
  133. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/pre_release.yaml +0 -0
  134. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.github/workflows/update_new_issue.yaml +0 -0
  135. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.markdownlint.yaml +0 -0
  136. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/.pre-commit-config.yaml +0 -0
  137. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/LICENSE +0 -0
  138. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/README.md +0 -0
  139. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  140. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  141. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  142. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  143. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  144. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/google_cloud.mdx +0 -0
  145. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/deployment/google_cloud_run.mdx +0 -0
  146. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/add_data_to_dataset.mdx +0 -0
  147. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  148. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  149. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  150. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  151. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  152. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  153. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  154. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  155. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  156. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  157. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  158. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/configure_json_logging.py +0 -0
  159. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  160. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  161. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  162. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  163. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  164. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  165. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  166. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  167. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  168. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  169. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  170. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  171. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  172. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  173. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler.py +0 -0
  174. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  175. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  176. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler.py +0 -0
  177. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  178. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  179. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  180. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  181. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  182. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  183. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
  184. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  185. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  186. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawl_multiple_urls.mdx +0 -0
  187. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  188. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  189. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawler_keep_alive.mdx +0 -0
  190. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/crawler_stop.mdx +0 -0
  191. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  192. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/json_logging.mdx +0 -0
  193. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/parsel_crawler.mdx +0 -0
  194. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/playwright_crawler.mdx +0 -0
  195. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  196. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  197. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  198. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/respect_robots_txt_file.mdx +0 -0
  199. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/resuming_paused_crawl.mdx +0 -0
  200. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/examples/using_browser_profile.mdx +0 -0
  201. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/architecture_overview.mdx +0 -0
  202. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/avoid_blocking.mdx +0 -0
  203. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  204. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  205. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  206. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  207. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  208. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  209. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  210. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  211. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  212. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  213. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  214. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  215. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  216. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  217. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  218. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  219. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  220. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  221. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  222. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  223. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  224. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  225. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  226. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  227. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  228. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  229. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  230. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  231. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  232. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  233. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  234. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  235. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  236. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  237. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  238. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  239. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  240. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  241. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  242. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  243. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  244. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  245. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  246. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  247. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  248. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  249. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  250. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  251. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  252. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  253. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  254. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  255. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  256. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  257. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  258. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  259. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  260. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  261. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  262. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  263. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  264. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  265. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  266. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  267. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  268. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  269. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  270. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  271. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  272. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  273. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  274. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  275. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  276. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  277. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  278. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  279. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  280. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  281. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  282. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  283. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  284. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  285. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  286. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  287. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  288. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  289. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  290. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  291. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  292. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  293. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  294. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  295. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  296. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/opening.py +0 -0
  297. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  298. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  299. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  300. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  301. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/crawler_login.mdx +0 -0
  302. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/creating_web_archive.mdx +0 -0
  303. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/error_handling.mdx +0 -0
  304. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/http_clients.mdx +0 -0
  305. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/http_crawlers.mdx +0 -0
  306. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/playwright_crawler.mdx +0 -0
  307. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  308. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  309. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/proxy_management.mdx +0 -0
  310. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/request_loaders.mdx +0 -0
  311. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/request_router.mdx +0 -0
  312. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/running_in_web_server.mdx +0 -0
  313. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/scaling_crawlers.mdx +0 -0
  314. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/service_locator.mdx +0 -0
  315. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/session_management.mdx +0 -0
  316. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/guides/storages.mdx +0 -0
  317. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/01_setting_up.mdx +0 -0
  318. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/02_first_crawler.mdx +0 -0
  319. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/03_adding_more_urls.mdx +0 -0
  320. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/04_real_world_project.mdx +0 -0
  321. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/05_crawling.mdx +0 -0
  322. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/06_scraping.mdx +0 -0
  323. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/07_saving_data.mdx +0 -0
  324. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/08_refactoring.mdx +0 -0
  325. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/09_running_in_cloud.mdx +0 -0
  326. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs.py +0 -0
  327. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_bs_better.py +0 -0
  328. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/02_request_queue.py +0 -0
  329. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  330. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  331. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_globs.py +0 -0
  332. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_original_code.py +0 -0
  333. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/03_transform_request.py +0 -0
  334. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  335. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  336. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  337. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/06_scraping.py +0 -0
  338. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_final_code.py +0 -0
  339. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/07_first_code.py +0 -0
  340. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_main.py +0 -0
  341. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/08_routes.py +0 -0
  342. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  343. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/__init__.py +0 -0
  344. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/code_examples/routes.py +0 -0
  345. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/introduction/index.mdx +0 -0
  346. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/pyproject.toml +0 -0
  347. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  348. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  349. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  350. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  351. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/quick-start/index.mdx +0 -0
  352. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v0x.md +0 -0
  353. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/docs/upgrading/upgrading_to_v1.md +0 -0
  354. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/renovate.json +0 -0
  355. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/__init__.py +0 -0
  356. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/_types.py +0 -0
  357. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  358. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/py.typed +0 -0
  359. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  360. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_autoscaling/system_status.py +0 -0
  361. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_browserforge_workaround.py +0 -0
  362. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_cli.py +0 -0
  363. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_consts.py +0 -0
  364. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_log_config.py +0 -0
  365. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_service_locator.py +0 -0
  366. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/__init__.py +0 -0
  367. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/blocked.py +0 -0
  368. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/byte_size.py +0 -0
  369. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/console.py +0 -0
  370. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/crypto.py +0 -0
  371. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/docs.py +0 -0
  372. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/globs.py +0 -0
  373. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/html_to_text.py +0 -0
  374. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/models.py +0 -0
  375. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  376. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/recoverable_state.py +0 -0
  377. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/requests.py +0 -0
  378. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/robots.py +0 -0
  379. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/sitemap.py +0 -0
  380. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/system.py +0 -0
  381. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/try_import.py +0 -0
  382. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/urls.py +0 -0
  383. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/wait.py +0 -0
  384. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/_utils/web.py +0 -0
  385. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/__init__.py +0 -0
  386. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_controller.py +0 -0
  387. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_plugin.py +0 -0
  388. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_browser_pool.py +0 -0
  389. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser.py +0 -0
  390. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
  391. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  392. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/_types.py +0 -0
  393. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/browsers/py.typed +0 -0
  394. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  395. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  396. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  397. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  398. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  399. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  400. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  401. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  402. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  403. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  404. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  405. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_basic/py.typed +0 -0
  406. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  407. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  408. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  409. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  410. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  411. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/__init__.py +0 -0
  412. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  413. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  414. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  415. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  416. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  417. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  418. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  419. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  420. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  421. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/_types.py +0 -0
  422. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/crawlers/py.typed +0 -0
  423. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/events/__init__.py +0 -0
  424. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/events/_local_event_manager.py +0 -0
  425. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/events/_types.py +0 -0
  426. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/events/py.typed +0 -0
  427. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  428. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  429. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  430. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  431. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  432. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/_types.py +0 -0
  433. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/fingerprint_suite/py.typed +0 -0
  434. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/http_clients/__init__.py +0 -0
  435. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/otel/__init__.py +0 -0
  436. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/cookiecutter.json +0 -0
  437. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  438. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  439. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main.py +0 -0
  440. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  441. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  442. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  443. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  444. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  445. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  446. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  447. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  448. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  449. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  450. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  451. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  452. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  453. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  454. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  455. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  456. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  457. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  458. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/proxy_configuration.py +0 -0
  459. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/py.typed +0 -0
  460. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/__init__.py +0 -0
  461. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_list.py +0 -0
  462. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_loader.py +0 -0
  463. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager.py +0 -0
  464. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  465. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/__init__.py +0 -0
  466. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/_cookies.py +0 -0
  467. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/_models.py +0 -0
  468. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/_session.py +0 -0
  469. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/sessions/py.typed +0 -0
  470. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/statistics/__init__.py +0 -0
  471. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  472. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/statistics/_error_tracker.py +0 -0
  473. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  474. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  475. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  476. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  477. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  478. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_base/py.typed +0 -0
  479. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  480. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  481. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  482. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  483. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  484. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  485. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  486. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  487. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  488. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  489. {crawlee-1.0.5b7/src/crawlee/storage_clients/_sql → crawlee-1.2.1b5/src/crawlee/storage_clients/_redis}/py.typed +0 -0
  490. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  491. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  492. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  493. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
  494. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  495. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  496. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
  497. {crawlee-1.0.5b7/src/crawlee/storage_clients → crawlee-1.2.1b5/src/crawlee/storage_clients/_sql}/py.typed +0 -0
  498. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storage_clients/models.py +0 -0
  499. {crawlee-1.0.5b7/src/crawlee/storages → crawlee-1.2.1b5/src/crawlee/storage_clients}/py.typed +0 -0
  500. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/__init__.py +0 -0
  501. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_base.py +0 -0
  502. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_dataset.py +0 -0
  503. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_key_value_store.py +0 -0
  504. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_request_queue.py +0 -0
  505. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  506. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/src/crawlee/storages/_utils.py +0 -0
  507. /crawlee-1.0.5b7/tests/__init__.py → /crawlee-1.2.1b5/src/crawlee/storages/py.typed +0 -0
  508. {crawlee-1.0.5b7/tests/e2e → crawlee-1.2.1b5/tests}/__init__.py +0 -0
  509. {crawlee-1.0.5b7/tests/unit → crawlee-1.2.1b5/tests/e2e}/__init__.py +0 -0
  510. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/e2e/conftest.py +0 -0
  511. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/README.md +0 -0
  512. /crawlee-1.0.5b7/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.2.1b5/tests/unit/__init__.py +0 -0
  513. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
  514. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_autoscaling/test_system_status.py +0 -0
  515. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_statistics/test_error_tracker.py +0 -0
  516. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  517. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_statistics/test_persistence.py +0 -0
  518. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  519. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_byte_size.py +0 -0
  520. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_console.py +0 -0
  521. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_crypto.py +0 -0
  522. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_file.py +0 -0
  523. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_globs.py +0 -0
  524. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_html_to_text.py +0 -0
  525. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_measure_time.py +0 -0
  526. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  527. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_recurring_task.py +0 -0
  528. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_requests.py +0 -0
  529. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_robots.py +0 -0
  530. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_sitemap.py +0 -0
  531. /crawlee-1.0.5b7/tests/unit/_utils/test_timedelata_ms.py → /crawlee-1.2.1b5/tests/unit/_utils/test_timedelta_ms.py +0 -0
  532. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/_utils/test_urls.py +0 -0
  533. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/browsers/test_browser_pool.py +0 -0
  534. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser.py +0 -0
  535. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
  536. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  537. /crawlee-1.0.5b7/website/static/.nojekyll → /crawlee-1.2.1b5/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  538. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  539. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  540. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  541. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  542. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/http_clients/test_http_clients.py +0 -0
  543. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/http_clients/test_httpx.py +0 -0
  544. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  545. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  546. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  547. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/request_loaders/test_request_list.py +0 -0
  548. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/sessions/test_cookies.py +0 -0
  549. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/sessions/test_models.py +0 -0
  550. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/sessions/test_session.py +0 -0
  551. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/sessions/test_session_pool.py +0 -0
  552. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  553. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  554. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  555. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  556. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/test_cli.py +0 -0
  557. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/test_log_config.py +0 -0
  558. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/test_router.py +0 -0
  559. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/tests/unit/test_service_locator.py +0 -0
  560. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/.eslintrc.json +0 -0
  561. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/.yarnrc.yml +0 -0
  562. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/babel.config.js +0 -0
  563. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/build_api_reference.sh +0 -0
  564. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/generate_module_shortcuts.py +0 -0
  565. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  566. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  567. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/roa-loader/index.js +0 -0
  568. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/roa-loader/package.json +0 -0
  569. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/sidebars.js +0 -0
  570. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/ApiLink.jsx +0 -0
  571. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Button.jsx +0 -0
  572. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Button.module.css +0 -0
  573. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/CopyButton.jsx +0 -0
  574. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/CopyButton.module.css +0 -0
  575. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Gradients.jsx +0 -0
  576. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Highlights.jsx +0 -0
  577. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Highlights.module.css +0 -0
  578. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  579. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  580. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  581. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  582. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  583. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  584. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  585. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  586. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  587. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  588. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.jsx +0 -0
  589. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/RiverSection.module.css +0 -0
  590. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  591. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  592. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  593. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  594. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/components/RunnableCodeBlock.module.css +0 -0
  595. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/css/custom.css +0 -0
  596. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/pages/index.module.css +0 -0
  597. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  598. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/index.js +0 -0
  599. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  600. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  601. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/index.js +0 -0
  602. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  603. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.js +0 -0
  604. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  605. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Footer/index.js +0 -0
  606. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Footer/index.module.css +0 -0
  607. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/MDXComponents/A.js +0 -0
  608. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/index.js +0 -0
  609. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  610. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.js +0 -0
  611. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  612. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  613. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  614. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  615. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  616. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  617. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  618. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/font/lota.woff +0 -0
  619. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/font/lota.woff2 +0 -0
  620. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/API.png +0 -0
  621. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/arrow_right.svg +0 -0
  622. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/auto-scaling-dark.webp +0 -0
  623. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/auto-scaling-light.webp +0 -0
  624. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/check.svg +0 -0
  625. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-dark.gif +0 -0
  626. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/chrome-scrape-light.gif +0 -0
  627. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/cloud_icon.svg +0 -0
  628. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/community-dark-icon.svg +0 -0
  629. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/community-light-icon.svg +0 -0
  630. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-dark-new.svg +0 -0
  631. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-dark.svg +0 -0
  632. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-dark.svg +0 -0
  633. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-javascript-light.svg +0 -0
  634. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-light-new.svg +0 -0
  635. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-light.svg +0 -0
  636. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  637. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-logo.svg +0 -0
  638. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-python-dark.svg +0 -0
  639. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-python-light.svg +0 -0
  640. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/crawlee-python-og.png +0 -0
  641. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/defaults-dark-icon.svg +0 -0
  642. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/defaults-light-icon.svg +0 -0
  643. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/discord-brand-dark.svg +0 -0
  644. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/discord-brand.svg +0 -0
  645. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/docusaurus.svg +0 -0
  646. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/external-link.svg +0 -0
  647. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/favicon.ico +0 -0
  648. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/favorite-tools-dark.webp +0 -0
  649. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/favorite-tools-light.webp +0 -0
  650. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/auto-scaling.svg +0 -0
  651. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/automate-everything.svg +0 -0
  652. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/fingerprints.svg +0 -0
  653. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/node-requests.svg +0 -0
  654. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/runs-on-py.svg +0 -0
  655. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/storage.svg +0 -0
  656. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/features/works-everywhere.svg +0 -0
  657. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  658. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  659. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  660. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  661. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/current-price.jpg +0 -0
  662. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  663. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/select-an-element.jpg +0 -0
  664. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/selected-element.jpg +0 -0
  665. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/sku.jpg +0 -0
  666. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/getting-started/title.jpg +0 -0
  667. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/github-brand-dark.svg +0 -0
  668. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/github-brand.svg +0 -0
  669. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  670. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  671. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/hearth copy.svg +0 -0
  672. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/hearth.svg +0 -0
  673. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/javascript_logo.svg +0 -0
  674. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/js_file.svg +0 -0
  675. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/logo-big.svg +0 -0
  676. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/logo-blur.png +0 -0
  677. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/logo-blur.svg +0 -0
  678. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/logo-zoom.svg +0 -0
  679. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/menu-arrows.svg +0 -0
  680. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/oss_logo.png +0 -0
  681. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  682. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/puppeteer-live-view-detail.png +0 -0
  683. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/queue-dark-icon.svg +0 -0
  684. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/queue-light-icon.svg +0 -0
  685. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  686. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  687. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/robot.png +0 -0
  688. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/routing-dark-icon.svg +0 -0
  689. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/routing-light-icon.svg +0 -0
  690. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  691. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/scraping-utils-light-icon.svg +0 -0
  692. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/smart-proxy-dark.webp +0 -0
  693. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/smart-proxy-light.webp +0 -0
  694. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/source_code.png +0 -0
  695. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/system.svg +0 -0
  696. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/triangles_dark.svg +0 -0
  697. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/triangles_light.svg +0 -0
  698. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/workflow.svg +0 -0
  699. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/zero-setup-dark-icon.svg +0 -0
  700. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/img/zero-setup-light-icon.svg +0 -0
  701. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/js/custom.js +0 -0
  702. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/static/robots.txt +0 -0
  703. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/docs-prettier.config.js +0 -0
  704. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/utils/externalLink.js +0 -0
  705. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  706. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  707. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  708. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  709. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tools/website_gif/website_gif.mjs +0 -0
  710. {crawlee-1.0.5b7 → crawlee-1.2.1b5}/website/tsconfig.eslint.json +0 -0
@@ -10,7 +10,7 @@ on:
10
10
 
11
11
  env:
12
12
  NODE_VERSION: 20
13
- PYTHON_VERSION: 3.13
13
+ PYTHON_VERSION: 3.14
14
14
 
15
15
  jobs:
16
16
  build_and_deploy_docs:
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v5
27
+ uses: actions/checkout@v6
28
28
  with:
29
29
  token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
30
30
  ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
@@ -67,6 +67,10 @@ jobs:
67
67
  uses: actions/deploy-pages@v4
68
68
 
69
69
  - name: Invalidate CloudFront cache
70
- run: gh workflow run invalidate.yaml --repo apify/apify-docs-private
70
+ run: |
71
+ gh workflow run invalidate-cloudfront.yml \
72
+ --repo apify/apify-docs-private \
73
+ --field deployment=crawlee-web
74
+ echo "✅ CloudFront cache invalidation workflow triggered successfully"
71
75
  env:
72
76
  GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
@@ -47,13 +47,13 @@ jobs:
47
47
  name: Lint check
48
48
  uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
49
49
  with:
50
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
50
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
51
51
 
52
52
  type_check:
53
53
  name: Type check
54
54
  uses: apify/workflows/.github/workflows/python_type_check.yaml@main
55
55
  with:
56
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
56
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
57
57
 
58
58
  unit_tests:
59
59
  name: Unit tests
@@ -61,7 +61,7 @@ jobs:
61
61
  secrets:
62
62
  httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
63
63
  with:
64
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
64
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
65
65
 
66
66
  update_changelog:
67
67
  name: Update changelog
@@ -21,13 +21,13 @@ jobs:
21
21
  name: Lint check
22
22
  uses: apify/workflows/.github/workflows/python_lint_check.yaml@main
23
23
  with:
24
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
24
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
25
25
 
26
26
  type_check:
27
27
  name: Type check
28
28
  uses: apify/workflows/.github/workflows/python_type_check.yaml@main
29
29
  with:
30
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
30
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
31
31
 
32
32
  unit_tests:
33
33
  name: Unit tests
@@ -35,8 +35,10 @@ jobs:
35
35
  secrets:
36
36
  httpbin_url: ${{ secrets.APIFY_HTTPBIN_TOKEN && format('https://httpbin.apify.actor?token={0}', secrets.APIFY_HTTPBIN_TOKEN) || 'https://httpbin.org'}}
37
37
  with:
38
- python-versions: '["3.10", "3.11", "3.12", "3.13"]'
38
+ python-versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
39
+ os: '["ubuntu-latest", "windows-latest", "macos-latest"]'
39
40
 
40
41
  docs_check:
41
42
  name: Docs check
42
43
  uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
44
+ secrets: inherit
@@ -7,7 +7,7 @@ on:
7
7
 
8
8
  env:
9
9
  NODE_VERSION: 22
10
- PYTHON_VERSION: 3.13
10
+ PYTHON_VERSION: 3.14
11
11
 
12
12
  jobs:
13
13
  end_to_end_tests:
@@ -24,7 +24,7 @@ jobs:
24
24
 
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v5
27
+ uses: actions/checkout@v6
28
28
 
29
29
  - name: Setup node
30
30
  uses: actions/setup-node@v6
@@ -3,6 +3,7 @@ __pycache__
3
3
  .mypy_cache
4
4
  .pytest_cache
5
5
  .ruff_cache
6
+ .uv-cache
6
7
 
7
8
  # Virtual envs
8
9
  .venv
@@ -3,19 +3,57 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
5
  <!-- git-cliff-unreleased-start -->
6
- ## 1.0.5 - **not yet released**
6
+ ## 1.2.1 - **not yet released**
7
+
8
+ ### 🐛 Bug Fixes
9
+
10
+ - Fix short error summary ([#1605](https://github.com/apify/crawlee-python/pull/1605)) ([b751208](https://github.com/apify/crawlee-python/commit/b751208d9a56e9d923e4559baeba35e2eede0450)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1602](https://github.com/apify/crawlee-python/issues/1602)
11
+ - Freeze core `Request` fields ([#1603](https://github.com/apify/crawlee-python/pull/1603)) ([ae6d86b](https://github.com/apify/crawlee-python/commit/ae6d86b8c82900116032596201d94cd7875aaadc)) by [@Mantisus](https://github.com/Mantisus)
12
+
13
+
14
+ <!-- git-cliff-unreleased-end -->
15
+ ## [1.2.0](https://github.com/apify/crawlee-python/releases/tag/v1.2.0) (2025-12-08)
16
+
17
+ ### 🚀 Features
18
+
19
+ - Add additional kwargs to Crawler&#x27;s export_data ([#1597](https://github.com/apify/crawlee-python/pull/1597)) ([5977f37](https://github.com/apify/crawlee-python/commit/5977f376b93a7c0d4dd53f0d331a4b04fedba2c6)) by [@vdusek](https://github.com/vdusek), closes [#526](https://github.com/apify/crawlee-python/issues/526)
20
+ - Add `goto_options` for `PlaywrightCrawler` ([#1599](https://github.com/apify/crawlee-python/pull/1599)) ([0b82f3b](https://github.com/apify/crawlee-python/commit/0b82f3b6fb175223ea2aa5b348afcd5fdb767972)) by [@Mantisus](https://github.com/Mantisus), closes [#1576](https://github.com/apify/crawlee-python/issues/1576)
21
+
22
+ ### 🐛 Bug Fixes
23
+
24
+ - Only apply requestHandlerTimeout to request handler ([#1474](https://github.com/apify/crawlee-python/pull/1474)) ([0dfb6c2](https://github.com/apify/crawlee-python/commit/0dfb6c2a13b6650736245fa39b3fbff397644df7)) by [@janbuchar](https://github.com/janbuchar)
25
+ - Handle the case when `error_handler` returns `Request` ([#1595](https://github.com/apify/crawlee-python/pull/1595)) ([8a961a2](https://github.com/apify/crawlee-python/commit/8a961a2b07d0d33a7302dbb13c17f3d90999d390)) by [@Mantisus](https://github.com/Mantisus)
26
+ - Align `Request.state` transitions with `Request` lifecycle ([#1601](https://github.com/apify/crawlee-python/pull/1601)) ([383225f](https://github.com/apify/crawlee-python/commit/383225f9f055d95ffb1302b8cf96f42ec264f1fc)) by [@Mantisus](https://github.com/Mantisus)
27
+
28
+
29
+ ## [1.1.1](https://github.com/apify/crawlee-python/releases/tag/v1.1.1) (2025-12-02)
30
+
31
+ ### 🐛 Bug Fixes
32
+
33
+ - Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
34
+ - Fix `same-domain` strategy ignoring public suffix ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
35
+ - Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
36
+ - Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
37
+ - Respect `&lt;base&gt;` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
38
+
39
+
40
+ ## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
7
41
 
8
42
  ### 🚀 Features
9
43
 
10
44
  - Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
45
+ - Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
46
+ - Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
47
+ - Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
11
48
 
12
49
  ### 🐛 Bug Fixes
13
50
 
14
51
  - Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
15
52
  - Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
53
+ - Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
54
+ - Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
16
55
 
17
56
 
18
- <!-- git-cliff-unreleased-end -->
19
57
  ## [1.0.4](https://github.com/apify/crawlee-python/releases/tag/v1.0.4) (2025-10-24)
20
58
 
21
59
  ### 🐛 Bug Fixes
@@ -282,7 +320,7 @@ All notable changes to this project will be documented in this file.
282
320
 
283
321
  ### 🐛 Bug Fixes
284
322
 
285
- - Fix session managment with retire ([#947](https://github.com/apify/crawlee-python/pull/947)) ([caee03f](https://github.com/apify/crawlee-python/commit/caee03fe3a43cc1d7a8d3f9e19b42df1bdb1c0aa)) by [@Mantisus](https://github.com/Mantisus)
323
+ - Fix session management with retire ([#947](https://github.com/apify/crawlee-python/pull/947)) ([caee03f](https://github.com/apify/crawlee-python/commit/caee03fe3a43cc1d7a8d3f9e19b42df1bdb1c0aa)) by [@Mantisus](https://github.com/Mantisus)
286
324
  - Fix templates - poetry-plugin-export version and camoufox template name ([#952](https://github.com/apify/crawlee-python/pull/952)) ([7addea6](https://github.com/apify/crawlee-python/commit/7addea6605359cceba208e16ec9131724bdb3e9b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#951](https://github.com/apify/crawlee-python/issues/951)
287
325
  - Fix convert relative link to absolute in `enqueue_links` for response with redirect ([#956](https://github.com/apify/crawlee-python/pull/956)) ([694102e](https://github.com/apify/crawlee-python/commit/694102e163bb9021a4830d2545d153f6f8f3de90)) by [@Mantisus](https://github.com/Mantisus), closes [#955](https://github.com/apify/crawlee-python/issues/955)
288
326
  - Fix `CurlImpersonateHttpClient` cookies handler ([#946](https://github.com/apify/crawlee-python/pull/946)) ([ed415c4](https://github.com/apify/crawlee-python/commit/ed415c433da2a40b0ee62534f0730d0737e991b8)) by [@Mantisus](https://github.com/Mantisus)
@@ -103,7 +103,7 @@ make run-docs
103
103
  Publishing new versions to [PyPI](https://pypi.org/project/crawlee) is automated through GitHub Actions.
104
104
 
105
105
  - **Beta releases**: On each commit to the master branch, a new beta release is automatically published. The version number is determined based on the latest release and conventional commits. The beta version suffix is incremented by 1 from the last beta release on PyPI.
106
- - **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be overriden using the `custom` release type.
106
+ - **Stable releases**: A stable version release may be created by triggering the `release` GitHub Actions workflow. The version number is determined based on the latest release and conventional commits (`auto` release type), or it may be overridden using the `custom` release type.
107
107
 
108
108
  ### Publishing to PyPI manually
109
109
 
@@ -38,7 +38,7 @@ unit-tests-cov:
38
38
  uv run pytest --numprocesses=auto -vv --cov=src/crawlee --cov-append --cov-report=html tests/unit -m "not run_alone"
39
39
 
40
40
  e2e-templates-tests $(args):
41
- uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)"
41
+ uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) -vv tests/e2e/project_template "$(args)" --timeout=600
42
42
 
43
43
  format:
44
44
  uv run ruff check --fix
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.5b7
3
+ Version: 1.2.1b5
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -223,15 +223,17 @@ Classifier: Programming Language :: Python :: 3.10
223
223
  Classifier: Programming Language :: Python :: 3.11
224
224
  Classifier: Programming Language :: Python :: 3.12
225
225
  Classifier: Programming Language :: Python :: 3.13
226
+ Classifier: Programming Language :: Python :: 3.14
226
227
  Classifier: Topic :: Software Development :: Libraries
227
228
  Requires-Python: >=3.10
229
+ Requires-Dist: async-timeout>=5.0.1
228
230
  Requires-Dist: cachetools>=5.5.0
229
231
  Requires-Dist: colorama>=0.4.0
230
- Requires-Dist: impit>=0.6.1
232
+ Requires-Dist: impit>=0.8.0
231
233
  Requires-Dist: more-itertools>=10.2.0
232
234
  Requires-Dist: protego>=0.5.0
233
235
  Requires-Dist: psutil>=6.0.0
234
- Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
236
+ Requires-Dist: pydantic-settings>=2.12.0
235
237
  Requires-Dist: pydantic>=2.11.0
236
238
  Requires-Dist: pyee>=9.0.0
237
239
  Requires-Dist: tldextract>=5.1.0
@@ -263,6 +265,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
263
265
  Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
264
266
  Requires-Dist: parsel>=1.10.0; extra == 'all'
265
267
  Requires-Dist: playwright>=1.27.0; extra == 'all'
268
+ Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
266
269
  Requires-Dist: rich>=13.9.0; extra == 'all'
267
270
  Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
268
271
  Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
@@ -296,6 +299,8 @@ Provides-Extra: playwright
296
299
  Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
297
300
  Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
298
301
  Requires-Dist: playwright>=1.27.0; extra == 'playwright'
302
+ Provides-Extra: redis
303
+ Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
299
304
  Provides-Extra: sql-postgres
300
305
  Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
301
306
  Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
@@ -99,7 +99,7 @@ apify run
99
99
  For running Crawlee code as an Actor on [Apify platform](https://apify.com/actors) you need to wrap the body of the main function of your crawler with `async with Actor`.
100
100
 
101
101
  :::info NOTE
102
- Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle exitting the process.
102
+ Adding `async with Actor` is the only important thing needed to run it on Apify platform as an Actor. It is needed to initialize your Actor (e.g. to set the correct storage implementation) and to correctly handle exiting the process.
103
103
  :::
104
104
 
105
105
  Let's look at the `BeautifulSoupCrawler` example from the [Quick start](../quick-start) guide:
@@ -9,7 +9,7 @@ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
9
9
  from crawlee.storage_clients import MemoryStorageClient
10
10
 
11
11
 
12
- @get('/')
12
+ @get('/') # type: ignore[untyped-decorator]
13
13
  async def main() -> str:
14
14
  """The crawler entry point that will be called when the HTTP endpoint is accessed."""
15
15
  # highlight-start
@@ -6,10 +6,7 @@ from datetime import timedelta
6
6
  import functions_framework
7
7
  from flask import Request, Response
8
8
 
9
- from crawlee.crawlers import (
10
- BeautifulSoupCrawler,
11
- BeautifulSoupCrawlingContext,
12
- )
9
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
13
10
  from crawlee.storage_clients import MemoryStorageClient
14
11
 
15
12
 
@@ -51,7 +48,7 @@ async def main() -> str:
51
48
  # highlight-end
52
49
 
53
50
 
54
- @functions_framework.http
51
+ @functions_framework.http # type: ignore[untyped-decorator]
55
52
  def crawlee_run(request: Request) -> Response:
56
53
  # You can pass data to your crawler using `request`
57
54
  function_id = request.headers['Function-Execution-Id']
@@ -30,7 +30,8 @@ async def main() -> None:
30
30
  await crawler.run(['https://crawlee.dev'])
31
31
 
32
32
  # Export the entire dataset to a CSV file.
33
- await crawler.export_data(path='results.csv')
33
+ # Use semicolon as delimiter and always quote strings.
34
+ await crawler.export_data(path='results.csv', delimiter=';', quoting='all')
34
35
 
35
36
 
36
37
  if __name__ == '__main__':
@@ -30,7 +30,8 @@ async def main() -> None:
30
30
  await crawler.run(['https://crawlee.dev'])
31
31
 
32
32
  # Export the entire dataset to a JSON file.
33
- await crawler.export_data(path='results.json')
33
+ # Set ensure_ascii=False to allow Unicode characters in the output.
34
+ await crawler.export_data(path='results.json', ensure_ascii=False)
34
35
 
35
36
 
36
37
  if __name__ == '__main__':
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from collections.abc import Callable
3
+
4
+ from yarl import URL
5
+
6
+ from crawlee import RequestOptions, RequestTransformAction
7
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
8
+ from crawlee.http_clients import ImpitHttpClient
9
+ from crawlee.request_loaders import SitemapRequestLoader
10
+
11
+
12
+ # Create a transform_request_function that maps request options based on the host in
13
+ # the URL
14
+ def create_transform_request(
15
+ data_mapper: dict[str, dict],
16
+ ) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
17
+ def transform_request(
18
+ request_options: RequestOptions,
19
+ ) -> RequestOptions | RequestTransformAction:
20
+ # According to the Sitemap protocol, all URLs in a Sitemap must be from a single
21
+ # host.
22
+ request_host = URL(request_options['url']).host
23
+
24
+ if request_host and (mapping_data := data_mapper.get(request_host)):
25
+ # Set properties from the mapping data
26
+ if 'label' in mapping_data:
27
+ request_options['label'] = mapping_data['label']
28
+ if 'user_data' in mapping_data:
29
+ request_options['user_data'] = mapping_data['user_data']
30
+
31
+ return request_options
32
+
33
+ return 'unchanged'
34
+
35
+ return transform_request
36
+
37
+
38
+ async def main() -> None:
39
+ # Prepare data mapping for hosts
40
+ apify_host = URL('https://apify.com/sitemap.xml').host
41
+ crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
42
+
43
+ if not apify_host or not crawlee_host:
44
+ raise ValueError('Unable to extract host from URLs')
45
+
46
+ data_map = {
47
+ apify_host: {
48
+ 'label': 'apify',
49
+ 'user_data': {'source': 'apify'},
50
+ },
51
+ crawlee_host: {
52
+ 'label': 'crawlee',
53
+ 'user_data': {'source': 'crawlee'},
54
+ },
55
+ }
56
+
57
+ # Initialize the SitemapRequestLoader with the transform function
58
+ async with SitemapRequestLoader(
59
+ # Set the sitemap URLs and the HTTP client
60
+ sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
61
+ http_client=ImpitHttpClient(),
62
+ transform_request_function=create_transform_request(data_map),
63
+ ) as sitemap_loader:
64
+ # Convert the sitemap loader to a request manager
65
+ request_manager = await sitemap_loader.to_tandem()
66
+
67
+ # Create and configure the crawler
68
+ crawler = BeautifulSoupCrawler(
69
+ request_manager=request_manager,
70
+ max_requests_per_crawl=10,
71
+ )
72
+
73
+ # Create default handler for requests without a specific label
74
+ @crawler.router.default_handler
75
+ async def handler(context: BeautifulSoupCrawlingContext) -> None:
76
+ source = context.request.user_data.get('source', 'unknown')
77
+ context.log.info(
78
+ f'Processing request: {context.request.url} from source: {source}'
79
+ )
80
+
81
+ # Create handler for requests labeled 'apify'
82
+ @crawler.router.handler('apify')
83
+ async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
84
+ source = context.request.user_data.get('source', 'unknown')
85
+ context.log.info(
86
+ f'Apify handler processing: {context.request.url} from source: {source}'
87
+ )
88
+
89
+ # Create handler for requests labeled 'crawlee'
90
+ @crawler.router.handler('crawlee')
91
+ async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
92
+ source = context.request.user_data.get('source', 'unknown')
93
+ context.log.info(
94
+ f'Crawlee handler processing: {context.request.url} from source: {source}'
95
+ )
96
+
97
+ await crawler.run()
98
+
99
+
100
+ if __name__ == '__main__':
101
+ asyncio.run(main())
@@ -11,7 +11,7 @@ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
11
11
  import JsonExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_json.py';
12
12
  import CsvExample from '!!raw-loader!roa-loader!./code_examples/export_entire_dataset_to_file_csv.py';
13
13
 
14
- This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format.
14
+ This example demonstrates how to use the <ApiLink to="class/BasicCrawler#export_data">`BasicCrawler.export_data`</ApiLink> method of the crawler to export the entire default dataset to a single file. This method supports exporting data in either CSV or JSON format and also accepts additional keyword arguments so you can fine-tune the underlying `json.dump` or `csv.writer` behavior.
15
15
 
16
16
  :::note
17
17
 
@@ -1,5 +1,5 @@
1
1
  ---
2
- id: playwright-crawler-with-fingeprint-generator
2
+ id: playwright-crawler-with-fingerprint-generator
3
3
  title: Playwright crawler with fingerprint generator
4
4
  ---
5
5
 
@@ -0,0 +1,22 @@
1
+ ---
2
+ id: using-sitemap-request-loader
3
+ title: Using sitemap request loader
4
+ ---
5
+
6
+ import ApiLink from '@site/src/components/ApiLink';
7
+
8
+ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
9
+
10
+ import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
11
+
12
+ This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
13
+
14
+ The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
15
+
16
+ The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
17
+
18
+ <RunnableCodeBlock className="language-python" language="python">
19
+ {SitemapRequestLoaderExample}
20
+ </RunnableCodeBlock>
21
+
22
+ For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
@@ -14,7 +14,7 @@ from .crawler import lifespan
14
14
  app = FastAPI(lifespan=lifespan, title='Crawler app')
15
15
 
16
16
 
17
- @app.get('/', response_class=HTMLResponse)
17
+ @app.get('/', response_class=HTMLResponse) # type: ignore[untyped-decorator]
18
18
  def index() -> str:
19
19
  return """
20
20
  <!DOCTYPE html>
@@ -32,7 +32,7 @@ def index() -> str:
32
32
  """
33
33
 
34
34
 
35
- @app.get('/scrape')
35
+ @app.get('/scrape') # type: ignore[untyped-decorator]
36
36
  async def scrape_url(request: Request, url: str | None = None) -> dict:
37
37
  if not url:
38
38
  return {'url': 'missing', 'scrape result': 'no results'}
@@ -0,0 +1,10 @@
1
+ from crawlee.crawlers import ParselCrawler
2
+ from crawlee.storage_clients import RedisStorageClient
3
+
4
+ # Create a new instance of storage client using connection string.
5
+ # 'redis://localhost:6379' is the just placeholder, replace it with your actual
6
+ # connection string.
7
+ storage_client = RedisStorageClient(connection_string='redis://localhost:6379')
8
+
9
+ # And pass it to the crawler.
10
+ crawler = ParselCrawler(storage_client=storage_client)
@@ -0,0 +1,27 @@
1
+ from redis.asyncio import Redis
2
+
3
+ from crawlee.configuration import Configuration
4
+ from crawlee.crawlers import ParselCrawler
5
+ from crawlee.storage_clients import RedisStorageClient
6
+
7
+ # Create a new instance of storage client using a Redis client with custom settings.
8
+ # Replace host and port with your actual Redis server configuration.
9
+ # Other Redis client settings can be adjusted as needed.
10
+ storage_client = RedisStorageClient(
11
+ redis=Redis(
12
+ host='localhost',
13
+ port=6379,
14
+ retry_on_timeout=True,
15
+ socket_keepalive=True,
16
+ socket_connect_timeout=10,
17
+ )
18
+ )
19
+
20
+ # Create a configuration with custom settings.
21
+ configuration = Configuration(purge_on_start=False)
22
+
23
+ # And pass them to the crawler.
24
+ crawler = ParselCrawler(
25
+ storage_client=storage_client,
26
+ configuration=configuration,
27
+ )