crawlee 1.0.0rc1__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (684) hide show
  1. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/build_and_deploy_docs.yaml +3 -3
  2. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/check_pr_title.yaml +1 -1
  3. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/pre_release.yaml +1 -1
  4. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/templates_e2e_tests.yaml +2 -2
  5. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/update_new_issue.yaml +1 -1
  6. {crawlee-1.0.0rc1 → crawlee-1.0.1}/CHANGELOG.md +18 -1
  7. {crawlee-1.0.0rc1 → crawlee-1.0.1}/PKG-INFO +12 -5
  8. {crawlee-1.0.0rc1 → crawlee-1.0.1}/README.md +0 -2
  9. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +1 -1
  10. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_basic_example.py +1 -0
  11. crawlee-1.0.1/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +46 -0
  12. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +13 -0
  13. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +11 -0
  14. crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +30 -0
  15. crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +45 -0
  16. crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +53 -0
  17. crawlee-1.0.1/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +54 -0
  18. crawlee-1.0.1/docs/guides/code_examples/service_locator/service_storage_configuration.py +30 -0
  19. crawlee-1.0.1/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +12 -0
  20. crawlee-1.0.1/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +33 -0
  21. crawlee-1.0.1/docs/guides/code_examples/storages/opening.py +19 -0
  22. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/request_loaders.mdx +27 -1
  23. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/service_locator.mdx +1 -1
  24. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/storage_clients.mdx +188 -0
  25. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/storages.mdx +22 -9
  26. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/03_adding_more_urls.mdx +1 -1
  27. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/upgrading/upgrading_to_v1.md +91 -0
  28. {crawlee-1.0.0rc1 → crawlee-1.0.1}/pyproject.toml +19 -10
  29. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/snapshotter.py +1 -1
  30. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_request.py +2 -1
  31. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_service_locator.py +44 -24
  32. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_types.py +76 -17
  33. crawlee-1.0.1/src/crawlee/_utils/raise_if_too_many_kwargs.py +12 -0
  34. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/sitemap.py +3 -1
  35. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/system.py +3 -3
  36. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser_controller.py +20 -14
  37. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/configuration.py +1 -1
  38. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
  39. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +1 -1
  40. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +1 -1
  41. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +6 -2
  42. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +1 -1
  43. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +2 -1
  44. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +1 -1
  45. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_basic_crawler.py +107 -27
  46. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_logging_utils.py +5 -1
  47. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +6 -1
  48. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_types.py +6 -6
  49. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +3 -0
  50. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_types.py +2 -2
  51. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
  52. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
  53. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_list.py +1 -1
  54. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_loader.py +5 -1
  55. crawlee-1.0.1/src/crawlee/request_loaders/_sitemap_request_loader.py +357 -0
  56. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_models.py +2 -2
  57. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_models.py +1 -1
  58. crawlee-1.0.1/src/crawlee/storage_clients/__init__.py +21 -0
  59. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_storage_client.py +13 -0
  60. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_dataset_client.py +27 -25
  61. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +27 -23
  62. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +84 -98
  63. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/_storage_client.py +16 -3
  64. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_dataset_client.py +14 -2
  65. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +14 -2
  66. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_request_queue_client.py +43 -12
  67. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/_storage_client.py +6 -3
  68. crawlee-1.0.1/src/crawlee/storage_clients/_sql/__init__.py +6 -0
  69. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_client_mixin.py +385 -0
  70. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_dataset_client.py +310 -0
  71. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_db_models.py +269 -0
  72. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_key_value_store_client.py +299 -0
  73. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_request_queue_client.py +706 -0
  74. crawlee-1.0.1/src/crawlee/storage_clients/_sql/_storage_client.py +282 -0
  75. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/models.py +10 -10
  76. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_base.py +3 -1
  77. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_dataset.py +9 -2
  78. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_key_value_store.py +9 -2
  79. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/_request_queue.py +7 -2
  80. crawlee-1.0.1/src/crawlee/storages/_storage_instance_manager.py +187 -0
  81. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_autoscaled_pool.py +4 -2
  82. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_snapshotter.py +6 -6
  83. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_byte_size.py +2 -2
  84. crawlee-1.0.1/tests/unit/_utils/test_raise_if_too_many_kwargs.py +38 -0
  85. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_browser_pool.py +5 -5
  86. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser_controller.py +30 -4
  87. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser_plugin.py +2 -2
  88. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/conftest.py +13 -15
  89. crawlee-1.0.1/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  90. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +1 -1
  91. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_basic/test_basic_crawler.py +225 -14
  92. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +110 -1
  93. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_http/test_http_crawler.py +7 -3
  94. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +108 -1
  95. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +135 -1
  96. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/events/test_event_manager.py +3 -3
  97. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/fingerprint_suite/test_header_generator.py +2 -2
  98. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/request_loaders/test_sitemap_request_loader.py +69 -0
  99. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_session_pool.py +5 -5
  100. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +5 -17
  101. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +3 -13
  102. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +4 -10
  103. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -5
  104. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -4
  105. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -5
  106. crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +236 -0
  107. crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +287 -0
  108. crawlee-1.0.1/tests/unit/storage_clients/_sql/test_sql_rq_client.py +239 -0
  109. crawlee-1.0.1/tests/unit/storages/conftest.py +18 -0
  110. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_dataset.py +511 -40
  111. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_key_value_store.py +506 -41
  112. crawlee-1.0.1/tests/unit/storages/test_request_queue.py +1261 -0
  113. crawlee-1.0.1/tests/unit/storages/test_storage_instance_manager.py +143 -0
  114. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_service_locator.py +12 -16
  115. crawlee-1.0.1/uv.lock +3966 -0
  116. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/docusaurus.config.js +8 -4
  117. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/package.json +15 -14
  118. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/css/custom.css +4 -1
  119. crawlee-1.0.1/website/static/.nojekyll +0 -0
  120. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/yarn.lock +1853 -1447
  121. crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_example.py +0 -28
  122. crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -40
  123. crawlee-1.0.0rc1/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -43
  124. crawlee-1.0.0rc1/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -22
  125. crawlee-1.0.0rc1/src/crawlee/request_loaders/_sitemap_request_loader.py +0 -177
  126. crawlee-1.0.0rc1/src/crawlee/storage_clients/__init__.py +0 -9
  127. crawlee-1.0.0rc1/src/crawlee/storages/_storage_instance_manager.py +0 -133
  128. crawlee-1.0.0rc1/tests/unit/storages/test_request_queue.py +0 -644
  129. crawlee-1.0.0rc1/uv.lock +0 -3623
  130. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.editorconfig +0 -0
  131. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/CODEOWNERS +0 -0
  132. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/pull_request_template.md +0 -0
  133. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/release.yaml +0 -0
  134. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.github/workflows/run_code_checks.yaml +0 -0
  135. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.gitignore +0 -0
  136. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.markdownlint.yaml +0 -0
  137. {crawlee-1.0.0rc1 → crawlee-1.0.1}/.pre-commit-config.yaml +0 -0
  138. {crawlee-1.0.0rc1 → crawlee-1.0.1}/CONTRIBUTING.md +0 -0
  139. {crawlee-1.0.0rc1 → crawlee-1.0.1}/LICENSE +0 -0
  140. {crawlee-1.0.0rc1 → crawlee-1.0.1}/Makefile +0 -0
  141. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/apify_platform.mdx +0 -0
  142. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  143. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  144. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  145. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  146. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  147. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
  148. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/code_examples/google/google_example.py +0 -0
  149. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/google_cloud.mdx +0 -0
  150. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/deployment/google_cloud_run.mdx +0 -0
  151. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/add_data_to_dataset.mdx +0 -0
  152. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  153. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  154. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  155. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  156. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  157. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  158. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  159. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  160. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  161. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  162. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  163. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/configure_json_logging.py +0 -0
  164. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  165. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  166. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  167. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  168. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  169. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  170. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  171. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  172. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  173. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  174. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  175. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  176. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  177. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  178. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  179. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  180. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/parsel_crawler.py +0 -0
  181. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  182. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  183. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler.py +0 -0
  184. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  185. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  186. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  187. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  188. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  189. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  190. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  191. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_multiple_urls.mdx +0 -0
  192. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  193. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  194. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawler_keep_alive.mdx +0 -0
  195. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/crawler_stop.mdx +0 -0
  196. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  197. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  198. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/json_logging.mdx +0 -0
  199. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/parsel_crawler.mdx +0 -0
  200. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler.mdx +0 -0
  201. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  202. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  203. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  204. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  205. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/respect_robots_txt_file.mdx +0 -0
  206. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/examples/resuming_paused_crawl.mdx +0 -0
  207. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/architecture_overview.mdx +0 -0
  208. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/avoid_blocking.mdx +0 -0
  209. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  210. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  211. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  212. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  213. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  214. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  215. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  216. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  217. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  218. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  219. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  220. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  221. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  222. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  223. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  224. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  225. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  226. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  227. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  228. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  229. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  230. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  231. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  232. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  233. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  234. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  235. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  236. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  237. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  238. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  239. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  240. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  241. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  242. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  243. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  244. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  245. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  246. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  247. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  248. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  249. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  250. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  251. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  252. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  253. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  254. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  255. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  256. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  257. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
  258. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  259. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  260. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  261. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  262. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  263. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  264. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  265. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  266. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  267. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  268. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  269. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  270. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  271. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  272. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  273. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  274. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  275. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  276. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  277. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  278. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  279. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  280. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  281. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  282. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  283. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  284. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  285. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  286. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  287. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  288. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  289. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  290. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  291. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  292. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  293. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  294. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  295. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/crawler_login.mdx +0 -0
  296. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/creating_web_archive.mdx +0 -0
  297. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/error_handling.mdx +0 -0
  298. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/http_clients.mdx +0 -0
  299. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/http_crawlers.mdx +0 -0
  300. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler.mdx +0 -0
  301. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  302. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  303. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/proxy_management.mdx +0 -0
  304. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/request_router.mdx +0 -0
  305. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/running_in_web_server.mdx +0 -0
  306. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/scaling_crawlers.mdx +0 -0
  307. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/session_management.mdx +0 -0
  308. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  309. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/01_setting_up.mdx +0 -0
  310. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/02_first_crawler.mdx +0 -0
  311. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/04_real_world_project.mdx +0 -0
  312. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/05_crawling.mdx +0 -0
  313. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/06_scraping.mdx +0 -0
  314. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/07_saving_data.mdx +0 -0
  315. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/08_refactoring.mdx +0 -0
  316. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/09_running_in_cloud.mdx +0 -0
  317. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_bs.py +0 -0
  318. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_bs_better.py +0 -0
  319. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/02_request_queue.py +0 -0
  320. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  321. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  322. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_globs.py +0 -0
  323. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_original_code.py +0 -0
  324. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/03_transform_request.py +0 -0
  325. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  326. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  327. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  328. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/06_scraping.py +0 -0
  329. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/07_final_code.py +0 -0
  330. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/07_first_code.py +0 -0
  331. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/08_main.py +0 -0
  332. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/08_routes.py +0 -0
  333. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  334. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/__init__.py +0 -0
  335. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/code_examples/routes.py +0 -0
  336. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/introduction/index.mdx +0 -0
  337. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/pyproject.toml +0 -0
  338. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  339. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  340. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  341. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  342. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/quick-start/index.mdx +0 -0
  343. {crawlee-1.0.0rc1 → crawlee-1.0.1}/docs/upgrading/upgrading_to_v0x.md +0 -0
  344. {crawlee-1.0.0rc1 → crawlee-1.0.1}/renovate.json +0 -0
  345. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/__init__.py +0 -0
  346. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/__init__.py +0 -0
  347. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/_types.py +0 -0
  348. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  349. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/py.typed +0 -0
  350. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_autoscaling/system_status.py +0 -0
  351. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_browserforge_workaround.py +0 -0
  352. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_cli.py +0 -0
  353. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_consts.py +0 -0
  354. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_log_config.py +0 -0
  355. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/__init__.py +0 -0
  356. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/blocked.py +0 -0
  357. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/byte_size.py +0 -0
  358. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/console.py +0 -0
  359. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/context.py +0 -0
  360. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/crypto.py +0 -0
  361. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/docs.py +0 -0
  362. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/file.py +0 -0
  363. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/globs.py +0 -0
  364. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/html_to_text.py +0 -0
  365. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/models.py +0 -0
  366. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/recoverable_state.py +0 -0
  367. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/recurring_task.py +0 -0
  368. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/requests.py +0 -0
  369. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/robots.py +0 -0
  370. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/time.py +0 -0
  371. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/try_import.py +0 -0
  372. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/urls.py +0 -0
  373. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/wait.py +0 -0
  374. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/_utils/web.py +0 -0
  375. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/__init__.py +0 -0
  376. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_controller.py +0 -0
  377. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_plugin.py +0 -0
  378. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_browser_pool.py +0 -0
  379. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser.py +0 -0
  380. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  381. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/_types.py +0 -0
  382. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/browsers/py.typed +0 -0
  383. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/__init__.py +0 -0
  384. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  385. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  386. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  387. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  388. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  389. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  390. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  391. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  392. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_basic/py.typed +0 -0
  393. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  394. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  395. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  396. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  397. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  398. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  399. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/__init__.py +0 -0
  400. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  401. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  402. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  403. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  404. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  405. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  406. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  407. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  408. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  409. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  410. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  411. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  412. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  413. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/_types.py +0 -0
  414. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/crawlers/py.typed +0 -0
  415. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/errors.py +0 -0
  416. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/__init__.py +0 -0
  417. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_event_manager.py +0 -0
  418. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/_local_event_manager.py +0 -0
  419. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/events/py.typed +0 -0
  420. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  421. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  422. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  423. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  424. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/fingerprint_suite/py.typed +0 -0
  425. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/__init__.py +0 -0
  426. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_base.py +0 -0
  427. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  428. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_httpx.py +0 -0
  429. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/http_clients/_impit.py +0 -0
  430. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/otel/__init__.py +0 -0
  431. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  432. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/cookiecutter.json +0 -0
  433. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  434. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  435. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main.py +0 -0
  436. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  437. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  438. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  439. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  440. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  441. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  442. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  443. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  444. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  445. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  446. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  447. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  448. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  449. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  450. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  451. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  452. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/proxy_configuration.py +0 -0
  453. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/py.typed +0 -0
  454. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/__init__.py +0 -0
  455. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_manager.py +0 -0
  456. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  457. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/router.py +0 -0
  458. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/__init__.py +0 -0
  459. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_cookies.py +0 -0
  460. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_session.py +0 -0
  461. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/_session_pool.py +0 -0
  462. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/sessions/py.typed +0 -0
  463. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/__init__.py +0 -0
  464. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  465. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_error_tracker.py +0 -0
  466. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/statistics/_statistics.py +0 -0
  467. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  468. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  469. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  470. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  471. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_base/py.typed +0 -0
  472. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  473. /crawlee-1.0.0rc1/src/crawlee/storage_clients/_file_system/py.typed → /crawlee-1.0.1/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  474. {crawlee-1.0.0rc1/src/crawlee/storage_clients/_memory → crawlee-1.0.1/src/crawlee/storage_clients/_file_system}/py.typed +0 -0
  475. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  476. {crawlee-1.0.0rc1/src/crawlee/storage_clients → crawlee-1.0.1/src/crawlee/storage_clients/_memory}/py.typed +0 -0
  477. {crawlee-1.0.0rc1/src/crawlee/storages → crawlee-1.0.1/src/crawlee/storage_clients/_sql}/py.typed +0 -0
  478. /crawlee-1.0.0rc1/tests/__init__.py → /crawlee-1.0.1/src/crawlee/storage_clients/py.typed +0 -0
  479. {crawlee-1.0.0rc1 → crawlee-1.0.1}/src/crawlee/storages/__init__.py +0 -0
  480. /crawlee-1.0.0rc1/tests/e2e/__init__.py → /crawlee-1.0.1/src/crawlee/storages/py.typed +0 -0
  481. {crawlee-1.0.0rc1/tests/unit → crawlee-1.0.1/tests}/__init__.py +0 -0
  482. /crawlee-1.0.0rc1/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py → /crawlee-1.0.1/tests/e2e/__init__.py +0 -0
  483. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/conftest.py +0 -0
  484. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
  485. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/e2e/project_template/utils.py +0 -0
  486. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/README.md +0 -0
  487. /crawlee-1.0.0rc1/website/static/.nojekyll → /crawlee-1.0.1/tests/unit/__init__.py +0 -0
  488. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_autoscaling/test_system_status.py +0 -0
  489. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_error_tracker.py +0 -0
  490. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  491. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_persistence.py +0 -0
  492. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  493. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_console.py +0 -0
  494. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_crypto.py +0 -0
  495. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_file.py +0 -0
  496. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_globs.py +0 -0
  497. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_html_to_text.py +0 -0
  498. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_measure_time.py +0 -0
  499. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_recurring_task.py +0 -0
  500. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_requests.py +0 -0
  501. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_robots.py +0 -0
  502. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_sitemap.py +0 -0
  503. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_system.py +0 -0
  504. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  505. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/_utils/test_urls.py +0 -0
  506. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/browsers/test_playwright_browser.py +0 -0
  507. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
  508. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  509. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  510. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/events/test_local_event_manager.py +0 -0
  511. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  512. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/http_clients/test_http_clients.py +0 -0
  513. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/http_clients/test_httpx.py +0 -0
  514. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  515. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  516. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  517. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/request_loaders/test_request_list.py +0 -0
  518. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/server.py +0 -0
  519. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/server_endpoints.py +0 -0
  520. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_cookies.py +0 -0
  521. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_models.py +0 -0
  522. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/sessions/test_session.py +0 -0
  523. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  524. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_cli.py +0 -0
  525. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_configuration.py +0 -0
  526. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_log_config.py +0 -0
  527. {crawlee-1.0.0rc1 → crawlee-1.0.1}/tests/unit/test_router.py +0 -0
  528. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/.eslintrc.json +0 -0
  529. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/.yarnrc.yml +0 -0
  530. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/babel.config.js +0 -0
  531. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/build_api_reference.sh +0 -0
  532. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/generate_module_shortcuts.py +0 -0
  533. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  534. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  535. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/roa-loader/index.js +0 -0
  536. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/roa-loader/package.json +0 -0
  537. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/sidebars.js +0 -0
  538. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/ApiLink.jsx +0 -0
  539. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Button.jsx +0 -0
  540. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Button.module.css +0 -0
  541. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/CopyButton.jsx +0 -0
  542. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/CopyButton.module.css +0 -0
  543. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Gradients.jsx +0 -0
  544. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Highlights.jsx +0 -0
  545. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Highlights.module.css +0 -0
  546. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  547. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  548. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  549. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  550. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  551. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  552. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  553. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  554. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  555. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  556. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/RiverSection.jsx +0 -0
  557. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/RiverSection.module.css +0 -0
  558. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  559. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  560. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  561. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  562. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/RunnableCodeBlock.jsx +0 -0
  563. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/components/RunnableCodeBlock.module.css +0 -0
  564. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/home_page_example.py +0 -0
  565. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/index.js +0 -0
  566. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/pages/index.module.css +0 -0
  567. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  568. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/index.js +0 -0
  569. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  570. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  571. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/DocItem/Layout/index.js +0 -0
  572. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  573. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/LinkItem/index.js +0 -0
  574. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  575. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/index.js +0 -0
  576. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Footer/index.module.css +0 -0
  577. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/MDXComponents/A.js +0 -0
  578. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Content/index.js +0 -0
  579. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  580. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Logo/index.js +0 -0
  581. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  582. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  583. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  584. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  585. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  586. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  587. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  588. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/font/lota.woff +0 -0
  589. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/font/lota.woff2 +0 -0
  590. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/API.png +0 -0
  591. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_logo.svg +0 -0
  592. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_og_SDK.png +0 -0
  593. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_sdk.svg +0 -0
  594. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/apify_sdk_white.svg +0 -0
  595. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/arrow_right.svg +0 -0
  596. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/auto-scaling-dark.webp +0 -0
  597. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/auto-scaling-light.webp +0 -0
  598. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/check.svg +0 -0
  599. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/chrome-scrape-dark.gif +0 -0
  600. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/chrome-scrape-light.gif +0 -0
  601. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/cloud_icon.svg +0 -0
  602. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/community-dark-icon.svg +0 -0
  603. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/community-light-icon.svg +0 -0
  604. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-dark-new.svg +0 -0
  605. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-dark.svg +0 -0
  606. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-javascript-dark.svg +0 -0
  607. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-javascript-light.svg +0 -0
  608. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-light-new.svg +0 -0
  609. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-light.svg +0 -0
  610. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  611. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-logo.svg +0 -0
  612. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-dark.svg +0 -0
  613. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-light.svg +0 -0
  614. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/crawlee-python-og.png +0 -0
  615. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/defaults-dark-icon.svg +0 -0
  616. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/defaults-light-icon.svg +0 -0
  617. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/discord-brand-dark.svg +0 -0
  618. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/discord-brand.svg +0 -0
  619. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/docusaurus.svg +0 -0
  620. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/external-link.svg +0 -0
  621. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favicon.ico +0 -0
  622. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favorite-tools-dark.webp +0 -0
  623. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/favorite-tools-light.webp +0 -0
  624. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/auto-scaling.svg +0 -0
  625. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/automate-everything.svg +0 -0
  626. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/fingerprints.svg +0 -0
  627. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/node-requests.svg +0 -0
  628. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/runs-on-py.svg +0 -0
  629. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/storage.svg +0 -0
  630. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/features/works-everywhere.svg +0 -0
  631. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  632. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  633. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  634. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  635. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/current-price.jpg +0 -0
  636. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  637. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/select-an-element.jpg +0 -0
  638. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/selected-element.jpg +0 -0
  639. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/sku.jpg +0 -0
  640. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/getting-started/title.jpg +0 -0
  641. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/github-brand-dark.svg +0 -0
  642. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/github-brand.svg +0 -0
  643. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  644. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  645. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/hearth copy.svg +0 -0
  646. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/hearth.svg +0 -0
  647. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/javascript_logo.svg +0 -0
  648. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/js_file.svg +0 -0
  649. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-big.svg +0 -0
  650. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-blur.png +0 -0
  651. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-blur.svg +0 -0
  652. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/logo-zoom.svg +0 -0
  653. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/menu-arrows.svg +0 -0
  654. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/oss_logo.png +0 -0
  655. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  656. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/puppeteer-live-view-detail.png +0 -0
  657. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/queue-dark-icon.svg +0 -0
  658. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/queue-light-icon.svg +0 -0
  659. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  660. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  661. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/robot.png +0 -0
  662. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/routing-dark-icon.svg +0 -0
  663. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/routing-light-icon.svg +0 -0
  664. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  665. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/scraping-utils-light-icon.svg +0 -0
  666. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/smart-proxy-dark.webp +0 -0
  667. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/smart-proxy-light.webp +0 -0
  668. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/source_code.png +0 -0
  669. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/system.svg +0 -0
  670. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/triangles_dark.svg +0 -0
  671. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/triangles_light.svg +0 -0
  672. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/workflow.svg +0 -0
  673. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/zero-setup-dark-icon.svg +0 -0
  674. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/img/zero-setup-light-icon.svg +0 -0
  675. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/js/custom.js +0 -0
  676. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/static/robots.txt +0 -0
  677. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/docs-prettier.config.js +0 -0
  678. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/utils/externalLink.js +0 -0
  679. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  680. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  681. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  682. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  683. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tools/website_gif/website_gif.mjs +0 -0
  684. {crawlee-1.0.0rc1 → crawlee-1.0.1}/website/tsconfig.eslint.json +0 -0
@@ -30,12 +30,12 @@ jobs:
30
30
  ref: ${{ github.event_name == 'workflow_call' && inputs.ref || github.ref }}
31
31
 
32
32
  - name: Set up Node
33
- uses: actions/setup-node@v4
33
+ uses: actions/setup-node@v5
34
34
  with:
35
35
  node-version: ${{ env.NODE_VERSION }}
36
36
 
37
37
  - name: Set up Python
38
- uses: actions/setup-python@v5
38
+ uses: actions/setup-python@v6
39
39
  with:
40
40
  python-version: ${{ env.PYTHON_VERSION }}
41
41
 
@@ -59,7 +59,7 @@ jobs:
59
59
  uses: actions/configure-pages@v5
60
60
 
61
61
  - name: Upload GitHub Pages artifact
62
- uses: actions/upload-pages-artifact@v3
62
+ uses: actions/upload-pages-artifact@v4
63
63
  with:
64
64
  path: ./website/build
65
65
 
@@ -9,6 +9,6 @@ jobs:
9
9
  name: Check PR title
10
10
  runs-on: ubuntu-latest
11
11
  steps:
12
- - uses: amannn/action-semantic-pull-request@v6.1.0
12
+ - uses: amannn/action-semantic-pull-request@v6.1.1
13
13
  env:
14
14
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -37,7 +37,7 @@ jobs:
37
37
  name: Wait for code checks to pass
38
38
  runs-on: ubuntu-latest
39
39
  steps:
40
- - uses: lewagon/wait-on-check-action@v1.4.0
40
+ - uses: lewagon/wait-on-check-action@v1.4.1
41
41
  with:
42
42
  ref: ${{ github.ref }}
43
43
  repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -27,7 +27,7 @@ jobs:
27
27
  uses: actions/checkout@v5
28
28
 
29
29
  - name: Setup node
30
- uses: actions/setup-node@v4
30
+ uses: actions/setup-node@v5
31
31
  with:
32
32
  node-version: ${{ env.NODE_VERSION }}
33
33
 
@@ -35,7 +35,7 @@ jobs:
35
35
  run: npm install -g apify-cli
36
36
 
37
37
  - name: Set up Python ${{ env.PYTHON_VERSION }}
38
- uses: actions/setup-python@v5
38
+ uses: actions/setup-python@v6
39
39
  with:
40
40
  python-version: ${{ env.PYTHON_VERSION }}
41
41
 
@@ -14,7 +14,7 @@ jobs:
14
14
 
15
15
  steps:
16
16
  # Add the "t-tooling" label to all new issues
17
- - uses: actions/github-script@v7
17
+ - uses: actions/github-script@v8
18
18
  with:
19
19
  script: |
20
20
  github.rest.issues.addLabels({
@@ -2,7 +2,15 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
- ## [1.0.0rc1](https://github.com/apify/crawlee-python/releases/tag/v1.0.0rc1) (2025-08-22)
5
+ ## [1.0.1](https://github.com/apify/crawlee-python/releases/tag/v1.0.1) (2025-10-06)
6
+
7
+ ### 🐛 Bug Fixes
8
+
9
+ - Fix memory leak in `PlaywrightCrawler` on browser context creation ([#1446](https://github.com/apify/crawlee-python/pull/1446)) ([bb181e5](https://github.com/apify/crawlee-python/commit/bb181e58d8070fba38e62d6e57fe981a00e5f035)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1443](https://github.com/apify/crawlee-python/issues/1443)
10
+ - Update templates to handle optional httpx client ([#1440](https://github.com/apify/crawlee-python/pull/1440)) ([c087efd](https://github.com/apify/crawlee-python/commit/c087efd39baedf46ca3e5cae1ddc1acd6396e6c1)) by [@Pijukatel](https://github.com/Pijukatel)
11
+
12
+
13
+ ## [1.0.0](https://github.com/apify/crawlee-python/releases/tag/v1.0.0) (2025-09-29)
6
14
 
7
15
  ### 🚀 Features
8
16
 
@@ -17,6 +25,10 @@ All notable changes to this project will be documented in this file.
17
25
  - Add `impit` option for Crawlee CLI ([#1312](https://github.com/apify/crawlee-python/pull/1312)) ([508d7ce](https://github.com/apify/crawlee-python/commit/508d7ce4d998f37ab2adcf9c057c3c635a69f863)) by [@Mantisus](https://github.com/Mantisus)
18
26
  - Persist RequestList state ([#1274](https://github.com/apify/crawlee-python/pull/1274)) ([cc68014](https://github.com/apify/crawlee-python/commit/cc680147ba3cc8b35b9da70274e53e6f5dd92434)) by [@janbuchar](https://github.com/janbuchar), closes [#99](https://github.com/apify/crawlee-python/issues/99)
19
27
  - Persist `DefaultRenderingTypePredictor` state ([#1340](https://github.com/apify/crawlee-python/pull/1340)) ([fad4c25](https://github.com/apify/crawlee-python/commit/fad4c25fc712915c4a45b24e3290b6f5dbd8a683)) by [@Mantisus](https://github.com/Mantisus), closes [#1272](https://github.com/apify/crawlee-python/issues/1272)
28
+ - Persist the `SitemapRequestLoader` state ([#1347](https://github.com/apify/crawlee-python/pull/1347)) ([27ef9ad](https://github.com/apify/crawlee-python/commit/27ef9ad194552ea9f1321d91a7a52054be9a8a51)) by [@Mantisus](https://github.com/Mantisus), closes [#1269](https://github.com/apify/crawlee-python/issues/1269)
29
+ - Add support for NDU storages ([#1401](https://github.com/apify/crawlee-python/pull/1401)) ([5dbd212](https://github.com/apify/crawlee-python/commit/5dbd212663e7abc37535713f4c6e3a5bbf30a12e)) by [@vdusek](https://github.com/vdusek), closes [#1175](https://github.com/apify/crawlee-python/issues/1175)
30
+ - Add RQ id, name, alias args to `add_requests` and `enqueue_links` methods ([#1413](https://github.com/apify/crawlee-python/pull/1413)) ([1cae2bc](https://github.com/apify/crawlee-python/commit/1cae2bca0b1508fcb3cb419dc239caf33e20a7ef)) by [@Mantisus](https://github.com/Mantisus), closes [#1402](https://github.com/apify/crawlee-python/issues/1402)
31
+ - Add `SqlStorageClient` based on `sqlalchemy` v2+ ([#1339](https://github.com/apify/crawlee-python/pull/1339)) ([07c75a0](https://github.com/apify/crawlee-python/commit/07c75a078b443b58bfaaeb72eb2aa1439458dc47)) by [@Mantisus](https://github.com/Mantisus), closes [#307](https://github.com/apify/crawlee-python/issues/307)
20
32
 
21
33
  ### 🐛 Bug Fixes
22
34
 
@@ -27,6 +39,9 @@ All notable changes to this project will be documented in this file.
27
39
  - Fix `timeout` for `stream` method in `ImpitHttpClient` ([#1352](https://github.com/apify/crawlee-python/pull/1352)) ([54b693b](https://github.com/apify/crawlee-python/commit/54b693b838f135a596e1e9493b565bc558b19a3a)) by [@Mantisus](https://github.com/Mantisus)
28
40
  - Include reason in the session rotation warning logs ([#1363](https://github.com/apify/crawlee-python/pull/1363)) ([d6d7a45](https://github.com/apify/crawlee-python/commit/d6d7a45dd64a906419d9552c45062d726cbb1a0f)) by [@vdusek](https://github.com/vdusek), closes [#1318](https://github.com/apify/crawlee-python/issues/1318)
29
41
  - Improve crawler statistics logging ([#1364](https://github.com/apify/crawlee-python/pull/1364)) ([1eb6da5](https://github.com/apify/crawlee-python/commit/1eb6da5dd85870124593dcad877284ccaed9c0ce)) by [@vdusek](https://github.com/vdusek), closes [#1317](https://github.com/apify/crawlee-python/issues/1317)
42
+ - Do not add a request that is already in progress to `MemoryRequestQueueClient` ([#1384](https://github.com/apify/crawlee-python/pull/1384)) ([3af326c](https://github.com/apify/crawlee-python/commit/3af326c9dfa8fffd56a42ca42981374613739e39)) by [@Mantisus](https://github.com/Mantisus), closes [#1383](https://github.com/apify/crawlee-python/issues/1383)
43
+ - Save `RequestQueueState` for `FileSystemRequestQueueClient` in default KVS ([#1411](https://github.com/apify/crawlee-python/pull/1411)) ([6ee60a0](https://github.com/apify/crawlee-python/commit/6ee60a08ac1f9414e1b792f4935cc3799cb5089a)) by [@Mantisus](https://github.com/Mantisus), closes [#1410](https://github.com/apify/crawlee-python/issues/1410)
44
+ - Set default desired concurrency for non-browser crawlers to 10 ([#1419](https://github.com/apify/crawlee-python/pull/1419)) ([1cc9401](https://github.com/apify/crawlee-python/commit/1cc940197600d2539bda967880d7f9d241eb8c3e)) by [@vdusek](https://github.com/vdusek)
30
45
 
31
46
  ### Refactor
32
47
 
@@ -36,6 +51,8 @@ All notable changes to this project will be documented in this file.
36
51
  - [**breaking**] Replace `HttpxHttpClient` with `ImpitHttpClient` as default HTTP client ([#1307](https://github.com/apify/crawlee-python/pull/1307)) ([c803a97](https://github.com/apify/crawlee-python/commit/c803a976776a76846866d533e3a3ee8144e248c4)) by [@Mantisus](https://github.com/Mantisus), closes [#1079](https://github.com/apify/crawlee-python/issues/1079)
37
52
  - [**breaking**] Change Dataset unwind parameter to accept list of strings ([#1357](https://github.com/apify/crawlee-python/pull/1357)) ([862a203](https://github.com/apify/crawlee-python/commit/862a20398f00fe91802fe7a1ccd58b05aee053a1)) by [@vdusek](https://github.com/vdusek)
38
53
  - [**breaking**] Remove `Request.id` field ([#1366](https://github.com/apify/crawlee-python/pull/1366)) ([32f3580](https://github.com/apify/crawlee-python/commit/32f3580e9775a871924ab1233085d0c549c4cd52)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1358](https://github.com/apify/crawlee-python/issues/1358)
54
+ - [**breaking**] Refactor storage creation and caching, configuration and services ([#1386](https://github.com/apify/crawlee-python/pull/1386)) ([04649bd](https://github.com/apify/crawlee-python/commit/04649bde60d46b2bc18ae4f6e3fd9667d02a9cef)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1379](https://github.com/apify/crawlee-python/issues/1379)
55
+
39
56
 
40
57
 
41
58
  ## [0.6.12](https://github.com/apify/crawlee-python/releases/tag/v0.6.12) (2025-07-30)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.0rc1
3
+ Version: 1.0.1
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -227,12 +227,12 @@ Classifier: Topic :: Software Development :: Libraries
227
227
  Requires-Python: >=3.10
228
228
  Requires-Dist: cachetools>=5.5.0
229
229
  Requires-Dist: colorama>=0.4.0
230
- Requires-Dist: impit>=0.5.2
230
+ Requires-Dist: impit>=0.6.1
231
231
  Requires-Dist: more-itertools>=10.2.0
232
232
  Requires-Dist: protego>=0.5.0
233
233
  Requires-Dist: psutil>=6.0.0
234
- Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,>=2.8.0
235
234
  Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
235
+ Requires-Dist: pydantic>=2.11.0
236
236
  Requires-Dist: pyee>=9.0.0
237
237
  Requires-Dist: tldextract>=5.1.0
238
238
  Requires-Dist: typing-extensions>=4.1.0
@@ -244,7 +244,9 @@ Requires-Dist: jaro-winkler>=2.0.3; extra == 'adaptive-crawler'
244
244
  Requires-Dist: playwright>=1.27.0; extra == 'adaptive-crawler'
245
245
  Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
246
246
  Provides-Extra: all
247
+ Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
247
248
  Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
249
+ Requires-Dist: asyncpg>=0.24.0; extra == 'all'
248
250
  Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
249
251
  Requires-Dist: browserforge>=1.2.3; extra == 'all'
250
252
  Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
@@ -263,6 +265,7 @@ Requires-Dist: parsel>=1.10.0; extra == 'all'
263
265
  Requires-Dist: playwright>=1.27.0; extra == 'all'
264
266
  Requires-Dist: rich>=13.9.0; extra == 'all'
265
267
  Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
268
+ Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
266
269
  Requires-Dist: typer>=0.12.0; extra == 'all'
267
270
  Requires-Dist: wrapt>=1.17.0; extra == 'all'
268
271
  Provides-Extra: beautifulsoup
@@ -293,6 +296,12 @@ Provides-Extra: playwright
293
296
  Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
294
297
  Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
295
298
  Requires-Dist: playwright>=1.27.0; extra == 'playwright'
299
+ Provides-Extra: sql-postgres
300
+ Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
301
+ Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
302
+ Provides-Extra: sql-sqlite
303
+ Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
304
+ Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-sqlite'
296
305
  Description-Content-Type: text/markdown
297
306
 
298
307
  <h1 align="center">
@@ -327,8 +336,6 @@ Description-Content-Type: text/markdown
327
336
 
328
337
  Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
329
338
 
330
- > 🚀 Crawlee for Python is open to early adopters!
331
-
332
339
  Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
333
340
 
334
341
  > 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
@@ -30,8 +30,6 @@
30
30
 
31
31
  Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
32
32
 
33
- > 🚀 Crawlee for Python is open to early adopters!
34
-
35
33
  Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
36
34
 
37
35
  > 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
@@ -5,7 +5,7 @@ from crawlee.crawlers import AdaptivePlaywrightCrawler, AdaptivePlaywrightCrawli
5
5
 
6
6
 
7
7
  async def main() -> None:
8
- crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser()
8
+ crawler = AdaptivePlaywrightCrawler.with_parsel_static_parser()
9
9
 
10
10
  @crawler.router.default_handler
11
11
  async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
@@ -18,6 +18,7 @@ async def main() -> None:
18
18
  # Fetch and process requests from the queue.
19
19
  while request := await request_list.fetch_next_request():
20
20
  # Do something with it...
21
+ print(f'Processing {request.url}')
21
22
 
22
23
  # And mark it as handled.
23
24
  await request_list.mark_request_as_handled(request)
@@ -0,0 +1,46 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from crawlee import service_locator
5
+ from crawlee.request_loaders import RequestList
6
+
7
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(message)s')
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ # Disable clearing the `KeyValueStore` on each run.
12
+ # This is necessary so that the state keys are not cleared at startup.
13
+ # The recommended way to achieve this behavior is setting the environment variable
14
+ # `CRAWLEE_PURGE_ON_START=0`
15
+ configuration = service_locator.get_configuration()
16
+ configuration.purge_on_start = False
17
+
18
+
19
+ async def main() -> None:
20
+ # Open the request list, if it does not exist, it will be created.
21
+ # Leave name empty to use the default request list.
22
+ request_list = RequestList(
23
+ name='my-request-list',
24
+ requests=[
25
+ 'https://apify.com/',
26
+ 'https://crawlee.dev/',
27
+ 'https://crawlee.dev/python/',
28
+ ],
29
+ # Enable persistence
30
+ persist_state_key='my-persist-state',
31
+ persist_requests_key='my-persist-requests',
32
+ )
33
+
34
+ # We receive only one request.
35
+ # Each time you run it, it will be a new request until you exhaust the `RequestList`.
36
+ request = await request_list.fetch_next_request()
37
+ if request:
38
+ logger.info(f'Processing request: {request.url}')
39
+ # Do something with it...
40
+
41
+ # And mark it as handled.
42
+ await request_list.mark_request_as_handled(request)
43
+
44
+
45
+ if __name__ == '__main__':
46
+ asyncio.run(main())
@@ -8,9 +8,11 @@ async def main() -> None:
8
8
  # Create a static request list.
9
9
  request_list = RequestList(['https://crawlee.dev', 'https://apify.com'])
10
10
 
11
+ # highlight-start
11
12
  # Convert the request list to a request manager using the to_tandem method.
12
13
  # It is a tandem with the default request queue.
13
14
  request_manager = await request_list.to_tandem()
15
+ # highlight-end
14
16
 
15
17
  # Create a crawler and pass the request manager to it.
16
18
  crawler = ParselCrawler(
@@ -20,9 +22,20 @@ async def main() -> None:
20
22
 
21
23
  @crawler.router.default_handler
22
24
  async def handler(context: ParselCrawlingContext) -> None:
25
+ context.log.info(f'Processing {context.request.url}')
26
+
23
27
  # New links will be enqueued directly to the queue.
24
28
  await context.enqueue_links()
25
29
 
30
+ # Extract data using Parsel's XPath and CSS selectors.
31
+ data = {
32
+ 'url': context.request.url,
33
+ 'title': context.selector.xpath('//title/text()').get(),
34
+ }
35
+
36
+ # Push extracted data to the dataset.
37
+ await context.push_data(data)
38
+
26
39
  await crawler.run()
27
40
 
28
41
 
@@ -23,9 +23,20 @@ async def main() -> None:
23
23
 
24
24
  @crawler.router.default_handler
25
25
  async def handler(context: ParselCrawlingContext) -> None:
26
+ context.log.info(f'Processing {context.request.url}')
27
+
26
28
  # New links will be enqueued directly to the queue.
27
29
  await context.enqueue_links()
28
30
 
31
+ # Extract data using Parsel's XPath and CSS selectors.
32
+ data = {
33
+ 'url': context.request.url,
34
+ 'title': context.selector.xpath('//title/text()').get(),
35
+ }
36
+
37
+ # Push extracted data to the dataset.
38
+ await context.push_data(data)
39
+
29
40
  await crawler.run()
30
41
 
31
42
 
@@ -0,0 +1,30 @@
1
+ import asyncio
2
+ import re
3
+
4
+ from crawlee.http_clients import ImpitHttpClient
5
+ from crawlee.request_loaders import SitemapRequestLoader
6
+
7
+
8
+ async def main() -> None:
9
+ # Create an HTTP client for fetching the sitemap.
10
+ http_client = ImpitHttpClient()
11
+
12
+ # Create a sitemap request loader with filtering rules.
13
+ sitemap_loader = SitemapRequestLoader(
14
+ sitemap_urls=['https://crawlee.dev/sitemap.xml'],
15
+ http_client=http_client,
16
+ include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
17
+ max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
18
+ )
19
+
20
+ # We work with the loader until we process all relevant links from the sitemap.
21
+ while request := await sitemap_loader.fetch_next_request():
22
+ # Do something with it...
23
+ print(f'Processing {request.url}')
24
+
25
+ # And mark it as handled.
26
+ await sitemap_loader.mark_request_as_handled(request)
27
+
28
+
29
+ if __name__ == '__main__':
30
+ asyncio.run(main())
@@ -0,0 +1,45 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from crawlee import service_locator
5
+ from crawlee.http_clients import ImpitHttpClient
6
+ from crawlee.request_loaders import SitemapRequestLoader
7
+
8
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(message)s')
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ # Disable clearing the `KeyValueStore` on each run.
13
+ # This is necessary so that the state keys are not cleared at startup.
14
+ # The recommended way to achieve this behavior is setting the environment variable
15
+ # `CRAWLEE_PURGE_ON_START=0`
16
+ configuration = service_locator.get_configuration()
17
+ configuration.purge_on_start = False
18
+
19
+
20
+ async def main() -> None:
21
+ # Create an HTTP client for fetching sitemaps
22
+ # Use the context manager for `SitemapRequestLoader` to correctly save the state when
23
+ # the work is completed.
24
+ async with (
25
+ ImpitHttpClient() as http_client,
26
+ SitemapRequestLoader(
27
+ sitemap_urls=['https://crawlee.dev/sitemap.xml'],
28
+ http_client=http_client,
29
+ # Enable persistence
30
+ persist_state_key='my-persist-state',
31
+ ) as sitemap_loader,
32
+ ):
33
+ # We receive only one request.
34
+ # Each time you run it, it will be a new request until you exhaust the sitemap.
35
+ request = await sitemap_loader.fetch_next_request()
36
+ if request:
37
+ logger.info(f'Processing request: {request.url}')
38
+ # Do something with it...
39
+
40
+ # And mark it as handled.
41
+ await sitemap_loader.mark_request_as_handled(request)
42
+
43
+
44
+ if __name__ == '__main__':
45
+ asyncio.run(main())
@@ -0,0 +1,53 @@
1
+ import asyncio
2
+ import re
3
+
4
+ from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
5
+ from crawlee.http_clients import ImpitHttpClient
6
+ from crawlee.request_loaders import SitemapRequestLoader
7
+
8
+
9
+ async def main() -> None:
10
+ # Create an HTTP client for fetching the sitemap.
11
+ http_client = ImpitHttpClient()
12
+
13
+ # Create a sitemap request loader with filtering rules.
14
+ sitemap_loader = SitemapRequestLoader(
15
+ sitemap_urls=['https://crawlee.dev/sitemap.xml'],
16
+ http_client=http_client,
17
+ include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
18
+ max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
19
+ )
20
+
21
+ # highlight-start
22
+ # Convert the sitemap loader into a request manager linked
23
+ # to the default request queue.
24
+ request_manager = await sitemap_loader.to_tandem()
25
+ # highlight-end
26
+
27
+ # Create a crawler and pass the request manager to it.
28
+ crawler = ParselCrawler(
29
+ request_manager=request_manager,
30
+ max_requests_per_crawl=10, # Limit the max requests per crawl.
31
+ )
32
+
33
+ @crawler.router.default_handler
34
+ async def handler(context: ParselCrawlingContext) -> None:
35
+ context.log.info(f'Processing {context.request.url}')
36
+
37
+ # New links will be enqueued directly to the queue.
38
+ await context.enqueue_links()
39
+
40
+ # Extract data using Parsel's XPath and CSS selectors.
41
+ data = {
42
+ 'url': context.request.url,
43
+ 'title': context.selector.xpath('//title/text()').get(),
44
+ }
45
+
46
+ # Push extracted data to the dataset.
47
+ await context.push_data(data)
48
+
49
+ await crawler.run()
50
+
51
+
52
+ if __name__ == '__main__':
53
+ asyncio.run(main())
@@ -0,0 +1,54 @@
1
+ import asyncio
2
+ import re
3
+
4
+ from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
5
+ from crawlee.http_clients import ImpitHttpClient
6
+ from crawlee.request_loaders import RequestManagerTandem, SitemapRequestLoader
7
+ from crawlee.storages import RequestQueue
8
+
9
+
10
+ async def main() -> None:
11
+ # Create an HTTP client for fetching the sitemap.
12
+ http_client = ImpitHttpClient()
13
+
14
+ # Create a sitemap request loader with filtering rules.
15
+ sitemap_loader = SitemapRequestLoader(
16
+ sitemap_urls=['https://crawlee.dev/sitemap.xml'],
17
+ http_client=http_client,
18
+ include=[re.compile(r'.*docs.*')], # Only include URLs containing 'docs'.
19
+ max_buffer_size=500, # Keep up to 500 URLs in memory before processing.
20
+ )
21
+
22
+ # Open the default request queue.
23
+ request_queue = await RequestQueue.open()
24
+
25
+ # And combine them together to a single request manager.
26
+ request_manager = RequestManagerTandem(sitemap_loader, request_queue)
27
+
28
+ # Create a crawler and pass the request manager to it.
29
+ crawler = ParselCrawler(
30
+ request_manager=request_manager,
31
+ max_requests_per_crawl=10, # Limit the max requests per crawl.
32
+ )
33
+
34
+ @crawler.router.default_handler
35
+ async def handler(context: ParselCrawlingContext) -> None:
36
+ context.log.info(f'Processing {context.request.url}')
37
+
38
+ # New links will be enqueued directly to the queue.
39
+ await context.enqueue_links()
40
+
41
+ # Extract data using Parsel's XPath and CSS selectors.
42
+ data = {
43
+ 'url': context.request.url,
44
+ 'title': context.selector.xpath('//title/text()').get(),
45
+ }
46
+
47
+ # Push extracted data to the dataset.
48
+ await context.push_data(data)
49
+
50
+ await crawler.run()
51
+
52
+
53
+ if __name__ == '__main__':
54
+ asyncio.run(main())
@@ -0,0 +1,30 @@
1
+ import asyncio
2
+ from datetime import timedelta
3
+
4
+ from crawlee import service_locator
5
+ from crawlee.configuration import Configuration
6
+ from crawlee.storage_clients import MemoryStorageClient
7
+ from crawlee.storages import Dataset
8
+
9
+
10
+ async def main() -> None:
11
+ configuration = Configuration(
12
+ log_level='DEBUG',
13
+ headless=False,
14
+ persist_state_interval=timedelta(seconds=30),
15
+ )
16
+ # Set the custom configuration as the global default configuration.
17
+ service_locator.set_configuration(configuration)
18
+
19
+ # Use the global defaults when creating the dataset (or other storage).
20
+ dataset_1 = await Dataset.open()
21
+
22
+ # Or set explicitly specific configuration if
23
+ # you do not want to rely on global defaults.
24
+ dataset_2 = await Dataset.open(
25
+ storage_client=MemoryStorageClient(), configuration=configuration
26
+ )
27
+
28
+
29
+ if __name__ == '__main__':
30
+ asyncio.run(main())
@@ -0,0 +1,12 @@
1
+ from crawlee.crawlers import ParselCrawler
2
+ from crawlee.storage_clients import SqlStorageClient
3
+
4
+
5
+ async def main() -> None:
6
+ # Create a new instance of storage client.
7
+ # This will create an SQLite database file crawlee.db or created tables in your
8
+ # database if you pass `connection_string` or `engine`
9
+ # Use the context manager to ensure that connections are properly cleaned up.
10
+ async with SqlStorageClient() as storage_client:
11
+ # And pass it to the crawler.
12
+ crawler = ParselCrawler(storage_client=storage_client)
@@ -0,0 +1,33 @@
1
+ from sqlalchemy.ext.asyncio import create_async_engine
2
+
3
+ from crawlee.configuration import Configuration
4
+ from crawlee.crawlers import ParselCrawler
5
+ from crawlee.storage_clients import SqlStorageClient
6
+
7
+
8
+ async def main() -> None:
9
+ # Create a new instance of storage client.
10
+ # On first run, also creates tables in your PostgreSQL database.
11
+ # Use the context manager to ensure that connections are properly cleaned up.
12
+ async with SqlStorageClient(
13
+ # Create an `engine` with the desired configuration
14
+ engine=create_async_engine(
15
+ 'postgresql+asyncpg://myuser:mypassword@localhost:5432/postgres',
16
+ future=True,
17
+ pool_size=5,
18
+ max_overflow=10,
19
+ pool_recycle=3600,
20
+ pool_pre_ping=True,
21
+ echo=False,
22
+ )
23
+ ) as storage_client:
24
+ # Create a configuration with custom settings.
25
+ configuration = Configuration(
26
+ purge_on_start=False,
27
+ )
28
+
29
+ # And pass them to the crawler.
30
+ crawler = ParselCrawler(
31
+ storage_client=storage_client,
32
+ configuration=configuration,
33
+ )
@@ -0,0 +1,19 @@
1
+ import asyncio
2
+
3
+ from crawlee.storages import Dataset
4
+
5
+
6
+ async def main() -> None:
7
+ # Named storage (persists across runs)
8
+ dataset_named = await Dataset.open(name='my-persistent-dataset')
9
+
10
+ # Unnamed storage with alias (purged on start)
11
+ dataset_unnamed = await Dataset.open(alias='temporary-results')
12
+
13
+ # Default unnamed storage (both are equivalent and purged on start)
14
+ dataset_default = await Dataset.open()
15
+ dataset_default = await Dataset.open(alias='default')
16
+
17
+
18
+ if __name__ == '__main__':
19
+ asyncio.run(main())