crawlee 1.0.5b20__tar.gz → 1.0.5b22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (694) hide show
  1. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/CHANGELOG.md +2 -0
  2. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/PKG-INFO +1 -1
  3. crawlee-1.0.5b22/docs/examples/code_examples/using_sitemap_request_loader.py +101 -0
  4. crawlee-1.0.5b22/docs/examples/using_sitemap_request_loader.mdx +22 -0
  5. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/pyproject.toml +1 -1
  6. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_basic_crawler.py +1 -4
  7. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_event_manager.py +3 -1
  8. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_sitemap_request_loader.py +17 -4
  9. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_models.py +32 -1
  10. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_statistics.py +2 -21
  11. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_basic/test_basic_crawler.py +0 -1
  12. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/events/test_event_manager.py +12 -0
  13. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/request_loaders/test_sitemap_request_loader.py +35 -0
  14. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/uv.lock +1 -1
  15. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.editorconfig +0 -0
  16. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/CODEOWNERS +0 -0
  17. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/pull_request_template.md +0 -0
  18. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/build_and_deploy_docs.yaml +0 -0
  19. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/check_pr_title.yaml +0 -0
  20. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/pre_release.yaml +0 -0
  21. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/release.yaml +0 -0
  22. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/run_code_checks.yaml +0 -0
  23. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/templates_e2e_tests.yaml +0 -0
  24. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.github/workflows/update_new_issue.yaml +0 -0
  25. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.gitignore +0 -0
  26. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.markdownlint.yaml +0 -0
  27. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/.pre-commit-config.yaml +0 -0
  28. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/CONTRIBUTING.md +0 -0
  29. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/LICENSE +0 -0
  30. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/Makefile +0 -0
  31. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/README.md +0 -0
  32. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/apify_platform.mdx +0 -0
  33. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/crawler_as_actor_example.py +0 -0
  34. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/get_public_url.py +0 -0
  35. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/log_with_config_example.py +0 -0
  36. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/proxy_advanced_example.py +0 -0
  37. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/apify/proxy_example.py +0 -0
  38. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/cloud_run_example.py +0 -0
  39. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/code_examples/google/google_example.py +0 -0
  40. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/google_cloud.mdx +0 -0
  41. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/deployment/google_cloud_run.mdx +0 -0
  42. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/add_data_to_dataset.mdx +0 -0
  43. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/beautifulsoup_crawler.mdx +0 -0
  44. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/capture_screenshot_using_playwright.mdx +0 -0
  45. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/capturing_page_snapshots_with_error_snapshotter.mdx +0 -0
  46. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/adaptive_playwright_crawler.py +0 -0
  47. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_bs.py +0 -0
  48. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_dataset.py +0 -0
  49. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/add_data_to_dataset_pw.py +0 -0
  50. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler.py +0 -0
  51. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler_keep_alive.py +0 -0
  52. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/beautifulsoup_crawler_stop.py +0 -0
  53. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/capture_screenshot_using_playwright.py +0 -0
  54. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/configure_json_logging.py +0 -0
  55. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_bs.py +0 -0
  56. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_all_links_on_website_pw.py +0 -0
  57. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_multiple_urls_bs.py +0 -0
  58. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_multiple_urls_pw.py +0 -0
  59. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_specific_links_on_website_bs.py +0 -0
  60. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_specific_links_on_website_pw.py +0 -0
  61. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_all_links.py +0 -0
  62. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_domain.py +0 -0
  63. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_hostname.py +0 -0
  64. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/crawl_website_with_relative_links_same_origin.py +0 -0
  65. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/export_entire_dataset_to_file_csv.py +0 -0
  66. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/export_entire_dataset_to_file_json.py +0 -0
  67. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/extract_and_add_specific_links_on_website_bs.py +0 -0
  68. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/extract_and_add_specific_links_on_website_pw.py +0 -0
  69. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/fill_and_submit_web_form_crawler.py +0 -0
  70. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/fill_and_submit_web_form_request.py +0 -0
  71. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/parsel_crawler.py +0 -0
  72. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/parsel_crawler_with_error_snapshotter.py +0 -0
  73. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_block_requests.py +0 -0
  74. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler.py +0 -0
  75. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_camoufox.py +0 -0
  76. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_error_snapshotter.py +0 -0
  77. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/playwright_crawler_with_fingerprint_generator.py +0 -0
  78. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/respect_robots_on_skipped_request.py +0 -0
  79. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/respect_robots_txt_file.py +0 -0
  80. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/resuming_paused_crawl.py +0 -0
  81. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/using_browser_profiles_chrome.py +0 -0
  82. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/code_examples/using_browser_profiles_firefox.py +0 -0
  83. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_all_links_on_website.mdx +0 -0
  84. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_multiple_urls.mdx +0 -0
  85. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_specific_links_on_website.mdx +0 -0
  86. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawl_website_with_relative_links.mdx +0 -0
  87. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawler_keep_alive.mdx +0 -0
  88. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/crawler_stop.mdx +0 -0
  89. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/export_entire_dataset_to_file.mdx +0 -0
  90. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/fill_and_submit_web_form.mdx +0 -0
  91. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/json_logging.mdx +0 -0
  92. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/parsel_crawler.mdx +0 -0
  93. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler.mdx +0 -0
  94. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_adaptive.mdx +0 -0
  95. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_block_requests.mdx +0 -0
  96. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_camoufox.mdx +0 -0
  97. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/playwright_crawler_with_fingerprint_generator.mdx +0 -0
  98. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/respect_robots_txt_file.mdx +0 -0
  99. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/resuming_paused_crawl.mdx +0 -0
  100. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/examples/using_browser_profile.mdx +0 -0
  101. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/architecture_overview.mdx +0 -0
  102. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/avoid_blocking.mdx +0 -0
  103. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py +0 -0
  104. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/avoid_blocking/playwright_with_fingerprint_generator.py +0 -0
  105. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/manual_archiving_parsel_crawler.py +0 -0
  106. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/manual_archiving_playwright_crawler.py +0 -0
  107. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/creating_web_archive/simple_pw_through_proxy_pywb_server.py +0 -0
  108. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/change_handle_error_status.py +0 -0
  109. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/disable_retry.py +0 -0
  110. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/error_handling/handle_proxy_error.py +0 -0
  111. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_curl_impersonate_example.py +0 -0
  112. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_httpx_example.py +0 -0
  113. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_clients/parsel_impit_example.py +0 -0
  114. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/beautifulsoup_example.py +0 -0
  115. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/custom_crawler_example.py +0 -0
  116. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/http_example.py +0 -0
  117. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/http_crawlers/parsel_example.py +0 -0
  118. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/login_crawler/http_login.py +0 -0
  119. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/login_crawler/playwright_login.py +0 -0
  120. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/browser_configuration_example.py +0 -0
  121. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/multiple_launch_example.py +0 -0
  122. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/plugin_browser_configuration_example.py +0 -0
  123. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +0 -0
  124. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/handler.py +0 -0
  125. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_beautifulsoup.py +0 -0
  126. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_parsel.py +0 -0
  127. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/init_prediction.py +0 -0
  128. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_adaptive/pre_nav_hooks.py +0 -0
  129. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py +0 -0
  130. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py +0 -0
  131. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py +0 -0
  132. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py +0 -0
  133. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/inspecting_bs_example.py +0 -0
  134. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/inspecting_pw_example.py +0 -0
  135. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/integration_bs_example.py +0 -0
  136. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/integration_pw_example.py +0 -0
  137. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/quick_start_example.py +0 -0
  138. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/session_bs_example.py +0 -0
  139. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/session_pw_example.py +0 -0
  140. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/tiers_bs_example.py +0 -0
  141. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/proxy_management/tiers_pw_example.py +0 -0
  142. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_basic_example.py +0 -0
  143. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_basic_example_with_persist.py +0 -0
  144. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_tandem_example.py +0 -0
  145. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/rl_tandem_example_explicit.py +0 -0
  146. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_basic_example.py +0 -0
  147. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_example_with_persist.py +0 -0
  148. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_tandem_example.py +0 -0
  149. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_loaders/sitemap_tandem_example_explicit.py +0 -0
  150. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/adaptive_crawler_handlers.py +0 -0
  151. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/basic_request_handlers.py +0 -0
  152. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/custom_router_default_only.py +0 -0
  153. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/error_handler.py +0 -0
  154. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/failed_request_handler.py +0 -0
  155. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/http_pre_navigation.py +0 -0
  156. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/playwright_pre_navigation.py +0 -0
  157. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/request_router/simple_default_handler.py +0 -0
  158. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/__init__.py +0 -0
  159. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/crawler.py +0 -0
  160. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/running_in_web_server/server.py +0 -0
  161. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/scaling_crawlers/max_tasks_per_minute_example.py +0 -0
  162. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/scaling_crawlers/min_and_max_concurrency_example.py +0 -0
  163. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_conflicts.py +0 -0
  164. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_configuration.py +0 -0
  165. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_event_manager.py +0 -0
  166. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_crawler_storage_client.py +0 -0
  167. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_configuration.py +0 -0
  168. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_event_manager.py +0 -0
  169. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_locator_storage_client.py +0 -0
  170. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_storage_configuration.py +0 -0
  171. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/service_locator/service_storage_storage_client.py +0 -0
  172. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/multi_sessions_http.py +0 -0
  173. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/one_session_http.py +0 -0
  174. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_basic.py +0 -0
  175. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_beautifulsoup.py +0 -0
  176. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_http.py +0 -0
  177. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_parsel.py +0 -0
  178. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_playwright.py +0 -0
  179. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/session_management/sm_standalone.py +0 -0
  180. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/custom_storage_client_example.py +0 -0
  181. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py +0 -0
  182. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py +0 -0
  183. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py +0 -0
  184. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/redis_storage_client_basic_example.py +0 -0
  185. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/redis_storage_client_configuration_example.py +0 -0
  186. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/registering_storage_clients_example.py +0 -0
  187. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/sql_storage_client_basic_example.py +0 -0
  188. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storage_clients/sql_storage_client_configuration_example.py +0 -0
  189. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/cleaning_do_not_purge_example.py +0 -0
  190. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py +0 -0
  191. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_basic_example.py +0 -0
  192. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_with_crawler_example.py +0 -0
  193. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py +0 -0
  194. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/helper_add_requests_example.py +0 -0
  195. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/helper_enqueue_links_example.py +0 -0
  196. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_basic_example.py +0 -0
  197. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_with_crawler_example.py +0 -0
  198. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py +0 -0
  199. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/opening.py +0 -0
  200. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_basic_example.py +0 -0
  201. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_with_crawler_example.py +0 -0
  202. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py +0 -0
  203. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/code_examples/trace_and_monitor_crawlers/instrument_crawler.py +0 -0
  204. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/crawler_login.mdx +0 -0
  205. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/creating_web_archive.mdx +0 -0
  206. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/error_handling.mdx +0 -0
  207. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/http_clients.mdx +0 -0
  208. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/http_crawlers.mdx +0 -0
  209. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler.mdx +0 -0
  210. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler_adaptive.mdx +0 -0
  211. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/playwright_crawler_stagehand.mdx +0 -0
  212. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/proxy_management.mdx +0 -0
  213. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/request_loaders.mdx +0 -0
  214. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/request_router.mdx +0 -0
  215. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/running_in_web_server.mdx +0 -0
  216. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/scaling_crawlers.mdx +0 -0
  217. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/service_locator.mdx +0 -0
  218. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/session_management.mdx +0 -0
  219. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/storage_clients.mdx +0 -0
  220. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/storages.mdx +0 -0
  221. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/guides/trace_and_monitor_crawlers.mdx +0 -0
  222. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/01_setting_up.mdx +0 -0
  223. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/02_first_crawler.mdx +0 -0
  224. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/03_adding_more_urls.mdx +0 -0
  225. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/04_real_world_project.mdx +0 -0
  226. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/05_crawling.mdx +0 -0
  227. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/06_scraping.mdx +0 -0
  228. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/07_saving_data.mdx +0 -0
  229. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/08_refactoring.mdx +0 -0
  230. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/09_running_in_cloud.mdx +0 -0
  231. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_bs.py +0 -0
  232. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_bs_better.py +0 -0
  233. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/02_request_queue.py +0 -0
  234. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_enqueue_strategy.py +0 -0
  235. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_finding_new_links.py +0 -0
  236. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_globs.py +0 -0
  237. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_original_code.py +0 -0
  238. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/03_transform_request.py +0 -0
  239. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/04_sanity_check.py +0 -0
  240. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/05_crawling_detail.py +0 -0
  241. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/05_crawling_listing.py +0 -0
  242. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/06_scraping.py +0 -0
  243. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/07_final_code.py +0 -0
  244. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/07_first_code.py +0 -0
  245. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/08_main.py +0 -0
  246. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/08_routes.py +0 -0
  247. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/09_apify_sdk.py +0 -0
  248. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/__init__.py +0 -0
  249. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/code_examples/routes.py +0 -0
  250. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/introduction/index.mdx +0 -0
  251. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/pyproject.toml +0 -0
  252. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/beautifulsoup_crawler_example.py +0 -0
  253. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/parsel_crawler_example.py +0 -0
  254. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/playwright_crawler_example.py +0 -0
  255. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/code_examples/playwright_crawler_headful_example.py +0 -0
  256. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/quick-start/index.mdx +0 -0
  257. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/upgrading/upgrading_to_v0x.md +0 -0
  258. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/docs/upgrading/upgrading_to_v1.md +0 -0
  259. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/renovate.json +0 -0
  260. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/__init__.py +0 -0
  261. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/__init__.py +0 -0
  262. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/_types.py +0 -0
  263. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/autoscaled_pool.py +0 -0
  264. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/py.typed +0 -0
  265. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/snapshotter.py +0 -0
  266. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_autoscaling/system_status.py +0 -0
  267. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_browserforge_workaround.py +0 -0
  268. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_cli.py +0 -0
  269. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_consts.py +0 -0
  270. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_log_config.py +0 -0
  271. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_request.py +0 -0
  272. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_service_locator.py +0 -0
  273. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_types.py +0 -0
  274. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/__init__.py +0 -0
  275. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/blocked.py +0 -0
  276. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/byte_size.py +0 -0
  277. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/console.py +0 -0
  278. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/context.py +0 -0
  279. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/crypto.py +0 -0
  280. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/docs.py +0 -0
  281. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/file.py +0 -0
  282. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/globs.py +0 -0
  283. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/html_to_text.py +0 -0
  284. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/models.py +0 -0
  285. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/raise_if_too_many_kwargs.py +0 -0
  286. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/recoverable_state.py +0 -0
  287. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/recurring_task.py +0 -0
  288. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/requests.py +0 -0
  289. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/robots.py +0 -0
  290. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/sitemap.py +0 -0
  291. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/system.py +0 -0
  292. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/time.py +0 -0
  293. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/try_import.py +0 -0
  294. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/urls.py +0 -0
  295. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/wait.py +0 -0
  296. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/_utils/web.py +0 -0
  297. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/__init__.py +0 -0
  298. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_controller.py +0 -0
  299. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_plugin.py +0 -0
  300. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_browser_pool.py +0 -0
  301. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser.py +0 -0
  302. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser_controller.py +0 -0
  303. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_playwright_browser_plugin.py +0 -0
  304. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/_types.py +0 -0
  305. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/browsers/py.typed +0 -0
  306. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/configuration.py +0 -0
  307. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/__init__.py +0 -0
  308. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/__init__.py +0 -0
  309. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +0 -0
  310. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py +0 -0
  311. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/_http_crawling_context.py +0 -0
  312. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_abstract_http/py.typed +0 -0
  313. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/__init__.py +0 -0
  314. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +0 -0
  315. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +0 -0
  316. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +0 -0
  317. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +0 -0
  318. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_result_comparator.py +0 -0
  319. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_adaptive_playwright/_utils.py +0 -0
  320. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/__init__.py +0 -0
  321. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_basic_crawling_context.py +0 -0
  322. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_context_pipeline.py +0 -0
  323. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/_logging_utils.py +0 -0
  324. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_basic/py.typed +0 -0
  325. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/__init__.py +0 -0
  326. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +0 -0
  327. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py +0 -0
  328. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +0 -0
  329. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/_utils.py +0 -0
  330. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_beautifulsoup/py.typed +0 -0
  331. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/__init__.py +0 -0
  332. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/_http_crawler.py +0 -0
  333. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_http/_http_parser.py +0 -0
  334. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/__init__.py +0 -0
  335. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_crawler.py +0 -0
  336. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_crawling_context.py +0 -0
  337. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_parsel_parser.py +0 -0
  338. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_parsel/_utils.py +0 -0
  339. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/__init__.py +0 -0
  340. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_crawler.py +0 -0
  341. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_crawling_context.py +0 -0
  342. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_http_client.py +0 -0
  343. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +0 -0
  344. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_types.py +0 -0
  345. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_playwright/_utils.py +0 -0
  346. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/_types.py +0 -0
  347. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/crawlers/py.typed +0 -0
  348. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/errors.py +0 -0
  349. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/__init__.py +0 -0
  350. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_local_event_manager.py +0 -0
  351. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/_types.py +0 -0
  352. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/events/py.typed +0 -0
  353. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/__init__.py +0 -0
  354. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_browserforge_adapter.py +0 -0
  355. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_consts.py +0 -0
  356. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_fingerprint_generator.py +0 -0
  357. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_header_generator.py +0 -0
  358. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/_types.py +0 -0
  359. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/fingerprint_suite/py.typed +0 -0
  360. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/__init__.py +0 -0
  361. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_base.py +0 -0
  362. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_curl_impersonate.py +0 -0
  363. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_httpx.py +0 -0
  364. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/http_clients/_impit.py +0 -0
  365. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/otel/__init__.py +0 -0
  366. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/otel/crawler_instrumentor.py +0 -0
  367. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/cookiecutter.json +0 -0
  368. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/hooks/post_gen_project.py +0 -0
  369. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/hooks/pre_gen_project.py +0 -0
  370. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main.py +0 -0
  371. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_beautifulsoup.py +0 -0
  372. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_parsel.py +0 -0
  373. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_playwright.py +0 -0
  374. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/main_playwright_camoufox.py +0 -0
  375. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_beautifulsoup.py +0 -0
  376. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_camoufox.py +0 -0
  377. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_parsel.py +0 -0
  378. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_playwright.py +0 -0
  379. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/templates/routes_playwright_camoufox.py +0 -0
  380. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore +0 -0
  381. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +0 -0
  382. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/README.md +0 -0
  383. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +0 -0
  384. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +0 -0
  385. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py +0 -0
  386. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py +0 -0
  387. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py +0 -0
  388. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/routes.py +0 -0
  389. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/proxy_configuration.py +0 -0
  390. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/py.typed +0 -0
  391. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/__init__.py +0 -0
  392. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_list.py +0 -0
  393. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_loader.py +0 -0
  394. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_manager.py +0 -0
  395. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/request_loaders/_request_manager_tandem.py +0 -0
  396. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/router.py +0 -0
  397. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/__init__.py +0 -0
  398. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_cookies.py +0 -0
  399. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_models.py +0 -0
  400. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_session.py +0 -0
  401. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/_session_pool.py +0 -0
  402. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/sessions/py.typed +0 -0
  403. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/__init__.py +0 -0
  404. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_error_snapshotter.py +0 -0
  405. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/statistics/_error_tracker.py +0 -0
  406. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/__init__.py +0 -0
  407. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/__init__.py +0 -0
  408. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_dataset_client.py +0 -0
  409. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_key_value_store_client.py +0 -0
  410. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_request_queue_client.py +0 -0
  411. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/_storage_client.py +0 -0
  412. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_base/py.typed +0 -0
  413. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/__init__.py +0 -0
  414. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_dataset_client.py +0 -0
  415. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_key_value_store_client.py +0 -0
  416. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_request_queue_client.py +0 -0
  417. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_storage_client.py +0 -0
  418. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/_utils.py +0 -0
  419. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_file_system/py.typed +0 -0
  420. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/__init__.py +0 -0
  421. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_dataset_client.py +0 -0
  422. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_key_value_store_client.py +0 -0
  423. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_request_queue_client.py +0 -0
  424. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/_storage_client.py +0 -0
  425. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_memory/py.typed +0 -0
  426. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/__init__.py +0 -0
  427. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_client_mixin.py +0 -0
  428. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_dataset_client.py +0 -0
  429. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_key_value_store_client.py +0 -0
  430. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_request_queue_client.py +0 -0
  431. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_storage_client.py +0 -0
  432. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/_utils.py +0 -0
  433. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +0 -0
  434. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +0 -0
  435. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +0 -0
  436. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +0 -0
  437. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_redis/py.typed +0 -0
  438. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/__init__.py +0 -0
  439. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_client_mixin.py +0 -0
  440. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_dataset_client.py +0 -0
  441. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_db_models.py +0 -0
  442. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_key_value_store_client.py +0 -0
  443. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_request_queue_client.py +0 -0
  444. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/_storage_client.py +0 -0
  445. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/_sql/py.typed +0 -0
  446. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/models.py +0 -0
  447. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storage_clients/py.typed +0 -0
  448. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/__init__.py +0 -0
  449. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_base.py +0 -0
  450. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_dataset.py +0 -0
  451. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_key_value_store.py +0 -0
  452. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_request_queue.py +0 -0
  453. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_storage_instance_manager.py +0 -0
  454. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/_utils.py +0 -0
  455. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/src/crawlee/storages/py.typed +0 -0
  456. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/__init__.py +0 -0
  457. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/__init__.py +0 -0
  458. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/conftest.py +0 -0
  459. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/project_template/test_static_crawlers_templates.py +0 -0
  460. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/e2e/project_template/utils.py +0 -0
  461. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/README.md +0 -0
  462. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/__init__.py +0 -0
  463. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_autoscaled_pool.py +0 -0
  464. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_snapshotter.py +0 -0
  465. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_autoscaling/test_system_status.py +0 -0
  466. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_error_tracker.py +0 -0
  467. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_periodic_logging.py +0 -0
  468. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_persistence.py +0 -0
  469. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_statistics/test_request_processing_record.py +0 -0
  470. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_byte_size.py +0 -0
  471. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_console.py +0 -0
  472. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_crypto.py +0 -0
  473. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_file.py +0 -0
  474. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_globs.py +0 -0
  475. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_html_to_text.py +0 -0
  476. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_measure_time.py +0 -0
  477. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_raise_if_too_many_kwargs.py +0 -0
  478. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_recurring_task.py +0 -0
  479. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_requests.py +0 -0
  480. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_robots.py +0 -0
  481. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_sitemap.py +0 -0
  482. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_system.py +0 -0
  483. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_timedelata_ms.py +0 -0
  484. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/_utils/test_urls.py +0 -0
  485. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_browser_pool.py +0 -0
  486. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser.py +0 -0
  487. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser_controller.py +0 -0
  488. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/browsers/test_playwright_browser_plugin.py +0 -0
  489. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/conftest.py +0 -0
  490. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +0 -0
  491. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler_statistics.py +0 -0
  492. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawling_context.py +0 -0
  493. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_adaptive_playwright/test_predictor.py +0 -0
  494. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_basic/test_context_pipeline.py +0 -0
  495. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py +0 -0
  496. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_http/test_http_crawler.py +0 -0
  497. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_parsel/test_parsel_crawler.py +0 -0
  498. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/crawlers/_playwright/test_playwright_crawler.py +0 -0
  499. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/events/test_local_event_manager.py +0 -0
  500. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/fingerprint_suite/test_adapters.py +0 -0
  501. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/fingerprint_suite/test_header_generator.py +0 -0
  502. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/http_clients/test_http_clients.py +0 -0
  503. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/http_clients/test_httpx.py +0 -0
  504. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/otel/test_crawler_instrumentor.py +0 -0
  505. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/proxy_configuration/test_new_proxy_info.py +0 -0
  506. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/proxy_configuration/test_tiers.py +0 -0
  507. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/request_loaders/test_request_list.py +0 -0
  508. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/server.py +0 -0
  509. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/server_endpoints.py +0 -0
  510. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_cookies.py +0 -0
  511. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_models.py +0 -0
  512. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_session.py +0 -0
  513. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/sessions/test_session_pool.py +0 -0
  514. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +0 -0
  515. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +0 -0
  516. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +0 -0
  517. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +0 -0
  518. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +0 -0
  519. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_memory/test_memory_rq_client.py +0 -0
  520. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +0 -0
  521. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_kvs_client.py +0 -0
  522. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_redis/test_redis_rq_client.py +0 -0
  523. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +0 -0
  524. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +0 -0
  525. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storage_clients/_sql/test_sql_rq_client.py +0 -0
  526. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/conftest.py +0 -0
  527. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_dataset.py +0 -0
  528. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_key_value_store.py +0 -0
  529. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_request_manager_tandem.py +0 -0
  530. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_request_queue.py +0 -0
  531. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/storages/test_storage_instance_manager.py +0 -0
  532. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_cli.py +0 -0
  533. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_configuration.py +0 -0
  534. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_log_config.py +0 -0
  535. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_router.py +0 -0
  536. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/tests/unit/test_service_locator.py +0 -0
  537. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/.eslintrc.json +0 -0
  538. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/.yarnrc.yml +0 -0
  539. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/babel.config.js +0 -0
  540. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/build_api_reference.sh +0 -0
  541. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/docusaurus.config.js +0 -0
  542. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/generate_module_shortcuts.py +0 -0
  543. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/package.json +0 -0
  544. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/patches/@docusaurus+core+3.4.0.patch +0 -0
  545. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/patches/@docusaurus+core+3.5.2.patch +0 -0
  546. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/roa-loader/index.js +0 -0
  547. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/roa-loader/package.json +0 -0
  548. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/sidebars.js +0 -0
  549. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/ApiLink.jsx +0 -0
  550. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Button.jsx +0 -0
  551. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Button.module.css +0 -0
  552. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/CopyButton.jsx +0 -0
  553. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/CopyButton.module.css +0 -0
  554. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Gradients.jsx +0 -0
  555. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Highlights.jsx +0 -0
  556. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Highlights.module.css +0 -0
  557. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCliExample.jsx +0 -0
  558. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCliExample.module.css +0 -0
  559. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCtaSection.jsx +0 -0
  560. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageCtaSection.module.css +0 -0
  561. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageHeroSection.jsx +0 -0
  562. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/HomepageHeroSection.module.css +0 -0
  563. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageInfoWidget.jsx +0 -0
  564. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageInfoWidget.module.css +0 -0
  565. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageSwitch.jsx +0 -0
  566. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/LanguageSwitch.module.css +0 -0
  567. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/RiverSection.jsx +0 -0
  568. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/RiverSection.module.css +0 -0
  569. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/ThreeCardsWithIcon.jsx +0 -0
  570. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/ThreeCardsWithIcon.module.css +0 -0
  571. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/animated-crawlee-logo-dark.svg +0 -0
  572. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/Homepage/animated-crawlee-logo-light.svg +0 -0
  573. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/RunnableCodeBlock.jsx +0 -0
  574. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/components/RunnableCodeBlock.module.css +0 -0
  575. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/css/custom.css +0 -0
  576. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/home_page_example.py +0 -0
  577. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/index.js +0 -0
  578. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/pages/index.module.css +0 -0
  579. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/dark-mode-icon.svg +0 -0
  580. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/index.js +0 -0
  581. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/light-mode-icon.svg +0 -0
  582. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/ColorModeToggle/styles.module.css +0 -0
  583. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/DocItem/Layout/index.js +0 -0
  584. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/DocItem/Layout/styles.module.css +0 -0
  585. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/LinkItem/index.js +0 -0
  586. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/LinkItem/index.module.css +0 -0
  587. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/index.js +0 -0
  588. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Footer/index.module.css +0 -0
  589. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/MDXComponents/A.js +0 -0
  590. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Content/index.js +0 -0
  591. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Content/styles.module.css +0 -0
  592. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Logo/index.js +0 -0
  593. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/Logo/index.module.css +0 -0
  594. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Header/index.js +0 -0
  595. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Header/index.module.css +0 -0
  596. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/Layout/index.js +0 -0
  597. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/PrimaryMenu/index.js +0 -0
  598. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/Navbar/MobileSidebar/index.js +0 -0
  599. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/src/theme/NavbarItem/ComponentTypes.js +0 -0
  600. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/.nojekyll +0 -0
  601. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/font/lota.woff +0 -0
  602. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/font/lota.woff2 +0 -0
  603. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/API.png +0 -0
  604. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/arrow_right.svg +0 -0
  605. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/auto-scaling-dark.webp +0 -0
  606. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/auto-scaling-light.webp +0 -0
  607. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/check.svg +0 -0
  608. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/chrome-scrape-dark.gif +0 -0
  609. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/chrome-scrape-light.gif +0 -0
  610. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/cloud_icon.svg +0 -0
  611. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/community-dark-icon.svg +0 -0
  612. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/community-light-icon.svg +0 -0
  613. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-dark-new.svg +0 -0
  614. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-dark.svg +0 -0
  615. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-javascript-dark.svg +0 -0
  616. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-javascript-light.svg +0 -0
  617. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-light-new.svg +0 -0
  618. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-light.svg +0 -0
  619. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-logo-monocolor.svg +0 -0
  620. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-logo.svg +0 -0
  621. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-dark.svg +0 -0
  622. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-light.svg +0 -0
  623. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/crawlee-python-og.png +0 -0
  624. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/defaults-dark-icon.svg +0 -0
  625. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/defaults-light-icon.svg +0 -0
  626. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/discord-brand-dark.svg +0 -0
  627. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/discord-brand.svg +0 -0
  628. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/docusaurus.svg +0 -0
  629. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/external-link.svg +0 -0
  630. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favicon.ico +0 -0
  631. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favorite-tools-dark.webp +0 -0
  632. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/favorite-tools-light.webp +0 -0
  633. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/auto-scaling.svg +0 -0
  634. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/automate-everything.svg +0 -0
  635. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/fingerprints.svg +0 -0
  636. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/node-requests.svg +0 -0
  637. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/runs-on-py.svg +0 -0
  638. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/storage.svg +0 -0
  639. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/features/works-everywhere.svg +0 -0
  640. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/00.jpg +0 -0
  641. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/01.jpg +0 -0
  642. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/02.jpg +0 -0
  643. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/fill-and-submit-web-form/03.jpg +0 -0
  644. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/current-price.jpg +0 -0
  645. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/scraping-practice.jpg +0 -0
  646. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/select-an-element.jpg +0 -0
  647. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/selected-element.jpg +0 -0
  648. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/sku.jpg +0 -0
  649. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/getting-started/title.jpg +0 -0
  650. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/github-brand-dark.svg +0 -0
  651. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/github-brand.svg +0 -0
  652. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/guides/jaeger_otel_search_view_example.png +0 -0
  653. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/guides/jaeger_otel_trace_example.png +0 -0
  654. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/hearth copy.svg +0 -0
  655. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/hearth.svg +0 -0
  656. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/javascript_logo.svg +0 -0
  657. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/js_file.svg +0 -0
  658. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-big.svg +0 -0
  659. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-blur.png +0 -0
  660. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-blur.svg +0 -0
  661. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/logo-zoom.svg +0 -0
  662. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/menu-arrows.svg +0 -0
  663. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/oss_logo.png +0 -0
  664. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/puppeteer-live-view-dashboard.png +0 -0
  665. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/puppeteer-live-view-detail.png +0 -0
  666. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/queue-dark-icon.svg +0 -0
  667. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/queue-light-icon.svg +0 -0
  668. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/resuming-paused-crawl/00.webp +0 -0
  669. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/resuming-paused-crawl/01.webp +0 -0
  670. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/robot.png +0 -0
  671. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/routing-dark-icon.svg +0 -0
  672. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/routing-light-icon.svg +0 -0
  673. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/scraping-utils-dark-icon.svg +0 -0
  674. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/scraping-utils-light-icon.svg +0 -0
  675. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/smart-proxy-dark.webp +0 -0
  676. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/smart-proxy-light.webp +0 -0
  677. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/source_code.png +0 -0
  678. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/system.svg +0 -0
  679. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/triangles_dark.svg +0 -0
  680. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/triangles_light.svg +0 -0
  681. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/workflow.svg +0 -0
  682. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/zero-setup-dark-icon.svg +0 -0
  683. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/img/zero-setup-light-icon.svg +0 -0
  684. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/js/custom.js +0 -0
  685. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/static/robots.txt +0 -0
  686. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/docs-prettier.config.js +0 -0
  687. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/utils/externalLink.js +0 -0
  688. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-dark.gif +0 -0
  689. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-dark.mp4 +0 -0
  690. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-light.gif +0 -0
  691. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/chrome-scrape-light.mp4 +0 -0
  692. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tools/website_gif/website_gif.mjs +0 -0
  693. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/tsconfig.eslint.json +0 -0
  694. {crawlee-1.0.5b20 → crawlee-1.0.5b22}/website/yarn.lock +0 -0
@@ -10,12 +10,14 @@ All notable changes to this project will be documented in this file.
10
10
  - Add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ([#1487](https://github.com/apify/crawlee-python/pull/1487)) ([b06937b](https://github.com/apify/crawlee-python/commit/b06937bbc3afe3c936b554bfc503365c1b2c526b)) by [@Mantisus](https://github.com/Mantisus), closes [#1071](https://github.com/apify/crawlee-python/issues/1071)
11
11
  - Add `RedisStorageClient` based on Redis v8.0+ ([#1406](https://github.com/apify/crawlee-python/pull/1406)) ([d08d13d](https://github.com/apify/crawlee-python/commit/d08d13d39203c24ab61fe254b0956d6744db3b5f)) by [@Mantisus](https://github.com/Mantisus)
12
12
  - Add support for Python 3.14 ([#1553](https://github.com/apify/crawlee-python/pull/1553)) ([89e9130](https://github.com/apify/crawlee-python/commit/89e9130cabee0fbc974b29c26483b7fa0edf627c)) by [@Mantisus](https://github.com/Mantisus)
13
+ - Add `transform_request_function` parameter for `SitemapRequestLoader` ([#1525](https://github.com/apify/crawlee-python/pull/1525)) ([dc90127](https://github.com/apify/crawlee-python/commit/dc901271849b239ba2a947e8ebff8e1815e8c4fb)) by [@Mantisus](https://github.com/Mantisus)
13
14
 
14
15
  ### 🐛 Bug Fixes
15
16
 
16
17
  - Improve indexing of the `request_queue_records` table for `SqlRequestQueueClient` ([#1527](https://github.com/apify/crawlee-python/pull/1527)) ([6509534](https://github.com/apify/crawlee-python/commit/65095346a9d8b703b10c91e0510154c3c48a4176)) by [@Mantisus](https://github.com/Mantisus), closes [#1526](https://github.com/apify/crawlee-python/issues/1526)
17
18
  - Improve error handling for `RobotsTxtFile.load` ([#1524](https://github.com/apify/crawlee-python/pull/1524)) ([596a311](https://github.com/apify/crawlee-python/commit/596a31184914a254b3e7a81fd2f48ea8eda7db49)) by [@Mantisus](https://github.com/Mantisus)
18
19
  - Fix `crawler_runtime` not being updated during run and only in the end ([#1540](https://github.com/apify/crawlee-python/pull/1540)) ([0d6c3f6](https://github.com/apify/crawlee-python/commit/0d6c3f6d3337ddb6cab4873747c28cf95605d550)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1541](https://github.com/apify/crawlee-python/issues/1541)
20
+ - Ensure persist state event emission when exiting `EventManager` context ([#1562](https://github.com/apify/crawlee-python/pull/1562)) ([6a44f17](https://github.com/apify/crawlee-python/commit/6a44f172600cbcacebab899082d6efc9105c4e03)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1560](https://github.com/apify/crawlee-python/issues/1560)
19
21
 
20
22
 
21
23
  <!-- git-cliff-unreleased-end -->
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.5b20
3
+ Version: 1.0.5b22
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from collections.abc import Callable
3
+
4
+ from yarl import URL
5
+
6
+ from crawlee import RequestOptions, RequestTransformAction
7
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
8
+ from crawlee.http_clients import ImpitHttpClient
9
+ from crawlee.request_loaders import SitemapRequestLoader
10
+
11
+
12
+ # Create a transform_request_function that maps request options based on the host in
13
+ # the URL
14
+ def create_transform_request(
15
+ data_mapper: dict[str, dict],
16
+ ) -> Callable[[RequestOptions], RequestOptions | RequestTransformAction]:
17
+ def transform_request(
18
+ request_options: RequestOptions,
19
+ ) -> RequestOptions | RequestTransformAction:
20
+ # According to the Sitemap protocol, all URLs in a Sitemap must be from a single
21
+ # host.
22
+ request_host = URL(request_options['url']).host
23
+
24
+ if request_host and (mapping_data := data_mapper.get(request_host)):
25
+ # Set properties from the mapping data
26
+ if 'label' in mapping_data:
27
+ request_options['label'] = mapping_data['label']
28
+ if 'user_data' in mapping_data:
29
+ request_options['user_data'] = mapping_data['user_data']
30
+
31
+ return request_options
32
+
33
+ return 'unchanged'
34
+
35
+ return transform_request
36
+
37
+
38
+ async def main() -> None:
39
+ # Prepare data mapping for hosts
40
+ apify_host = URL('https://apify.com/sitemap.xml').host
41
+ crawlee_host = URL('https://crawlee.dev/sitemap.xml').host
42
+
43
+ if not apify_host or not crawlee_host:
44
+ raise ValueError('Unable to extract host from URLs')
45
+
46
+ data_map = {
47
+ apify_host: {
48
+ 'label': 'apify',
49
+ 'user_data': {'source': 'apify'},
50
+ },
51
+ crawlee_host: {
52
+ 'label': 'crawlee',
53
+ 'user_data': {'source': 'crawlee'},
54
+ },
55
+ }
56
+
57
+ # Initialize the SitemapRequestLoader with the transform function
58
+ async with SitemapRequestLoader(
59
+ # Set the sitemap URLs and the HTTP client
60
+ sitemap_urls=['https://crawlee.dev/sitemap.xml', 'https://apify.com/sitemap.xml'],
61
+ http_client=ImpitHttpClient(),
62
+ transform_request_function=create_transform_request(data_map),
63
+ ) as sitemap_loader:
64
+ # Convert the sitemap loader to a request manager
65
+ request_manager = await sitemap_loader.to_tandem()
66
+
67
+ # Create and configure the crawler
68
+ crawler = BeautifulSoupCrawler(
69
+ request_manager=request_manager,
70
+ max_requests_per_crawl=10,
71
+ )
72
+
73
+ # Create default handler for requests without a specific label
74
+ @crawler.router.default_handler
75
+ async def handler(context: BeautifulSoupCrawlingContext) -> None:
76
+ source = context.request.user_data.get('source', 'unknown')
77
+ context.log.info(
78
+ f'Processing request: {context.request.url} from source: {source}'
79
+ )
80
+
81
+ # Create handler for requests labeled 'apify'
82
+ @crawler.router.handler('apify')
83
+ async def apify_handler(context: BeautifulSoupCrawlingContext) -> None:
84
+ source = context.request.user_data.get('source', 'unknown')
85
+ context.log.info(
86
+ f'Apify handler processing: {context.request.url} from source: {source}'
87
+ )
88
+
89
+ # Create handler for requests labeled 'crawlee'
90
+ @crawler.router.handler('crawlee')
91
+ async def crawlee_handler(context: BeautifulSoupCrawlingContext) -> None:
92
+ source = context.request.user_data.get('source', 'unknown')
93
+ context.log.info(
94
+ f'Crawlee handler processing: {context.request.url} from source: {source}'
95
+ )
96
+
97
+ await crawler.run()
98
+
99
+
100
+ if __name__ == '__main__':
101
+ asyncio.run(main())
@@ -0,0 +1,22 @@
1
+ ---
2
+ id: using-sitemap-request-loader
3
+ title: Using sitemap request loader
4
+ ---
5
+
6
+ import ApiLink from '@site/src/components/ApiLink';
7
+
8
+ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
9
+
10
+ import SitemapRequestLoaderExample from '!!raw-loader!roa-loader!./code_examples/using_sitemap_request_loader.py';
11
+
12
+ This example demonstrates how to use <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> to crawl websites that provide `sitemap.xml` files following the [Sitemaps protocol](https://www.sitemaps.org/protocol.html). The <ApiLink to="class/SitemapRequestLoader">`SitemapRequestLoader`</ApiLink> processes sitemaps in a streaming fashion without loading them entirely into memory, making it suitable for large sitemaps.
13
+
14
+ The example shows how to use the `transform_request_function` parameter to configure request options based on URL patterns. This allows you to modify request properties such as labels and user data based on the source URL, enabling different handling logic for different websites or sections.
15
+
16
+ The following code example implements processing of sitemaps from two different domains (Apify and Crawlee), with different labels assigned to requests based on their host. The `create_transform_request` function maps each host to the corresponding request configuration, while the crawler uses different handlers based on the assigned labels.
17
+
18
+ <RunnableCodeBlock className="language-python" language="python">
19
+ {SitemapRequestLoaderExample}
20
+ </RunnableCodeBlock>
21
+
22
+ For more information about request loaders, see the [Request loaders guide](../guides/request-loaders).
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "crawlee"
7
- version = "1.0.5b20"
7
+ version = "1.0.5b22"
8
8
  description = "Crawlee for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -56,7 +56,7 @@ from crawlee.errors import (
56
56
  SessionError,
57
57
  UserDefinedErrorHandlerError,
58
58
  )
59
- from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
59
+ from crawlee.events._types import Event, EventCrawlerStatusData
60
60
  from crawlee.http_clients import ImpitHttpClient
61
61
  from crawlee.router import Router
62
62
  from crawlee.sessions import SessionPool
@@ -751,9 +751,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
751
751
 
752
752
  await self._autoscaled_pool.run()
753
753
 
754
- # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
755
- event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
756
-
757
754
  async def add_requests(
758
755
  self,
759
756
  requests: Sequence[str | Request],
@@ -130,11 +130,13 @@ class EventManager:
130
130
  if not self._active:
131
131
  raise RuntimeError(f'The {self.__class__.__name__} is not active.')
132
132
 
133
+ # Stop persist state event periodic emission and manually emit last one to ensure latest state is saved.
134
+ await self._emit_persist_state_event_rec_task.stop()
135
+ await self._emit_persist_state_event()
133
136
  await self.wait_for_all_listeners_to_complete(timeout=self._close_timeout)
134
137
  self._event_emitter.remove_all_listeners()
135
138
  self._listener_tasks.clear()
136
139
  self._listeners_to_wrappers.clear()
137
- await self._emit_persist_state_event_rec_task.stop()
138
140
  self._active = False
139
141
 
140
142
  @overload
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Annotated, Any
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
  from typing_extensions import override
11
11
 
12
- from crawlee import Request
12
+ from crawlee import Request, RequestOptions
13
13
  from crawlee._utils.docs import docs_group
14
14
  from crawlee._utils.globs import Glob
15
15
  from crawlee._utils.recoverable_state import RecoverableState
@@ -18,9 +18,10 @@ from crawlee.request_loaders._request_loader import RequestLoader
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  import re
21
- from collections.abc import Sequence
21
+ from collections.abc import Callable, Sequence
22
22
  from types import TracebackType
23
23
 
24
+ from crawlee import RequestTransformAction
24
25
  from crawlee.http_clients import HttpClient
25
26
  from crawlee.proxy_configuration import ProxyInfo
26
27
  from crawlee.storage_clients.models import ProcessedRequest
@@ -112,6 +113,7 @@ class SitemapRequestLoader(RequestLoader):
112
113
  exclude: list[re.Pattern[Any] | Glob] | None = None,
113
114
  max_buffer_size: int = 200,
114
115
  persist_state_key: str | None = None,
116
+ transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | None = None,
115
117
  ) -> None:
116
118
  """Initialize the sitemap request loader.
117
119
 
@@ -125,6 +127,9 @@ class SitemapRequestLoader(RequestLoader):
125
127
  persist_state_key: A key for persisting the loader's state in the KeyValueStore.
126
128
  When provided, allows resuming from where it left off after interruption.
127
129
  If None, no state persistence occurs.
130
+ transform_request_function: An optional function to transform requests
131
+ generated by the loader. It receives `RequestOptions` with `url` and should return either
132
+ modified `RequestOptions` or a `RequestTransformAction`.
128
133
  """
129
134
  self._http_client = http_client
130
135
  self._sitemap_urls = sitemap_urls
@@ -132,6 +137,7 @@ class SitemapRequestLoader(RequestLoader):
132
137
  self._exclude = exclude
133
138
  self._proxy_info = proxy_info
134
139
  self._max_buffer_size = max_buffer_size
140
+ self._transform_request_function = transform_request_function
135
141
 
136
142
  # Synchronization for queue operations
137
143
  self._queue_has_capacity = asyncio.Event()
@@ -313,8 +319,15 @@ class SitemapRequestLoader(RequestLoader):
313
319
 
314
320
  async with self._queue_lock:
315
321
  url = state.url_queue.popleft()
316
-
317
- request = Request.from_url(url)
322
+ request_option = RequestOptions(url=url)
323
+ if self._transform_request_function:
324
+ transform_request_option = self._transform_request_function(request_option)
325
+ if transform_request_option == 'skip':
326
+ state.total_count -= 1
327
+ continue
328
+ if transform_request_option != 'unchanged':
329
+ request_option = transform_request_option
330
+ request = Request.from_url(**request_option)
318
331
  state.in_progress.add(request.url)
319
332
  if len(state.url_queue) < self._max_buffer_size:
320
333
  self._queue_has_capacity.set()
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import warnings
4
5
  from dataclasses import asdict, dataclass
5
6
  from datetime import datetime, timedelta, timezone
6
7
  from typing import Annotated, Any
@@ -76,7 +77,6 @@ class StatisticsState(BaseModel):
76
77
  crawler_started_at: Annotated[datetime | None, Field(alias='crawlerStartedAt')] = None
77
78
  crawler_last_started_at: Annotated[datetime | None, Field(alias='crawlerLastStartTimestamp')] = None
78
79
  crawler_finished_at: Annotated[datetime | None, Field(alias='crawlerFinishedAt')] = None
79
- crawler_runtime: Annotated[timedelta_ms, Field(alias='crawlerRuntimeMillis')] = timedelta()
80
80
  errors: dict[str, Any] = Field(default_factory=dict)
81
81
  retry_errors: dict[str, Any] = Field(alias='retryErrors', default_factory=dict)
82
82
  requests_with_status_code: dict[str, int] = Field(alias='requestsWithStatusCode', default_factory=dict)
@@ -93,6 +93,37 @@ class StatisticsState(BaseModel):
93
93
  ),
94
94
  ] = {}
95
95
 
96
+ # Used to track the crawler runtime, that had already been persisted. This is the runtime from previous runs.
97
+ _runtime_offset: Annotated[timedelta, Field(exclude=True)] = timedelta()
98
+
99
+ def model_post_init(self, /, __context: Any) -> None:
100
+ self._runtime_offset = self.crawler_runtime or self._runtime_offset
101
+
102
+ @property
103
+ def crawler_runtime(self) -> timedelta:
104
+ if self.crawler_last_started_at:
105
+ finished_at = self.crawler_finished_at or datetime.now(timezone.utc)
106
+ return self._runtime_offset + finished_at - self.crawler_last_started_at
107
+ return self._runtime_offset
108
+
109
+ @crawler_runtime.setter
110
+ def crawler_runtime(self, value: timedelta) -> None:
111
+ # Setter for backwards compatibility only, the crawler_runtime is now computed_field, and cant be set manually.
112
+ # To be removed in v2 release https://github.com/apify/crawlee-python/issues/1567
113
+ warnings.warn(
114
+ f"Setting 'crawler_runtime' is deprecated and will be removed in a future version."
115
+ f' Value {value} will not be used.',
116
+ DeprecationWarning,
117
+ stacklevel=2,
118
+ )
119
+
120
+ @computed_field(alias='crawlerRuntimeMillis')
121
+ def crawler_runtime_for_serialization(self) -> timedelta:
122
+ if self.crawler_last_started_at:
123
+ finished_at = self.crawler_finished_at or datetime.now(timezone.utc)
124
+ return self._runtime_offset + finished_at - self.crawler_last_started_at
125
+ return self._runtime_offset
126
+
96
127
  @computed_field(alias='requestTotalDurationMillis', return_type=timedelta_ms) # type: ignore[prop-decorator]
97
128
  @property
98
129
  def request_total_duration(self) -> timedelta:
@@ -110,9 +110,6 @@ class Statistics(Generic[TStatisticsState]):
110
110
  # Flag to indicate the context state.
111
111
  self._active = False
112
112
 
113
- # Pre-existing runtime offset, that can be non-zero when restoring serialized state from KVS.
114
- self._runtime_offset = timedelta(seconds=0)
115
-
116
113
  def replace_state_model(self, state_model: type[TNewStatisticsState]) -> Statistics[TNewStatisticsState]:
117
114
  """Create near copy of the `Statistics` with replaced `state_model`."""
118
115
  new_statistics: Statistics[TNewStatisticsState] = Statistics(
@@ -168,8 +165,8 @@ class Statistics(Generic[TStatisticsState]):
168
165
  raise RuntimeError(f'The {self.__class__.__name__} is already active.')
169
166
 
170
167
  await self._state.initialize()
171
-
172
- self._runtime_offset = self.state.crawler_runtime
168
+ # Reset `crawler_finished_at` to indicate a new run in progress.
169
+ self.state.crawler_finished_at = None
173
170
 
174
171
  # Start periodic logging and let it print initial state before activation.
175
172
  self._periodic_logger.start()
@@ -200,10 +197,6 @@ class Statistics(Generic[TStatisticsState]):
200
197
  # Stop logging and deactivate the statistics to prevent further changes to crawler_runtime
201
198
  await self._periodic_logger.stop()
202
199
  self.state.crawler_finished_at = datetime.now(timezone.utc)
203
- self.state.crawler_runtime = (
204
- self._runtime_offset + self.state.crawler_finished_at - self.state.crawler_last_started_at
205
- )
206
-
207
200
  self._active = False
208
201
  await self._state.teardown()
209
202
 
@@ -262,20 +255,8 @@ class Statistics(Generic[TStatisticsState]):
262
255
 
263
256
  del self._requests_in_progress[request_id_or_key]
264
257
 
265
- def _update_crawler_runtime(self) -> None:
266
- current_run_duration = (
267
- (datetime.now(timezone.utc) - self.state.crawler_last_started_at)
268
- if self.state.crawler_last_started_at
269
- else timedelta()
270
- )
271
- self.state.crawler_runtime = current_run_duration + self._runtime_offset
272
-
273
258
  def calculate(self) -> FinalStatistics:
274
259
  """Calculate the current statistics."""
275
- if self._active:
276
- # Only update state when active. If not, just report the last known runtime.
277
- self._update_crawler_runtime()
278
-
279
260
  total_minutes = self.state.crawler_runtime.total_seconds() / 60
280
261
  state = self._state.current_value
281
262
  serialized_state = state.model_dump(by_alias=False)
@@ -1673,7 +1673,6 @@ def _process_run_crawler(requests: list[str], storage_dir: str) -> StatisticsSta
1673
1673
  return asyncio.run(_run_crawler(requests=requests, storage_dir=storage_dir))
1674
1674
 
1675
1675
 
1676
- @pytest.mark.skip(reason='This test is flaky, see https://github.com/apify/crawlee-python/issues/1560.')
1677
1676
  async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
1678
1677
  """Test that crawler statistics persist and are loaded correctly.
1679
1678
 
@@ -5,6 +5,7 @@ import logging
5
5
  from datetime import timedelta
6
6
  from functools import update_wrapper
7
7
  from typing import TYPE_CHECKING, Any
8
+ from unittest import mock
8
9
  from unittest.mock import AsyncMock, MagicMock
9
10
 
10
11
  import pytest
@@ -207,3 +208,14 @@ async def test_methods_raise_error_when_not_active(event_system_info_data: Event
207
208
  await event_manager.wait_for_all_listeners_to_complete()
208
209
 
209
210
  assert event_manager.active is True
211
+
212
+
213
+ async def test_event_manager_in_context_persistence() -> None:
214
+ """Test that entering the `EventManager` context emits persist state event at least once."""
215
+ event_manager = EventManager()
216
+
217
+ with mock.patch.object(event_manager, '_emit_persist_state_event', AsyncMock()) as mocked_emit_persist_state_event:
218
+ async with event_manager:
219
+ pass
220
+
221
+ assert mocked_emit_persist_state_event.call_count >= 1
@@ -4,6 +4,7 @@ import gzip
4
4
 
5
5
  from yarl import URL
6
6
 
7
+ from crawlee import RequestOptions, RequestTransformAction
7
8
  from crawlee.http_clients._base import HttpClient
8
9
  from crawlee.request_loaders._sitemap_request_loader import SitemapRequestLoader
9
10
  from crawlee.storages import KeyValueStore
@@ -172,3 +173,37 @@ async def test_recovery_data_persistence_for_sitemap_loading(
172
173
 
173
174
  assert item is not None
174
175
  assert item.url == next_item_in_kvs
176
+
177
+
178
+ async def test_transform_request_function(server_url: URL, http_client: HttpClient) -> None:
179
+ sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode()))
180
+
181
+ def transform_request(request_options: RequestOptions) -> RequestOptions | RequestTransformAction:
182
+ request_options['user_data'] = {'transformed': True}
183
+ return request_options
184
+
185
+ sitemap_loader = SitemapRequestLoader(
186
+ [str(sitemap_url)],
187
+ http_client=http_client,
188
+ transform_request_function=transform_request,
189
+ )
190
+
191
+ extracted_urls = set()
192
+
193
+ while not await sitemap_loader.is_finished():
194
+ request = await sitemap_loader.fetch_next_request()
195
+ assert request is not None
196
+ assert request.user_data.get('transformed') is True
197
+
198
+ extracted_urls.add(request.url)
199
+
200
+ await sitemap_loader.mark_request_as_handled(request)
201
+
202
+ assert len(extracted_urls) == 5
203
+ assert extracted_urls == {
204
+ 'http://not-exists.com/',
205
+ 'http://not-exists.com/catalog?item=12&desc=vacation_hawaii',
206
+ 'http://not-exists.com/catalog?item=73&desc=vacation_new_zealand',
207
+ 'http://not-exists.com/catalog?item=74&desc=vacation_newfoundland',
208
+ 'http://not-exists.com/catalog?item=83&desc=vacation_usa',
209
+ }
@@ -693,7 +693,7 @@ toml = [
693
693
 
694
694
  [[package]]
695
695
  name = "crawlee"
696
- version = "1.0.5b20"
696
+ version = "1.0.5b22"
697
697
  source = { editable = "." }
698
698
  dependencies = [
699
699
  { name = "cachetools" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes