apify 2.7.1b10__tar.gz → 2.7.1b12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (236) hide show
  1. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/build_and_deploy_docs.yaml +2 -2
  2. {apify-2.7.1b10 → apify-2.7.1b12}/CHANGELOG.md +25 -0
  3. {apify-2.7.1b10 → apify-2.7.1b12}/PKG-INFO +17 -2
  4. {apify-2.7.1b10 → apify-2.7.1b12}/README.md +16 -1
  5. apify-2.7.1b12/docs/03_guides/01_beautifulsoup_httpx.mdx +30 -0
  6. apify-2.7.1b12/docs/03_guides/02_parsel_impit.mdx +28 -0
  7. apify-2.7.1b12/docs/03_guides/05_crawlee.mdx +46 -0
  8. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/01_beautifulsoup_httpx.py +7 -4
  9. apify-2.7.1b12/docs/03_guides/code/02_parsel_impit.py +94 -0
  10. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/03_playwright.py +5 -2
  11. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/04_selenium.py +4 -2
  12. apify-2.7.1b12/docs/03_guides/code/05_crawlee_beautifulsoup.py +55 -0
  13. apify-2.7.1b12/docs/03_guides/code/05_crawlee_parsel.py +55 -0
  14. apify-2.7.1b12/docs/03_guides/code/05_crawlee_playwright.py +58 -0
  15. {apify-2.7.1b10 → apify-2.7.1b12}/pyproject.toml +1 -1
  16. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_charging.py +19 -0
  17. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_scrapy.py +1 -1
  18. {apify-2.7.1b10 → apify-2.7.1b12}/uv.lock +1 -1
  19. {apify-2.7.1b10 → apify-2.7.1b12}/website/package-lock.json +7 -7
  20. {apify-2.7.1b10 → apify-2.7.1b12}/website/package.json +1 -1
  21. {apify-2.7.1b10 → apify-2.7.1b12}/website/sidebars.js +4 -4
  22. apify-2.7.1b12/website/src/theme/DocItem/Content/index.js +6 -0
  23. apify-2.7.1b10/docs/02_guides/01_beautifulsoup_httpx.mdx +0 -30
  24. apify-2.7.1b10/docs/02_guides/02_crawlee.mdx +0 -37
  25. apify-2.7.1b10/docs/02_guides/code/02_crawlee_beautifulsoup.py +0 -55
  26. apify-2.7.1b10/docs/02_guides/code/02_crawlee_playwright.py +0 -68
  27. {apify-2.7.1b10 → apify-2.7.1b12}/.editorconfig +0 -0
  28. {apify-2.7.1b10 → apify-2.7.1b12}/.github/CODEOWNERS +0 -0
  29. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/check_pr_title.yaml +0 -0
  30. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/pre_release.yaml +0 -0
  31. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/release.yaml +0 -0
  32. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/run_code_checks.yaml +0 -0
  33. {apify-2.7.1b10 → apify-2.7.1b12}/.github/workflows/update_new_issue.yaml +0 -0
  34. {apify-2.7.1b10 → apify-2.7.1b12}/.gitignore +0 -0
  35. {apify-2.7.1b10 → apify-2.7.1b12}/.markdownlint.yaml +0 -0
  36. {apify-2.7.1b10 → apify-2.7.1b12}/.pre-commit-config.yaml +0 -0
  37. {apify-2.7.1b10 → apify-2.7.1b12}/CONTRIBUTING.md +0 -0
  38. {apify-2.7.1b10 → apify-2.7.1b12}/LICENSE +0 -0
  39. {apify-2.7.1b10 → apify-2.7.1b12}/Makefile +0 -0
  40. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/01_introduction.mdx +0 -0
  41. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/02_running_actors_locally.mdx +0 -0
  42. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/03_actor_structure.mdx +0 -0
  43. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/code/01_introduction.py +0 -0
  44. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/code/actor_structure/__init__.py +0 -0
  45. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/code/actor_structure/__main__.py +0 -0
  46. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/code/actor_structure/main.py +0 -0
  47. {apify-2.7.1b10 → apify-2.7.1b12}/docs/01_overview/code/actor_structure/py.typed +0 -0
  48. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/01_actor_lifecycle.mdx +0 -0
  49. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/02_actor_input.mdx +0 -0
  50. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/03_storages.mdx +0 -0
  51. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/04_actor_events.mdx +0 -0
  52. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/05_proxy_management.mdx +0 -0
  53. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/06_interacting_with_other_actors.mdx +0 -0
  54. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/07_webhooks.mdx +0 -0
  55. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/08_access_apify_api.mdx +0 -0
  56. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/09_running_webserver.mdx +0 -0
  57. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/10_logging.mdx +0 -0
  58. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/11_configuration.mdx +0 -0
  59. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/12_pay_per_event.mdx +0 -0
  60. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/01_context_manager.py +0 -0
  61. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/01_init_exit.py +0 -0
  62. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/01_reboot.py +0 -0
  63. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/01_status_message.py +0 -0
  64. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/02_input.py +0 -0
  65. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_dataset_exports.py +0 -0
  66. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_dataset_read_write.py +0 -0
  67. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_deleting_storages.py +0 -0
  68. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_kvs_iterating.py +0 -0
  69. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_kvs_public_url.py +0 -0
  70. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_kvs_read_write.py +0 -0
  71. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_opening_storages.py +0 -0
  72. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/03_rq.py +0 -0
  73. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/04_actor_events.py +0 -0
  74. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_apify_proxy.py +0 -0
  75. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_apify_proxy_config.py +0 -0
  76. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_custom_proxy.py +0 -0
  77. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_custom_proxy_function.py +0 -0
  78. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_proxy_actor_input.py +0 -0
  79. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_proxy_httpx.py +0 -0
  80. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/05_proxy_rotation.py +0 -0
  81. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/06_interacting_call.py +0 -0
  82. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/06_interacting_call_task.py +0 -0
  83. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/06_interacting_metamorph.py +0 -0
  84. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/06_interacting_start.py +0 -0
  85. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/07_webhook.py +0 -0
  86. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/07_webhook_preventing.py +0 -0
  87. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/08_actor_client.py +0 -0
  88. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/08_actor_new_client.py +0 -0
  89. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/09_webserver.py +0 -0
  90. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/10_log_config.py +0 -0
  91. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/10_logger_usage.py +0 -0
  92. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/10_redirect_log.py +0 -0
  93. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/10_redirect_log_existing_run.py +0 -0
  94. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/11_config.py +0 -0
  95. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/actor_charge.py +0 -0
  96. {apify-2.7.1b10/docs/03_concepts → apify-2.7.1b12/docs/02_concepts}/code/conditional_actor_charge.py +0 -0
  97. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/03_playwright.mdx +0 -0
  98. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/04_selenium.mdx +0 -0
  99. /apify-2.7.1b10/docs/02_guides/05_scrapy.mdx → /apify-2.7.1b12/docs/03_guides/06_scrapy.mdx +0 -0
  100. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/__init__.py +0 -0
  101. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/__main__.py +0 -0
  102. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/items.py +0 -0
  103. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/main.py +0 -0
  104. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/py.typed +0 -0
  105. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/settings.py +0 -0
  106. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/spiders/__init__.py +0 -0
  107. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/spiders/py.typed +0 -0
  108. {apify-2.7.1b10/docs/02_guides → apify-2.7.1b12/docs/03_guides}/code/scrapy_project/src/spiders/title.py +0 -0
  109. {apify-2.7.1b10 → apify-2.7.1b12}/docs/04_upgrading/upgrading_to_v2.md +0 -0
  110. {apify-2.7.1b10 → apify-2.7.1b12}/docs/04_upgrading/upgrading_to_v3.md +0 -0
  111. {apify-2.7.1b10 → apify-2.7.1b12}/docs/pyproject.toml +0 -0
  112. {apify-2.7.1b10 → apify-2.7.1b12}/renovate.json +0 -0
  113. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/__init__.py +0 -0
  114. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_actor.py +0 -0
  115. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_configuration.py +0 -0
  116. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_consts.py +0 -0
  117. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_crypto.py +0 -0
  118. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_models.py +0 -0
  119. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_proxy_configuration.py +0 -0
  120. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/_utils.py +0 -0
  121. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/events/__init__.py +0 -0
  122. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/events/_apify_event_manager.py +0 -0
  123. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/events/_types.py +0 -0
  124. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/events/py.typed +0 -0
  125. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/log.py +0 -0
  126. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/py.typed +0 -0
  127. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/request_loaders/__init__.py +0 -0
  128. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/request_loaders/_apify_request_list.py +0 -0
  129. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/request_loaders/py.typed +0 -0
  130. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/__init__.py +0 -0
  131. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/_actor_runner.py +0 -0
  132. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/_async_thread.py +0 -0
  133. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/_logging_config.py +0 -0
  134. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/extensions/__init__.py +0 -0
  135. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/extensions/_httpcache.py +0 -0
  136. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/middlewares/__init__.py +0 -0
  137. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
  138. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/middlewares/py.typed +0 -0
  139. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/pipelines/__init__.py +0 -0
  140. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
  141. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/pipelines/py.typed +0 -0
  142. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/py.typed +0 -0
  143. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/requests.py +0 -0
  144. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/scheduler.py +0 -0
  145. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/scrapy/utils.py +0 -0
  146. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/__init__.py +0 -0
  147. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/__init__.py +0 -0
  148. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
  149. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
  150. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/_models.py +0 -0
  151. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
  152. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
  153. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_apify/py.typed +0 -0
  154. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_file_system/__init__.py +0 -0
  155. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
  156. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
  157. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storage_clients/py.typed +0 -0
  158. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storages/__init__.py +0 -0
  159. {apify-2.7.1b10 → apify-2.7.1b12}/src/apify/storages/py.typed +0 -0
  160. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/README.md +0 -0
  161. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/__init__.py +0 -0
  162. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/_utils.py +0 -0
  163. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/Dockerfile +0 -0
  164. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/requirements.txt +0 -0
  165. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/server.py +0 -0
  166. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/src/__init__.py +0 -0
  167. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/src/__main__.py +0 -0
  168. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/actor_source_base/src/main.py +0 -0
  169. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/conftest.py +0 -0
  170. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_api_helpers.py +0 -0
  171. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_call_timeouts.py +0 -0
  172. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_charge.py +0 -0
  173. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_create_proxy_configuration.py +0 -0
  174. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_dataset.py +0 -0
  175. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_events.py +0 -0
  176. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_key_value_store.py +0 -0
  177. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_lifecycle.py +0 -0
  178. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_log.py +0 -0
  179. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_actor_request_queue.py +0 -0
  180. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_crawlers_with_storages.py +0 -0
  181. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_fixtures.py +0 -0
  182. {apify-2.7.1b10 → apify-2.7.1b12}/tests/integration/test_request_queue.py +0 -0
  183. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/__init__.py +0 -0
  184. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/__init__.py +0 -0
  185. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_create_proxy_configuration.py +0 -0
  186. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_dataset.py +0 -0
  187. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_env_helpers.py +0 -0
  188. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_helpers.py +0 -0
  189. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_key_value_store.py +0 -0
  190. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_lifecycle.py +0 -0
  191. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_log.py +0 -0
  192. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_non_default_instance.py +0 -0
  193. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_actor_request_queue.py +0 -0
  194. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_configuration.py +0 -0
  195. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/actor/test_request_list.py +0 -0
  196. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/conftest.py +0 -0
  197. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/events/__init__.py +0 -0
  198. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/events/test_apify_event_manager.py +0 -0
  199. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/__init__.py +0 -0
  200. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/extensions/__init__.py +0 -0
  201. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/extensions/test_httpcache.py +0 -0
  202. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/middlewares/__init__.py +0 -0
  203. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/middlewares/test_apify_proxy.py +0 -0
  204. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/pipelines/__init__.py +0 -0
  205. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +0 -0
  206. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/requests/__init__.py +0 -0
  207. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/requests/test_to_apify_request.py +0 -0
  208. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/requests/test_to_scrapy_request.py +0 -0
  209. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/utils/__init__.py +0 -0
  210. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/utils/test_apply_apify_settings.py +0 -0
  211. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/scrapy/utils/test_get_basic_auth_header.py +0 -0
  212. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/storage_clients/__init__.py +0 -0
  213. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/storage_clients/test_apify_request_queue_client.py +0 -0
  214. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/storage_clients/test_file_system.py +0 -0
  215. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/test_crypto.py +0 -0
  216. {apify-2.7.1b10 → apify-2.7.1b12}/tests/unit/test_proxy_configuration.py +0 -0
  217. {apify-2.7.1b10 → apify-2.7.1b12}/website/.eslintrc.json +0 -0
  218. {apify-2.7.1b10 → apify-2.7.1b12}/website/babel.config.js +0 -0
  219. {apify-2.7.1b10 → apify-2.7.1b12}/website/build_api_reference.sh +0 -0
  220. {apify-2.7.1b10 → apify-2.7.1b12}/website/docusaurus.config.js +0 -0
  221. {apify-2.7.1b10 → apify-2.7.1b12}/website/generate_module_shortcuts.py +0 -0
  222. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/ApiLink.jsx +0 -0
  223. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/Gradients.jsx +0 -0
  224. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/Highlights.jsx +0 -0
  225. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/Highlights.module.css +0 -0
  226. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/RunnableCodeBlock.jsx +0 -0
  227. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/components/RunnableCodeBlock.module.css +0 -0
  228. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/css/custom.css +0 -0
  229. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/pages/home_page_example.py +0 -0
  230. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/pages/index.js +0 -0
  231. {apify-2.7.1b10 → apify-2.7.1b12}/website/src/pages/index.module.css +0 -0
  232. {apify-2.7.1b10 → apify-2.7.1b12}/website/static/.nojekyll +0 -0
  233. {apify-2.7.1b10 → apify-2.7.1b12}/website/static/img/docs-og.png +0 -0
  234. {apify-2.7.1b10 → apify-2.7.1b12}/website/static/img/guides/redirected_logs_example.webp +0 -0
  235. {apify-2.7.1b10 → apify-2.7.1b12}/website/tools/docs-prettier.config.js +0 -0
  236. {apify-2.7.1b10 → apify-2.7.1b12}/website/tools/utils/externalLink.js +0 -0
@@ -27,7 +27,7 @@ jobs:
27
27
  token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
28
28
 
29
29
  - name: Set up Node
30
- uses: actions/setup-node@v4
30
+ uses: actions/setup-node@v5
31
31
  with:
32
32
  node-version: ${{ env.NODE_VERSION }}
33
33
  cache: npm
@@ -50,7 +50,7 @@ jobs:
50
50
  git push
51
51
 
52
52
  - name: Set up Python
53
- uses: actions/setup-python@v5
53
+ uses: actions/setup-python@v6
54
54
  with:
55
55
  python-version: ${{ env.PYTHON_VERSION }}
56
56
 
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
8
8
  ### 🚀 Features
9
9
 
10
10
  - Add deduplication to `add_batch_of_requests` ([#534](https://github.com/apify/apify-sdk-python/pull/534)) ([dd03c4d](https://github.com/apify/apify-sdk-python/commit/dd03c4d446f611492adf35f1b5738648ee5a66f7)) by [@Pijukatel](https://github.com/Pijukatel), closes [#514](https://github.com/apify/apify-sdk-python/issues/514)
11
+ - Add new methods to ChargingManager ([#580](https://github.com/apify/apify-sdk-python/pull/580)) ([54f7f8b](https://github.com/apify/apify-sdk-python/commit/54f7f8b29c5982be98b595dac11eceff915035c9)) by [@vdusek](https://github.com/vdusek)
11
12
 
12
13
  ### 🐛 Bug Fixes
13
14
 
@@ -52,6 +53,30 @@ All notable changes to this project will be documented in this file.
52
53
  - Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
53
54
 
54
55
 
56
+ ## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
57
+
58
+ ### 🚀 Features
59
+
60
+ - **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
61
+
62
+ ### 🐛 Bug Fixes
63
+
64
+ - Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
65
+ - Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
66
+
67
+
68
+ ## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
69
+
70
+ ### 🚀 Features
71
+
72
+ - **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
73
+
74
+ ### 🐛 Bug Fixes
75
+
76
+ - Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
77
+ - Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
78
+
79
+
55
80
 
56
81
  ## [2.7.3](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.3) (2025-08-11)
57
82
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.7.1b10
3
+ Version: 2.7.1b12
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -240,7 +240,22 @@ Provides-Extra: scrapy
240
240
  Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
241
241
  Description-Content-Type: text/markdown
242
242
 
243
- # Apify SDK for Python
243
+ <h1 align=center>Apify SDK for Python</h1>
244
+
245
+ <p align=center>
246
+ <a href="https://badge.fury.io/py/apify" rel="nofollow">
247
+ <img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
248
+ </a>
249
+ <a href="https://pypi.org/project/apify/" rel="nofollow">
250
+ <img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
251
+ </a>
252
+ <a href="https://pypi.org/project/apify/" rel="nofollow">
253
+ <img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
254
+ </a>
255
+ <a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
256
+ <img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
257
+ </a>
258
+ </p>
244
259
 
245
260
  The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
246
261
  in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
@@ -1,4 +1,19 @@
1
- # Apify SDK for Python
1
+ <h1 align=center>Apify SDK for Python</h1>
2
+
3
+ <p align=center>
4
+ <a href="https://badge.fury.io/py/apify" rel="nofollow">
5
+ <img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
6
+ </a>
7
+ <a href="https://pypi.org/project/apify/" rel="nofollow">
8
+ <img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
9
+ </a>
10
+ <a href="https://pypi.org/project/apify/" rel="nofollow">
11
+ <img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
12
+ </a>
13
+ <a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
14
+ <img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
15
+ </a>
16
+ </p>
2
17
 
3
18
  The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
4
19
  in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
@@ -0,0 +1,30 @@
1
+ ---
2
+ id: beautifulsoup-httpx
3
+ title: Using BeautifulSoup with HTTPX
4
+ ---
5
+
6
+ import CodeBlock from '@theme/CodeBlock';
7
+
8
+ import BeautifulSoupHttpxExample from '!!raw-loader!./code/01_beautifulsoup_httpx.py';
9
+
10
+ In this guide, you'll learn how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) library with the [HTTPX](https://www.python-httpx.org/) library in your Apify Actors.
11
+
12
+ ## Introduction
13
+
14
+ [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) is a Python library for extracting data from HTML and XML files. It provides simple methods and Pythonic idioms for navigating, searching, and modifying a website's element tree, enabling efficient data extraction.
15
+
16
+ [HTTPX](https://www.python-httpx.org/) is a modern, high-level HTTP client library for Python. It provides a simple interface for making HTTP requests and supports both synchronous and asynchronous requests.
17
+
18
+ To create an Actor which uses those libraries, start from the [BeautifulSoup & Python](https://apify.com/templates/categories/python) Actor template. This template includes the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) and [HTTPX](https://www.python-httpx.org/) libraries preinstalled, allowing you to begin development immediately.
19
+
20
+ ## Example Actor
21
+
22
+ Below is a simple Actor that recursively scrapes titles from all linked websites, up to a specified maximum depth, starting from URLs provided in the Actor input. It uses [HTTPX](https://www.python-httpx.org/) for fetching pages and [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for parsing their content to extract titles and links to other pages.
23
+
24
+ <CodeBlock className="language-python">
25
+ {BeautifulSoupHttpxExample}
26
+ </CodeBlock>
27
+
28
+ ## Conclusion
29
+
30
+ In this guide, you learned how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) with the [HTTPX](https://www.python-httpx.org/) in your Apify Actors. By combining these libraries, you can efficiently extract data from HTML or XML files, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
@@ -0,0 +1,28 @@
1
+ ---
2
+ id: parsel-impit
3
+ title: Using Parsel with Impit
4
+ ---
5
+
6
+ import CodeBlock from '@theme/CodeBlock';
7
+
8
+ import ParselImpitExample from '!!raw-loader!./code/02_parsel_impit.py';
9
+
10
+ In this guide, you'll learn how to combine the [Parsel](https://github.com/scrapy/parsel) and [Impit](https://github.com/apify/impit) libraries when building Apify Actors.
11
+
12
+ ## Introduction
13
+
14
+ [Parsel](https://github.com/scrapy/parsel) is a Python library for extracting data from HTML and XML documents using CSS selectors and [XPath](https://en.wikipedia.org/wiki/XPath) expressions. It offers an intuitive API for navigating and extracting structured data, making it a popular choice for web scraping. Compared to [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/), it also delivers better performance.
15
+
16
+ [Impit](https://github.com/apify/impit) is Apify's high-performance HTTP client for Python. It supports both synchronous and asynchronous workflows and is built for large-scale web scraping, where making thousands of requests efficiently is essential. With built-in browser impersonation and anti-blocking features, it simplifies handling modern websites.
17
+
18
+ ## Example Actor
19
+
20
+ The following example shows a simple Actor that recursively scrapes titles from linked pages, up to a user-defined maximum depth. It uses [Impit](https://github.com/apify/impit) to fetch pages and [Parsel](https://github.com/scrapy/parsel) to extract titles and discover new links.
21
+
22
+ <CodeBlock className="language-python">
23
+ {ParselImpitExample}
24
+ </CodeBlock>
25
+
26
+ ## Conclusion
27
+
28
+ In this guide, you learned how to use [Parsel](https://github.com/scrapy/parsel) with [Impit](https://github.com/apify/impit) in your Apify Actors. By combining these libraries, you get a powerful and efficient solution for web scraping: [Parsel](https://github.com/scrapy/parsel) provides excellent CSS selector and XPath support for data extraction, while [Impit](https://github.com/apify/impit) offers a fast and simple HTTP client built by Apify. This combination makes it easy to build scalable web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
@@ -0,0 +1,46 @@
1
+ ---
2
+ id: crawlee
3
+ title: Using Crawlee
4
+ ---
5
+
6
+ import CodeBlock from '@theme/CodeBlock';
7
+
8
+ import CrawleeBeautifulSoupExample from '!!raw-loader!./code/05_crawlee_beautifulsoup.py';
9
+ import CrawleeParselExample from '!!raw-loader!./code/05_crawlee_parsel.py';
10
+ import CrawleePlaywrightExample from '!!raw-loader!./code/05_crawlee_playwright.py';
11
+
12
+ In this guide you'll learn how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors.
13
+
14
+ ## Introduction
15
+
16
+ [Crawlee](https://crawlee.dev/python) is a Python library for web scraping and browser automation that provides a robust and flexible framework for building web scraping tasks. It seamlessly integrates with the Apify platform and supports a variety of scraping techniques, from static HTML parsing to dynamic JavaScript-rendered content handling. Crawlee offers a range of crawlers, including HTTP-based crawlers like [`HttpCrawler`](https://crawlee.dev/python/api/class/HttpCrawler), [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) and [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and browser-based crawlers like [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler), to suit different scraping needs.
17
+
18
+ In this guide, you'll learn how to use Crawlee with [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) to build Apify Actors for web scraping.
19
+
20
+ ## Actor with BeautifulSoupCrawler
21
+
22
+ The [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is ideal for extracting data from static HTML pages. It uses [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) for parsing and [`ImpitHttpClient`](https://crawlee.dev/python/api/class/ImpitHttpClient) for HTTP communication, ensuring efficient and lightweight scraping. If you do not need to execute JavaScript on the page, [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is a great choice for your scraping tasks. Below is an example of how to use it` in an Apify Actor.
23
+
24
+ <CodeBlock className="language-python">
25
+ {CrawleeBeautifulSoupExample}
26
+ </CodeBlock>
27
+
28
+ ## Actor with ParselCrawler
29
+
30
+ The [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) works in the same way as [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), but it uses the [Parsel](https://parsel.readthedocs.io/en/latest/) library for HTML parsing. This allows for more powerful and flexible data extraction using [XPath](https://en.wikipedia.org/wiki/XPath) selectors. It should be faster than [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler). Below is an example of how to use [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) in an Apify Actor.
31
+
32
+ <CodeBlock className="language-python">
33
+ {CrawleeParselExample}
34
+ </CodeBlock>
35
+
36
+ ## Actor with PlaywrightCrawler
37
+
38
+ The [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) is built for handling dynamic web pages that rely on JavaScript for content rendering. Using the [Playwright](https://playwright.dev/) library, it provides a browser-based automation environment to interact with complex websites. Below is an example of how to use [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) in an Apify Actor.
39
+
40
+ <CodeBlock className="language-python">
41
+ {CrawleePlaywrightExample}
42
+ </CodeBlock>
43
+
44
+ ## Conclusion
45
+
46
+ In this guide, you learned how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors. By using the [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) crawlers, you can efficiently scrape static or dynamic web pages, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
@@ -1,9 +1,8 @@
1
- from __future__ import annotations
2
-
1
+ import asyncio
3
2
  from urllib.parse import urljoin
4
3
 
4
+ import httpx
5
5
  from bs4 import BeautifulSoup
6
- from httpx import AsyncClient
7
6
 
8
7
  from apify import Actor, Request
9
8
 
@@ -32,7 +31,7 @@ async def main() -> None:
32
31
  await request_queue.add_request(new_request)
33
32
 
34
33
  # Create an HTTPX client to fetch the HTML content of the URLs.
35
- async with AsyncClient() as client:
34
+ async with httpx.AsyncClient() as client:
36
35
  # Process the URLs from the request queue.
37
36
  while request := await request_queue.fetch_next_request():
38
37
  url = request.url
@@ -83,3 +82,7 @@ async def main() -> None:
83
82
  finally:
84
83
  # Mark the request as handled to ensure it is not processed again.
85
84
  await request_queue.mark_request_as_handled(new_request)
85
+
86
+
87
+ if __name__ == '__main__':
88
+ asyncio.run(main())
@@ -0,0 +1,94 @@
1
+ import asyncio
2
+ from urllib.parse import urljoin
3
+
4
+ import impit
5
+ import parsel
6
+
7
+ from apify import Actor, Request
8
+
9
+
10
+ async def main() -> None:
11
+ # Enter the context of the Actor.
12
+ async with Actor:
13
+ # Retrieve the Actor input, and use default values if not provided.
14
+ actor_input = await Actor.get_input() or {}
15
+ start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
16
+ max_depth = actor_input.get('max_depth', 1)
17
+
18
+ # Exit if no start URLs are provided.
19
+ if not start_urls:
20
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
21
+ await Actor.exit()
22
+
23
+ # Open the default request queue for handling URLs to be processed.
24
+ request_queue = await Actor.open_request_queue()
25
+
26
+ # Enqueue the start URLs with an initial crawl depth of 0.
27
+ for start_url in start_urls:
28
+ url = start_url.get('url')
29
+ Actor.log.info(f'Enqueuing {url} ...')
30
+ new_request = Request.from_url(url, user_data={'depth': 0})
31
+ await request_queue.add_request(new_request)
32
+
33
+ # Create an Impit client to fetch the HTML content of the URLs.
34
+ async with impit.AsyncClient() as client:
35
+ # Process the URLs from the request queue.
36
+ while request := await request_queue.fetch_next_request():
37
+ url = request.url
38
+
39
+ if not isinstance(request.user_data['depth'], (str, int)):
40
+ raise TypeError('Request.depth is an unexpected type.')
41
+
42
+ depth = int(request.user_data['depth'])
43
+ Actor.log.info(f'Scraping {url} (depth={depth}) ...')
44
+
45
+ try:
46
+ # Fetch the HTTP response from the specified URL using Impit.
47
+ response = await client.get(url)
48
+
49
+ # Parse the HTML content using Parsel Selector.
50
+ selector = parsel.Selector(text=response.text)
51
+
52
+ # If the current depth is less than max_depth, find nested links
53
+ # and enqueue them.
54
+ if depth < max_depth:
55
+ # Extract all links using CSS selector
56
+ links = selector.css('a::attr(href)').getall()
57
+ for link_href in links:
58
+ link_url = urljoin(url, link_href)
59
+
60
+ if link_url.startswith(('http://', 'https://')):
61
+ Actor.log.info(f'Enqueuing {link_url} ...')
62
+ new_request = Request.from_url(
63
+ link_url,
64
+ user_data={'depth': depth + 1},
65
+ )
66
+ await request_queue.add_request(new_request)
67
+
68
+ # Extract the desired data using Parsel selectors.
69
+ title = selector.css('title::text').get()
70
+ h1s = selector.css('h1::text').getall()
71
+ h2s = selector.css('h2::text').getall()
72
+ h3s = selector.css('h3::text').getall()
73
+
74
+ data = {
75
+ 'url': url,
76
+ 'title': title,
77
+ 'h1s': h1s,
78
+ 'h2s': h2s,
79
+ 'h3s': h3s,
80
+ }
81
+
82
+ # Store the extracted data to the default dataset.
83
+ await Actor.push_data(data)
84
+
85
+ except Exception:
86
+ Actor.log.exception(f'Cannot extract data from {url}.')
87
+
88
+ finally:
89
+ # Mark the request as handled to ensure it is not processed again.
90
+ await request_queue.mark_request_as_handled(request)
91
+
92
+
93
+ if __name__ == '__main__':
94
+ asyncio.run(main())
@@ -1,5 +1,4 @@
1
- from __future__ import annotations
2
-
1
+ import asyncio
3
2
  from urllib.parse import urljoin
4
3
 
5
4
  from playwright.async_api import async_playwright
@@ -92,3 +91,7 @@ async def main() -> None:
92
91
  await page.close()
93
92
  # Mark the request as handled to ensure it is not processed again.
94
93
  await request_queue.mark_request_as_handled(request)
94
+
95
+
96
+ if __name__ == '__main__':
97
+ asyncio.run(main())
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import asyncio
4
2
  from urllib.parse import urljoin
5
3
 
@@ -102,3 +100,7 @@ async def main() -> None:
102
100
  await request_queue.mark_request_as_handled(request)
103
101
 
104
102
  driver.quit()
103
+
104
+
105
+ if __name__ == '__main__':
106
+ asyncio.run(main())
@@ -0,0 +1,55 @@
1
+ import asyncio
2
+
3
+ from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
4
+
5
+ from apify import Actor
6
+
7
+ # Create a crawler.
8
+ crawler = BeautifulSoupCrawler(
9
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
10
+ max_requests_per_crawl=50,
11
+ )
12
+
13
+
14
+ # Define a request handler, which will be called for every request.
15
+ @crawler.router.default_handler
16
+ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
17
+ Actor.log.info(f'Scraping {context.request.url}...')
18
+
19
+ # Extract the desired data.
20
+ data = {
21
+ 'url': context.request.url,
22
+ 'title': context.soup.title.string if context.soup.title else None,
23
+ 'h1s': [h1.text for h1 in context.soup.find_all('h1')],
24
+ 'h2s': [h2.text for h2 in context.soup.find_all('h2')],
25
+ 'h3s': [h3.text for h3 in context.soup.find_all('h3')],
26
+ }
27
+
28
+ # Store the extracted data to the default dataset.
29
+ await context.push_data(data)
30
+
31
+ # Enqueue additional links found on the current page.
32
+ await context.enqueue_links(strategy='same-domain')
33
+
34
+
35
+ async def main() -> None:
36
+ # Enter the context of the Actor.
37
+ async with Actor:
38
+ # Retrieve the Actor input, and use default values if not provided.
39
+ actor_input = await Actor.get_input() or {}
40
+ start_urls = [
41
+ url.get('url')
42
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
43
+ ]
44
+
45
+ # Exit if no start URLs are provided.
46
+ if not start_urls:
47
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
48
+ await Actor.exit()
49
+
50
+ # Run the crawler with the starting requests.
51
+ await crawler.run(start_urls)
52
+
53
+
54
+ if __name__ == '__main__':
55
+ asyncio.run(main())
@@ -0,0 +1,55 @@
1
+ import asyncio
2
+
3
+ from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
4
+
5
+ from apify import Actor
6
+
7
+ # Create a crawler.
8
+ crawler = ParselCrawler(
9
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
10
+ max_requests_per_crawl=50,
11
+ )
12
+
13
+
14
+ # Define a request handler, which will be called for every request.
15
+ @crawler.router.default_handler
16
+ async def request_handler(context: ParselCrawlingContext) -> None:
17
+ Actor.log.info(f'Scraping {context.request.url}...')
18
+
19
+ # Extract the desired data.
20
+ data = {
21
+ 'url': context.request.url,
22
+ 'title': context.selector.xpath('//title/text()').get(),
23
+ 'h1s': context.selector.xpath('//h1/text()').getall(),
24
+ 'h2s': context.selector.xpath('//h2/text()').getall(),
25
+ 'h3s': context.selector.xpath('//h3/text()').getall(),
26
+ }
27
+
28
+ # Store the extracted data to the default dataset.
29
+ await context.push_data(data)
30
+
31
+ # Enqueue additional links found on the current page.
32
+ await context.enqueue_links(strategy='same-domain')
33
+
34
+
35
+ async def main() -> None:
36
+ # Enter the context of the Actor.
37
+ async with Actor:
38
+ # Retrieve the Actor input, and use default values if not provided.
39
+ actor_input = await Actor.get_input() or {}
40
+ start_urls = [
41
+ url.get('url')
42
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
43
+ ]
44
+
45
+ # Exit if no start URLs are provided.
46
+ if not start_urls:
47
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
48
+ await Actor.exit()
49
+
50
+ # Run the crawler with the starting requests.
51
+ await crawler.run(start_urls)
52
+
53
+
54
+ if __name__ == '__main__':
55
+ asyncio.run(main())
@@ -0,0 +1,58 @@
1
+ import asyncio
2
+
3
+ from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
4
+
5
+ from apify import Actor
6
+
7
+ # Create a crawler.
8
+ crawler = PlaywrightCrawler(
9
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
10
+ max_requests_per_crawl=50,
11
+ # Run the browser in a headless mode.
12
+ headless=True,
13
+ browser_launch_options={'args': ['--disable-gpu']},
14
+ )
15
+
16
+
17
+ # Define a request handler, which will be called for every request.
18
+ @crawler.router.default_handler
19
+ async def request_handler(context: PlaywrightCrawlingContext) -> None:
20
+ Actor.log.info(f'Scraping {context.request.url}...')
21
+
22
+ # Extract the desired data.
23
+ data = {
24
+ 'url': context.request.url,
25
+ 'title': await context.page.title(),
26
+ 'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
27
+ 'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
28
+ 'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
29
+ }
30
+
31
+ # Store the extracted data to the default dataset.
32
+ await context.push_data(data)
33
+
34
+ # Enqueue additional links found on the current page.
35
+ await context.enqueue_links(strategy='same-domain')
36
+
37
+
38
+ async def main() -> None:
39
+ # Enter the context of the Actor.
40
+ async with Actor:
41
+ # Retrieve the Actor input, and use default values if not provided.
42
+ actor_input = await Actor.get_input() or {}
43
+ start_urls = [
44
+ url.get('url')
45
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
46
+ ]
47
+
48
+ # Exit if no start URLs are provided.
49
+ if not start_urls:
50
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
51
+ await Actor.exit()
52
+
53
+ # Run the crawler with the starting requests.
54
+ await crawler.run(start_urls)
55
+
56
+
57
+ if __name__ == '__main__':
58
+ asyncio.run(main())
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "apify"
7
- version = "2.7.1b10"
7
+ version = "2.7.1b12"
8
8
  description = "Apify SDK for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -64,6 +64,16 @@ class ChargingManager(Protocol):
64
64
  This can be used for instance when your code needs to support multiple pricing models in transition periods.
65
65
  """
66
66
 
67
+ def get_charged_event_count(self, event_name: str) -> int:
68
+ """Get the number of events with the given name that were charged so far.
69
+
70
+ Args:
71
+ event_name: Name of the inspected event.
72
+ """
73
+
74
+ def get_max_total_charge_usd(self) -> Decimal:
75
+ """Get the configured maximum total charge for this Actor run."""
76
+
67
77
 
68
78
  @docs_group('Charging')
69
79
  @dataclass(frozen=True)
@@ -309,6 +319,15 @@ class ChargingManagerImplementation(ChargingManager):
309
319
  },
310
320
  )
311
321
 
322
+ @ensure_context
323
+ def get_charged_event_count(self, event_name: str) -> int:
324
+ item = self._charging_state.get(event_name)
325
+ return item.charge_count if item is not None else 0
326
+
327
+ @ensure_context
328
+ def get_max_total_charge_usd(self) -> Decimal:
329
+ return self._max_total_charge_usd
330
+
312
331
 
313
332
  @dataclass
314
333
  class ChargingStateItem:
@@ -11,7 +11,7 @@ async def test_actor_scrapy_title_spider(
11
11
  make_actor: MakeActorFunction,
12
12
  run_actor: RunActorFunction,
13
13
  ) -> None:
14
- base_path = Path('docs/02_guides/code/scrapy_project')
14
+ base_path = Path('docs/03_guides/code/scrapy_project')
15
15
 
16
16
  actor_source_files = {
17
17
  'src/__init__.py': (base_path / 'src/__init__.py').read_text(),
@@ -28,7 +28,7 @@ wheels = [
28
28
 
29
29
  [[package]]
30
30
  name = "apify"
31
- version = "2.7.1b10"
31
+ version = "2.7.1b12"
32
32
  source = { editable = "." }
33
33
  dependencies = [
34
34
  { name = "apify-client" },
@@ -7,7 +7,7 @@
7
7
  "name": "apify-sdk-python",
8
8
  "dependencies": {
9
9
  "@apify/docs-theme": "^1.0.203",
10
- "@apify/docusaurus-plugin-typedoc-api": "^4.4.6",
10
+ "@apify/docusaurus-plugin-typedoc-api": "^4.4.8",
11
11
  "@docusaurus/core": "^3.8.1",
12
12
  "@docusaurus/faster": "^3.8.1",
13
13
  "@docusaurus/plugin-client-redirects": "^3.8.1",
@@ -1614,9 +1614,9 @@
1614
1614
  }
1615
1615
  },
1616
1616
  "node_modules/@apify/docs-theme": {
1617
- "version": "1.0.203",
1618
- "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.203.tgz",
1619
- "integrity": "sha512-vvFoyS/d/7dLrTJDBB+qpZYzH+mxhWt45C3iLfZaUW4P/oUAfEVKksxv5iLHCBfuxKQ6JIMmuzGnT4OeZ/O+VQ==",
1617
+ "version": "1.0.209",
1618
+ "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.209.tgz",
1619
+ "integrity": "sha512-1IJpcITFgZ6rx/c23Rii5RC8OVRe7bmdhERuRhj6llMbn5WSl8B0z/BoqoTXNkNFrtQfyL3jvbzCZ3H7e7jDxA==",
1620
1620
  "license": "ISC",
1621
1621
  "dependencies": {
1622
1622
  "@apify/docs-search-modal": "^1.2.2",
@@ -1642,9 +1642,9 @@
1642
1642
  }
1643
1643
  },
1644
1644
  "node_modules/@apify/docusaurus-plugin-typedoc-api": {
1645
- "version": "4.4.7",
1646
- "resolved": "https://registry.npmjs.org/@apify/docusaurus-plugin-typedoc-api/-/docusaurus-plugin-typedoc-api-4.4.7.tgz",
1647
- "integrity": "sha512-xJMcRARQHYECg8xDOkJfCX6EZb4++doWCU2eWEHxtc4d4m38xMdRkh3RU78ovWgRwjkkAu/mfV7DY7aNaQebGA==",
1645
+ "version": "4.4.8",
1646
+ "resolved": "https://registry.npmjs.org/@apify/docusaurus-plugin-typedoc-api/-/docusaurus-plugin-typedoc-api-4.4.8.tgz",
1647
+ "integrity": "sha512-iAGPaFKXz5SBzhf+611bk41b2sBRXPPINQOegxd0tOXwGOopnkKfr0LAvJp1Hd1jZFrVJhjyDsVZ6HM7Wwh9zg==",
1648
1648
  "license": "MIT",
1649
1649
  "dependencies": {
1650
1650
  "@vscode/codicons": "^0.0.35",