aio-scrapy 2.1.7__tar.gz → 2.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {aio_scrapy-2.1.7/aio_scrapy.egg-info → aio_scrapy-2.1.9}/PKG-INFO +54 -4
  2. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/README.md +45 -2
  3. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9/aio_scrapy.egg-info}/PKG-INFO +54 -4
  4. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/SOURCES.txt +2 -0
  5. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/requires.txt +8 -0
  6. aio_scrapy-2.1.9/aioscrapy/VERSION +1 -0
  7. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/curl_cffi.py +13 -3
  8. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/webdriver/__init__.py +1 -0
  9. aio_scrapy-2.1.9/aioscrapy/core/downloader/handlers/webdriver/sbcdp.py +404 -0
  10. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/request/__init__.py +89 -5
  11. aio_scrapy-2.1.9/aioscrapy/libs/pipelines/redis.py +122 -0
  12. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/settings/default_settings.py +7 -0
  13. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/setup.py +4 -1
  14. aio_scrapy-2.1.7/aioscrapy/VERSION +0 -1
  15. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/LICENSE +0 -0
  16. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/MANIFEST.in +0 -0
  17. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/dependency_links.txt +0 -0
  18. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/entry_points.txt +0 -0
  19. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/not-zip-safe +0 -0
  20. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aio_scrapy.egg-info/top_level.txt +0 -0
  21. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/__init__.py +0 -0
  22. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/__main__.py +0 -0
  23. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/cmdline.py +0 -0
  24. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/__init__.py +0 -0
  25. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/crawl.py +0 -0
  26. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/genspider.py +0 -0
  27. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/list.py +0 -0
  28. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/runspider.py +0 -0
  29. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/settings.py +0 -0
  30. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/startproject.py +0 -0
  31. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/commands/version.py +0 -0
  32. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/__init__.py +0 -0
  33. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/__init__.py +0 -0
  34. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
  35. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/aiohttp.py +0 -0
  36. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/httpx.py +0 -0
  37. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/pyhttpx.py +0 -0
  38. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/requests.py +0 -0
  39. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +0 -0
  40. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/webdriver/driverpool.py +0 -0
  41. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/downloader/handlers/webdriver/playwright.py +0 -0
  42. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/engine.py +0 -0
  43. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/scheduler.py +0 -0
  44. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/core/scraper.py +0 -0
  45. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/crawler.py +0 -0
  46. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/__init__.py +0 -0
  47. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/absmanager.py +0 -0
  48. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/aiomongo.py +0 -0
  49. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/aiomysql.py +0 -0
  50. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/aiopg.py +0 -0
  51. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/aiorabbitmq.py +0 -0
  52. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/db/aioredis.py +0 -0
  53. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/dupefilters/__init__.py +0 -0
  54. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/dupefilters/disk.py +0 -0
  55. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/dupefilters/redis.py +0 -0
  56. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/exceptions.py +0 -0
  57. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/__init__.py +0 -0
  58. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/headers.py +0 -0
  59. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/request/form.py +0 -0
  60. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/request/json_request.py +0 -0
  61. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/response/__init__.py +0 -0
  62. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/response/html.py +0 -0
  63. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/response/text.py +0 -0
  64. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/response/web_driver.py +0 -0
  65. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/http/response/xml.py +0 -0
  66. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/__init__.py +0 -0
  67. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/__init__.py +0 -0
  68. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
  69. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
  70. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
  71. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/retry.py +0 -0
  72. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/stats.py +0 -0
  73. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/downloader/useragent.py +0 -0
  74. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/__init__.py +0 -0
  75. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/closespider.py +0 -0
  76. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/corestats.py +0 -0
  77. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/logstats.py +0 -0
  78. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/metric.py +0 -0
  79. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/extensions/throttle.py +0 -0
  80. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/__init__.py +0 -0
  81. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/csv.py +0 -0
  82. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/excel.py +0 -0
  83. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/mongo.py +0 -0
  84. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/mysql.py +0 -0
  85. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/pipelines/pg.py +0 -0
  86. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/__init__.py +0 -0
  87. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/depth.py +0 -0
  88. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/httperror.py +0 -0
  89. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/offsite.py +0 -0
  90. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/referer.py +0 -0
  91. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/libs/spider/urllength.py +0 -0
  92. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/link.py +0 -0
  93. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/logformatter.py +0 -0
  94. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/__init__.py +0 -0
  95. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/absmanager.py +0 -0
  96. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/downloader.py +0 -0
  97. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/extension.py +0 -0
  98. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/itempipeline.py +0 -0
  99. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/middleware/spider.py +0 -0
  100. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/process.py +0 -0
  101. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/proxy/__init__.py +0 -0
  102. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/proxy/redis.py +0 -0
  103. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/queue/__init__.py +0 -0
  104. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/queue/memory.py +0 -0
  105. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/queue/rabbitmq.py +0 -0
  106. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/queue/redis.py +0 -0
  107. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/scrapyd/__init__.py +0 -0
  108. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/scrapyd/runner.py +0 -0
  109. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/serializer.py +0 -0
  110. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/settings/__init__.py +0 -0
  111. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/signalmanager.py +0 -0
  112. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/signals.py +0 -0
  113. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/spiderloader.py +0 -0
  114. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/spiders/__init__.py +0 -0
  115. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/statscollectors.py +0 -0
  116. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
  117. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/module/__init__.py +0 -0
  118. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
  119. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
  120. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
  121. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
  122. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/spiders/basic.tmpl +0 -0
  123. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/templates/spiders/single.tmpl +0 -0
  124. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/__init__.py +0 -0
  125. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/conf.py +0 -0
  126. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/curl.py +0 -0
  127. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/decorators.py +0 -0
  128. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/deprecate.py +0 -0
  129. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/httpobj.py +0 -0
  130. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/log.py +0 -0
  131. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/misc.py +0 -0
  132. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/ossignal.py +0 -0
  133. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/project.py +0 -0
  134. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/python.py +0 -0
  135. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/reqser.py +0 -0
  136. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/request.py +0 -0
  137. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/response.py +0 -0
  138. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/signal.py +0 -0
  139. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/spider.py +0 -0
  140. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/template.py +0 -0
  141. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/tools.py +0 -0
  142. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/trackref.py +0 -0
  143. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/aioscrapy/utils/url.py +0 -0
  144. {aio_scrapy-2.1.7 → aio_scrapy-2.1.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: aio-scrapy
3
- Version: 2.1.7
3
+ Version: 2.1.9
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -41,6 +41,8 @@ Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
41
41
  Requires-Dist: pillow>=9.4.0; extra == "all"
42
42
  Requires-Dist: requests>=2.28.2; extra == "all"
43
43
  Requires-Dist: curl_cffi; extra == "all"
44
+ Requires-Dist: sbcdp; extra == "all"
45
+ Requires-Dist: DrissionPage; extra == "all"
44
46
  Provides-Extra: aiomysql
45
47
  Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
46
48
  Requires-Dist: cryptography; extra == "aiomysql"
@@ -52,6 +54,10 @@ Provides-Extra: mongo
52
54
  Requires-Dist: motor>=2.1.0; extra == "mongo"
53
55
  Provides-Extra: playwright
54
56
  Requires-Dist: playwright>=1.31.1; extra == "playwright"
57
+ Provides-Extra: sbcdp
58
+ Requires-Dist: sbcdp; extra == "sbcdp"
59
+ Provides-Extra: dp
60
+ Requires-Dist: DrissionPage; extra == "dp"
55
61
  Provides-Extra: pyhttpx
56
62
  Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
57
63
  Provides-Extra: curl-cffi
@@ -71,6 +77,7 @@ Dynamic: description-content-type
71
77
  Dynamic: home-page
72
78
  Dynamic: keywords
73
79
  Dynamic: license
80
+ Dynamic: license-file
74
81
  Dynamic: provides-extra
75
82
  Dynamic: requires-dist
76
83
  Dynamic: requires-python
@@ -84,7 +91,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
84
91
  ## 特性 | Features
85
92
 
86
93
  - **完全异步**:基于Python的asyncio库,实现高效的并发爬取
87
- - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
94
+ - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage、playwrightsbcdp
88
95
  - **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
89
96
  - **强大的数据处理管道**:支持多种数据库存储选项
90
97
  - **内置信号系统**:方便的事件处理机制
@@ -94,7 +101,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
94
101
 
95
102
 
96
103
  - **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
97
- - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
104
+ - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage, playwright and sbcdp
98
105
  - **Flexible Middleware System**: Easily add custom functionality and processing logic
99
106
  - **Powerful Data Processing Pipelines**: Support for various database storage options
100
107
  - **Built-in Signal System**: Convenient event handling mechanism
@@ -117,6 +124,49 @@ pip install aio-scrapy
117
124
  # pip install git+https://github.com/ConlinH/aio-scrapy
118
125
  ```
119
126
 
127
+ ### 开始 | Start
128
+ ```python
129
+ from aioscrapy import Spider, logger
130
+
131
+
132
+ class MyspiderSpider(Spider):
133
+ name = 'myspider'
134
+ custom_settings = {
135
+ "CLOSE_SPIDER_ON_IDLE": True
136
+ }
137
+ start_urls = ["https://quotes.toscrape.com"]
138
+
139
+ @staticmethod
140
+ async def process_request(request, spider):
141
+ """ request middleware """
142
+ pass
143
+
144
+ @staticmethod
145
+ async def process_response(request, response, spider):
146
+ """ response middleware """
147
+ return response
148
+
149
+ @staticmethod
150
+ async def process_exception(request, exception, spider):
151
+ """ exception middleware """
152
+ pass
153
+
154
+ async def parse(self, response):
155
+ for quote in response.css('div.quote'):
156
+ item = {
157
+ 'author': quote.xpath('span/small/text()').get(),
158
+ 'text': quote.css('span.text::text').get(),
159
+ }
160
+ yield item
161
+
162
+ async def process_item(self, item):
163
+ logger.info(item)
164
+
165
+
166
+ if __name__ == '__main__':
167
+ MyspiderSpider.start()
168
+ ```
169
+
120
170
  ## 文档 | Documentation
121
171
 
122
172
  ## 文档目录 | Documentation Contents
@@ -6,7 +6,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
6
6
  ## 特性 | Features
7
7
 
8
8
  - **完全异步**:基于Python的asyncio库,实现高效的并发爬取
9
- - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
9
+ - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage、playwrightsbcdp
10
10
  - **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
11
11
  - **强大的数据处理管道**:支持多种数据库存储选项
12
12
  - **内置信号系统**:方便的事件处理机制
@@ -16,7 +16,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
16
16
 
17
17
 
18
18
  - **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
19
- - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
19
+ - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage, playwright and sbcdp
20
20
  - **Flexible Middleware System**: Easily add custom functionality and processing logic
21
21
  - **Powerful Data Processing Pipelines**: Support for various database storage options
22
22
  - **Built-in Signal System**: Convenient event handling mechanism
@@ -39,6 +39,49 @@ pip install aio-scrapy
39
39
  # pip install git+https://github.com/ConlinH/aio-scrapy
40
40
  ```
41
41
 
42
+ ### 开始 | Start
43
+ ```python
44
+ from aioscrapy import Spider, logger
45
+
46
+
47
+ class MyspiderSpider(Spider):
48
+ name = 'myspider'
49
+ custom_settings = {
50
+ "CLOSE_SPIDER_ON_IDLE": True
51
+ }
52
+ start_urls = ["https://quotes.toscrape.com"]
53
+
54
+ @staticmethod
55
+ async def process_request(request, spider):
56
+ """ request middleware """
57
+ pass
58
+
59
+ @staticmethod
60
+ async def process_response(request, response, spider):
61
+ """ response middleware """
62
+ return response
63
+
64
+ @staticmethod
65
+ async def process_exception(request, exception, spider):
66
+ """ exception middleware """
67
+ pass
68
+
69
+ async def parse(self, response):
70
+ for quote in response.css('div.quote'):
71
+ item = {
72
+ 'author': quote.xpath('span/small/text()').get(),
73
+ 'text': quote.css('span.text::text').get(),
74
+ }
75
+ yield item
76
+
77
+ async def process_item(self, item):
78
+ logger.info(item)
79
+
80
+
81
+ if __name__ == '__main__':
82
+ MyspiderSpider.start()
83
+ ```
84
+
42
85
  ## 文档 | Documentation
43
86
 
44
87
  ## 文档目录 | Documentation Contents
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: aio-scrapy
3
- Version: 2.1.7
3
+ Version: 2.1.9
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -41,6 +41,8 @@ Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
41
41
  Requires-Dist: pillow>=9.4.0; extra == "all"
42
42
  Requires-Dist: requests>=2.28.2; extra == "all"
43
43
  Requires-Dist: curl_cffi; extra == "all"
44
+ Requires-Dist: sbcdp; extra == "all"
45
+ Requires-Dist: DrissionPage; extra == "all"
44
46
  Provides-Extra: aiomysql
45
47
  Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
46
48
  Requires-Dist: cryptography; extra == "aiomysql"
@@ -52,6 +54,10 @@ Provides-Extra: mongo
52
54
  Requires-Dist: motor>=2.1.0; extra == "mongo"
53
55
  Provides-Extra: playwright
54
56
  Requires-Dist: playwright>=1.31.1; extra == "playwright"
57
+ Provides-Extra: sbcdp
58
+ Requires-Dist: sbcdp; extra == "sbcdp"
59
+ Provides-Extra: dp
60
+ Requires-Dist: DrissionPage; extra == "dp"
55
61
  Provides-Extra: pyhttpx
56
62
  Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
57
63
  Provides-Extra: curl-cffi
@@ -71,6 +77,7 @@ Dynamic: description-content-type
71
77
  Dynamic: home-page
72
78
  Dynamic: keywords
73
79
  Dynamic: license
80
+ Dynamic: license-file
74
81
  Dynamic: provides-extra
75
82
  Dynamic: requires-dist
76
83
  Dynamic: requires-python
@@ -84,7 +91,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
84
91
  ## 特性 | Features
85
92
 
86
93
  - **完全异步**:基于Python的asyncio库,实现高效的并发爬取
87
- - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
94
+ - **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage、playwrightsbcdp
88
95
  - **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
89
96
  - **强大的数据处理管道**:支持多种数据库存储选项
90
97
  - **内置信号系统**:方便的事件处理机制
@@ -94,7 +101,7 @@ AioScrapy is a powerful asynchronous web crawling framework built on Python's as
94
101
 
95
102
 
96
103
  - **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
97
- - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
104
+ - **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage, playwright and sbcdp
98
105
  - **Flexible Middleware System**: Easily add custom functionality and processing logic
99
106
  - **Powerful Data Processing Pipelines**: Support for various database storage options
100
107
  - **Built-in Signal System**: Convenient event handling mechanism
@@ -117,6 +124,49 @@ pip install aio-scrapy
117
124
  # pip install git+https://github.com/ConlinH/aio-scrapy
118
125
  ```
119
126
 
127
+ ### 开始 | Start
128
+ ```python
129
+ from aioscrapy import Spider, logger
130
+
131
+
132
+ class MyspiderSpider(Spider):
133
+ name = 'myspider'
134
+ custom_settings = {
135
+ "CLOSE_SPIDER_ON_IDLE": True
136
+ }
137
+ start_urls = ["https://quotes.toscrape.com"]
138
+
139
+ @staticmethod
140
+ async def process_request(request, spider):
141
+ """ request middleware """
142
+ pass
143
+
144
+ @staticmethod
145
+ async def process_response(request, response, spider):
146
+ """ response middleware """
147
+ return response
148
+
149
+ @staticmethod
150
+ async def process_exception(request, exception, spider):
151
+ """ exception middleware """
152
+ pass
153
+
154
+ async def parse(self, response):
155
+ for quote in response.css('div.quote'):
156
+ item = {
157
+ 'author': quote.xpath('span/small/text()').get(),
158
+ 'text': quote.css('span.text::text').get(),
159
+ }
160
+ yield item
161
+
162
+ async def process_item(self, item):
163
+ logger.info(item)
164
+
165
+
166
+ if __name__ == '__main__':
167
+ MyspiderSpider.start()
168
+ ```
169
+
120
170
  ## 文档 | Documentation
121
171
 
122
172
  ## 文档目录 | Documentation Contents
@@ -46,6 +46,7 @@ aioscrapy/core/downloader/handlers/webdriver/__init__.py
46
46
  aioscrapy/core/downloader/handlers/webdriver/drissionpage.py
47
47
  aioscrapy/core/downloader/handlers/webdriver/driverpool.py
48
48
  aioscrapy/core/downloader/handlers/webdriver/playwright.py
49
+ aioscrapy/core/downloader/handlers/webdriver/sbcdp.py
49
50
  aioscrapy/db/__init__.py
50
51
  aioscrapy/db/absmanager.py
51
52
  aioscrapy/db/aiomongo.py
@@ -86,6 +87,7 @@ aioscrapy/libs/pipelines/excel.py
86
87
  aioscrapy/libs/pipelines/mongo.py
87
88
  aioscrapy/libs/pipelines/mysql.py
88
89
  aioscrapy/libs/pipelines/pg.py
90
+ aioscrapy/libs/pipelines/redis.py
89
91
  aioscrapy/libs/spider/__init__.py
90
92
  aioscrapy/libs/spider/depth.py
91
93
  aioscrapy/libs/spider/httperror.py
@@ -28,10 +28,15 @@ XlsxWriter>=3.1.2
28
28
  pillow>=9.4.0
29
29
  requests>=2.28.2
30
30
  curl_cffi
31
+ sbcdp
32
+ DrissionPage
31
33
 
32
34
  [curl_cffi]
33
35
  curl_cffi>=0.6.1
34
36
 
37
+ [dp]
38
+ DrissionPage
39
+
35
40
  [execl]
36
41
  XlsxWriter>=3.1.2
37
42
  pillow>=9.4.0
@@ -53,3 +58,6 @@ pyhttpx>=2.10.4
53
58
 
54
59
  [requests]
55
60
  requests>=2.28.2
61
+
62
+ [sbcdp]
63
+ sbcdp
@@ -0,0 +1 @@
1
+ 2.1.9
@@ -8,8 +8,9 @@ It supports features like browser impersonation, proxies, and cookies.
8
8
  它支持浏览器模拟、代理和Cookie等功能。
9
9
  """
10
10
 
11
+ import asyncio
11
12
  from curl_cffi.curl import CurlError
12
- from curl_cffi.requests import AsyncSession
13
+ from curl_cffi.requests import AsyncSession, Session
13
14
 
14
15
  from aioscrapy import Request
15
16
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
@@ -50,6 +51,9 @@ class CurlCffiDownloadHandler(BaseDownloadHandler):
50
51
  # SSL验证设置
51
52
  self.verify_ssl: bool = self.settings.get("VERIFY_SSL", True)
52
53
 
54
+ # 是否在线程中执行
55
+ self.use_thread: bool = self.settings.get("USE_THREAD", False)
56
+
53
57
  @classmethod
54
58
  def from_settings(cls, settings: Settings):
55
59
  """
@@ -160,8 +164,14 @@ class CurlCffiDownloadHandler(BaseDownloadHandler):
160
164
 
161
165
  # Perform the request
162
166
  # 执行请求
163
- async with AsyncSession(**session_args) as session:
164
- response = await session.request(request.method, request.url, **kwargs)
167
+ if self.use_thread:
168
+ with Session(**session_args) as session:
169
+ # Run the synchronous curl-cffi request in a thread pool
170
+ # 在线程池中运行同步的curl-cffi请求
171
+ response = await asyncio.to_thread(session.request, request.method, request.url, **kwargs)
172
+ else:
173
+ async with AsyncSession(**session_args) as session:
174
+ response = await session.request(request.method, request.url, **kwargs)
165
175
 
166
176
  # Convert curl_cffi response to HtmlResponse
167
177
  # 将curl_cffi响应转换为HtmlResponse
@@ -1,2 +1,3 @@
1
1
  from .playwright import PlaywrightDownloadHandler, PlaywrightDriver
2
2
  from .drissionpage import DrissionPageDownloadHandler, DrissionPageDriver
3
+ from .sbcdp import SbcdpDownloadHandler, SbcdpDriver