aio-scrapy 2.1.7__tar.gz → 2.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. {aio_scrapy-2.1.7/aio_scrapy.egg-info → aio_scrapy-2.1.8}/PKG-INFO +44 -1
  2. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/README.md +43 -0
  3. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8/aio_scrapy.egg-info}/PKG-INFO +44 -1
  4. aio_scrapy-2.1.8/aioscrapy/VERSION +1 -0
  5. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/__init__.py +89 -5
  6. aio_scrapy-2.1.7/aioscrapy/VERSION +0 -1
  7. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/LICENSE +0 -0
  8. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/MANIFEST.in +0 -0
  9. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/SOURCES.txt +0 -0
  10. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/dependency_links.txt +0 -0
  11. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/entry_points.txt +0 -0
  12. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/not-zip-safe +0 -0
  13. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/requires.txt +0 -0
  14. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/top_level.txt +0 -0
  15. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/__init__.py +0 -0
  16. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/__main__.py +0 -0
  17. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/cmdline.py +0 -0
  18. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/__init__.py +0 -0
  19. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/crawl.py +0 -0
  20. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/genspider.py +0 -0
  21. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/list.py +0 -0
  22. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/runspider.py +0 -0
  23. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/settings.py +0 -0
  24. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/startproject.py +0 -0
  25. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/version.py +0 -0
  26. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/__init__.py +0 -0
  27. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/__init__.py +0 -0
  28. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
  29. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/aiohttp.py +0 -0
  30. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/curl_cffi.py +0 -0
  31. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/httpx.py +0 -0
  32. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/pyhttpx.py +0 -0
  33. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/requests.py +0 -0
  34. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/__init__.py +0 -0
  35. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +0 -0
  36. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/driverpool.py +0 -0
  37. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/playwright.py +0 -0
  38. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/engine.py +0 -0
  39. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/scheduler.py +0 -0
  40. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/scraper.py +0 -0
  41. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/crawler.py +0 -0
  42. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/__init__.py +0 -0
  43. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/absmanager.py +0 -0
  44. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiomongo.py +0 -0
  45. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiomysql.py +0 -0
  46. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiopg.py +0 -0
  47. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiorabbitmq.py +0 -0
  48. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aioredis.py +0 -0
  49. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/__init__.py +0 -0
  50. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/disk.py +0 -0
  51. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/redis.py +0 -0
  52. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/exceptions.py +0 -0
  53. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/__init__.py +0 -0
  54. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/headers.py +0 -0
  55. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/form.py +0 -0
  56. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/json_request.py +0 -0
  57. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/__init__.py +0 -0
  58. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/html.py +0 -0
  59. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/text.py +0 -0
  60. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/web_driver.py +0 -0
  61. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/xml.py +0 -0
  62. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/__init__.py +0 -0
  63. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/__init__.py +0 -0
  64. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
  65. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
  66. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
  67. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/retry.py +0 -0
  68. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/stats.py +0 -0
  69. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/useragent.py +0 -0
  70. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/__init__.py +0 -0
  71. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/closespider.py +0 -0
  72. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/corestats.py +0 -0
  73. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/logstats.py +0 -0
  74. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/metric.py +0 -0
  75. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/throttle.py +0 -0
  76. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/__init__.py +0 -0
  77. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/csv.py +0 -0
  78. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/excel.py +0 -0
  79. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/mongo.py +0 -0
  80. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/mysql.py +0 -0
  81. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/pg.py +0 -0
  82. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/__init__.py +0 -0
  83. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/depth.py +0 -0
  84. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/httperror.py +0 -0
  85. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/offsite.py +0 -0
  86. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/referer.py +0 -0
  87. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/urllength.py +0 -0
  88. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/link.py +0 -0
  89. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/logformatter.py +0 -0
  90. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/__init__.py +0 -0
  91. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/absmanager.py +0 -0
  92. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/downloader.py +0 -0
  93. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/extension.py +0 -0
  94. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/itempipeline.py +0 -0
  95. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/spider.py +0 -0
  96. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/process.py +0 -0
  97. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/proxy/__init__.py +0 -0
  98. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/proxy/redis.py +0 -0
  99. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/__init__.py +0 -0
  100. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/memory.py +0 -0
  101. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/rabbitmq.py +0 -0
  102. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/redis.py +0 -0
  103. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/scrapyd/__init__.py +0 -0
  104. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/scrapyd/runner.py +0 -0
  105. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/serializer.py +0 -0
  106. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/settings/__init__.py +0 -0
  107. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/settings/default_settings.py +0 -0
  108. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/signalmanager.py +0 -0
  109. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/signals.py +0 -0
  110. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/spiderloader.py +0 -0
  111. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/spiders/__init__.py +0 -0
  112. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/statscollectors.py +0 -0
  113. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
  114. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/__init__.py +0 -0
  115. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
  116. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
  117. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
  118. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
  119. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/spiders/basic.tmpl +0 -0
  120. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/spiders/single.tmpl +0 -0
  121. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/__init__.py +0 -0
  122. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/conf.py +0 -0
  123. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/curl.py +0 -0
  124. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/decorators.py +0 -0
  125. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/deprecate.py +0 -0
  126. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/httpobj.py +0 -0
  127. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/log.py +0 -0
  128. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/misc.py +0 -0
  129. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/ossignal.py +0 -0
  130. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/project.py +0 -0
  131. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/python.py +0 -0
  132. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/reqser.py +0 -0
  133. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/request.py +0 -0
  134. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/response.py +0 -0
  135. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/signal.py +0 -0
  136. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/spider.py +0 -0
  137. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/template.py +0 -0
  138. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/tools.py +0 -0
  139. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/trackref.py +0 -0
  140. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/url.py +0 -0
  141. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/setup.cfg +0 -0
  142. {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: aio-scrapy
3
- Version: 2.1.7
3
+ Version: 2.1.8
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -117,6 +117,49 @@ pip install aio-scrapy
117
117
  # pip install git+https://github.com/ConlinH/aio-scrapy
118
118
  ```
119
119
 
120
+ ### 开始 | Start
121
+ ```python
122
+ from aioscrapy import Spider, logger
123
+
124
+
125
+ class MyspiderSpider(Spider):
126
+ name = 'myspider'
127
+ custom_settings = {
128
+ "CLOSE_SPIDER_ON_IDLE": True
129
+ }
130
+ start_urls = ["https://quotes.toscrape.com"]
131
+
132
+ @staticmethod
133
+ async def process_request(request, spider):
134
+ """ request middleware """
135
+ pass
136
+
137
+ @staticmethod
138
+ async def process_response(request, response, spider):
139
+ """ response middleware """
140
+ return response
141
+
142
+ @staticmethod
143
+ async def process_exception(request, exception, spider):
144
+ """ exception middleware """
145
+ pass
146
+
147
+ async def parse(self, response):
148
+ for quote in response.css('div.quote'):
149
+ item = {
150
+ 'author': quote.xpath('span/small/text()').get(),
151
+ 'text': quote.css('span.text::text').get(),
152
+ }
153
+ yield item
154
+
155
+ async def process_item(self, item):
156
+ logger.info(item)
157
+
158
+
159
+ if __name__ == '__main__':
160
+ MyspiderSpider.start()
161
+ ```
162
+
120
163
  ## 文档 | Documentation
121
164
 
122
165
  ## 文档目录 | Documentation Contents
@@ -39,6 +39,49 @@ pip install aio-scrapy
39
39
  # pip install git+https://github.com/ConlinH/aio-scrapy
40
40
  ```
41
41
 
42
+ ### 开始 | Start
43
+ ```python
44
+ from aioscrapy import Spider, logger
45
+
46
+
47
+ class MyspiderSpider(Spider):
48
+ name = 'myspider'
49
+ custom_settings = {
50
+ "CLOSE_SPIDER_ON_IDLE": True
51
+ }
52
+ start_urls = ["https://quotes.toscrape.com"]
53
+
54
+ @staticmethod
55
+ async def process_request(request, spider):
56
+ """ request middleware """
57
+ pass
58
+
59
+ @staticmethod
60
+ async def process_response(request, response, spider):
61
+ """ response middleware """
62
+ return response
63
+
64
+ @staticmethod
65
+ async def process_exception(request, exception, spider):
66
+ """ exception middleware """
67
+ pass
68
+
69
+ async def parse(self, response):
70
+ for quote in response.css('div.quote'):
71
+ item = {
72
+ 'author': quote.xpath('span/small/text()').get(),
73
+ 'text': quote.css('span.text::text').get(),
74
+ }
75
+ yield item
76
+
77
+ async def process_item(self, item):
78
+ logger.info(item)
79
+
80
+
81
+ if __name__ == '__main__':
82
+ MyspiderSpider.start()
83
+ ```
84
+
42
85
  ## 文档 | Documentation
43
86
 
44
87
  ## 文档目录 | Documentation Contents
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: aio-scrapy
3
- Version: 2.1.7
3
+ Version: 2.1.8
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -117,6 +117,49 @@ pip install aio-scrapy
117
117
  # pip install git+https://github.com/ConlinH/aio-scrapy
118
118
  ```
119
119
 
120
+ ### 开始 | Start
121
+ ```python
122
+ from aioscrapy import Spider, logger
123
+
124
+
125
+ class MyspiderSpider(Spider):
126
+ name = 'myspider'
127
+ custom_settings = {
128
+ "CLOSE_SPIDER_ON_IDLE": True
129
+ }
130
+ start_urls = ["https://quotes.toscrape.com"]
131
+
132
+ @staticmethod
133
+ async def process_request(request, spider):
134
+ """ request middleware """
135
+ pass
136
+
137
+ @staticmethod
138
+ async def process_response(request, response, spider):
139
+ """ response middleware """
140
+ return response
141
+
142
+ @staticmethod
143
+ async def process_exception(request, exception, spider):
144
+ """ exception middleware """
145
+ pass
146
+
147
+ async def parse(self, response):
148
+ for quote in response.css('div.quote'):
149
+ item = {
150
+ 'author': quote.xpath('span/small/text()').get(),
151
+ 'text': quote.css('span.text::text').get(),
152
+ }
153
+ yield item
154
+
155
+ async def process_item(self, item):
156
+ logger.info(item)
157
+
158
+
159
+ if __name__ == '__main__':
160
+ MyspiderSpider.start()
161
+ ```
162
+
120
163
  ## 文档 | Documentation
121
164
 
122
165
  ## 文档目录 | Documentation Contents
@@ -0,0 +1 @@
1
+ 2.1.8
@@ -11,9 +11,11 @@ It handles URL normalization, fingerprinting, serialization, and other request-r
11
11
 
12
12
  import hashlib
13
13
  import inspect
14
- import json
15
- from typing import Callable, List, Optional, Tuple, Type, TypeVar
14
+ from collections import Counter
15
+ from typing import Callable, List, Optional, Tuple, Type, TypeVar, Union
16
+ from urllib.parse import ParseResult, parse_qsl, urlencode, urlparse
16
17
 
18
+ import ujson
17
19
  from w3lib.url import canonicalize_url
18
20
  from w3lib.url import safe_url_string
19
21
 
@@ -23,11 +25,67 @@ from aioscrapy.utils.curl import curl_to_request_kwargs
23
25
  from aioscrapy.utils.python import to_unicode
24
26
  from aioscrapy.utils.url import escape_ajax
25
27
 
28
+
26
29
  # Type variable for Request class to use in class methods
27
30
  # 用于在类方法中使用的Request类的类型变量
28
31
  RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")
29
32
 
30
33
 
34
+ def _update_url_params(url: str, params: Union[dict, list, tuple]) -> str:
35
+ """Add URL query params to provided URL being aware of existing.
36
+
37
+ Args:
38
+ url: string of target URL
39
+ params: dict containing requested params to be added
40
+
41
+ Returns:
42
+ string with updated URL
43
+
44
+ >> url = 'http://stackoverflow.com/test?answers=true'
45
+ >> new_params = {'answers': False, 'data': ['some','values']}
46
+ >> update_url_params(url, new_params)
47
+ 'http://stackoverflow.com/test?data=some&data=values&answers=false'
48
+ """
49
+ # No need to unquote, since requote_uri will be called later.
50
+ parsed_url = urlparse(url)
51
+
52
+ # Extracting URL arguments from parsed URL, NOTE the result is a list, not dict
53
+ parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
54
+
55
+ # Merging URL arguments dict with new params
56
+ old_args_counter = Counter(x[0] for x in parsed_get_args)
57
+ if isinstance(params, dict):
58
+ params = list(params.items())
59
+ new_args_counter = Counter(x[0] for x in params)
60
+ for key, value in params:
61
+ # Bool and Dict values should be converted to json-friendly values
62
+ if isinstance(value, (bool, dict)):
63
+ value = ujson.dumps(value)
64
+ # 1 to 1 mapping, we have to search and update it.
65
+ if old_args_counter.get(key) == 1 and new_args_counter.get(key) == 1:
66
+ parsed_get_args = [
67
+ (x if x[0] != key else (key, value)) for x in parsed_get_args
68
+ ]
69
+ else:
70
+ parsed_get_args.append((key, value))
71
+
72
+ # Converting URL argument to proper query string
73
+ encoded_get_args = urlencode(parsed_get_args, doseq=True)
74
+
75
+ # Creating new parsed result object based on provided with new
76
+ # URL arguments. Same thing happens inside of urlparse.
77
+ new_url = ParseResult(
78
+ parsed_url.scheme,
79
+ parsed_url.netloc,
80
+ parsed_url.path,
81
+ parsed_url.params,
82
+ encoded_get_args,
83
+ parsed_url.fragment,
84
+ ).geturl()
85
+
86
+ return new_url
87
+
88
+
31
89
  class Request(object):
32
90
  attributes: Tuple[str, ...] = (
33
91
  "url", "callback", "method", "headers", "body",
@@ -42,7 +100,10 @@ class Request(object):
42
100
  callback: Optional[Callable] = None,
43
101
  method: str = 'GET',
44
102
  headers: Optional[dict] = None,
103
+ params: Optional[Union[dict, list, tuple]] = None,
45
104
  body: Optional[str] = None,
105
+ data: Optional[Union[dict[str, str], list[tuple], str, bytes]] = None,
106
+ json: Optional[dict | list] = None,
46
107
  cookies: Optional[dict] = None,
47
108
  meta: Optional[dict] = None,
48
109
  encoding: str = 'utf-8',
@@ -77,8 +138,32 @@ class Request(object):
77
138
  """
78
139
  self._encoding = encoding
79
140
  self.method = str(method).upper()
141
+
142
+ self.headers = Headers(headers or {})
143
+
144
+ # url
145
+ if params:
146
+ url = _update_url_params(url, params)
80
147
  self._set_url(url)
148
+
149
+ # body/data/json
150
+ if data is not None:
151
+ if isinstance(data, (dict, list, tuple)):
152
+ body = urlencode(data)
153
+ elif isinstance(data, str):
154
+ body = data
155
+ elif isinstance(data, bytes):
156
+ body = data.decode(self._encoding)
157
+ self.headers.setdefault('Content-Type', 'application/x-www-form-urlencoded')
158
+
159
+ if json is not None:
160
+ body = ujson.dumps(json, separators=(",", ":"))
161
+ # Set default headers for JSON content
162
+ # 设置JSON内容的默认头部
163
+ self.headers.setdefault('Content-Type', 'application/json')
164
+
81
165
  self._set_body(body)
166
+
82
167
  assert isinstance(priority, int), f"Request priority not an integer: {priority!r}"
83
168
  self.priority = priority
84
169
 
@@ -86,7 +171,6 @@ class Request(object):
86
171
  self.errback = errback
87
172
 
88
173
  self.cookies = cookies or {}
89
- self.headers = Headers(headers or {})
90
174
  self.dont_filter = dont_filter
91
175
  self.use_proxy = use_proxy
92
176
 
@@ -207,7 +291,7 @@ class Request(object):
207
291
  """
208
292
  return self._body
209
293
 
210
- def _set_body(self, body: str) -> None:
294
+ def _set_body(self, body: Optional[str]) -> None:
211
295
  """
212
296
  Set the request body.
213
297
  设置请求体。
@@ -361,7 +445,7 @@ class Request(object):
361
445
  The request fingerprint. 请求指纹。
362
446
  """
363
447
  return hashlib.sha1(
364
- json.dumps({
448
+ ujson.dumps({
365
449
  'method': to_unicode(self.method),
366
450
  'url': canonicalize_url(self.url, keep_fragments=keep_fragments),
367
451
  'body': self.body,
@@ -1 +0,0 @@
1
- 2.1.7
File without changes
File without changes
File without changes
File without changes
File without changes