aio-scrapy 2.1.7__tar.gz → 2.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aio_scrapy-2.1.7/aio_scrapy.egg-info → aio_scrapy-2.1.8}/PKG-INFO +44 -1
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/README.md +43 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8/aio_scrapy.egg-info}/PKG-INFO +44 -1
- aio_scrapy-2.1.8/aioscrapy/VERSION +1 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/__init__.py +89 -5
- aio_scrapy-2.1.7/aioscrapy/VERSION +0 -1
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/LICENSE +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/MANIFEST.in +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/SOURCES.txt +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/dependency_links.txt +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/entry_points.txt +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/not-zip-safe +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/requires.txt +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aio_scrapy.egg-info/top_level.txt +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/__main__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/cmdline.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/crawl.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/genspider.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/list.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/runspider.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/settings.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/startproject.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/commands/version.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/aiohttp.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/curl_cffi.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/httpx.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/pyhttpx.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/requests.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/driverpool.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/playwright.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/engine.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/scheduler.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/scraper.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/crawler.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/absmanager.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiomongo.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiomysql.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiopg.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aiorabbitmq.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/db/aioredis.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/disk.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/dupefilters/redis.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/exceptions.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/headers.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/form.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/request/json_request.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/html.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/text.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/web_driver.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/http/response/xml.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/retry.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/stats.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/downloader/useragent.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/closespider.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/corestats.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/logstats.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/metric.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/extensions/throttle.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/csv.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/excel.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/mongo.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/mysql.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/pipelines/pg.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/depth.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/httperror.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/offsite.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/referer.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/libs/spider/urllength.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/link.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/logformatter.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/absmanager.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/downloader.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/extension.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/itempipeline.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/middleware/spider.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/process.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/proxy/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/proxy/redis.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/memory.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/rabbitmq.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/queue/redis.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/scrapyd/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/scrapyd/runner.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/serializer.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/settings/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/settings/default_settings.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/signalmanager.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/signals.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/spiderloader.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/spiders/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/statscollectors.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/spiders/basic.tmpl +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/spiders/single.tmpl +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/__init__.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/conf.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/curl.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/decorators.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/deprecate.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/httpobj.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/log.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/misc.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/ossignal.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/project.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/python.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/reqser.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/request.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/response.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/signal.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/spider.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/template.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/tools.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/trackref.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/utils/url.py +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/setup.cfg +0 -0
- {aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: aio-scrapy
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
|
|
5
5
|
Home-page: https://github.com/conlin-huang/aio-scrapy.git
|
|
6
6
|
Author: conlin
|
|
@@ -117,6 +117,49 @@ pip install aio-scrapy
|
|
|
117
117
|
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
118
118
|
```
|
|
119
119
|
|
|
120
|
+
### 开始 | Start
|
|
121
|
+
```python
|
|
122
|
+
from aioscrapy import Spider, logger
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class MyspiderSpider(Spider):
|
|
126
|
+
name = 'myspider'
|
|
127
|
+
custom_settings = {
|
|
128
|
+
"CLOSE_SPIDER_ON_IDLE": True
|
|
129
|
+
}
|
|
130
|
+
start_urls = ["https://quotes.toscrape.com"]
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
async def process_request(request, spider):
|
|
134
|
+
""" request middleware """
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
async def process_response(request, response, spider):
|
|
139
|
+
""" response middleware """
|
|
140
|
+
return response
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
async def process_exception(request, exception, spider):
|
|
144
|
+
""" exception middleware """
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
async def parse(self, response):
|
|
148
|
+
for quote in response.css('div.quote'):
|
|
149
|
+
item = {
|
|
150
|
+
'author': quote.xpath('span/small/text()').get(),
|
|
151
|
+
'text': quote.css('span.text::text').get(),
|
|
152
|
+
}
|
|
153
|
+
yield item
|
|
154
|
+
|
|
155
|
+
async def process_item(self, item):
|
|
156
|
+
logger.info(item)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == '__main__':
|
|
160
|
+
MyspiderSpider.start()
|
|
161
|
+
```
|
|
162
|
+
|
|
120
163
|
## 文档 | Documentation
|
|
121
164
|
|
|
122
165
|
## 文档目录 | Documentation Contents
|
|
@@ -39,6 +39,49 @@ pip install aio-scrapy
|
|
|
39
39
|
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
+
### 开始 | Start
|
|
43
|
+
```python
|
|
44
|
+
from aioscrapy import Spider, logger
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class MyspiderSpider(Spider):
|
|
48
|
+
name = 'myspider'
|
|
49
|
+
custom_settings = {
|
|
50
|
+
"CLOSE_SPIDER_ON_IDLE": True
|
|
51
|
+
}
|
|
52
|
+
start_urls = ["https://quotes.toscrape.com"]
|
|
53
|
+
|
|
54
|
+
@staticmethod
|
|
55
|
+
async def process_request(request, spider):
|
|
56
|
+
""" request middleware """
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
async def process_response(request, response, spider):
|
|
61
|
+
""" response middleware """
|
|
62
|
+
return response
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
async def process_exception(request, exception, spider):
|
|
66
|
+
""" exception middleware """
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
async def parse(self, response):
|
|
70
|
+
for quote in response.css('div.quote'):
|
|
71
|
+
item = {
|
|
72
|
+
'author': quote.xpath('span/small/text()').get(),
|
|
73
|
+
'text': quote.css('span.text::text').get(),
|
|
74
|
+
}
|
|
75
|
+
yield item
|
|
76
|
+
|
|
77
|
+
async def process_item(self, item):
|
|
78
|
+
logger.info(item)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__ == '__main__':
|
|
82
|
+
MyspiderSpider.start()
|
|
83
|
+
```
|
|
84
|
+
|
|
42
85
|
## 文档 | Documentation
|
|
43
86
|
|
|
44
87
|
## 文档目录 | Documentation Contents
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: aio-scrapy
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
|
|
5
5
|
Home-page: https://github.com/conlin-huang/aio-scrapy.git
|
|
6
6
|
Author: conlin
|
|
@@ -117,6 +117,49 @@ pip install aio-scrapy
|
|
|
117
117
|
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
118
118
|
```
|
|
119
119
|
|
|
120
|
+
### 开始 | Start
|
|
121
|
+
```python
|
|
122
|
+
from aioscrapy import Spider, logger
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class MyspiderSpider(Spider):
|
|
126
|
+
name = 'myspider'
|
|
127
|
+
custom_settings = {
|
|
128
|
+
"CLOSE_SPIDER_ON_IDLE": True
|
|
129
|
+
}
|
|
130
|
+
start_urls = ["https://quotes.toscrape.com"]
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
async def process_request(request, spider):
|
|
134
|
+
""" request middleware """
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
async def process_response(request, response, spider):
|
|
139
|
+
""" response middleware """
|
|
140
|
+
return response
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
async def process_exception(request, exception, spider):
|
|
144
|
+
""" exception middleware """
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
async def parse(self, response):
|
|
148
|
+
for quote in response.css('div.quote'):
|
|
149
|
+
item = {
|
|
150
|
+
'author': quote.xpath('span/small/text()').get(),
|
|
151
|
+
'text': quote.css('span.text::text').get(),
|
|
152
|
+
}
|
|
153
|
+
yield item
|
|
154
|
+
|
|
155
|
+
async def process_item(self, item):
|
|
156
|
+
logger.info(item)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == '__main__':
|
|
160
|
+
MyspiderSpider.start()
|
|
161
|
+
```
|
|
162
|
+
|
|
120
163
|
## 文档 | Documentation
|
|
121
164
|
|
|
122
165
|
## 文档目录 | Documentation Contents
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.1.8
|
|
@@ -11,9 +11,11 @@ It handles URL normalization, fingerprinting, serialization, and other request-r
|
|
|
11
11
|
|
|
12
12
|
import hashlib
|
|
13
13
|
import inspect
|
|
14
|
-
import
|
|
15
|
-
from typing import Callable, List, Optional, Tuple, Type, TypeVar
|
|
14
|
+
from collections import Counter
|
|
15
|
+
from typing import Callable, List, Optional, Tuple, Type, TypeVar, Union
|
|
16
|
+
from urllib.parse import ParseResult, parse_qsl, urlencode, urlparse
|
|
16
17
|
|
|
18
|
+
import ujson
|
|
17
19
|
from w3lib.url import canonicalize_url
|
|
18
20
|
from w3lib.url import safe_url_string
|
|
19
21
|
|
|
@@ -23,11 +25,67 @@ from aioscrapy.utils.curl import curl_to_request_kwargs
|
|
|
23
25
|
from aioscrapy.utils.python import to_unicode
|
|
24
26
|
from aioscrapy.utils.url import escape_ajax
|
|
25
27
|
|
|
28
|
+
|
|
26
29
|
# Type variable for Request class to use in class methods
|
|
27
30
|
# 用于在类方法中使用的Request类的类型变量
|
|
28
31
|
RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")
|
|
29
32
|
|
|
30
33
|
|
|
34
|
+
def _update_url_params(url: str, params: Union[dict, list, tuple]) -> str:
|
|
35
|
+
"""Add URL query params to provided URL being aware of existing.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
url: string of target URL
|
|
39
|
+
params: dict containing requested params to be added
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
string with updated URL
|
|
43
|
+
|
|
44
|
+
>> url = 'http://stackoverflow.com/test?answers=true'
|
|
45
|
+
>> new_params = {'answers': False, 'data': ['some','values']}
|
|
46
|
+
>> update_url_params(url, new_params)
|
|
47
|
+
'http://stackoverflow.com/test?data=some&data=values&answers=false'
|
|
48
|
+
"""
|
|
49
|
+
# No need to unquote, since requote_uri will be called later.
|
|
50
|
+
parsed_url = urlparse(url)
|
|
51
|
+
|
|
52
|
+
# Extracting URL arguments from parsed URL, NOTE the result is a list, not dict
|
|
53
|
+
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
|
|
54
|
+
|
|
55
|
+
# Merging URL arguments dict with new params
|
|
56
|
+
old_args_counter = Counter(x[0] for x in parsed_get_args)
|
|
57
|
+
if isinstance(params, dict):
|
|
58
|
+
params = list(params.items())
|
|
59
|
+
new_args_counter = Counter(x[0] for x in params)
|
|
60
|
+
for key, value in params:
|
|
61
|
+
# Bool and Dict values should be converted to json-friendly values
|
|
62
|
+
if isinstance(value, (bool, dict)):
|
|
63
|
+
value = ujson.dumps(value)
|
|
64
|
+
# 1 to 1 mapping, we have to search and update it.
|
|
65
|
+
if old_args_counter.get(key) == 1 and new_args_counter.get(key) == 1:
|
|
66
|
+
parsed_get_args = [
|
|
67
|
+
(x if x[0] != key else (key, value)) for x in parsed_get_args
|
|
68
|
+
]
|
|
69
|
+
else:
|
|
70
|
+
parsed_get_args.append((key, value))
|
|
71
|
+
|
|
72
|
+
# Converting URL argument to proper query string
|
|
73
|
+
encoded_get_args = urlencode(parsed_get_args, doseq=True)
|
|
74
|
+
|
|
75
|
+
# Creating new parsed result object based on provided with new
|
|
76
|
+
# URL arguments. Same thing happens inside of urlparse.
|
|
77
|
+
new_url = ParseResult(
|
|
78
|
+
parsed_url.scheme,
|
|
79
|
+
parsed_url.netloc,
|
|
80
|
+
parsed_url.path,
|
|
81
|
+
parsed_url.params,
|
|
82
|
+
encoded_get_args,
|
|
83
|
+
parsed_url.fragment,
|
|
84
|
+
).geturl()
|
|
85
|
+
|
|
86
|
+
return new_url
|
|
87
|
+
|
|
88
|
+
|
|
31
89
|
class Request(object):
|
|
32
90
|
attributes: Tuple[str, ...] = (
|
|
33
91
|
"url", "callback", "method", "headers", "body",
|
|
@@ -42,7 +100,10 @@ class Request(object):
|
|
|
42
100
|
callback: Optional[Callable] = None,
|
|
43
101
|
method: str = 'GET',
|
|
44
102
|
headers: Optional[dict] = None,
|
|
103
|
+
params: Optional[Union[dict, list, tuple]] = None,
|
|
45
104
|
body: Optional[str] = None,
|
|
105
|
+
data: Optional[Union[dict[str, str], list[tuple], str, bytes]] = None,
|
|
106
|
+
json: Optional[dict | list] = None,
|
|
46
107
|
cookies: Optional[dict] = None,
|
|
47
108
|
meta: Optional[dict] = None,
|
|
48
109
|
encoding: str = 'utf-8',
|
|
@@ -77,8 +138,32 @@ class Request(object):
|
|
|
77
138
|
"""
|
|
78
139
|
self._encoding = encoding
|
|
79
140
|
self.method = str(method).upper()
|
|
141
|
+
|
|
142
|
+
self.headers = Headers(headers or {})
|
|
143
|
+
|
|
144
|
+
# url
|
|
145
|
+
if params:
|
|
146
|
+
url = _update_url_params(url, params)
|
|
80
147
|
self._set_url(url)
|
|
148
|
+
|
|
149
|
+
# body/data/json
|
|
150
|
+
if data is not None:
|
|
151
|
+
if isinstance(data, (dict, list, tuple)):
|
|
152
|
+
body = urlencode(data)
|
|
153
|
+
elif isinstance(data, str):
|
|
154
|
+
body = data
|
|
155
|
+
elif isinstance(data, bytes):
|
|
156
|
+
body = data.decode(self._encoding)
|
|
157
|
+
self.headers.setdefault('Content-Type', 'application/x-www-form-urlencoded')
|
|
158
|
+
|
|
159
|
+
if json is not None:
|
|
160
|
+
body = ujson.dumps(json, separators=(",", ":"))
|
|
161
|
+
# Set default headers for JSON content
|
|
162
|
+
# 设置JSON内容的默认头部
|
|
163
|
+
self.headers.setdefault('Content-Type', 'application/json')
|
|
164
|
+
|
|
81
165
|
self._set_body(body)
|
|
166
|
+
|
|
82
167
|
assert isinstance(priority, int), f"Request priority not an integer: {priority!r}"
|
|
83
168
|
self.priority = priority
|
|
84
169
|
|
|
@@ -86,7 +171,6 @@ class Request(object):
|
|
|
86
171
|
self.errback = errback
|
|
87
172
|
|
|
88
173
|
self.cookies = cookies or {}
|
|
89
|
-
self.headers = Headers(headers or {})
|
|
90
174
|
self.dont_filter = dont_filter
|
|
91
175
|
self.use_proxy = use_proxy
|
|
92
176
|
|
|
@@ -207,7 +291,7 @@ class Request(object):
|
|
|
207
291
|
"""
|
|
208
292
|
return self._body
|
|
209
293
|
|
|
210
|
-
def _set_body(self, body: str) -> None:
|
|
294
|
+
def _set_body(self, body: Optional[str]) -> None:
|
|
211
295
|
"""
|
|
212
296
|
Set the request body.
|
|
213
297
|
设置请求体。
|
|
@@ -361,7 +445,7 @@ class Request(object):
|
|
|
361
445
|
The request fingerprint. 请求指纹。
|
|
362
446
|
"""
|
|
363
447
|
return hashlib.sha1(
|
|
364
|
-
|
|
448
|
+
ujson.dumps({
|
|
365
449
|
'method': to_unicode(self.method),
|
|
366
450
|
'url': canonicalize_url(self.url, keep_fragments=keep_fragments),
|
|
367
451
|
'body': self.body,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
2.1.7
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/__init__.py
RENAMED
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py
RENAMED
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/driverpool.py
RENAMED
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/core/downloader/handlers/webdriver/playwright.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/middlewares.py.tmpl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aio_scrapy-2.1.7 → aio_scrapy-2.1.8}/aioscrapy/templates/project/module/spiders/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|