aio-scrapy 2.1.6__tar.gz → 2.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aio_scrapy-2.1.7/PKG-INFO +147 -0
- aio_scrapy-2.1.7/README.md +69 -0
- aio_scrapy-2.1.7/aio_scrapy.egg-info/PKG-INFO +147 -0
- aio_scrapy-2.1.7/aioscrapy/VERSION +1 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/aiohttp.py +3 -3
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/curl_cffi.py +2 -2
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/httpx.py +2 -2
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/pyhttpx.py +5 -2
- aio_scrapy-2.1.7/aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +2 -2
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/webdriver/playwright.py +2 -2
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/settings/default_settings.py +7 -7
- aio_scrapy-2.1.6/PKG-INFO +0 -251
- aio_scrapy-2.1.6/README.md +0 -173
- aio_scrapy-2.1.6/aio_scrapy.egg-info/PKG-INFO +0 -251
- aio_scrapy-2.1.6/aioscrapy/VERSION +0 -1
- aio_scrapy-2.1.6/aioscrapy/core/downloader/handlers/webdriver/__init__.py +0 -2
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/LICENSE +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/MANIFEST.in +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/SOURCES.txt +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/dependency_links.txt +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/entry_points.txt +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/not-zip-safe +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/requires.txt +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aio_scrapy.egg-info/top_level.txt +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/__main__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/cmdline.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/crawl.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/genspider.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/list.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/runspider.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/settings.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/startproject.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/commands/version.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/requests.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/webdriver/driverpool.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/engine.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/scheduler.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/scraper.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/crawler.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/absmanager.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/aiomongo.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/aiomysql.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/aiopg.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/aiorabbitmq.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/db/aioredis.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/dupefilters/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/dupefilters/disk.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/dupefilters/redis.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/exceptions.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/headers.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/request/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/request/form.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/request/json_request.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/response/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/response/html.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/response/text.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/response/web_driver.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/http/response/xml.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/retry.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/stats.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/downloader/useragent.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/closespider.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/corestats.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/logstats.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/metric.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/extensions/throttle.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/csv.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/excel.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/mongo.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/mysql.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/pipelines/pg.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/depth.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/httperror.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/offsite.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/referer.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/libs/spider/urllength.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/link.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/logformatter.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/absmanager.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/downloader.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/extension.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/itempipeline.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/middleware/spider.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/process.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/proxy/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/proxy/redis.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/queue/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/queue/memory.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/queue/rabbitmq.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/queue/redis.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/scrapyd/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/scrapyd/runner.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/serializer.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/settings/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/signalmanager.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/signals.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/spiderloader.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/spiders/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/statscollectors.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/module/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/spiders/basic.tmpl +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/templates/spiders/single.tmpl +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/__init__.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/conf.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/curl.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/decorators.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/deprecate.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/httpobj.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/log.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/misc.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/ossignal.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/project.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/python.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/reqser.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/request.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/response.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/signal.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/spider.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/template.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/tools.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/trackref.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/utils/url.py +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/setup.cfg +0 -0
- {aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/setup.py +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: aio-scrapy
|
|
3
|
+
Version: 2.1.7
|
|
4
|
+
Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
|
|
5
|
+
Home-page: https://github.com/conlin-huang/aio-scrapy.git
|
|
6
|
+
Author: conlin
|
|
7
|
+
Author-email: 995018884@qq.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: aio-scrapy,scrapy,aioscrapy,scrapy redis,asyncio,spider
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: aiohttp
|
|
23
|
+
Requires-Dist: ujson
|
|
24
|
+
Requires-Dist: w3lib>=1.17.0
|
|
25
|
+
Requires-Dist: parsel>=1.5.0
|
|
26
|
+
Requires-Dist: PyDispatcher>=2.0.5
|
|
27
|
+
Requires-Dist: zope.interface>=5.1.0
|
|
28
|
+
Requires-Dist: redis>=4.3.1
|
|
29
|
+
Requires-Dist: aiomultiprocess>=0.9.0
|
|
30
|
+
Requires-Dist: loguru>=0.7.0
|
|
31
|
+
Requires-Dist: anyio>=3.6.2
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: aiomysql>=0.1.1; extra == "all"
|
|
34
|
+
Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
|
|
35
|
+
Requires-Dist: aio-pika>=8.1.1; extra == "all"
|
|
36
|
+
Requires-Dist: cryptography; extra == "all"
|
|
37
|
+
Requires-Dist: motor>=2.1.0; extra == "all"
|
|
38
|
+
Requires-Dist: pyhttpx>=2.10.1; extra == "all"
|
|
39
|
+
Requires-Dist: asyncpg>=0.27.0; extra == "all"
|
|
40
|
+
Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
|
|
41
|
+
Requires-Dist: pillow>=9.4.0; extra == "all"
|
|
42
|
+
Requires-Dist: requests>=2.28.2; extra == "all"
|
|
43
|
+
Requires-Dist: curl_cffi; extra == "all"
|
|
44
|
+
Provides-Extra: aiomysql
|
|
45
|
+
Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
|
|
46
|
+
Requires-Dist: cryptography; extra == "aiomysql"
|
|
47
|
+
Provides-Extra: httpx
|
|
48
|
+
Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
|
|
49
|
+
Provides-Extra: aio-pika
|
|
50
|
+
Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
|
|
51
|
+
Provides-Extra: mongo
|
|
52
|
+
Requires-Dist: motor>=2.1.0; extra == "mongo"
|
|
53
|
+
Provides-Extra: playwright
|
|
54
|
+
Requires-Dist: playwright>=1.31.1; extra == "playwright"
|
|
55
|
+
Provides-Extra: pyhttpx
|
|
56
|
+
Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
|
|
57
|
+
Provides-Extra: curl-cffi
|
|
58
|
+
Requires-Dist: curl_cffi>=0.6.1; extra == "curl-cffi"
|
|
59
|
+
Provides-Extra: requests
|
|
60
|
+
Requires-Dist: requests>=2.28.2; extra == "requests"
|
|
61
|
+
Provides-Extra: pg
|
|
62
|
+
Requires-Dist: asyncpg>=0.27.0; extra == "pg"
|
|
63
|
+
Provides-Extra: execl
|
|
64
|
+
Requires-Dist: XlsxWriter>=3.1.2; extra == "execl"
|
|
65
|
+
Requires-Dist: pillow>=9.4.0; extra == "execl"
|
|
66
|
+
Dynamic: author
|
|
67
|
+
Dynamic: author-email
|
|
68
|
+
Dynamic: classifier
|
|
69
|
+
Dynamic: description
|
|
70
|
+
Dynamic: description-content-type
|
|
71
|
+
Dynamic: home-page
|
|
72
|
+
Dynamic: keywords
|
|
73
|
+
Dynamic: license
|
|
74
|
+
Dynamic: provides-extra
|
|
75
|
+
Dynamic: requires-dist
|
|
76
|
+
Dynamic: requires-python
|
|
77
|
+
Dynamic: summary
|
|
78
|
+
|
|
79
|
+
# AioScrapy
|
|
80
|
+
|
|
81
|
+
AioScrapy是一个基于Python异步IO的强大网络爬虫框架。它的设计理念源自Scrapy,但完全基于异步IO实现,提供更高的性能和更灵活的配置选项。</br>
|
|
82
|
+
AioScrapy is a powerful asynchronous web crawling framework built on Python's asyncio library. It is inspired by Scrapy but completely reimplemented with asynchronous IO, offering higher performance and more flexible configuration options.
|
|
83
|
+
|
|
84
|
+
## 特性 | Features
|
|
85
|
+
|
|
86
|
+
- **完全异步**:基于Python的asyncio库,实现高效的并发爬取
|
|
87
|
+
- **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
|
|
88
|
+
- **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
|
|
89
|
+
- **强大的数据处理管道**:支持多种数据库存储选项
|
|
90
|
+
- **内置信号系统**:方便的事件处理机制
|
|
91
|
+
- **丰富的配置选项**:高度可定制的爬虫行为
|
|
92
|
+
- **分布式爬取**:支持使用Redis和RabbitMQ进行分布式爬取
|
|
93
|
+
- **数据库集成**:内置支持Redis、MySQL、MongoDB、PostgreSQL和RabbitMQ
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
- **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
|
|
97
|
+
- **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
|
|
98
|
+
- **Flexible Middleware System**: Easily add custom functionality and processing logic
|
|
99
|
+
- **Powerful Data Processing Pipelines**: Support for various database storage options
|
|
100
|
+
- **Built-in Signal System**: Convenient event handling mechanism
|
|
101
|
+
- **Rich Configuration Options**: Highly customizable crawler behavior
|
|
102
|
+
- **Distributed Crawling**: Support for distributed crawling using Redis and RabbitMQ
|
|
103
|
+
- **Database Integration**: Built-in support for Redis, MySQL, MongoDB, PostgreSQL, and RabbitMQ
|
|
104
|
+
|
|
105
|
+
## 安装 | Installation
|
|
106
|
+
|
|
107
|
+
### 要求 | Requirements
|
|
108
|
+
|
|
109
|
+
- Python 3.9+
|
|
110
|
+
|
|
111
|
+
### 使用pip安装 | Install with pip
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install aio-scrapy
|
|
115
|
+
|
|
116
|
+
# Install the latest aio-scrapy
|
|
117
|
+
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## 文档 | Documentation
|
|
121
|
+
|
|
122
|
+
## 文档目录 | Documentation Contents
|
|
123
|
+
- [安装指南 | Installation Guide](docs/installation.md)
|
|
124
|
+
- [快速入门 | Quick Start](docs/quickstart.md)
|
|
125
|
+
- [核心概念 | Core Concepts](docs/concepts.md)
|
|
126
|
+
- [爬虫指南 | Spider Guide](docs/spiders.md)
|
|
127
|
+
- [下载器 | Downloaders](docs/downloaders.md)
|
|
128
|
+
- [中间件 | Middlewares](docs/middlewares.md)
|
|
129
|
+
- [管道 | Pipelines](docs/pipelines.md)
|
|
130
|
+
- [队列 | Queues](docs/queues.md)
|
|
131
|
+
- [请求过滤器 | Request Filters](docs/dupefilters.md)
|
|
132
|
+
- [代理 | Proxy](docs/proxy.md)
|
|
133
|
+
- [数据库连接 | Database Connections](docs/databases.md)
|
|
134
|
+
- [分布式部署 | Distributed Deployment](docs/distributed.md)
|
|
135
|
+
- [配置参考 | Settings Reference](docs/settings.md)
|
|
136
|
+
- [API参考 | API Reference](docs/api.md)
|
|
137
|
+
- [示例 | Example](example)
|
|
138
|
+
|
|
139
|
+
## 许可证 | License
|
|
140
|
+
|
|
141
|
+
本项目采用MIT许可证 - 详情请查看LICENSE文件。</br>
|
|
142
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## 联系
|
|
146
|
+
QQ: 995018884 </br>
|
|
147
|
+
WeChat: h995018884
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# AioScrapy
|
|
2
|
+
|
|
3
|
+
AioScrapy是一个基于Python异步IO的强大网络爬虫框架。它的设计理念源自Scrapy,但完全基于异步IO实现,提供更高的性能和更灵活的配置选项。</br>
|
|
4
|
+
AioScrapy is a powerful asynchronous web crawling framework built on Python's asyncio library. It is inspired by Scrapy but completely reimplemented with asynchronous IO, offering higher performance and more flexible configuration options.
|
|
5
|
+
|
|
6
|
+
## 特性 | Features
|
|
7
|
+
|
|
8
|
+
- **完全异步**:基于Python的asyncio库,实现高效的并发爬取
|
|
9
|
+
- **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
|
|
10
|
+
- **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
|
|
11
|
+
- **强大的数据处理管道**:支持多种数据库存储选项
|
|
12
|
+
- **内置信号系统**:方便的事件处理机制
|
|
13
|
+
- **丰富的配置选项**:高度可定制的爬虫行为
|
|
14
|
+
- **分布式爬取**:支持使用Redis和RabbitMQ进行分布式爬取
|
|
15
|
+
- **数据库集成**:内置支持Redis、MySQL、MongoDB、PostgreSQL和RabbitMQ
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
- **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
|
|
19
|
+
- **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
|
|
20
|
+
- **Flexible Middleware System**: Easily add custom functionality and processing logic
|
|
21
|
+
- **Powerful Data Processing Pipelines**: Support for various database storage options
|
|
22
|
+
- **Built-in Signal System**: Convenient event handling mechanism
|
|
23
|
+
- **Rich Configuration Options**: Highly customizable crawler behavior
|
|
24
|
+
- **Distributed Crawling**: Support for distributed crawling using Redis and RabbitMQ
|
|
25
|
+
- **Database Integration**: Built-in support for Redis, MySQL, MongoDB, PostgreSQL, and RabbitMQ
|
|
26
|
+
|
|
27
|
+
## 安装 | Installation
|
|
28
|
+
|
|
29
|
+
### 要求 | Requirements
|
|
30
|
+
|
|
31
|
+
- Python 3.9+
|
|
32
|
+
|
|
33
|
+
### 使用pip安装 | Install with pip
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install aio-scrapy
|
|
37
|
+
|
|
38
|
+
# Install the latest aio-scrapy
|
|
39
|
+
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## 文档 | Documentation
|
|
43
|
+
|
|
44
|
+
## 文档目录 | Documentation Contents
|
|
45
|
+
- [安装指南 | Installation Guide](docs/installation.md)
|
|
46
|
+
- [快速入门 | Quick Start](docs/quickstart.md)
|
|
47
|
+
- [核心概念 | Core Concepts](docs/concepts.md)
|
|
48
|
+
- [爬虫指南 | Spider Guide](docs/spiders.md)
|
|
49
|
+
- [下载器 | Downloaders](docs/downloaders.md)
|
|
50
|
+
- [中间件 | Middlewares](docs/middlewares.md)
|
|
51
|
+
- [管道 | Pipelines](docs/pipelines.md)
|
|
52
|
+
- [队列 | Queues](docs/queues.md)
|
|
53
|
+
- [请求过滤器 | Request Filters](docs/dupefilters.md)
|
|
54
|
+
- [代理 | Proxy](docs/proxy.md)
|
|
55
|
+
- [数据库连接 | Database Connections](docs/databases.md)
|
|
56
|
+
- [分布式部署 | Distributed Deployment](docs/distributed.md)
|
|
57
|
+
- [配置参考 | Settings Reference](docs/settings.md)
|
|
58
|
+
- [API参考 | API Reference](docs/api.md)
|
|
59
|
+
- [示例 | Example](example)
|
|
60
|
+
|
|
61
|
+
## 许可证 | License
|
|
62
|
+
|
|
63
|
+
本项目采用MIT许可证 - 详情请查看LICENSE文件。</br>
|
|
64
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## 联系
|
|
68
|
+
QQ: 995018884 </br>
|
|
69
|
+
WeChat: h995018884
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: aio-scrapy
|
|
3
|
+
Version: 2.1.7
|
|
4
|
+
Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
|
|
5
|
+
Home-page: https://github.com/conlin-huang/aio-scrapy.git
|
|
6
|
+
Author: conlin
|
|
7
|
+
Author-email: 995018884@qq.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: aio-scrapy,scrapy,aioscrapy,scrapy redis,asyncio,spider
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: aiohttp
|
|
23
|
+
Requires-Dist: ujson
|
|
24
|
+
Requires-Dist: w3lib>=1.17.0
|
|
25
|
+
Requires-Dist: parsel>=1.5.0
|
|
26
|
+
Requires-Dist: PyDispatcher>=2.0.5
|
|
27
|
+
Requires-Dist: zope.interface>=5.1.0
|
|
28
|
+
Requires-Dist: redis>=4.3.1
|
|
29
|
+
Requires-Dist: aiomultiprocess>=0.9.0
|
|
30
|
+
Requires-Dist: loguru>=0.7.0
|
|
31
|
+
Requires-Dist: anyio>=3.6.2
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: aiomysql>=0.1.1; extra == "all"
|
|
34
|
+
Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
|
|
35
|
+
Requires-Dist: aio-pika>=8.1.1; extra == "all"
|
|
36
|
+
Requires-Dist: cryptography; extra == "all"
|
|
37
|
+
Requires-Dist: motor>=2.1.0; extra == "all"
|
|
38
|
+
Requires-Dist: pyhttpx>=2.10.1; extra == "all"
|
|
39
|
+
Requires-Dist: asyncpg>=0.27.0; extra == "all"
|
|
40
|
+
Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
|
|
41
|
+
Requires-Dist: pillow>=9.4.0; extra == "all"
|
|
42
|
+
Requires-Dist: requests>=2.28.2; extra == "all"
|
|
43
|
+
Requires-Dist: curl_cffi; extra == "all"
|
|
44
|
+
Provides-Extra: aiomysql
|
|
45
|
+
Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
|
|
46
|
+
Requires-Dist: cryptography; extra == "aiomysql"
|
|
47
|
+
Provides-Extra: httpx
|
|
48
|
+
Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
|
|
49
|
+
Provides-Extra: aio-pika
|
|
50
|
+
Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
|
|
51
|
+
Provides-Extra: mongo
|
|
52
|
+
Requires-Dist: motor>=2.1.0; extra == "mongo"
|
|
53
|
+
Provides-Extra: playwright
|
|
54
|
+
Requires-Dist: playwright>=1.31.1; extra == "playwright"
|
|
55
|
+
Provides-Extra: pyhttpx
|
|
56
|
+
Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
|
|
57
|
+
Provides-Extra: curl-cffi
|
|
58
|
+
Requires-Dist: curl_cffi>=0.6.1; extra == "curl-cffi"
|
|
59
|
+
Provides-Extra: requests
|
|
60
|
+
Requires-Dist: requests>=2.28.2; extra == "requests"
|
|
61
|
+
Provides-Extra: pg
|
|
62
|
+
Requires-Dist: asyncpg>=0.27.0; extra == "pg"
|
|
63
|
+
Provides-Extra: execl
|
|
64
|
+
Requires-Dist: XlsxWriter>=3.1.2; extra == "execl"
|
|
65
|
+
Requires-Dist: pillow>=9.4.0; extra == "execl"
|
|
66
|
+
Dynamic: author
|
|
67
|
+
Dynamic: author-email
|
|
68
|
+
Dynamic: classifier
|
|
69
|
+
Dynamic: description
|
|
70
|
+
Dynamic: description-content-type
|
|
71
|
+
Dynamic: home-page
|
|
72
|
+
Dynamic: keywords
|
|
73
|
+
Dynamic: license
|
|
74
|
+
Dynamic: provides-extra
|
|
75
|
+
Dynamic: requires-dist
|
|
76
|
+
Dynamic: requires-python
|
|
77
|
+
Dynamic: summary
|
|
78
|
+
|
|
79
|
+
# AioScrapy
|
|
80
|
+
|
|
81
|
+
AioScrapy是一个基于Python异步IO的强大网络爬虫框架。它的设计理念源自Scrapy,但完全基于异步IO实现,提供更高的性能和更灵活的配置选项。</br>
|
|
82
|
+
AioScrapy is a powerful asynchronous web crawling framework built on Python's asyncio library. It is inspired by Scrapy but completely reimplemented with asynchronous IO, offering higher performance and more flexible configuration options.
|
|
83
|
+
|
|
84
|
+
## 特性 | Features
|
|
85
|
+
|
|
86
|
+
- **完全异步**:基于Python的asyncio库,实现高效的并发爬取
|
|
87
|
+
- **多种下载处理程序**:支持多种HTTP客户端,包括aiohttp、httpx、requests、pyhttpx、curl_cffi、DrissionPage和playwright
|
|
88
|
+
- **灵活的中间件系统**:轻松添加自定义功能和处理逻辑
|
|
89
|
+
- **强大的数据处理管道**:支持多种数据库存储选项
|
|
90
|
+
- **内置信号系统**:方便的事件处理机制
|
|
91
|
+
- **丰富的配置选项**:高度可定制的爬虫行为
|
|
92
|
+
- **分布式爬取**:支持使用Redis和RabbitMQ进行分布式爬取
|
|
93
|
+
- **数据库集成**:内置支持Redis、MySQL、MongoDB、PostgreSQL和RabbitMQ
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
- **Fully Asynchronous**: Built on Python's asyncio for efficient concurrent crawling
|
|
97
|
+
- **Multiple Download Handlers**: Support for various HTTP clients including aiohttp, httpx, requests, pyhttpx, curl_cffi, DrissionPage and playwright
|
|
98
|
+
- **Flexible Middleware System**: Easily add custom functionality and processing logic
|
|
99
|
+
- **Powerful Data Processing Pipelines**: Support for various database storage options
|
|
100
|
+
- **Built-in Signal System**: Convenient event handling mechanism
|
|
101
|
+
- **Rich Configuration Options**: Highly customizable crawler behavior
|
|
102
|
+
- **Distributed Crawling**: Support for distributed crawling using Redis and RabbitMQ
|
|
103
|
+
- **Database Integration**: Built-in support for Redis, MySQL, MongoDB, PostgreSQL, and RabbitMQ
|
|
104
|
+
|
|
105
|
+
## 安装 | Installation
|
|
106
|
+
|
|
107
|
+
### 要求 | Requirements
|
|
108
|
+
|
|
109
|
+
- Python 3.9+
|
|
110
|
+
|
|
111
|
+
### 使用pip安装 | Install with pip
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install aio-scrapy
|
|
115
|
+
|
|
116
|
+
# Install the latest aio-scrapy
|
|
117
|
+
# pip install git+https://github.com/ConlinH/aio-scrapy
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## 文档 | Documentation
|
|
121
|
+
|
|
122
|
+
## 文档目录 | Documentation Contents
|
|
123
|
+
- [安装指南 | Installation Guide](docs/installation.md)
|
|
124
|
+
- [快速入门 | Quick Start](docs/quickstart.md)
|
|
125
|
+
- [核心概念 | Core Concepts](docs/concepts.md)
|
|
126
|
+
- [爬虫指南 | Spider Guide](docs/spiders.md)
|
|
127
|
+
- [下载器 | Downloaders](docs/downloaders.md)
|
|
128
|
+
- [中间件 | Middlewares](docs/middlewares.md)
|
|
129
|
+
- [管道 | Pipelines](docs/pipelines.md)
|
|
130
|
+
- [队列 | Queues](docs/queues.md)
|
|
131
|
+
- [请求过滤器 | Request Filters](docs/dupefilters.md)
|
|
132
|
+
- [代理 | Proxy](docs/proxy.md)
|
|
133
|
+
- [数据库连接 | Database Connections](docs/databases.md)
|
|
134
|
+
- [分布式部署 | Distributed Deployment](docs/distributed.md)
|
|
135
|
+
- [配置参考 | Settings Reference](docs/settings.md)
|
|
136
|
+
- [API参考 | API Reference](docs/api.md)
|
|
137
|
+
- [示例 | Example](example)
|
|
138
|
+
|
|
139
|
+
## 许可证 | License
|
|
140
|
+
|
|
141
|
+
本项目采用MIT许可证 - 详情请查看LICENSE文件。</br>
|
|
142
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## 联系
|
|
146
|
+
QQ: 995018884 </br>
|
|
147
|
+
WeChat: h995018884
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.1.7
|
|
@@ -50,7 +50,7 @@ class AioHttpDownloadHandler(BaseDownloadHandler):
|
|
|
50
50
|
|
|
51
51
|
# Arguments to pass to aiohttp.ClientSession constructor
|
|
52
52
|
# 传递给aiohttp.ClientSession构造函数的参数
|
|
53
|
-
self.
|
|
53
|
+
self.aiohttp_args: dict = settings.getdict('AIOHTTP_ARGS')
|
|
54
54
|
|
|
55
55
|
# SSL verification setting
|
|
56
56
|
# SSL验证设置
|
|
@@ -228,13 +228,13 @@ class AioHttpDownloadHandler(BaseDownloadHandler):
|
|
|
228
228
|
if self.use_session:
|
|
229
229
|
# Not recommended to use session, The abnormal phenomena will occurs when using tunnel proxy
|
|
230
230
|
# 不建议使用会话,使用隧道代理时会出现异常现象
|
|
231
|
-
session = self.get_session(**self.
|
|
231
|
+
session = self.get_session(**self.aiohttp_args)
|
|
232
232
|
async with session.request(request.method, request.url, **kwargs) as response:
|
|
233
233
|
content: bytes = await response.read()
|
|
234
234
|
else:
|
|
235
235
|
# Create a new session for each request (recommended)
|
|
236
236
|
# 为每个请求创建一个新会话(推荐)
|
|
237
|
-
async with aiohttp.ClientSession(**self.
|
|
237
|
+
async with aiohttp.ClientSession(**self.aiohttp_args) as session:
|
|
238
238
|
async with session.request(request.method, request.url, **kwargs) as response:
|
|
239
239
|
content: bytes = await response.read()
|
|
240
240
|
|
|
@@ -44,7 +44,7 @@ class CurlCffiDownloadHandler(BaseDownloadHandler):
|
|
|
44
44
|
|
|
45
45
|
# Arguments to pass to curl_cffi AsyncSession constructor
|
|
46
46
|
# 传递给curl_cffi AsyncSession构造函数的参数
|
|
47
|
-
self.
|
|
47
|
+
self.curl_cffi_args: dict = self.settings.get('CURL_CFFI_ARGS', {})
|
|
48
48
|
|
|
49
49
|
# SSL verification setting
|
|
50
50
|
# SSL验证设置
|
|
@@ -156,7 +156,7 @@ class CurlCffiDownloadHandler(BaseDownloadHandler):
|
|
|
156
156
|
|
|
157
157
|
# Configure curl_cffi session
|
|
158
158
|
# 配置curl_cffi会话
|
|
159
|
-
session_args = self.
|
|
159
|
+
session_args = self.curl_cffi_args.copy()
|
|
160
160
|
|
|
161
161
|
# Perform the request
|
|
162
162
|
# 执行请求
|
|
@@ -46,7 +46,7 @@ class HttpxDownloadHandler(BaseDownloadHandler):
|
|
|
46
46
|
|
|
47
47
|
# Arguments to pass to httpx AsyncClient constructor
|
|
48
48
|
# 传递给httpx AsyncClient构造函数的参数
|
|
49
|
-
self.
|
|
49
|
+
self.httpx_args: dict = self.settings.get('HTTPX_ARGS', {})
|
|
50
50
|
|
|
51
51
|
# SSL verification setting
|
|
52
52
|
# SSL验证设置
|
|
@@ -147,7 +147,7 @@ class HttpxDownloadHandler(BaseDownloadHandler):
|
|
|
147
147
|
|
|
148
148
|
# Configure httpx client session
|
|
149
149
|
# 配置httpx客户端会话
|
|
150
|
-
session_args = self.
|
|
150
|
+
session_args = self.httpx_args.copy()
|
|
151
151
|
session_args.setdefault('http2', True) # Enable HTTP/2 by default
|
|
152
152
|
# 默认启用HTTP/2
|
|
153
153
|
session_args.update({
|
|
@@ -46,7 +46,7 @@ class PyhttpxDownloadHandler(BaseDownloadHandler):
|
|
|
46
46
|
|
|
47
47
|
# Arguments to pass to pyhttpx HttpSession constructor
|
|
48
48
|
# 传递给pyhttpx HttpSession构造函数的参数
|
|
49
|
-
self.
|
|
49
|
+
self.pyhttpx_args: dict = self.settings.get('PYHTTPX_ARGS', {})
|
|
50
50
|
|
|
51
51
|
# SSL verification setting
|
|
52
52
|
# SSL验证设置
|
|
@@ -161,10 +161,13 @@ class PyhttpxDownloadHandler(BaseDownloadHandler):
|
|
|
161
161
|
|
|
162
162
|
# Configure pyhttpx session
|
|
163
163
|
# 配置pyhttpx会话
|
|
164
|
-
session_args = self.
|
|
164
|
+
session_args = self.pyhttpx_args.copy()
|
|
165
165
|
session_args.setdefault('http2', True) # Enable HTTP/2 by default
|
|
166
166
|
# 默认启用HTTP/2
|
|
167
167
|
|
|
168
|
+
if ja3 := request.meta.get("ja3"):
|
|
169
|
+
session_args['ja3'] = ja3
|
|
170
|
+
|
|
168
171
|
# Execute the request in a thread pool since pyhttpx is synchronous
|
|
169
172
|
# 由于pyhttpx是同步的,在线程池中执行请求
|
|
170
173
|
with pyhttpx.HttpSession(**session_args) as session:
|
{aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/webdriver/drissionpage.py
RENAMED
|
@@ -273,7 +273,7 @@ class DrissionPageDriver(WebDriverBase):
|
|
|
273
273
|
self.page.set.cookies(cookies)
|
|
274
274
|
|
|
275
275
|
|
|
276
|
-
class
|
|
276
|
+
class DrissionPageDownloadHandler(BaseDownloadHandler):
|
|
277
277
|
"""
|
|
278
278
|
Download handler that uses DrissionPage to perform browser-based HTTP requests.
|
|
279
279
|
使用DrissionPage执行基于浏览器的HTTP请求的下载处理程序。
|
|
@@ -298,7 +298,7 @@ class DrissionPageHandler(BaseDownloadHandler):
|
|
|
298
298
|
|
|
299
299
|
# Get DrissionPage client arguments from settings
|
|
300
300
|
# 从设置中获取DrissionPage客户端参数
|
|
301
|
-
client_args = settings.getdict('
|
|
301
|
+
client_args = settings.getdict('DP_ARGS', {})
|
|
302
302
|
|
|
303
303
|
# Configure the pool size for browser instances
|
|
304
304
|
# 配置浏览器实例的池大小
|
{aio_scrapy-2.1.6 → aio_scrapy-2.1.7}/aioscrapy/core/downloader/handlers/webdriver/playwright.py
RENAMED
|
@@ -278,7 +278,7 @@ class PlaywrightDriver(WebDriverBase):
|
|
|
278
278
|
])
|
|
279
279
|
|
|
280
280
|
|
|
281
|
-
class
|
|
281
|
+
class PlaywrightDownloadHandler(BaseDownloadHandler):
|
|
282
282
|
"""
|
|
283
283
|
Download handler that uses Playwright to perform browser-based HTTP requests.
|
|
284
284
|
使用Playwright执行基于浏览器的HTTP请求的下载处理程序。
|
|
@@ -303,7 +303,7 @@ class PlaywrightHandler(BaseDownloadHandler):
|
|
|
303
303
|
|
|
304
304
|
# Get Playwright client arguments from settings
|
|
305
305
|
# 从设置中获取Playwright客户端参数
|
|
306
|
-
playwright_client_args = settings.getdict('
|
|
306
|
+
playwright_client_args = settings.getdict('PLAYWRIGHT_ARGS')
|
|
307
307
|
|
|
308
308
|
# Set the default page load event to wait for
|
|
309
309
|
# 设置要等待的默认页面加载事件
|
|
@@ -167,15 +167,15 @@ DOWNLOAD_HANDLERS_MAP = {
|
|
|
167
167
|
# playwright handlers (for JavaScript rendering)
|
|
168
168
|
# playwright处理程序(用于JavaScript渲染)
|
|
169
169
|
'playwright': {
|
|
170
|
-
'http': 'aioscrapy.core.downloader.handlers.webdriver.playwright.
|
|
171
|
-
'https': 'aioscrapy.core.downloader.handlers.webdriver.playwright.
|
|
170
|
+
'http': 'aioscrapy.core.downloader.handlers.webdriver.playwright.PlaywrightDownloadHandler',
|
|
171
|
+
'https': 'aioscrapy.core.downloader.handlers.webdriver.playwright.PlaywrightDownloadHandler',
|
|
172
172
|
},
|
|
173
173
|
|
|
174
|
-
#
|
|
175
|
-
#
|
|
174
|
+
# DrissionPage handlers (for JavaScript rendering)
|
|
175
|
+
# DrissionPage处理程序(用于JavaScript渲染)
|
|
176
176
|
'dp': {
|
|
177
|
-
'http': 'aioscrapy.core.downloader.handlers.webdriver.drissionpage.
|
|
178
|
-
'https': 'aioscrapy.core.downloader.handlers.webdriver.drissionpage.
|
|
177
|
+
'http': 'aioscrapy.core.downloader.handlers.webdriver.drissionpage.DrissionPageDownloadHandler',
|
|
178
|
+
'https': 'aioscrapy.core.downloader.handlers.webdriver.drissionpage.DrissionPageDownloadHandler',
|
|
179
179
|
},
|
|
180
180
|
|
|
181
181
|
# curl_cffi handlers
|
|
@@ -480,4 +480,4 @@ URLLENGTH_LIMIT = 2083
|
|
|
480
480
|
|
|
481
481
|
# Whether to close the spider when it becomes idle (no more requests)
|
|
482
482
|
# 当爬虫变为空闲状态(没有更多请求)时是否关闭爬虫
|
|
483
|
-
CLOSE_SPIDER_ON_IDLE =
|
|
483
|
+
CLOSE_SPIDER_ON_IDLE = True
|