aio-scrapy 2.0.7__tar.gz → 2.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {aio-scrapy-2.0.7/aio_scrapy.egg-info → aio-scrapy-2.0.9}/PKG-INFO +32 -2
  2. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9/aio_scrapy.egg-info}/PKG-INFO +32 -2
  3. aio-scrapy-2.0.9/aioscrapy/VERSION +1 -0
  4. aio-scrapy-2.0.9/aioscrapy/dupefilters/__init__.py +39 -0
  5. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/dupefilters/disk.py +5 -19
  6. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/dupefilters/redis.py +5 -15
  7. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/retry.py +7 -0
  8. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/queue/__init__.py +2 -2
  9. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/queue/memory.py +2 -2
  10. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/queue/rabbitmq.py +1 -1
  11. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/queue/redis.py +3 -3
  12. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/spiders/__init__.py +1 -1
  13. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/log.py +3 -1
  14. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/reqser.py +2 -2
  15. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/request.py +2 -2
  16. aio-scrapy-2.0.7/aioscrapy/VERSION +0 -1
  17. aio-scrapy-2.0.7/aioscrapy/dupefilters/__init__.py +0 -24
  18. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/LICENSE +0 -0
  19. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/MANIFEST.in +0 -0
  20. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/README.md +0 -0
  21. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/SOURCES.txt +0 -0
  22. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/dependency_links.txt +0 -0
  23. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/entry_points.txt +0 -0
  24. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/not-zip-safe +0 -0
  25. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/requires.txt +0 -0
  26. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aio_scrapy.egg-info/top_level.txt +0 -0
  27. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/__init__.py +0 -0
  28. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/__main__.py +0 -0
  29. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/cmdline.py +0 -0
  30. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/__init__.py +0 -0
  31. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/crawl.py +0 -0
  32. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/genspider.py +0 -0
  33. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/list.py +0 -0
  34. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/runspider.py +0 -0
  35. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/settings.py +0 -0
  36. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/startproject.py +0 -0
  37. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/commands/version.py +0 -0
  38. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/__init__.py +0 -0
  39. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/__init__.py +0 -0
  40. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
  41. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/aiohttp.py +0 -0
  42. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/httpx.py +0 -0
  43. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -0
  44. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -0
  45. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -0
  46. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/pyhttpx.py +0 -0
  47. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/downloader/handlers/requests.py +0 -0
  48. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/engine.py +0 -0
  49. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/scheduler.py +0 -0
  50. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/core/scraper.py +0 -0
  51. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/crawler.py +0 -0
  52. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/__init__.py +0 -0
  53. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/absmanager.py +0 -0
  54. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/aiomongo.py +0 -0
  55. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/aiomysql.py +0 -0
  56. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/aiopg.py +0 -0
  57. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/aiorabbitmq.py +0 -0
  58. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/db/aioredis.py +0 -0
  59. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/exceptions.py +0 -0
  60. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/__init__.py +0 -0
  61. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/headers.py +0 -0
  62. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/request/__init__.py +0 -0
  63. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/request/form.py +0 -0
  64. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/request/json_request.py +0 -0
  65. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/response/__init__.py +0 -0
  66. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/response/html.py +0 -0
  67. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/response/playwright.py +0 -0
  68. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/response/text.py +0 -0
  69. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/http/response/xml.py +0 -0
  70. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/__init__.py +0 -0
  71. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/__init__.py +0 -0
  72. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
  73. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
  74. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
  75. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/stats.py +0 -0
  76. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/downloader/useragent.py +0 -0
  77. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/__init__.py +0 -0
  78. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/closespider.py +0 -0
  79. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/corestats.py +0 -0
  80. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/logstats.py +0 -0
  81. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/metric.py +0 -0
  82. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/extensions/throttle.py +0 -0
  83. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/__init__.py +0 -0
  84. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/csv.py +0 -0
  85. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/execl.py +0 -0
  86. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/mongo.py +0 -0
  87. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/mysql.py +0 -0
  88. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/pipelines/pg.py +0 -0
  89. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/__init__.py +0 -0
  90. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/depth.py +0 -0
  91. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/httperror.py +0 -0
  92. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/offsite.py +0 -0
  93. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/referer.py +0 -0
  94. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/libs/spider/urllength.py +0 -0
  95. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/link.py +0 -0
  96. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/logformatter.py +0 -0
  97. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/__init__.py +0 -0
  98. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/absmanager.py +0 -0
  99. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/downloader.py +0 -0
  100. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/extension.py +0 -0
  101. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/itempipeline.py +0 -0
  102. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/middleware/spider.py +0 -0
  103. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/process.py +0 -0
  104. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/proxy/__init__.py +0 -0
  105. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/proxy/redis.py +0 -0
  106. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/scrapyd/__init__.py +0 -0
  107. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/scrapyd/runner.py +0 -0
  108. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/serializer.py +0 -0
  109. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/settings/__init__.py +0 -0
  110. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/settings/default_settings.py +0 -0
  111. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/signalmanager.py +0 -0
  112. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/signals.py +0 -0
  113. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/spiderloader.py +0 -0
  114. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/statscollectors.py +0 -0
  115. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
  116. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/module/__init__.py +0 -0
  117. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
  118. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
  119. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
  120. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
  121. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/spiders/basic.tmpl +0 -0
  122. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/templates/spiders/single.tmpl +0 -0
  123. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/__init__.py +0 -0
  124. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/conf.py +0 -0
  125. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/curl.py +0 -0
  126. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/decorators.py +0 -0
  127. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/deprecate.py +0 -0
  128. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/httpobj.py +0 -0
  129. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/misc.py +0 -0
  130. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/ossignal.py +0 -0
  131. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/project.py +0 -0
  132. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/python.py +0 -0
  133. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/response.py +0 -0
  134. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/signal.py +0 -0
  135. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/spider.py +0 -0
  136. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/template.py +0 -0
  137. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/tools.py +0 -0
  138. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/trackref.py +0 -0
  139. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/aioscrapy/utils/url.py +0 -0
  140. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/setup.cfg +0 -0
  141. {aio-scrapy-2.0.7 → aio-scrapy-2.0.9}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.7
3
+ Version: 2.0.9
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -18,17 +18,47 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Requires-Python: >=3.9
20
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: aiohttp
23
+ Requires-Dist: ujson
24
+ Requires-Dist: w3lib>=1.17.0
25
+ Requires-Dist: parsel>=1.5.0
26
+ Requires-Dist: PyDispatcher>=2.0.5
27
+ Requires-Dist: zope.interface>=5.1.0
28
+ Requires-Dist: redis>=4.3.1
29
+ Requires-Dist: aiomultiprocess>=0.9.0
30
+ Requires-Dist: loguru>=0.7.0
21
31
  Provides-Extra: all
32
+ Requires-Dist: aiomysql>=0.1.1; extra == "all"
33
+ Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
34
+ Requires-Dist: aio-pika>=8.1.1; extra == "all"
35
+ Requires-Dist: cryptography; extra == "all"
36
+ Requires-Dist: motor>=3.1.1; extra == "all"
37
+ Requires-Dist: pyhttpx>=2.10.1; extra == "all"
38
+ Requires-Dist: asyncpg>=0.27.0; extra == "all"
39
+ Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
40
+ Requires-Dist: pillow>=9.4.0; extra == "all"
41
+ Requires-Dist: requests>=2.28.2; extra == "all"
22
42
  Provides-Extra: aiomysql
43
+ Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
44
+ Requires-Dist: cryptography; extra == "aiomysql"
23
45
  Provides-Extra: httpx
46
+ Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
24
47
  Provides-Extra: aio-pika
48
+ Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
25
49
  Provides-Extra: mongo
50
+ Requires-Dist: motor>=3.1.1; extra == "mongo"
26
51
  Provides-Extra: playwright
52
+ Requires-Dist: playwright>=1.31.1; extra == "playwright"
27
53
  Provides-Extra: pyhttpx
54
+ Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
28
55
  Provides-Extra: requests
56
+ Requires-Dist: requests>=2.28.2; extra == "requests"
29
57
  Provides-Extra: pg
58
+ Requires-Dist: asyncpg>=0.27.0; extra == "pg"
30
59
  Provides-Extra: execl
31
- License-File: LICENSE
60
+ Requires-Dist: XlsxWriter>=3.1.2; extra == "execl"
61
+ Requires-Dist: pillow>=9.4.0; extra == "execl"
32
62
 
33
63
  <!--
34
64
  ![aio-scrapy](./doc/images/aio-scrapy.png)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.7
3
+ Version: 2.0.9
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -18,17 +18,47 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Requires-Python: >=3.9
20
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: aiohttp
23
+ Requires-Dist: ujson
24
+ Requires-Dist: w3lib>=1.17.0
25
+ Requires-Dist: parsel>=1.5.0
26
+ Requires-Dist: PyDispatcher>=2.0.5
27
+ Requires-Dist: zope.interface>=5.1.0
28
+ Requires-Dist: redis>=4.3.1
29
+ Requires-Dist: aiomultiprocess>=0.9.0
30
+ Requires-Dist: loguru>=0.7.0
21
31
  Provides-Extra: all
32
+ Requires-Dist: aiomysql>=0.1.1; extra == "all"
33
+ Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
34
+ Requires-Dist: aio-pika>=8.1.1; extra == "all"
35
+ Requires-Dist: cryptography; extra == "all"
36
+ Requires-Dist: motor>=3.1.1; extra == "all"
37
+ Requires-Dist: pyhttpx>=2.10.1; extra == "all"
38
+ Requires-Dist: asyncpg>=0.27.0; extra == "all"
39
+ Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
40
+ Requires-Dist: pillow>=9.4.0; extra == "all"
41
+ Requires-Dist: requests>=2.28.2; extra == "all"
22
42
  Provides-Extra: aiomysql
43
+ Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
44
+ Requires-Dist: cryptography; extra == "aiomysql"
23
45
  Provides-Extra: httpx
46
+ Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
24
47
  Provides-Extra: aio-pika
48
+ Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
25
49
  Provides-Extra: mongo
50
+ Requires-Dist: motor>=3.1.1; extra == "mongo"
26
51
  Provides-Extra: playwright
52
+ Requires-Dist: playwright>=1.31.1; extra == "playwright"
27
53
  Provides-Extra: pyhttpx
54
+ Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
28
55
  Provides-Extra: requests
56
+ Requires-Dist: requests>=2.28.2; extra == "requests"
29
57
  Provides-Extra: pg
58
+ Requires-Dist: asyncpg>=0.27.0; extra == "pg"
30
59
  Provides-Extra: execl
31
- License-File: LICENSE
60
+ Requires-Dist: XlsxWriter>=3.1.2; extra == "execl"
61
+ Requires-Dist: pillow>=9.4.0; extra == "execl"
32
62
 
33
63
  <!--
34
64
  ![aio-scrapy](./doc/images/aio-scrapy.png)
@@ -0,0 +1 @@
1
+ 2.0.9
@@ -0,0 +1,39 @@
1
+ from abc import ABCMeta, abstractmethod
2
+
3
+ from aioscrapy import Request, Spider
4
+ from aioscrapy.utils.log import logger
5
+
6
+
7
+ class DupeFilterBase(metaclass=ABCMeta):
8
+ """Request Fingerprint duplicates filter"""
9
+
10
+ @classmethod
11
+ @abstractmethod
12
+ def from_crawler(cls, crawler: "aioscrapy.crawler.Crawler"):
13
+ """ Get Instance of RFPDupeFilter from crawler """
14
+
15
+ @abstractmethod
16
+ async def request_seen(self, request: Request) -> bool:
17
+ """ Check whether fingerprint of request exists """
18
+
19
+ @abstractmethod
20
+ async def close(self, reason: str = '') -> None:
21
+ """ Delete data on close """
22
+
23
+ def log(self, request: Request, spider: Spider):
24
+ if self.info:
25
+ logger.info("Filtered duplicate request: %(request)s" % {
26
+ 'request': request.meta.get('dupefilter_msg') or request
27
+ })
28
+ elif self.debug:
29
+ logger.debug("Filtered duplicate request: %(request)s" % {
30
+ 'request': request.meta.get('dupefilter_msg') or request
31
+ })
32
+ elif self.logdupes:
33
+ msg = ("Filtered duplicate request: %(request)s"
34
+ " - no more duplicates will be shown"
35
+ " (see DUPEFILTER_DEBUG to show all duplicates)")
36
+ logger.debug(msg % {'request': request.meta.get('dupefilter_msg') or request})
37
+ self.logdupes = False
38
+
39
+ spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)
@@ -1,20 +1,19 @@
1
1
  import os
2
2
  from typing import Optional, Set
3
3
 
4
- from aioscrapy import Request, Spider
4
+ from aioscrapy import Request
5
5
  from aioscrapy.dupefilters import DupeFilterBase
6
- from aioscrapy.utils.log import logger
7
- from aioscrapy.utils.request import referer_str
8
6
 
9
7
 
10
8
  class DiskRFPDupeFilter(DupeFilterBase):
11
9
  """Request Fingerprint duplicates filter built with Disk storage"""
12
10
 
13
- def __init__(self, path: Optional[str] = None, debug: bool = False):
11
+ def __init__(self, path: Optional[str] = None, debug: bool = False, info: bool = False):
14
12
  self.file: Optional["File object"] = None
15
13
  self.debug = debug
16
14
  self.fingerprints: Set = set()
17
15
  self.logdupes: bool = True
16
+ self.info: bool = info
18
17
  if path:
19
18
  self.file = open(os.path.join(path, 'requests.seen'), 'a+')
20
19
  self.file.seek(0)
@@ -23,10 +22,11 @@ class DiskRFPDupeFilter(DupeFilterBase):
23
22
  @classmethod
24
23
  def from_crawler(cls, crawler: "aioscrapy.crawler.Crawler"):
25
24
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG')
25
+ info = crawler.settings.getbool('DUPEFILTER_INFO')
26
26
  path = crawler.settings.get('JOBDIR', './job_dir')
27
27
  if path and not os.path.exists(path):
28
28
  os.makedirs(path)
29
- return cls(path, debug)
29
+ return cls(path, debug, info)
30
30
 
31
31
  async def request_seen(self, request: Request) -> bool:
32
32
  if request.fingerprint in self.fingerprints:
@@ -40,19 +40,5 @@ class DiskRFPDupeFilter(DupeFilterBase):
40
40
  if self.file:
41
41
  self.file.close()
42
42
 
43
- def log(self, request: Request, spider: Spider):
44
- if self.debug:
45
- logger.debug("Filtered duplicate request: %(request)s (referer: %(referer)s)" % {
46
- 'request': request, 'referer': referer_str(request)
47
- })
48
- elif self.logdupes:
49
- msg = ("Filtered duplicate request: %(request)s"
50
- " - no more duplicates will be shown"
51
- " (see DUPEFILTER_DEBUG to show all duplicates)")
52
- logger.debug(msg % {'request': request})
53
- self.logdupes = False
54
-
55
- spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)
56
-
57
43
 
58
44
  RFPDupeFilter = DiskRFPDupeFilter
@@ -2,8 +2,6 @@ from aioscrapy import Request
2
2
  from aioscrapy.db import db_manager
3
3
  from aioscrapy.dupefilters import DupeFilterBase
4
4
 
5
- from aioscrapy.utils.log import logger
6
-
7
5
 
8
6
  class RedisRFPDupeFilter(DupeFilterBase):
9
7
  """Request Fingerprint duplicates filter built with Set of Redis"""
@@ -13,13 +11,15 @@ class RedisRFPDupeFilter(DupeFilterBase):
13
11
  server: "redis.asyncio.Redis",
14
12
  key: str,
15
13
  debug: bool = False,
16
- keep_on_close: bool = True
14
+ keep_on_close: bool = True,
15
+ info: bool = False,
17
16
  ):
18
17
  self.server = server
19
18
  self.key = key
20
19
  self.debug = debug
21
20
  self.keep_on_close = keep_on_close
22
21
  self.logdupes: bool = True
22
+ self.info: bool = info
23
23
 
24
24
  @classmethod
25
25
  def from_crawler(cls, crawler: "aioscrapy.crawler.Crawler"):
@@ -28,7 +28,8 @@ class RedisRFPDupeFilter(DupeFilterBase):
28
28
  keep_on_close = crawler.settings.getbool("KEEP_DUPEFILTER_DATA_ON_CLOSE", True)
29
29
  key = dupefilter_key % {'spider': crawler.spider.name}
30
30
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
31
- instance = cls(server, key=key, debug=debug, keep_on_close=keep_on_close)
31
+ info = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
32
+ instance = cls(server, key=key, debug=debug, keep_on_close=keep_on_close, info=info)
32
33
  return instance
33
34
 
34
35
  async def request_seen(self, request: Request):
@@ -41,17 +42,6 @@ class RedisRFPDupeFilter(DupeFilterBase):
41
42
  async def clear(self):
42
43
  await self.server.delete(self.key)
43
44
 
44
- def log(self, request, spider):
45
- if self.debug:
46
- logger.debug("Filtered duplicate request: %(request)s" % {'request': request})
47
- elif self.logdupes:
48
- msg = ("Filtered duplicate request %(request)s"
49
- " - no more duplicates will be shown"
50
- " (see DUPEFILTER_DEBUG to show all duplicates)")
51
- logger.debug(msg % {'request': request})
52
- self.logdupes = False
53
- spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)
54
-
55
45
 
56
46
  class HashMap(object):
57
47
  def __init__(self, m, seed):
@@ -26,6 +26,13 @@ try:
26
26
  except ImportError:
27
27
  pass
28
28
 
29
+ try:
30
+ from anyio import EndOfStream
31
+
32
+ NEED_RETRY_ERROR += (EndOfStream,)
33
+ except ImportError:
34
+ pass
35
+
29
36
  try:
30
37
  from httpx import HTTPError as HttpxError
31
38
 
@@ -37,10 +37,10 @@ class AbsQueue(metaclass=ABCMeta):
37
37
  obj = request.to_dict(spider=self.spider)
38
38
  return self.serializer.dumps(obj)
39
39
 
40
- def _decode_request(self, encoded_request: Any) -> aioscrapy.Request:
40
+ async def _decode_request(self, encoded_request: Any) -> aioscrapy.Request:
41
41
  """Decode an request previously encoded"""
42
42
  obj = self.serializer.loads(encoded_request)
43
- return request_from_dict(obj, spider=self.spider)
43
+ return await request_from_dict(obj, spider=self.spider)
44
44
 
45
45
  def __len__(self) -> None:
46
46
  """Return the length of the queue"""
@@ -58,7 +58,7 @@ class MemoryQueueBase(AbsQueue):
58
58
  data = self.container.get_nowait()
59
59
  except QueueEmpty:
60
60
  break
61
- yield self._decode_request(data)
61
+ yield await self._decode_request(data)
62
62
 
63
63
  async def clear(self, timeout: int = 0) -> None:
64
64
  self.container = self.get_queue(self.max_size)
@@ -93,7 +93,7 @@ class MemoryPriorityQueue(MemoryFifoQueue):
93
93
  score, data = self.container.get_nowait()
94
94
  except QueueEmpty:
95
95
  break
96
- yield self._decode_request(data)
96
+ yield await self._decode_request(data)
97
97
 
98
98
 
99
99
  SpiderQueue = MemoryFifoQueue
@@ -57,7 +57,7 @@ class RabbitMqPriorityQueue(AbsQueue):
57
57
  async def pop(self, count: int = 1) -> Optional[aioscrapy.Request]:
58
58
  result = await self.container.get_message(self.key)
59
59
  if result:
60
- yield self._decode_request(result)
60
+ yield await self._decode_request(result)
61
61
 
62
62
  async def clear(self) -> None:
63
63
  await self.container.clean_message_queue(self.key)
@@ -67,7 +67,7 @@ class RedisFifoQueue(RedisQueueBase):
67
67
  results = await pipe.execute()
68
68
  for result in results:
69
69
  if result:
70
- yield self._decode_request(result)
70
+ yield await self._decode_request(result)
71
71
 
72
72
 
73
73
  class RedisPriorityQueue(RedisQueueBase):
@@ -97,7 +97,7 @@ class RedisPriorityQueue(RedisQueueBase):
97
97
  .execute()
98
98
  )
99
99
  for result in results:
100
- yield self._decode_request(result)
100
+ yield await self._decode_request(result)
101
101
 
102
102
 
103
103
  class RedisLifoQueue(RedisQueueBase):
@@ -124,7 +124,7 @@ class RedisLifoQueue(RedisQueueBase):
124
124
  results = await pipe.execute()
125
125
  for result in results:
126
126
  if result:
127
- yield self._decode_request(result)
127
+ yield await self._decode_request(result)
128
128
 
129
129
 
130
130
  SpiderQueue = RedisFifoQueue
@@ -76,7 +76,7 @@ class Spider(object):
76
76
  for url in self.start_urls:
77
77
  yield Request(url)
78
78
 
79
- def request_from_dict(self, d: dict):
79
+ async def request_from_dict(self, d: dict):
80
80
  """集成后重写改方法,将队列中的json根据情况构建成Request对象"""
81
81
  pass
82
82
 
@@ -7,7 +7,9 @@ from loguru import logger as _logger
7
7
 
8
8
  from aioscrapy.settings import Settings
9
9
 
10
- _logger.remove(0)
10
+ for _handler in _logger._core.handlers.values():
11
+ if _handler._name == '<stderr>':
12
+ _logger.remove(_handler._id)
11
13
 
12
14
 
13
15
  def configure_logging(spider: Type["Spider"], settings: Settings):
@@ -11,5 +11,5 @@ def request_to_dict(request: "aioscrapy.Request", spider: Optional["aioscrapy.Sp
11
11
  return request.to_dict(spider=spider)
12
12
 
13
13
 
14
- def request_from_dict(d: dict, spider: Optional["aioscrapy.Spider"] = None) -> "aioscrapy.Request":
15
- return _from_dict(d, spider=spider)
14
+ async def request_from_dict(d: dict, spider: Optional["aioscrapy.Spider"] = None) -> "aioscrapy.Request":
15
+ return await _from_dict(d, spider=spider)
@@ -39,13 +39,13 @@ def referer_str(request: Request) -> Optional[str]:
39
39
  return to_unicode(referrer, errors='replace')
40
40
 
41
41
 
42
- def request_from_dict(d: dict, *, spider: Optional[Spider] = None) -> Request:
42
+ async def request_from_dict(d: dict, *, spider: Optional[Spider] = None) -> Request:
43
43
  """Create a :class:`~scrapy.Request` object from a dict.
44
44
 
45
45
  If a spider is given, it will try to resolve the callbacks looking at the
46
46
  spider for methods with the same name.
47
47
  """
48
- d = spider.request_from_dict(d) or d
48
+ d = await spider.request_from_dict(d) or d
49
49
  if isinstance(d, Request):
50
50
  return d
51
51
 
@@ -1 +0,0 @@
1
- 2.0.7
@@ -1,24 +0,0 @@
1
- from abc import ABCMeta, abstractmethod
2
-
3
- from aioscrapy import Request, Spider
4
-
5
-
6
- class DupeFilterBase(metaclass=ABCMeta):
7
- """Request Fingerprint duplicates filter"""
8
-
9
- @classmethod
10
- @abstractmethod
11
- def from_crawler(cls, crawler: "aioscrapy.crawler.Crawler"):
12
- """ Get Instance of RFPDupeFilter from crawler """
13
-
14
- @abstractmethod
15
- async def request_seen(self, request: Request) -> bool:
16
- """ Check whether fingerprint of request exists """
17
-
18
- @abstractmethod
19
- async def close(self, reason: str = '') -> None:
20
- """ Delete data on close """
21
-
22
- @abstractmethod
23
- def log(self, request: Request, spider: Spider) -> None:
24
- """ Logs given request """
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes