aio-scrapy 2.0.10__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {aio-scrapy-2.0.10/aio_scrapy.egg-info → aio-scrapy-2.1.2}/PKG-INFO +7 -4
  2. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/README.md +1 -1
  3. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2/aio_scrapy.egg-info}/PKG-INFO +7 -4
  4. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/SOURCES.txt +1 -0
  5. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/requires.txt +6 -2
  6. aio-scrapy-2.1.2/aioscrapy/VERSION +1 -0
  7. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/__init__.py +5 -4
  8. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/aiohttp.py +8 -0
  9. aio-scrapy-2.1.2/aioscrapy/core/downloader/handlers/curl_cffi.py +67 -0
  10. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/httpx.py +9 -1
  11. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/playwright/__init__.py +10 -2
  12. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/pyhttpx.py +8 -0
  13. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/requests.py +9 -1
  14. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/engine.py +2 -27
  15. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/scheduler.py +45 -5
  16. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/scraper.py +9 -3
  17. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/crawler.py +5 -2
  18. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/aiomongo.py +10 -2
  19. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/dupefilters/__init__.py +4 -0
  20. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/dupefilters/redis.py +47 -0
  21. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/exceptions.py +5 -0
  22. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/retry.py +6 -47
  23. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/__init__.py +3 -6
  24. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/mongo.py +7 -2
  25. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/settings/default_settings.py +4 -0
  26. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/spiders/__init__.py +4 -3
  27. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/spiders/single.tmpl +6 -5
  28. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/python.py +1 -6
  29. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/setup.py +4 -3
  30. aio-scrapy-2.0.10/aioscrapy/VERSION +0 -1
  31. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/LICENSE +0 -0
  32. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/MANIFEST.in +0 -0
  33. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/dependency_links.txt +0 -0
  34. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/entry_points.txt +0 -0
  35. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/not-zip-safe +0 -0
  36. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aio_scrapy.egg-info/top_level.txt +0 -0
  37. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/__init__.py +0 -0
  38. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/__main__.py +0 -0
  39. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/cmdline.py +0 -0
  40. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/__init__.py +0 -0
  41. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/crawl.py +0 -0
  42. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/genspider.py +0 -0
  43. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/list.py +0 -0
  44. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/runspider.py +0 -0
  45. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/settings.py +0 -0
  46. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/startproject.py +0 -0
  47. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/commands/version.py +0 -0
  48. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/__init__.py +0 -0
  49. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
  50. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -0
  51. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -0
  52. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/__init__.py +0 -0
  53. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/absmanager.py +0 -0
  54. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/aiomysql.py +0 -0
  55. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/aiopg.py +0 -0
  56. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/aiorabbitmq.py +0 -0
  57. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/db/aioredis.py +0 -0
  58. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/dupefilters/disk.py +0 -0
  59. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/__init__.py +0 -0
  60. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/headers.py +0 -0
  61. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/request/__init__.py +0 -0
  62. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/request/form.py +0 -0
  63. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/request/json_request.py +0 -0
  64. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/response/__init__.py +0 -0
  65. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/response/html.py +0 -0
  66. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/response/playwright.py +0 -0
  67. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/response/text.py +0 -0
  68. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/http/response/xml.py +0 -0
  69. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/__init__.py +0 -0
  70. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/__init__.py +0 -0
  71. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
  72. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
  73. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
  74. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/stats.py +0 -0
  75. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/downloader/useragent.py +0 -0
  76. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/__init__.py +0 -0
  77. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/closespider.py +0 -0
  78. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/corestats.py +0 -0
  79. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/logstats.py +0 -0
  80. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/metric.py +0 -0
  81. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/extensions/throttle.py +0 -0
  82. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/csv.py +0 -0
  83. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/execl.py +0 -0
  84. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/mysql.py +0 -0
  85. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/pipelines/pg.py +0 -0
  86. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/__init__.py +0 -0
  87. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/depth.py +0 -0
  88. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/httperror.py +0 -0
  89. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/offsite.py +0 -0
  90. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/referer.py +0 -0
  91. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/libs/spider/urllength.py +0 -0
  92. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/link.py +0 -0
  93. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/logformatter.py +0 -0
  94. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/__init__.py +0 -0
  95. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/absmanager.py +0 -0
  96. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/downloader.py +0 -0
  97. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/extension.py +0 -0
  98. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/itempipeline.py +0 -0
  99. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/middleware/spider.py +0 -0
  100. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/process.py +0 -0
  101. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/proxy/__init__.py +0 -0
  102. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/proxy/redis.py +0 -0
  103. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/queue/__init__.py +0 -0
  104. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/queue/memory.py +0 -0
  105. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/queue/rabbitmq.py +0 -0
  106. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/queue/redis.py +0 -0
  107. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/scrapyd/__init__.py +0 -0
  108. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/scrapyd/runner.py +0 -0
  109. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/serializer.py +0 -0
  110. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/settings/__init__.py +0 -0
  111. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/signalmanager.py +0 -0
  112. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/signals.py +0 -0
  113. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/spiderloader.py +0 -0
  114. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/statscollectors.py +0 -0
  115. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
  116. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/module/__init__.py +0 -0
  117. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
  118. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
  119. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
  120. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
  121. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/templates/spiders/basic.tmpl +0 -0
  122. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/__init__.py +0 -0
  123. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/conf.py +0 -0
  124. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/curl.py +0 -0
  125. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/decorators.py +0 -0
  126. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/deprecate.py +0 -0
  127. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/httpobj.py +0 -0
  128. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/log.py +0 -0
  129. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/misc.py +0 -0
  130. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/ossignal.py +0 -0
  131. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/project.py +0 -0
  132. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/reqser.py +0 -0
  133. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/request.py +0 -0
  134. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/response.py +0 -0
  135. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/signal.py +0 -0
  136. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/spider.py +0 -0
  137. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/template.py +0 -0
  138. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/tools.py +0 -0
  139. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/trackref.py +0 -0
  140. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/aioscrapy/utils/url.py +0 -0
  141. {aio-scrapy-2.0.10 → aio-scrapy-2.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.10
3
+ Version: 2.1.2
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -33,12 +33,13 @@ Requires-Dist: aiomysql>=0.1.1; extra == "all"
33
33
  Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
34
34
  Requires-Dist: aio-pika>=8.1.1; extra == "all"
35
35
  Requires-Dist: cryptography; extra == "all"
36
- Requires-Dist: motor>=3.1.1; extra == "all"
36
+ Requires-Dist: motor>=2.1.0; extra == "all"
37
37
  Requires-Dist: pyhttpx>=2.10.1; extra == "all"
38
38
  Requires-Dist: asyncpg>=0.27.0; extra == "all"
39
39
  Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
40
40
  Requires-Dist: pillow>=9.4.0; extra == "all"
41
41
  Requires-Dist: requests>=2.28.2; extra == "all"
42
+ Requires-Dist: curl_cffi; extra == "all"
42
43
  Provides-Extra: aiomysql
43
44
  Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
44
45
  Requires-Dist: cryptography; extra == "aiomysql"
@@ -47,11 +48,13 @@ Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
47
48
  Provides-Extra: aio-pika
48
49
  Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
49
50
  Provides-Extra: mongo
50
- Requires-Dist: motor>=3.1.1; extra == "mongo"
51
+ Requires-Dist: motor>=2.1.0; extra == "mongo"
51
52
  Provides-Extra: playwright
52
53
  Requires-Dist: playwright>=1.31.1; extra == "playwright"
53
54
  Provides-Extra: pyhttpx
54
55
  Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
56
+ Provides-Extra: curl-cffi
57
+ Requires-Dist: curl_cffi>=0.6.1; extra == "curl-cffi"
55
58
  Provides-Extra: requests
56
59
  Requires-Dist: requests>=2.28.2; extra == "requests"
57
60
  Provides-Extra: pg
@@ -86,7 +89,7 @@ The quick way:
86
89
 
87
90
  ```shell
88
91
  # Install the latest aio-scrapy
89
- pip install git+https://github.com/conlin-huang/aio-scrapy
92
+ pip install git+https://github.com/ConlinH/aio-scrapy
90
93
 
91
94
  # default
92
95
  pip install aio-scrapy
@@ -24,7 +24,7 @@ The quick way:
24
24
 
25
25
  ```shell
26
26
  # Install the latest aio-scrapy
27
- pip install git+https://github.com/conlin-huang/aio-scrapy
27
+ pip install git+https://github.com/ConlinH/aio-scrapy
28
28
 
29
29
  # default
30
30
  pip install aio-scrapy
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.10
3
+ Version: 2.1.2
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -33,12 +33,13 @@ Requires-Dist: aiomysql>=0.1.1; extra == "all"
33
33
  Requires-Dist: httpx[http2]>=0.23.0; extra == "all"
34
34
  Requires-Dist: aio-pika>=8.1.1; extra == "all"
35
35
  Requires-Dist: cryptography; extra == "all"
36
- Requires-Dist: motor>=3.1.1; extra == "all"
36
+ Requires-Dist: motor>=2.1.0; extra == "all"
37
37
  Requires-Dist: pyhttpx>=2.10.1; extra == "all"
38
38
  Requires-Dist: asyncpg>=0.27.0; extra == "all"
39
39
  Requires-Dist: XlsxWriter>=3.1.2; extra == "all"
40
40
  Requires-Dist: pillow>=9.4.0; extra == "all"
41
41
  Requires-Dist: requests>=2.28.2; extra == "all"
42
+ Requires-Dist: curl_cffi; extra == "all"
42
43
  Provides-Extra: aiomysql
43
44
  Requires-Dist: aiomysql>=0.1.1; extra == "aiomysql"
44
45
  Requires-Dist: cryptography; extra == "aiomysql"
@@ -47,11 +48,13 @@ Requires-Dist: httpx[http2]>=0.23.0; extra == "httpx"
47
48
  Provides-Extra: aio-pika
48
49
  Requires-Dist: aio-pika>=8.1.1; extra == "aio-pika"
49
50
  Provides-Extra: mongo
50
- Requires-Dist: motor>=3.1.1; extra == "mongo"
51
+ Requires-Dist: motor>=2.1.0; extra == "mongo"
51
52
  Provides-Extra: playwright
52
53
  Requires-Dist: playwright>=1.31.1; extra == "playwright"
53
54
  Provides-Extra: pyhttpx
54
55
  Requires-Dist: pyhttpx>=2.10.4; extra == "pyhttpx"
56
+ Provides-Extra: curl-cffi
57
+ Requires-Dist: curl_cffi>=0.6.1; extra == "curl-cffi"
55
58
  Provides-Extra: requests
56
59
  Requires-Dist: requests>=2.28.2; extra == "requests"
57
60
  Provides-Extra: pg
@@ -86,7 +89,7 @@ The quick way:
86
89
 
87
90
  ```shell
88
91
  # Install the latest aio-scrapy
89
- pip install git+https://github.com/conlin-huang/aio-scrapy
92
+ pip install git+https://github.com/ConlinH/aio-scrapy
90
93
 
91
94
  # default
92
95
  pip install aio-scrapy
@@ -38,6 +38,7 @@ aioscrapy/core/scraper.py
38
38
  aioscrapy/core/downloader/__init__.py
39
39
  aioscrapy/core/downloader/handlers/__init__.py
40
40
  aioscrapy/core/downloader/handlers/aiohttp.py
41
+ aioscrapy/core/downloader/handlers/curl_cffi.py
41
42
  aioscrapy/core/downloader/handlers/httpx.py
42
43
  aioscrapy/core/downloader/handlers/pyhttpx.py
43
44
  aioscrapy/core/downloader/handlers/requests.py
@@ -20,12 +20,16 @@ aiomysql>=0.1.1
20
20
  httpx[http2]>=0.23.0
21
21
  aio-pika>=8.1.1
22
22
  cryptography
23
- motor>=3.1.1
23
+ motor>=2.1.0
24
24
  pyhttpx>=2.10.1
25
25
  asyncpg>=0.27.0
26
26
  XlsxWriter>=3.1.2
27
27
  pillow>=9.4.0
28
28
  requests>=2.28.2
29
+ curl_cffi
30
+
31
+ [curl_cffi]
32
+ curl_cffi>=0.6.1
29
33
 
30
34
  [execl]
31
35
  XlsxWriter>=3.1.2
@@ -35,7 +39,7 @@ pillow>=9.4.0
35
39
  httpx[http2]>=0.23.0
36
40
 
37
41
  [mongo]
38
- motor>=3.1.1
42
+ motor>=2.1.0
39
43
 
40
44
  [pg]
41
45
  asyncpg>=0.27.0
@@ -0,0 +1 @@
1
+ 2.1.2
@@ -138,14 +138,14 @@ class Downloader(BaseDownloader):
138
138
 
139
139
  @classmethod
140
140
  async def from_crawler(cls, crawler) -> "Downloader":
141
+ df = crawler.settings.get('DUPEFILTER_CLASS') and await load_instance(crawler.settings['DUPEFILTER_CLASS'], crawler=crawler)
142
+ crawler.spider.dupefilter = df # 将指纹绑定到Spider 在解析成功的时候 调用DUPEFILTER_CLASS的success方法
141
143
  return cls(
142
144
  crawler,
143
145
  await call_helper(DownloadHandlerManager.for_crawler, crawler),
144
146
  await call_helper(DownloaderMiddlewareManager.from_crawler, crawler),
145
- proxy=crawler.settings.get("PROXY_HANDLER") and await load_instance(crawler.settings["PROXY_HANDLER"],
146
- crawler=crawler),
147
- dupefilter=crawler.settings.get('DUPEFILTER_CLASS') and await load_instance(
148
- crawler.settings['DUPEFILTER_CLASS'], crawler=crawler)
147
+ proxy=crawler.settings.get("PROXY_HANDLER") and await load_instance(crawler.settings["PROXY_HANDLER"], crawler=crawler),
148
+ dupefilter=df
149
149
  )
150
150
 
151
151
  async def fetch(self, request: Request) -> None:
@@ -204,6 +204,7 @@ class Downloader(BaseDownloader):
204
204
  slot.transferring.remove(request)
205
205
  slot.active.remove(request)
206
206
  self.active.remove(request)
207
+ self.dupefilter and not request.dont_filter and await self.dupefilter.done(request, done_type="request_done")
207
208
  if isinstance(result, Response):
208
209
  await self.signals.send_catch_log(signal=signals.response_downloaded,
209
210
  response=result,
@@ -4,9 +4,11 @@ import ssl
4
4
  from typing import Optional
5
5
 
6
6
  import aiohttp
7
+ from aiohttp.client_exceptions import ClientError
7
8
 
8
9
  from aioscrapy import Request
9
10
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
11
+ from aioscrapy.exceptions import DownloadError
10
12
  from aioscrapy.http import HtmlResponse
11
13
  from aioscrapy.settings import Settings
12
14
  from aioscrapy.utils.log import logger
@@ -32,6 +34,12 @@ class AioHttpDownloadHandler(BaseDownloadHandler):
32
34
  return self.session
33
35
 
34
36
  async def download_request(self, request: Request, _) -> HtmlResponse:
37
+ try:
38
+ return await self._download_request(request)
39
+ except ClientError as e:
40
+ raise DownloadError from e
41
+
42
+ async def _download_request(self, request: Request) -> HtmlResponse:
35
43
  kwargs = {
36
44
  'verify_ssl': request.meta.get('verify_ssl', self.verify_ssl),
37
45
  'timeout': request.meta.get('download_timeout', 180),
@@ -0,0 +1,67 @@
1
+ from curl_cffi.curl import CurlError
2
+ from curl_cffi.requests import AsyncSession
3
+
4
+ from aioscrapy import Request
5
+ from aioscrapy.core.downloader.handlers import BaseDownloadHandler
6
+ from aioscrapy.exceptions import DownloadError
7
+ from aioscrapy.http import HtmlResponse
8
+ from aioscrapy.settings import Settings
9
+ from aioscrapy.utils.log import logger
10
+
11
+
12
+ class CurlCffiDownloadHandler(BaseDownloadHandler):
13
+
14
+ def __init__(self, settings):
15
+ self.settings: Settings = settings
16
+ self.httpx_client_session_args: dict = self.settings.get('CURL_CFFI_CLIENT_SESSION_ARGS', {})
17
+ self.verify_ssl: bool = self.settings.get("VERIFY_SSL", True)
18
+
19
+ @classmethod
20
+ def from_settings(cls, settings: Settings):
21
+ return cls(settings)
22
+
23
+ async def download_request(self, request: Request, _) -> HtmlResponse:
24
+ try:
25
+ return await self._download_request(request)
26
+ except CurlError as e:
27
+ raise DownloadError from e
28
+
29
+ async def _download_request(self, request: Request) -> HtmlResponse:
30
+ kwargs = {
31
+ 'timeout': self.settings.get('DOWNLOAD_TIMEOUT'),
32
+ 'cookies': dict(request.cookies),
33
+ 'verify': request.meta.get('verify_ssl', self.verify_ssl),
34
+ 'allow_redirects': self.settings.getbool('REDIRECT_ENABLED', True) if request.meta.get(
35
+ 'dont_redirect') is None else request.meta.get('dont_redirect'),
36
+ 'impersonate': request.meta.get('impersonate'),
37
+ }
38
+ post_data = request.body or None
39
+ if isinstance(post_data, dict):
40
+ kwargs['json'] = post_data
41
+ else:
42
+ kwargs['data'] = post_data
43
+
44
+ headers = request.headers or self.settings.get('DEFAULT_REQUEST_HEADERS')
45
+ kwargs['headers'] = headers
46
+
47
+ proxy = request.meta.get("proxy")
48
+ if proxy:
49
+ kwargs["proxies"] = {'http': proxy, 'https': proxy}
50
+ logger.debug(f"use proxy {proxy}: {request.url}")
51
+
52
+ session_args = self.httpx_client_session_args.copy()
53
+
54
+ async with AsyncSession(**session_args) as session:
55
+ response = await session.request(request.method, request.url, **kwargs)
56
+
57
+ return HtmlResponse(
58
+ str(response.url),
59
+ status=response.status_code,
60
+ headers=response.headers,
61
+ body=response.content,
62
+ cookies={j.name: j.value or '' for j in response.cookies.jar},
63
+ encoding=response.encoding
64
+ )
65
+
66
+ async def close(self):
67
+ pass
@@ -1,9 +1,11 @@
1
1
  import ssl
2
2
 
3
3
  import httpx
4
+ from httpx import HTTPError as HttpxError
4
5
 
5
6
  from aioscrapy import Request
6
7
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
8
+ from aioscrapy.exceptions import DownloadError
7
9
  from aioscrapy.http import HtmlResponse
8
10
  from aioscrapy.settings import Settings
9
11
  from aioscrapy.utils.log import logger
@@ -27,6 +29,12 @@ class HttpxDownloadHandler(BaseDownloadHandler):
27
29
  return cls(settings)
28
30
 
29
31
  async def download_request(self, request: Request, _) -> HtmlResponse:
32
+ try:
33
+ return await self._download_request(request)
34
+ except HttpxError as e:
35
+ raise DownloadError from e
36
+
37
+ async def _download_request(self, request: Request) -> HtmlResponse:
30
38
  kwargs = {
31
39
  'timeout': self.settings.get('DOWNLOAD_TIMEOUT'),
32
40
  'cookies': dict(request.cookies),
@@ -68,7 +76,7 @@ class HttpxDownloadHandler(BaseDownloadHandler):
68
76
  status=response.status_code,
69
77
  headers=response.headers,
70
78
  body=content,
71
- cookies=dict(response.cookies),
79
+ cookies={j.name: j.value or '' for j in response.cookies.jar},
72
80
  encoding=response.encoding
73
81
  )
74
82
 
@@ -1,11 +1,13 @@
1
1
  from functools import wraps
2
2
 
3
+ from playwright._impl._api_types import Error
3
4
  from playwright.async_api._generated import Response as EventResponse
4
5
 
5
- from aioscrapy import Request
6
+ from aioscrapy import Request, Spider
6
7
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
7
8
  from aioscrapy.core.downloader.handlers.playwright.driverpool import WebDriverPool
8
9
  from aioscrapy.core.downloader.handlers.playwright.webdriver import PlaywrightDriver
10
+ from aioscrapy.exceptions import DownloadError
9
11
  from aioscrapy.http import PlaywrightResponse
10
12
  from aioscrapy.settings import Settings
11
13
  from aioscrapy.utils.tools import call_helper
@@ -24,7 +26,13 @@ class PlaywrightHandler(BaseDownloadHandler):
24
26
  def from_settings(cls, settings: Settings):
25
27
  return cls(settings)
26
28
 
27
- async def download_request(self, request: Request, spider) -> PlaywrightResponse:
29
+ async def download_request(self, request: Request, spider: Spider) -> PlaywrightResponse:
30
+ try:
31
+ return await self._download_request(request, spider)
32
+ except Error as e:
33
+ raise DownloadError from e
34
+
35
+ async def _download_request(self, request: Request, spider) -> PlaywrightResponse:
28
36
  cookies = dict(request.cookies)
29
37
  timeout = request.meta.get('download_timeout', 30) * 1000
30
38
  user_agent = request.headers.get("User-Agent")
@@ -1,9 +1,11 @@
1
1
  import asyncio
2
2
 
3
3
  import pyhttpx
4
+ from pyhttpx.exception import BaseExpetion as PyHttpxError
4
5
 
5
6
  from aioscrapy import Request
6
7
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
8
+ from aioscrapy.exceptions import DownloadError
7
9
  from aioscrapy.http import HtmlResponse
8
10
  from aioscrapy.settings import Settings
9
11
  from aioscrapy.utils.log import logger
@@ -22,6 +24,12 @@ class PyhttpxDownloadHandler(BaseDownloadHandler):
22
24
  return cls(settings)
23
25
 
24
26
  async def download_request(self, request: Request, _) -> HtmlResponse:
27
+ try:
28
+ return await self._download_request(request)
29
+ except PyHttpxError as e:
30
+ raise DownloadError from e
31
+
32
+ async def _download_request(self, request: Request) -> HtmlResponse:
25
33
  kwargs = {
26
34
  'timeout': self.settings.get('DOWNLOAD_TIMEOUT'),
27
35
  'cookies': dict(request.cookies),
@@ -1,9 +1,11 @@
1
1
  import asyncio
2
2
 
3
3
  import requests
4
+ from requests.exceptions import RequestException as RequestsError
4
5
 
5
6
  from aioscrapy import Request
6
7
  from aioscrapy.core.downloader.handlers import BaseDownloadHandler
8
+ from aioscrapy.exceptions import DownloadError
7
9
  from aioscrapy.http import HtmlResponse
8
10
  from aioscrapy.settings import Settings
9
11
  from aioscrapy.utils.log import logger
@@ -21,6 +23,12 @@ class RequestsDownloadHandler(BaseDownloadHandler):
21
23
  return cls(settings)
22
24
 
23
25
  async def download_request(self, request: Request, _) -> HtmlResponse:
26
+ try:
27
+ return await self._download_request(request)
28
+ except RequestsError as e:
29
+ raise DownloadError from e
30
+
31
+ async def _download_request(self, request: Request) -> HtmlResponse:
24
32
  kwargs = {
25
33
  'timeout': self.settings.get('DOWNLOAD_TIMEOUT'),
26
34
  'cookies': dict(request.cookies),
@@ -48,7 +56,7 @@ class RequestsDownloadHandler(BaseDownloadHandler):
48
56
  status=response.status_code,
49
57
  headers=response.headers,
50
58
  body=response.content,
51
- cookies=dict(response.cookies),
59
+ cookies={k: v or '' for k, v in response.cookies.items()},
52
60
  encoding=response.encoding
53
61
  )
54
62
 
@@ -42,8 +42,6 @@ class ExecutionEngine(object):
42
42
  self.signals = crawler.signals
43
43
  self.logformatter = crawler.logformatter
44
44
 
45
- self.enqueue_cache_num = self.settings.getint("ENQUEUE_CACHE_NUM")
46
- self.enqueue_cache: Queue = Queue(self.enqueue_cache_num)
47
45
  self.slot: Optional[Slot] = None
48
46
  self.spider: Optional[Spider] = None
49
47
  self.downloader: Optional[DownloaderTV] = None
@@ -53,7 +51,6 @@ class ExecutionEngine(object):
53
51
  self.running: bool = False
54
52
  self.unlock: bool = True
55
53
  self.finish: bool = False
56
- self.enqueue_unlock: bool = True
57
54
 
58
55
  async def start(
59
56
  self,
@@ -70,7 +67,6 @@ class ExecutionEngine(object):
70
67
  while not self.finish:
71
68
  self.running and await self._next_request()
72
69
  await asyncio.sleep(1)
73
- self.enqueue_cache_num != 1 and create_task(self._crawl())
74
70
  self.running and await self._spider_idle(self.spider)
75
71
 
76
72
  async def stop(self, reason: str = 'shutdown') -> None:
@@ -81,7 +77,6 @@ class ExecutionEngine(object):
81
77
 
82
78
  while not self.is_idle():
83
79
  await asyncio.sleep(0.2)
84
- self.enqueue_cache_num != 1 and create_task(self._crawl())
85
80
  await self.close_spider(self.spider, reason=reason)
86
81
  await self.signals.send_catch_log_deferred(signal=signals.engine_stopped)
87
82
  self.finish = True
@@ -212,27 +207,8 @@ class ExecutionEngine(object):
212
207
  return True
213
208
 
214
209
  async def crawl(self, request: Request) -> None:
215
- if self.enqueue_cache_num == 1:
216
- await self.scheduler.enqueue_request(request)
217
- create_task(self._next_request())
218
- else:
219
- await self.enqueue_cache.put(request)
220
-
221
- async def _crawl(self) -> None:
222
- if not self.enqueue_unlock:
223
- return
224
- self.enqueue_unlock = False
225
- requests = []
226
- for _ in range(self.enqueue_cache.qsize()):
227
- try:
228
- request = self.enqueue_cache.get_nowait()
229
- requests.append(request)
230
- except QueueEmpty:
231
- break
232
- if requests:
233
- await call_helper(self.scheduler.enqueue_request_batch, requests)
234
- create_task(self._next_request())
235
- self.enqueue_unlock = True
210
+ await self.scheduler.enqueue_request(request)
211
+ # create_task(self._next_request())
236
212
 
237
213
  async def close_spider(self, spider: Spider, reason: str = 'cancelled') -> None:
238
214
  """Close (cancel) spider and clear all its outstanding requests"""
@@ -276,7 +252,6 @@ class ExecutionEngine(object):
276
252
  # method of 'has_pending_requests' has IO, so method of 'is_idle' execute twice
277
253
  if self.is_idle() \
278
254
  and self.slot.start_requests is None \
279
- and self.enqueue_unlock and self.enqueue_cache.empty() \
280
255
  and not await self.scheduler.has_pending_requests() \
281
256
  and self.is_idle():
282
257
  await self.stop(reason='finished')
@@ -31,7 +31,7 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
31
31
  @classmethod
32
32
  async def from_crawler(cls, crawler: "aioscrapy.Crawler") -> "BaseScheduler":
33
33
  """
34
- Factory method which receives the current :class:`~scrapy.crawler.Crawler` object as argument.
34
+ Factory method which receives the current :class:`~aioscrapy.crawler.Crawler` object as argument.
35
35
  """
36
36
  return cls()
37
37
 
@@ -103,20 +103,27 @@ class Scheduler(BaseScheduler):
103
103
  queue: AbsQueue,
104
104
  spider: aioscrapy.Spider,
105
105
  stats=Optional[StatsCollector],
106
- persist: bool = True
106
+ persist: bool = True,
107
+ cache_queue: Optional[AbsQueue] = None
107
108
  ):
109
+
108
110
  self.queue = queue
111
+ self.cache_queue = cache_queue
109
112
  self.spider = spider
110
113
  self.stats = stats
111
114
  self.persist = persist
112
115
 
113
116
  @classmethod
114
117
  async def from_crawler(cls: Type[SchedulerTV], crawler: "aioscrapy.Crawler") -> SchedulerTV:
118
+ cache_queue = None
119
+ if crawler.settings.getbool('USE_SCHEDULER_QUEUE_CACHE', False):
120
+ cache_queue = await load_instance('aioscrapy.queue.memory.SpiderPriorityQueue', spider=crawler.spider)
115
121
  instance = cls(
116
122
  await load_instance(crawler.settings['SCHEDULER_QUEUE_CLASS'], spider=crawler.spider),
117
123
  crawler.spider,
118
124
  stats=crawler.stats,
119
- persist=crawler.settings.getbool('SCHEDULER_PERSIST', True)
125
+ persist=crawler.settings.getbool('SCHEDULER_PERSIST', True),
126
+ cache_queue=cache_queue
120
127
  )
121
128
 
122
129
  if crawler.settings.getbool('SCHEDULER_FLUSH_ON_START', False):
@@ -128,8 +135,20 @@ class Scheduler(BaseScheduler):
128
135
  return instance
129
136
 
130
137
  async def close(self, reason: str) -> None:
138
+
131
139
  if not self.persist:
132
140
  await self.flush()
141
+ return
142
+
143
+ # 如果持久化,将缓存中的任务放回到redis等分布式队列中
144
+ if self.cache_queue is not None:
145
+ while True:
146
+ temp = []
147
+ async for request in self.cache_queue.pop(2000):
148
+ temp.append(request)
149
+ temp and await self.queue.push_batch(temp)
150
+ if len(temp) < 2000:
151
+ break
133
152
 
134
153
  async def flush(self) -> None:
135
154
  await call_helper(self.queue.clear)
@@ -141,16 +160,37 @@ class Scheduler(BaseScheduler):
141
160
  return True
142
161
 
143
162
  async def enqueue_request(self, request: aioscrapy.Request) -> bool:
144
- await call_helper(self.queue.push, request)
163
+ """
164
+ 如果启用了缓存队列(USE_SCHEDULER_QUEUE_CACHE),则优先将任务放到缓存队列中
165
+ """
166
+ if self.cache_queue is not None:
167
+ await call_helper(self.cache_queue.push, request)
168
+ else:
169
+ await call_helper(self.queue.push, request)
145
170
  if self.stats:
146
171
  self.stats.inc_value(self.queue.inc_key, spider=self.spider)
147
172
  return True
148
173
 
149
174
  async def next_request(self, count: int = 1) -> Optional[aioscrapy.Request]:
175
+ """
176
+ 如果启用了缓存队列(USE_SCHEDULER_QUEUE_CACHE),则优先从缓存队列中获取任务,然后从redis等分布式队列中获取任务
177
+ """
178
+ flag = False
179
+ if self.cache_queue is not None:
180
+ async for request in self.cache_queue.pop(count):
181
+ if request and self.stats:
182
+ self.stats.inc_value(self.queue.inc_key, spider=self.spider)
183
+ yield request
184
+ flag = True
185
+
186
+ if flag:
187
+ return
188
+
150
189
  async for request in self.queue.pop(count):
151
190
  if request and self.stats:
152
191
  self.stats.inc_value(self.queue.inc_key, spider=self.spider)
153
192
  yield request
154
193
 
155
194
  async def has_pending_requests(self) -> bool:
156
- return await call_helper(self.queue.len) > 0
195
+ return await call_helper(self.queue.len) if self.cache_queue is None \
196
+ else (await call_helper(self.queue.len) + await call_helper(self.cache_queue.len)) > 0
@@ -110,8 +110,8 @@ class Scraper:
110
110
  await self.handle_spider_error(e, request, result)
111
111
  else:
112
112
  await self.handle_spider_output(output, request, result)
113
- except BaseException:
114
- logger.exception('Scraper bug processing %(request)s' % {'request': request})
113
+ except BaseException as e:
114
+ await self.handle_spider_error(e, request, result)
115
115
  finally:
116
116
  if isinstance(result, PlaywrightResponse):
117
117
  await result.release()
@@ -161,17 +161,23 @@ class Scraper:
161
161
  """Iter each Request/Item (given in the output parameter) returned from the given spider"""
162
162
  if not result:
163
163
  return
164
-
164
+ parser_successful = True
165
165
  while True:
166
166
  try:
167
167
  output = await result.__anext__()
168
168
  except StopAsyncIteration:
169
169
  break
170
170
  except Exception as e:
171
+ parser_successful = False
171
172
  await self.handle_spider_error(e, request, response)
172
173
  else:
173
174
  await self._process_spidermw_output(output, request, response)
174
175
 
176
+ self.spider.dupefilter and \
177
+ not request.dont_filter and \
178
+ parser_successful and \
179
+ await self.spider.dupefilter.done(request, done_type="parse_done")
180
+
175
181
  async def _process_spidermw_output(self, output: Any, request: Request, response: Response) -> None:
176
182
  """Process each Request/Item (given in the output parameter) returned from the given spider"""
177
183
 
@@ -234,9 +234,12 @@ class CrawlerProcess(CrawlerRunner):
234
234
  finally:
235
235
  await self.recycle_db_connect()
236
236
 
237
- def start(self) -> None:
237
+ def start(self, use_windows_selector_eventLoop: bool = False) -> None:
238
238
  if sys.platform.startswith('win'):
239
- asyncio.set_event_loop(asyncio.windows_events.ProactorEventLoop())
239
+ if use_windows_selector_eventLoop:
240
+ asyncio.set_event_loop_policy(asyncio.windows_events.WindowsSelectorEventLoopPolicy())
241
+ else:
242
+ asyncio.set_event_loop(asyncio.windows_events.ProactorEventLoop())
240
243
  else:
241
244
  try:
242
245
  import uvloop
@@ -1,7 +1,9 @@
1
1
  from motor.motor_asyncio import AsyncIOMotorClient
2
+ from pymongo.errors import NetworkTimeout
2
3
 
3
4
  import aioscrapy
4
5
  from aioscrapy.db.absmanager import AbsDBPoolManager
6
+ from loguru import logger
5
7
 
6
8
 
7
9
  class MongoExecutor:
@@ -9,10 +11,16 @@ class MongoExecutor:
9
11
  self.alias = alias
10
12
  self.pool_manager = pool_manager
11
13
 
12
- async def insert(self, table_name, values, db_name=None):
14
+ async def insert(self, table_name, values, db_name=None, ordered=False, retry_times=3):
13
15
  client, db_name_default = self.pool_manager.get_pool(self.alias)
14
16
  db_name = db_name or db_name_default
15
- return await client[f'{db_name}'][f'{table_name}'].insert_many(values)
17
+ for _ in range(retry_times):
18
+ try:
19
+ return await client[f'{db_name}'][f'{table_name}'].insert_many(values, ordered=ordered)
20
+ except NetworkTimeout:
21
+ logger.warning("mongo insert error by NetworkTimeout, retrying...")
22
+
23
+ raise NetworkTimeout
16
24
 
17
25
  def __getattr__(self, table_name: str):
18
26
  client, db_name_default = self.pool_manager.get_pool(self.alias)
@@ -1,3 +1,4 @@
1
+ from typing import Literal
1
2
  from abc import ABCMeta, abstractmethod
2
3
 
3
4
  from aioscrapy import Request, Spider
@@ -37,3 +38,6 @@ class DupeFilterBase(metaclass=ABCMeta):
37
38
  self.logdupes = False
38
39
 
39
40
  spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)
41
+
42
+ async def done(self, request: Request, done_type: Literal["request_done", "parse_done"]) -> None:
43
+ """ deal fingerprint on task successful """