aio-scrapy 2.0.9__tar.gz → 2.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {aio-scrapy-2.0.9/aio_scrapy.egg-info → aio-scrapy-2.0.10}/PKG-INFO +1 -1
  2. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10/aio_scrapy.egg-info}/PKG-INFO +1 -1
  3. aio-scrapy-2.0.10/aioscrapy/VERSION +1 -0
  4. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/__init__.py +1 -0
  5. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/engine.py +1 -1
  6. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/dupefilters/redis.py +5 -4
  7. aio-scrapy-2.0.9/aioscrapy/VERSION +0 -1
  8. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/LICENSE +0 -0
  9. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/MANIFEST.in +0 -0
  10. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/README.md +0 -0
  11. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/SOURCES.txt +0 -0
  12. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/dependency_links.txt +0 -0
  13. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/entry_points.txt +0 -0
  14. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/not-zip-safe +0 -0
  15. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/requires.txt +0 -0
  16. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aio_scrapy.egg-info/top_level.txt +0 -0
  17. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/__init__.py +0 -0
  18. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/__main__.py +0 -0
  19. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/cmdline.py +0 -0
  20. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/__init__.py +0 -0
  21. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/crawl.py +0 -0
  22. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/genspider.py +0 -0
  23. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/list.py +0 -0
  24. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/runspider.py +0 -0
  25. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/settings.py +0 -0
  26. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/startproject.py +0 -0
  27. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/commands/version.py +0 -0
  28. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/__init__.py +0 -0
  29. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/__init__.py +0 -0
  30. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/aiohttp.py +0 -0
  31. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/httpx.py +0 -0
  32. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -0
  33. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -0
  34. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -0
  35. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/pyhttpx.py +0 -0
  36. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/downloader/handlers/requests.py +0 -0
  37. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/scheduler.py +0 -0
  38. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/core/scraper.py +0 -0
  39. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/crawler.py +0 -0
  40. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/__init__.py +0 -0
  41. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/absmanager.py +0 -0
  42. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/aiomongo.py +0 -0
  43. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/aiomysql.py +0 -0
  44. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/aiopg.py +0 -0
  45. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/aiorabbitmq.py +0 -0
  46. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/db/aioredis.py +0 -0
  47. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/dupefilters/__init__.py +0 -0
  48. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/dupefilters/disk.py +0 -0
  49. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/exceptions.py +0 -0
  50. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/__init__.py +0 -0
  51. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/headers.py +0 -0
  52. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/request/__init__.py +0 -0
  53. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/request/form.py +0 -0
  54. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/request/json_request.py +0 -0
  55. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/response/__init__.py +0 -0
  56. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/response/html.py +0 -0
  57. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/response/playwright.py +0 -0
  58. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/response/text.py +0 -0
  59. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/http/response/xml.py +0 -0
  60. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/__init__.py +0 -0
  61. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/__init__.py +0 -0
  62. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/defaultheaders.py +0 -0
  63. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/downloadtimeout.py +0 -0
  64. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/ja3fingerprint.py +0 -0
  65. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/retry.py +0 -0
  66. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/stats.py +0 -0
  67. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/downloader/useragent.py +0 -0
  68. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/__init__.py +0 -0
  69. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/closespider.py +0 -0
  70. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/corestats.py +0 -0
  71. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/logstats.py +0 -0
  72. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/metric.py +0 -0
  73. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/extensions/throttle.py +0 -0
  74. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/__init__.py +0 -0
  75. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/csv.py +0 -0
  76. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/execl.py +0 -0
  77. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/mongo.py +0 -0
  78. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/mysql.py +0 -0
  79. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/pipelines/pg.py +0 -0
  80. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/__init__.py +0 -0
  81. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/depth.py +0 -0
  82. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/httperror.py +0 -0
  83. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/offsite.py +0 -0
  84. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/referer.py +0 -0
  85. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/libs/spider/urllength.py +0 -0
  86. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/link.py +0 -0
  87. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/logformatter.py +0 -0
  88. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/__init__.py +0 -0
  89. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/absmanager.py +0 -0
  90. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/downloader.py +0 -0
  91. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/extension.py +0 -0
  92. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/itempipeline.py +0 -0
  93. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/middleware/spider.py +0 -0
  94. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/process.py +0 -0
  95. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/proxy/__init__.py +0 -0
  96. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/proxy/redis.py +0 -0
  97. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/queue/__init__.py +0 -0
  98. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/queue/memory.py +0 -0
  99. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/queue/rabbitmq.py +0 -0
  100. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/queue/redis.py +0 -0
  101. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/scrapyd/__init__.py +0 -0
  102. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/scrapyd/runner.py +0 -0
  103. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/serializer.py +0 -0
  104. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/settings/__init__.py +0 -0
  105. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/settings/default_settings.py +0 -0
  106. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/signalmanager.py +0 -0
  107. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/signals.py +0 -0
  108. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/spiderloader.py +0 -0
  109. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/spiders/__init__.py +0 -0
  110. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/statscollectors.py +0 -0
  111. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/aioscrapy.cfg +0 -0
  112. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/module/__init__.py +0 -0
  113. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/module/middlewares.py.tmpl +0 -0
  114. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/module/pipelines.py.tmpl +0 -0
  115. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/module/settings.py.tmpl +0 -0
  116. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/project/module/spiders/__init__.py +0 -0
  117. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/spiders/basic.tmpl +0 -0
  118. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/templates/spiders/single.tmpl +0 -0
  119. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/__init__.py +0 -0
  120. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/conf.py +0 -0
  121. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/curl.py +0 -0
  122. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/decorators.py +0 -0
  123. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/deprecate.py +0 -0
  124. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/httpobj.py +0 -0
  125. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/log.py +0 -0
  126. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/misc.py +0 -0
  127. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/ossignal.py +0 -0
  128. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/project.py +0 -0
  129. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/python.py +0 -0
  130. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/reqser.py +0 -0
  131. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/request.py +0 -0
  132. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/response.py +0 -0
  133. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/signal.py +0 -0
  134. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/spider.py +0 -0
  135. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/template.py +0 -0
  136. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/tools.py +0 -0
  137. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/trackref.py +0 -0
  138. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/aioscrapy/utils/url.py +0 -0
  139. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/setup.cfg +0 -0
  140. {aio-scrapy-2.0.9 → aio-scrapy-2.0.10}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.9
3
+ Version: 2.0.10
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.9
3
+ Version: 2.0.10
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -0,0 +1 @@
1
+ 2.0.10
@@ -126,6 +126,7 @@ class Downloader(BaseDownloader):
126
126
  self.dupefilter = dupefilter
127
127
 
128
128
  self.total_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS')
129
+ self.get_requests_count: int = self.settings.getint('GET_REQUESTS_COUNT') or self.total_concurrency
129
130
  self.domain_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
130
131
  self.ip_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS_PER_IP')
131
132
  self.randomize_delay: bool = self.settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
@@ -138,7 +138,7 @@ class ExecutionEngine(object):
138
138
  while self.unlock and not self._needs_backout() and self.unlock:
139
139
  self.unlock = False
140
140
  try:
141
- async for request in self.scheduler.next_request(self.downloader.total_concurrency):
141
+ async for request in self.scheduler.next_request(self.downloader.get_requests_count):
142
142
  if request:
143
143
  self.slot.add_request(request)
144
144
  await self.downloader.fetch(request)
@@ -28,7 +28,7 @@ class RedisRFPDupeFilter(DupeFilterBase):
28
28
  keep_on_close = crawler.settings.getbool("KEEP_DUPEFILTER_DATA_ON_CLOSE", True)
29
29
  key = dupefilter_key % {'spider': crawler.spider.name}
30
30
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
31
- info = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
31
+ info = crawler.settings.getbool('DUPEFILTER_INFO', False)
32
32
  instance = cls(server, key=key, debug=debug, keep_on_close=keep_on_close, info=info)
33
33
  return instance
34
34
 
@@ -102,8 +102,8 @@ class BloomFilter(object):
102
102
  class RedisBloomDupeFilter(RedisRFPDupeFilter):
103
103
  """Bloom filter built with the bitis bitmap of redis"""
104
104
 
105
- def __init__(self, server, key, debug, bit, hash_number, keep_on_close):
106
- super().__init__(server, key, debug, keep_on_close)
105
+ def __init__(self, server, key, debug, bit, hash_number, keep_on_close, info):
106
+ super().__init__(server, key, debug, keep_on_close, info)
107
107
  self.bit = bit
108
108
  self.hash_number = hash_number
109
109
  self.bf = BloomFilter(server, self.key, bit, hash_number)
@@ -115,9 +115,10 @@ class RedisBloomDupeFilter(RedisRFPDupeFilter):
115
115
  keep_on_close = crawler.settings.getbool("KEEP_DUPEFILTER_DATA_ON_CLOSE", True)
116
116
  key = dupefilter_key % {'spider': crawler.spider.name}
117
117
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
118
+ info = crawler.settings.getbool('DUPEFILTER_INFO', False)
118
119
  bit = crawler.settings.getint('BLOOMFILTER_BIT', 30)
119
120
  hash_number = crawler.settings.getint('BLOOMFILTER_HASH_NUMBER', 6)
120
- return cls(server, key=key, debug=debug, bit=bit, hash_number=hash_number, keep_on_close=keep_on_close)
121
+ return cls(server, key=key, debug=debug, bit=bit, hash_number=hash_number, keep_on_close=keep_on_close, info=info)
121
122
 
122
123
  async def request_seen(self, request: Request) -> bool:
123
124
  fp = await self.bf.exists(request.fingerprint)
@@ -1 +0,0 @@
1
- 2.0.9
File without changes
File without changes
File without changes
File without changes
File without changes