aio-scrapy 2.0.9__py3-none-any.whl → 2.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: aio-scrapy
3
- Version: 2.0.9
3
+ Version: 2.0.10
4
4
  Summary: A high-level Web Crawling and Web Scraping framework based on Asyncio
5
5
  Home-page: https://github.com/conlin-huang/aio-scrapy.git
6
6
  Author: conlin
@@ -1,4 +1,4 @@
1
- aioscrapy/VERSION,sha256=Qd60-DGk0CyAsfZTOK4DTPjIJ6aXFjyqch4-b7ff6f0,5
1
+ aioscrapy/VERSION,sha256=bkksF7-FeZMTR8EfltCUKJZNQaHaQkySSXYbwvc2qdw,6
2
2
  aioscrapy/__init__.py,sha256=esJeH66Mz9WV7XbotvZEjNn49jc589YZ_L2DKoD0JvA,858
3
3
  aioscrapy/__main__.py,sha256=rvTdJ0cQwbi29aucPj3jJRpccx5SBzvRcV7qvxvX2NQ,80
4
4
  aioscrapy/cmdline.py,sha256=1qhNg2Edl-Obmf2re2K4V8pJG7ubGfZZCzcHdKtdE_s,5159
@@ -21,10 +21,10 @@ aioscrapy/commands/settings.py,sha256=sc0rwwfBQNySKX8uV3iJqv3i7SelFwNcrlHYxDupKO
21
21
  aioscrapy/commands/startproject.py,sha256=Rcc7JkN75Jp2t2aZIxBzPsWbLXChNAUSByDhcW_6Ig8,4001
22
22
  aioscrapy/commands/version.py,sha256=yqqTMlZkkiQhtbU9w_IqUWLMOAjqYlv24friEkPRQYM,485
23
23
  aioscrapy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- aioscrapy/core/engine.py,sha256=YJC597TZwhX81qjPHBH2mkRi4oVGNImzSz58_sFiXPA,10925
24
+ aioscrapy/core/engine.py,sha256=zW3GPigPqyrWJ_Jk7SUxD0ueV1HTuxUvweFUU4WFG-0,10926
25
25
  aioscrapy/core/scheduler.py,sha256=YCRw9j79ZOL8bijDa3IdRaw0YlMTrwXuJGzaApkN7lc,5737
26
26
  aioscrapy/core/scraper.py,sha256=M_bcizLUzWuECe7sIIZ_HJLNrPzL7dX2o-tN5nvFnCs,10304
27
- aioscrapy/core/downloader/__init__.py,sha256=XRpiDJg9Yc47SoFy-48HxFyuV3sZkjE30sK3DA80YJQ,9465
27
+ aioscrapy/core/downloader/__init__.py,sha256=22TC0z49BX3YvDUPl6DKMrOonECpY5tjaWJGGEV7RbU,9574
28
28
  aioscrapy/core/downloader/handlers/__init__.py,sha256=CriaX2Cp4jUqzDDGZDB7HiIEgUWt2pnYVho6HMV6sJ0,3198
29
29
  aioscrapy/core/downloader/handlers/aiohttp.py,sha256=dFVVeGgJ1WZcE1zI4fQOZIzmrkC6l1WZcYstHmB3qYg,3942
30
30
  aioscrapy/core/downloader/handlers/httpx.py,sha256=-DfjYgfrjxMhaMpTgEOFlQRONasCXV0g6UgH3WmWcfs,3041
@@ -42,7 +42,7 @@ aioscrapy/db/aiorabbitmq.py,sha256=tNKl4Kx7KM7H_lOj8xfeA0uD8PuBTVzySApTEn5TyAE,5
42
42
  aioscrapy/db/aioredis.py,sha256=UOoTRTQUvghnq29bVL8v1HvksMXYOzHaS8Btgbpn0bY,2966
43
43
  aioscrapy/dupefilters/__init__.py,sha256=17s6Hyr_lWDFPto6wLEvRfT2TbGU2RIssTDuChzrDNA,1498
44
44
  aioscrapy/dupefilters/disk.py,sha256=EMgxeC2a6aYCGKgp4QOs5xwHp33LUsOZ8pliKBTFx1c,1551
45
- aioscrapy/dupefilters/redis.py,sha256=1bDqB1avfDRR9b9doDXAyxwL1Fa8LEpMYlV7YoBXBvw,4723
45
+ aioscrapy/dupefilters/redis.py,sha256=cUuM68dEM1_ki2eOzZ6pAvmLZlAP_tC4lx73Ufmg_Bs,4812
46
46
  aioscrapy/http/__init__.py,sha256=yeQTT5W1iwr6dKznTS5d9vnx2hsB47i9roPM57wQp_0,597
47
47
  aioscrapy/http/headers.py,sha256=H-RJ6KqOsFFFAXORfvoyz3V-ud0I8TAj5Jt5fAACcLc,1573
48
48
  aioscrapy/http/request/__init__.py,sha256=PFoFU3ncTN-gj6Rx01rjVa_744Qfv3EH29mooW6JX9U,7121
@@ -124,9 +124,9 @@ aioscrapy/utils/template.py,sha256=HR97X4lpv2WuqhuPfzTgaBN66fYnzHVpP6zQ5IoTwcI,8
124
124
  aioscrapy/utils/tools.py,sha256=WJowViZB8XEs2CFqjVvbqXK3H5Uvf4BgWgBD_RcHMaM,2319
125
125
  aioscrapy/utils/trackref.py,sha256=0nIpelT1d5WYxALl8SGA8vHNYsh-jS0Z2lwVEAhwx8E,2019
126
126
  aioscrapy/utils/url.py,sha256=8W8tAhU7lgfPOfzKp3ejJGEcLj1i_PnA_53Jv5LpxiY,5464
127
- aio_scrapy-2.0.9.dist-info/LICENSE,sha256=L-UoAEM3fQSjKA7FVWxQM7gwSCbeue6gZRAnpRS_UCo,1088
128
- aio_scrapy-2.0.9.dist-info/METADATA,sha256=AzYfL1fSr0PvPyJDV7QgQ2rm3DzWaIN5WwRhtNsT8Ik,6384
129
- aio_scrapy-2.0.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
130
- aio_scrapy-2.0.9.dist-info/entry_points.txt,sha256=WWhoVHZvqhW8a5uFg97K0EP_GjG3uuCIFLkyqDICgaw,56
131
- aio_scrapy-2.0.9.dist-info/top_level.txt,sha256=8l08KyMt22wfX_5BmhrGH0PgwZdzZIPq-hBUa1GNir4,10
132
- aio_scrapy-2.0.9.dist-info/RECORD,,
127
+ aio_scrapy-2.0.10.dist-info/LICENSE,sha256=L-UoAEM3fQSjKA7FVWxQM7gwSCbeue6gZRAnpRS_UCo,1088
128
+ aio_scrapy-2.0.10.dist-info/METADATA,sha256=qMfSjJmZpj8xAaoGdjEC-oNULa4wYcWFwgJJm8wBQ3U,6385
129
+ aio_scrapy-2.0.10.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
130
+ aio_scrapy-2.0.10.dist-info/entry_points.txt,sha256=WWhoVHZvqhW8a5uFg97K0EP_GjG3uuCIFLkyqDICgaw,56
131
+ aio_scrapy-2.0.10.dist-info/top_level.txt,sha256=8l08KyMt22wfX_5BmhrGH0PgwZdzZIPq-hBUa1GNir4,10
132
+ aio_scrapy-2.0.10.dist-info/RECORD,,
aioscrapy/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.9
1
+ 2.0.10
@@ -126,6 +126,7 @@ class Downloader(BaseDownloader):
126
126
  self.dupefilter = dupefilter
127
127
 
128
128
  self.total_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS')
129
+ self.get_requests_count: int = self.settings.getint('GET_REQUESTS_COUNT') or self.total_concurrency
129
130
  self.domain_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
130
131
  self.ip_concurrency: int = self.settings.getint('CONCURRENT_REQUESTS_PER_IP')
131
132
  self.randomize_delay: bool = self.settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
aioscrapy/core/engine.py CHANGED
@@ -138,7 +138,7 @@ class ExecutionEngine(object):
138
138
  while self.unlock and not self._needs_backout() and self.unlock:
139
139
  self.unlock = False
140
140
  try:
141
- async for request in self.scheduler.next_request(self.downloader.total_concurrency):
141
+ async for request in self.scheduler.next_request(self.downloader.get_requests_count):
142
142
  if request:
143
143
  self.slot.add_request(request)
144
144
  await self.downloader.fetch(request)
@@ -28,7 +28,7 @@ class RedisRFPDupeFilter(DupeFilterBase):
28
28
  keep_on_close = crawler.settings.getbool("KEEP_DUPEFILTER_DATA_ON_CLOSE", True)
29
29
  key = dupefilter_key % {'spider': crawler.spider.name}
30
30
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
31
- info = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
31
+ info = crawler.settings.getbool('DUPEFILTER_INFO', False)
32
32
  instance = cls(server, key=key, debug=debug, keep_on_close=keep_on_close, info=info)
33
33
  return instance
34
34
 
@@ -102,8 +102,8 @@ class BloomFilter(object):
102
102
  class RedisBloomDupeFilter(RedisRFPDupeFilter):
103
103
  """Bloom filter built with the bitis bitmap of redis"""
104
104
 
105
- def __init__(self, server, key, debug, bit, hash_number, keep_on_close):
106
- super().__init__(server, key, debug, keep_on_close)
105
+ def __init__(self, server, key, debug, bit, hash_number, keep_on_close, info):
106
+ super().__init__(server, key, debug, keep_on_close, info)
107
107
  self.bit = bit
108
108
  self.hash_number = hash_number
109
109
  self.bf = BloomFilter(server, self.key, bit, hash_number)
@@ -115,9 +115,10 @@ class RedisBloomDupeFilter(RedisRFPDupeFilter):
115
115
  keep_on_close = crawler.settings.getbool("KEEP_DUPEFILTER_DATA_ON_CLOSE", True)
116
116
  key = dupefilter_key % {'spider': crawler.spider.name}
117
117
  debug = crawler.settings.getbool('DUPEFILTER_DEBUG', False)
118
+ info = crawler.settings.getbool('DUPEFILTER_INFO', False)
118
119
  bit = crawler.settings.getint('BLOOMFILTER_BIT', 30)
119
120
  hash_number = crawler.settings.getint('BLOOMFILTER_HASH_NUMBER', 6)
120
- return cls(server, key=key, debug=debug, bit=bit, hash_number=hash_number, keep_on_close=keep_on_close)
121
+ return cls(server, key=key, debug=debug, bit=bit, hash_number=hash_number, keep_on_close=keep_on_close, info=info)
121
122
 
122
123
  async def request_seen(self, request: Request) -> bool:
123
124
  fp = await self.bf.exists(request.fingerprint)