crawlo 1.3.9__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/core/processor.py +11 -3
- crawlo/crawler.py +12 -0
- crawlo/extension/__init__.py +25 -0
- crawlo/extension/log_stats.py +26 -37
- crawlo/middleware/response_code.py +1 -14
- crawlo/pipelines/pipeline_manager.py +15 -7
- crawlo/pipelines/redis_dedup_pipeline.py +5 -2
- {crawlo-1.3.9.dist-info → crawlo-1.4.0.dist-info}/METADATA +1 -1
- {crawlo-1.3.9.dist-info → crawlo-1.4.0.dist-info}/RECORD +13 -13
- {crawlo-1.3.9.dist-info → crawlo-1.4.0.dist-info}/WHEEL +0 -0
- {crawlo-1.3.9.dist-info → crawlo-1.4.0.dist-info}/entry_points.txt +0 -0
- {crawlo-1.3.9.dist-info → crawlo-1.4.0.dist-info}/top_level.txt +0 -0
crawlo/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '1.
|
|
1
|
+
__version__ = '1.4.0'
|
crawlo/core/processor.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding:UTF-8 -*-
|
|
3
|
-
from asyncio import Queue
|
|
3
|
+
from asyncio import Queue, create_task
|
|
4
4
|
from typing import Union, Optional
|
|
5
5
|
|
|
6
6
|
from crawlo import Request, Item
|
|
7
7
|
from crawlo.pipelines.pipeline_manager import PipelineManager
|
|
8
|
+
from crawlo.exceptions import ItemDiscard
|
|
9
|
+
from crawlo.event import item_discard
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class Processor(object):
|
|
@@ -27,7 +29,13 @@ class Processor(object):
|
|
|
27
29
|
await self._process_item(result)
|
|
28
30
|
|
|
29
31
|
async def _process_item(self, item):
|
|
30
|
-
|
|
32
|
+
try:
|
|
33
|
+
await self.pipelines.process_item(item=item)
|
|
34
|
+
except ItemDiscard as exc:
|
|
35
|
+
# Item was discarded by a pipeline (e.g., deduplication pipeline)
|
|
36
|
+
# We simply ignore this item and don't pass it to subsequent pipelines
|
|
37
|
+
# The statistics system has already been notified in PipelineManager, so we don't need to notify again
|
|
38
|
+
pass
|
|
31
39
|
|
|
32
40
|
async def enqueue(self, output: Union[Request, Item]):
|
|
33
41
|
await self.queue.put(output)
|
|
@@ -37,4 +45,4 @@ class Processor(object):
|
|
|
37
45
|
return len(self) == 0
|
|
38
46
|
|
|
39
47
|
def __len__(self):
|
|
40
|
-
return self.queue.qsize()
|
|
48
|
+
return self.queue.qsize()
|
crawlo/crawler.py
CHANGED
|
@@ -308,6 +308,18 @@ class ModernCrawler:
|
|
|
308
308
|
except Exception as e:
|
|
309
309
|
self._logger.warning(f"Spider cleanup failed: {e}")
|
|
310
310
|
|
|
311
|
+
# 调用StatsCollector的close_spider方法,设置reason和spider_name
|
|
312
|
+
if self._stats and hasattr(self._stats, 'close_spider'):
|
|
313
|
+
try:
|
|
314
|
+
# 使用默认的'finished'作为reason
|
|
315
|
+
self._stats.close_spider(self._spider, reason='finished')
|
|
316
|
+
except Exception as e:
|
|
317
|
+
self._logger.warning(f"Stats close_spider failed: {e}")
|
|
318
|
+
|
|
319
|
+
# 触发spider_closed事件,通知所有订阅者(包括扩展)
|
|
320
|
+
# 传递reason参数,这里使用默认的'finished'作为reason
|
|
321
|
+
await self.subscriber.notify("spider_closed", reason='finished')
|
|
322
|
+
|
|
311
323
|
if self._stats and hasattr(self._stats, 'close'):
|
|
312
324
|
try:
|
|
313
325
|
close_result = self._stats.close()
|
crawlo/extension/__init__.py
CHANGED
|
@@ -16,6 +16,7 @@ class ExtensionManager(object):
|
|
|
16
16
|
extensions = self.crawler.settings.get_list('EXTENSIONS')
|
|
17
17
|
self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
|
|
18
18
|
self._add_extensions(extensions)
|
|
19
|
+
self._subscribe_extensions()
|
|
19
20
|
|
|
20
21
|
@classmethod
|
|
21
22
|
def create_instance(cls, *args: Any, **kwargs: Any) -> 'ExtensionManager':
|
|
@@ -37,3 +38,27 @@ class ExtensionManager(object):
|
|
|
37
38
|
if extensions:
|
|
38
39
|
# 恢复INFO级别日志,保留关键的启用信息
|
|
39
40
|
self.logger.info(f"Enabled extensions: \n{pformat(extensions)}")
|
|
41
|
+
|
|
42
|
+
def _subscribe_extensions(self) -> None:
|
|
43
|
+
"""订阅扩展方法到相应的事件"""
|
|
44
|
+
for extension in self.extensions:
|
|
45
|
+
# 订阅 spider_closed 方法
|
|
46
|
+
if hasattr(extension, 'spider_closed'):
|
|
47
|
+
self.crawler.subscriber.subscribe(extension.spider_closed, event="spider_closed")
|
|
48
|
+
|
|
49
|
+
# 订阅 item_successful 方法
|
|
50
|
+
if hasattr(extension, 'item_successful'):
|
|
51
|
+
self.crawler.subscriber.subscribe(extension.item_successful, event="item_successful")
|
|
52
|
+
|
|
53
|
+
# 订阅 item_discard 方法
|
|
54
|
+
if hasattr(extension, 'item_discard'):
|
|
55
|
+
self.crawler.subscriber.subscribe(extension.item_discard, event="item_discard")
|
|
56
|
+
|
|
57
|
+
# 订阅 response_received 方法
|
|
58
|
+
if hasattr(extension, 'response_received'):
|
|
59
|
+
# 修复:将事件名称从 "request_received" 更正为 "response_received"
|
|
60
|
+
self.crawler.subscriber.subscribe(extension.response_received, event="response_received")
|
|
61
|
+
|
|
62
|
+
# 订阅 request_scheduled 方法
|
|
63
|
+
if hasattr(extension, 'request_scheduled'):
|
|
64
|
+
self.crawler.subscriber.subscribe(extension.request_scheduled, event="request_scheduled")
|
crawlo/extension/log_stats.py
CHANGED
|
@@ -1,52 +1,43 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding:UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
日志统计扩展
|
|
5
|
+
提供详细的爬虫运行统计信息
|
|
6
|
+
"""
|
|
7
|
+
import asyncio
|
|
3
8
|
from typing import Any
|
|
4
9
|
|
|
5
|
-
from crawlo import
|
|
6
|
-
from crawlo.
|
|
10
|
+
from crawlo.utils.log import get_logger
|
|
11
|
+
from crawlo.utils import now, time_diff
|
|
7
12
|
|
|
8
13
|
|
|
9
|
-
class LogStats
|
|
14
|
+
class LogStats:
|
|
15
|
+
"""
|
|
16
|
+
日志统计扩展,记录和输出爬虫运行过程中的各种统计信息
|
|
17
|
+
"""
|
|
10
18
|
|
|
11
|
-
def __init__(self,
|
|
12
|
-
self.
|
|
19
|
+
def __init__(self, crawler):
|
|
20
|
+
self.crawler = crawler
|
|
21
|
+
self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
|
|
22
|
+
self._stats = crawler.stats
|
|
23
|
+
self._stats['start_time'] = now(fmt='%Y-%m-%d %H:%M:%S')
|
|
13
24
|
|
|
14
25
|
@classmethod
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
# 订阅所有需要的事件
|
|
18
|
-
event_subscriptions = [
|
|
19
|
-
(o.spider_opened, event.spider_opened),
|
|
20
|
-
(o.spider_closed, event.spider_closed),
|
|
21
|
-
(o.item_successful, event.item_successful),
|
|
22
|
-
(o.item_discard, event.item_discard),
|
|
23
|
-
(o.response_received, event.response_received),
|
|
24
|
-
(o.request_scheduled, event.request_scheduled),
|
|
25
|
-
]
|
|
26
|
-
|
|
27
|
-
for handler, evt in event_subscriptions:
|
|
28
|
-
try:
|
|
29
|
-
crawler.subscriber.subscribe(handler, event=evt)
|
|
30
|
-
except Exception as e:
|
|
31
|
-
# 获取日志记录器并记录错误
|
|
32
|
-
from crawlo.utils.log import get_logger
|
|
33
|
-
logger = get_logger(cls.__name__)
|
|
34
|
-
logger.error(f"Failed to subscribe to event {evt}: {e}")
|
|
26
|
+
def from_crawler(cls, crawler):
|
|
27
|
+
return cls(crawler)
|
|
35
28
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
try:
|
|
40
|
-
self._stats['start_time'] = now(fmt='%Y-%m-%d %H:%M:%S')
|
|
41
|
-
except Exception as e:
|
|
42
|
-
# 静默处理,避免影响爬虫运行
|
|
43
|
-
pass
|
|
29
|
+
@classmethod
|
|
30
|
+
def create_instance(cls, crawler):
|
|
31
|
+
return cls.from_crawler(crawler)
|
|
44
32
|
|
|
45
|
-
async def spider_closed(self) -> None:
|
|
33
|
+
async def spider_closed(self, reason: str = 'finished') -> None:
|
|
46
34
|
try:
|
|
47
35
|
self._stats['end_time'] = now(fmt='%Y-%m-%d %H:%M:%S')
|
|
48
36
|
self._stats['cost_time(s)'] = time_diff(start=self._stats['start_time'], end=self._stats['end_time'])
|
|
37
|
+
self._stats['reason'] = reason
|
|
49
38
|
except Exception as e:
|
|
39
|
+
# 添加日志以便调试
|
|
40
|
+
self.logger.error(f"Error in spider_closed: {e}")
|
|
50
41
|
# 静默处理,避免影响爬虫运行
|
|
51
42
|
pass
|
|
52
43
|
|
|
@@ -59,10 +50,8 @@ class LogStats(object):
|
|
|
59
50
|
|
|
60
51
|
async def item_discard(self, _item: Any, exc: Any, _spider: Any) -> None:
|
|
61
52
|
try:
|
|
53
|
+
# 只增加总的丢弃计数,不记录每个丢弃项目的原因详情
|
|
62
54
|
self._stats.inc_value('item_discard_count')
|
|
63
|
-
reason = getattr(exc, 'msg', None) # 更安全地获取属性
|
|
64
|
-
if reason:
|
|
65
|
-
self._stats.inc_value(f"item_discard/{reason}")
|
|
66
55
|
except Exception as e:
|
|
67
56
|
# 静默处理,避免影响爬虫运行
|
|
68
57
|
pass
|
|
@@ -127,9 +127,7 @@ class ResponseCodeMiddleware(object):
|
|
|
127
127
|
"""
|
|
128
128
|
status_code = response.status_code
|
|
129
129
|
|
|
130
|
-
#
|
|
131
|
-
self.stats.inc_value(f'response_status_code/count/{status_code}')
|
|
132
|
-
|
|
130
|
+
# 只记录总的统计信息,不记录每个域名和每个状态码的详细信息
|
|
133
131
|
# 记录状态码分类统计
|
|
134
132
|
category = self._get_status_category(status_code)
|
|
135
133
|
self.stats.inc_value(f'response_status_code/category/{category}')
|
|
@@ -144,17 +142,6 @@ class ResponseCodeMiddleware(object):
|
|
|
144
142
|
if hasattr(response, 'content_length') and response.content_length:
|
|
145
143
|
self.stats.inc_value('response_total_bytes', response.content_length)
|
|
146
144
|
|
|
147
|
-
# 记录域名统计
|
|
148
|
-
try:
|
|
149
|
-
from urllib.parse import urlparse
|
|
150
|
-
parsed_url = urlparse(response.url)
|
|
151
|
-
domain = parsed_url.netloc
|
|
152
|
-
if domain:
|
|
153
|
-
self.stats.inc_value(f'response_status_code/domain/{domain}/count/{status_code}')
|
|
154
|
-
self.stats.inc_value(f'response_status_code/domain/{domain}/category/{category}')
|
|
155
|
-
except Exception:
|
|
156
|
-
self.stats.inc_value('response_status_code/domain/invalid_url/count/{status_code}')
|
|
157
|
-
|
|
158
145
|
# 详细日志记录
|
|
159
146
|
self.logger.debug(
|
|
160
147
|
f'收到响应: {status_code} {response.url} '
|
|
@@ -66,11 +66,19 @@ class PipelineManager:
|
|
|
66
66
|
|
|
67
67
|
async def process_item(self, item):
|
|
68
68
|
try:
|
|
69
|
-
for method in self.methods:
|
|
70
|
-
item
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
69
|
+
for i, method in enumerate(self.methods):
|
|
70
|
+
self.logger.debug(f"Processing item with pipeline method {i}: {method.__qualname__}")
|
|
71
|
+
try:
|
|
72
|
+
item = await common_call(method, item, self.crawler.spider)
|
|
73
|
+
if item is None:
|
|
74
|
+
raise InvalidOutputError(f"{method.__qualname__} return None is not supported.")
|
|
75
|
+
except (ItemDiscard, DropItem) as exc: # 同时捕获两种异常类型
|
|
76
|
+
self.logger.debug(f"Item discarded by pipeline: {exc}")
|
|
77
|
+
create_task(self.crawler.subscriber.notify(item_discard, item, exc, self.crawler.spider))
|
|
78
|
+
# 重新抛出异常,确保上层调用者也能捕获到,并停止执行后续管道
|
|
79
|
+
raise
|
|
80
|
+
except (ItemDiscard, DropItem):
|
|
81
|
+
# 异常已经被处理和通知,这里只需要重新抛出
|
|
82
|
+
raise
|
|
75
83
|
else:
|
|
76
|
-
create_task(self.crawler.subscriber.notify(item_successful, item, self.crawler.spider))
|
|
84
|
+
create_task(self.crawler.subscriber.notify(item_successful, item, self.crawler.spider))
|
|
@@ -104,17 +104,20 @@ class RedisDedupPipeline:
|
|
|
104
104
|
if not is_new:
|
|
105
105
|
# 如果指纹已存在,丢弃这个数据项
|
|
106
106
|
self.dropped_count += 1
|
|
107
|
-
|
|
107
|
+
self.logger.info(f"Dropping duplicate item: {fingerprint}")
|
|
108
108
|
raise ItemDiscard(f"Duplicate item: {fingerprint}")
|
|
109
109
|
else:
|
|
110
110
|
# 如果是新数据项,继续处理
|
|
111
|
-
|
|
111
|
+
self.logger.debug(f"Processing new item: {fingerprint}")
|
|
112
112
|
return item
|
|
113
113
|
|
|
114
114
|
except redis.RedisError as e:
|
|
115
115
|
self.logger.error(f"Redis error: {e}")
|
|
116
116
|
# 在 Redis 错误时继续处理,避免丢失数据
|
|
117
117
|
return item
|
|
118
|
+
except ItemDiscard:
|
|
119
|
+
# 重新抛出ItemDiscard异常,确保管道管理器能正确处理
|
|
120
|
+
raise
|
|
118
121
|
except Exception as e:
|
|
119
122
|
self.logger.error(f"Error processing item: {e}")
|
|
120
123
|
# 在其他错误时继续处理
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=rCeDq1OoX6mmcBxuK60eUpEp1cIg5T8Zgic3FUQAOkA,2318
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=EyMGX1ADFzN6XVXHWbJUtKPONYKeFkvWoKIFPDDB2I8,22
|
|
3
3
|
crawlo/cli.py,sha256=OXprmcTUbFK02ptw_Gq8Gk4-ZCU-WEMJgzU1ztgP6Bk,2327
|
|
4
4
|
crawlo/config.py,sha256=dNoNyTkXLe2msQ7bZx3YTQItk1m49nIg5-g89FQDNwE,9486
|
|
5
5
|
crawlo/config_validator.py,sha256=gsiLqf5swWd9ISDvoLqCdG7iSXr-ZdBPD4iT6ug1ua4,11239
|
|
6
|
-
crawlo/crawler.py,sha256=
|
|
6
|
+
crawlo/crawler.py,sha256=E83JhClOe58XVX1ma0f-HAF1BJ7Ej9Zs0w51ERs3fgA,26348
|
|
7
7
|
crawlo/event.py,sha256=7-y6HNv_EIJSYQNzsj0mVK-Gg4ON3wdQeMdQjfFJPlw,313
|
|
8
8
|
crawlo/exceptions.py,sha256=sMay0wnWLfc_FXWslqxm60qz6b66LXs3EdN_w8ygE9k,1166
|
|
9
9
|
crawlo/framework.py,sha256=1RVBwj_VBzfJiMB3lq6XcfFHCjRBHyT4D_T2X4fU_6g,9166
|
|
@@ -23,7 +23,7 @@ crawlo/commands/stats.py,sha256=8wTubR1RQ1JPTlpOKArcGcQ39bM-0cuH27lDpndnwPQ,6014
|
|
|
23
23
|
crawlo/commands/utils.py,sha256=Psfu2tKrmDloMq0WnfXLaxx0lJFitMZ-FWS3HAIrziQ,5382
|
|
24
24
|
crawlo/core/__init__.py,sha256=nikMDqFwnDfE8ugqwAIfycBtIqIVZpeprjEYW-H5Dkw,1272
|
|
25
25
|
crawlo/core/engine.py,sha256=0l7TVNf2R8EHJAZ4ktj71j-qysrq84cYqf_7LEzzYJM,19096
|
|
26
|
-
crawlo/core/processor.py,sha256=
|
|
26
|
+
crawlo/core/processor.py,sha256=wO6DMU-Azr0yaMLJw8LSTG19a6ZAvPuT3J7wNLfbpu4,1577
|
|
27
27
|
crawlo/core/scheduler.py,sha256=By1JB0iukcss5j0nrj1rq1Lk-VmmUHIiGl0RLCH9YUs,12630
|
|
28
28
|
crawlo/data/__init__.py,sha256=8MgDxcMhx-emFARcLAw_ODOZNz0neYBcx7kEbzothd8,116
|
|
29
29
|
crawlo/data/user_agents.py,sha256=6V34lYHREWV5ZR5wH-1pCnr1Y3ZYC7iMLfC6vZHyhZQ,9697
|
|
@@ -34,10 +34,10 @@ crawlo/downloader/httpx_downloader.py,sha256=7jfQfvAtfk8yD_mvwUbWLhYOxMM7r1nudiU
|
|
|
34
34
|
crawlo/downloader/hybrid_downloader.py,sha256=4SzOPEwBlSZVzUAWR3DyxMx2Tsx15YrpBvQS4it4Vps,8028
|
|
35
35
|
crawlo/downloader/playwright_downloader.py,sha256=Lnc7k5cXhVnURXSxgZFCYMJkBxLg5F_OE67rtf3G7Ig,16261
|
|
36
36
|
crawlo/downloader/selenium_downloader.py,sha256=B_0muNi-GQ_hgoYHcf7wgu01V68q7xKnSh-0kzlUiio,21036
|
|
37
|
-
crawlo/extension/__init__.py,sha256=
|
|
37
|
+
crawlo/extension/__init__.py,sha256=7HxWQKBuiVphZUBLIBVCtIjgFIbzTa5QDOQp6WH4HhU,2923
|
|
38
38
|
crawlo/extension/health_check.py,sha256=0GveZgUtFwjYEKlm3qbwIvCmb4FR0qrIKc8cEF1yQV8,5516
|
|
39
39
|
crawlo/extension/log_interval.py,sha256=VCIeNqXcWDnxj4m6l77cjqgRzV8LfsPMb22X0Xc1Vwc,2417
|
|
40
|
-
crawlo/extension/log_stats.py,sha256=
|
|
40
|
+
crawlo/extension/log_stats.py,sha256=X90Y_E6USAdm55yvRN5t59HNLmyN9QMKglhbPPxtehA,2382
|
|
41
41
|
crawlo/extension/logging_extension.py,sha256=8KT-WJRK5tocS2kBOiSquree53L11qD1vLg-P8ob40U,2354
|
|
42
42
|
crawlo/extension/memory_monitor.py,sha256=4aszl3C0GMQbqFhGZjZq5iQuXQR1sOz06VHjjEHgkyE,4290
|
|
43
43
|
crawlo/extension/performance_profiler.py,sha256=EPiNuXuPPDU0Jtgy8arYHpr_8ASK13cCI2BytdJnu_I,4899
|
|
@@ -70,7 +70,7 @@ crawlo/middleware/middleware_manager.py,sha256=bQuIxn-i2oud-0hDkv890sa3YvNMbuJIR
|
|
|
70
70
|
crawlo/middleware/offsite.py,sha256=FIWZvkkzlDJfvQc7Ud7BdfDZ78Sa85qlEEwAR76hSBk,4559
|
|
71
71
|
crawlo/middleware/proxy.py,sha256=NquB6tqHAgHs3-2_1_5220kJYfjNG5JyHRJyo_2j4wo,15636
|
|
72
72
|
crawlo/middleware/request_ignore.py,sha256=xcyZ1c7r_HhbzR3r9pfjsLGW7L7FBVeYvlNt8cpP2wY,2577
|
|
73
|
-
crawlo/middleware/response_code.py,sha256
|
|
73
|
+
crawlo/middleware/response_code.py,sha256=sHT-Xe9ZKLsjLso6SYAVqcD0r_4CptD82C605rjFWSs,4383
|
|
74
74
|
crawlo/middleware/response_filter.py,sha256=6VBUe04mu8C7XxmOak6XyhGMWZPYEm3AMo5Kt_r1SXY,4248
|
|
75
75
|
crawlo/middleware/retry.py,sha256=HxeIf7DibeLCpZ_y4rNARWMyzlrsdq5UR2CaFZInA3s,4124
|
|
76
76
|
crawlo/middleware/simple_proxy.py,sha256=V_v28L-faiMJtt8vi-u5O4za-aU77_JTqNTCYSfWzCE,2191
|
|
@@ -86,8 +86,8 @@ crawlo/pipelines/json_pipeline.py,sha256=vlu1nqbD2mtqtExt9cL5nibx1CwJM1RNqd4WGjZ
|
|
|
86
86
|
crawlo/pipelines/memory_dedup_pipeline.py,sha256=9KuUA1S0uHWSB3qJntPdg9ifPdRXwc8ju4j9tWe8qTo,3853
|
|
87
87
|
crawlo/pipelines/mongo_pipeline.py,sha256=k7gNqAO-g2MtIfArphC6z5ZzkKVRkBKcv-2ImziPFA0,5706
|
|
88
88
|
crawlo/pipelines/mysql_pipeline.py,sha256=_oRfIvlEiOsTKkr4v-yPTcL8nG9O9coRmke2ZSkkKII,13871
|
|
89
|
-
crawlo/pipelines/pipeline_manager.py,sha256=
|
|
90
|
-
crawlo/pipelines/redis_dedup_pipeline.py,sha256=
|
|
89
|
+
crawlo/pipelines/pipeline_manager.py,sha256=BX17CU9JK2xJeIdzQ4FeK7kwpwew1k-BEVMk9oviqTQ,3682
|
|
90
|
+
crawlo/pipelines/redis_dedup_pipeline.py,sha256=6fkHt7O-R2TTMlULgxyqPdyKBjsRzYh_GL-Juye4ZQ0,6410
|
|
91
91
|
crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
92
|
crawlo/queue/pqueue.py,sha256=j2ISmyays5t1tuI36xM6EcELwSpq2xIjAScSBWSRZms,1220
|
|
93
93
|
crawlo/queue/queue_manager.py,sha256=JfkjtOD04e_OZZvEEvp3O_W3lfGXhHslZHrCgw90amY,20693
|
|
@@ -286,8 +286,8 @@ tests/verify_distributed.py,sha256=krnYYA5Qx9xXDMWc9YF5DxPSplGvawDg2n0l-3CAqoM,3
|
|
|
286
286
|
tests/verify_log_fix.py,sha256=TD7M1R22NxLqQPufvgE-H33u9tUjyz-rSR2ayIXozRU,4225
|
|
287
287
|
tests/scrapy_comparison/ofweek_scrapy.py,sha256=2Hvpi6DRTubUxBy6RyJApQxMQONPLc1zWjKTQO_i5U4,5652
|
|
288
288
|
tests/scrapy_comparison/scrapy_test.py,sha256=5sw7jOHhaTmQ8bsUd1TiolAUTRQYQOe-f49HPfysqbI,5466
|
|
289
|
-
crawlo-1.
|
|
290
|
-
crawlo-1.
|
|
291
|
-
crawlo-1.
|
|
292
|
-
crawlo-1.
|
|
293
|
-
crawlo-1.
|
|
289
|
+
crawlo-1.4.0.dist-info/METADATA,sha256=WIambmjeZxudgM_Ej7lv8CKd1EyoQ4f-Z4CugfsHTkY,33235
|
|
290
|
+
crawlo-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
291
|
+
crawlo-1.4.0.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
|
|
292
|
+
crawlo-1.4.0.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
|
|
293
|
+
crawlo-1.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|