crawlo 1.2.4__py3-none-any.whl → 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlo/__version__.py +1 -1
- crawlo/cli.py +12 -5
- crawlo/commands/startproject.py +22 -6
- crawlo/core/engine.py +3 -1
- crawlo/core/scheduler.py +102 -6
- crawlo/filters/aioredis_filter.py +44 -91
- crawlo/queue/queue_manager.py +47 -8
- crawlo/queue/redis_priority_queue.py +9 -2
- crawlo/settings/default_settings.py +5 -7
- crawlo/templates/project/settings.py.tmpl +3 -65
- crawlo/templates/project/settings_distributed.py.tmpl +4 -7
- crawlo/templates/project/settings_gentle.py.tmpl +60 -93
- crawlo/templates/project/settings_high_performance.py.tmpl +85 -106
- crawlo/templates/project/settings_simple.py.tmpl +73 -83
- crawlo/templates/{project/run.py.tmpl → run.py.tmpl} +1 -3
- crawlo/utils/redis_connection_pool.py +19 -2
- {crawlo-1.2.4.dist-info → crawlo-1.2.6.dist-info}/METADATA +1 -1
- {crawlo-1.2.4.dist-info → crawlo-1.2.6.dist-info}/RECORD +21 -21
- {crawlo-1.2.4.dist-info → crawlo-1.2.6.dist-info}/WHEEL +0 -0
- {crawlo-1.2.4.dist-info → crawlo-1.2.6.dist-info}/entry_points.txt +0 -0
- {crawlo-1.2.4.dist-info → crawlo-1.2.6.dist-info}/top_level.txt +0 -0
|
@@ -1,109 +1,99 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
2
|
"""
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
基于 Crawlo 框架的简化爬虫项目配置。
|
|
6
|
-
适合快速开始和小型项目。
|
|
3
|
+
简化模式配置模板
|
|
4
|
+
最小配置,适合快速开始和简单项目
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
|
-
import os
|
|
10
|
-
from crawlo.config import CrawloConfig
|
|
11
|
-
|
|
12
7
|
# ============================== 项目基本信息 ==============================
|
|
13
8
|
PROJECT_NAME = '{{project_name}}'
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
#
|
|
22
|
-
|
|
23
|
-
#
|
|
24
|
-
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
#
|
|
35
|
-
# - 无需额外依赖(如Redis)
|
|
36
|
-
# - 资源占用少,适合个人开发环境
|
|
37
|
-
#
|
|
38
|
-
# 扩展建议:
|
|
39
|
-
# - 如需跨会话去重,可将FILTER_CLASS和DEFAULT_DEDUP_PIPELINE改为Redis实现
|
|
40
|
-
# - 如需更高性能,可调整CONCURRENCY和DOWNLOAD_DELAY参数
|
|
41
|
-
#
|
|
42
|
-
# 🎯 最佳使用方式:
|
|
43
|
-
# 推荐使用配置工厂方式创建基本配置:
|
|
44
|
-
# from crawlo.config import CrawloConfig
|
|
45
|
-
# config = CrawloConfig.standalone()
|
|
46
|
-
# process = CrawlerProcess(settings=config.to_dict())
|
|
47
|
-
|
|
48
|
-
# ============================== 基本配置 ==============================
|
|
49
|
-
# 使用配置工厂创建基本配置
|
|
50
|
-
CONFIG = CrawloConfig.standalone(
|
|
51
|
-
concurrency=4,
|
|
52
|
-
download_delay=1.0
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
# 获取配置
|
|
56
|
-
locals().update(CONFIG.to_dict())
|
|
57
|
-
|
|
58
|
-
# ============================== 网络请求配置 ==============================
|
|
59
|
-
DOWNLOADER = "crawlo.downloader.httpx_downloader.HttpXDownloader"
|
|
60
|
-
DOWNLOAD_TIMEOUT = 30
|
|
61
|
-
VERIFY_SSL = True
|
|
62
|
-
|
|
63
|
-
# ============================== 并发配置 ==============================
|
|
64
|
-
CONCURRENCY = 4
|
|
65
|
-
DOWNLOAD_DELAY = 1.0
|
|
66
|
-
|
|
67
|
-
# ============================== 数据存储配置 ==============================
|
|
68
|
-
# MySQL 配置
|
|
69
|
-
MYSQL_HOST = os.getenv('MYSQL_HOST', '127.0.0.1')
|
|
70
|
-
MYSQL_PORT = int(os.getenv('MYSQL_PORT', 3306))
|
|
71
|
-
MYSQL_USER = os.getenv('MYSQL_USER', 'root')
|
|
72
|
-
MYSQL_PASSWORD = os.getenv('MYSQL_PASSWORD', '123456')
|
|
73
|
-
MYSQL_DB = os.getenv('MYSQL_DB', '{{project_name}}')
|
|
74
|
-
MYSQL_TABLE = '{{project_name}}_data'
|
|
75
|
-
|
|
76
|
-
# MongoDB 配置
|
|
77
|
-
MONGO_URI = os.getenv('MONGO_URI', 'mongodb://localhost:27017')
|
|
78
|
-
MONGO_DATABASE = '{{project_name}}_db'
|
|
79
|
-
MONGO_COLLECTION = '{{project_name}}_items'
|
|
80
|
-
|
|
81
|
-
# ============================== 去重配置 ==============================
|
|
82
|
-
# 明确指定简化模式下使用内存去重管道
|
|
9
|
+
|
|
10
|
+
# ============================== 简化运行模式 ==============================
|
|
11
|
+
# 运行模式:'standalone'(单机), 'distributed'(分布式), 'auto'(自动检测)
|
|
12
|
+
RUN_MODE = 'standalone' # 单机模式 - 适用于开发和小规模数据采集
|
|
13
|
+
|
|
14
|
+
# 并发配置
|
|
15
|
+
CONCURRENCY = 4 # 低并发数以减少资源占用
|
|
16
|
+
DOWNLOAD_DELAY = 1.0 # 增加延迟以降低目标网站压力
|
|
17
|
+
|
|
18
|
+
# ============================== 队列配置 ==============================
|
|
19
|
+
|
|
20
|
+
# 队列类型:'auto'(自动选择), 'memory'(内存队列), 'redis'(分布式队列)
|
|
21
|
+
QUEUE_TYPE = 'auto' # 自动检测,如果Redis可用则使用Redis队列
|
|
22
|
+
SCHEDULER_MAX_QUEUE_SIZE = 1000
|
|
23
|
+
SCHEDULER_QUEUE_NAME = f'crawlo:{{project_name}}:queue:requests'
|
|
24
|
+
QUEUE_MAX_RETRIES = 3
|
|
25
|
+
QUEUE_TIMEOUT = 300
|
|
26
|
+
|
|
27
|
+
# ============================== 去重过滤配置 ==============================
|
|
28
|
+
|
|
29
|
+
# 简化模式下使用内存去重管道和过滤器
|
|
83
30
|
DEFAULT_DEDUP_PIPELINE = 'crawlo.pipelines.memory_dedup_pipeline.MemoryDedupPipeline'
|
|
84
31
|
FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFilter'
|
|
85
32
|
|
|
86
|
-
#
|
|
33
|
+
# --- Redis 配置(用于分布式去重和队列) ---
|
|
34
|
+
REDIS_HOST = '127.0.0.1'
|
|
35
|
+
REDIS_PORT = 6379
|
|
36
|
+
REDIS_PASSWORD = '' # 如果有密码,请填写
|
|
37
|
+
|
|
38
|
+
# 根据是否有密码生成 URL
|
|
39
|
+
if REDIS_PASSWORD:
|
|
40
|
+
REDIS_URL = f'redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/0'
|
|
41
|
+
else:
|
|
42
|
+
REDIS_URL = f'redis://{REDIS_HOST}:{REDIS_PORT}/0'
|
|
43
|
+
|
|
44
|
+
# Redis key配置已移至各组件中,使用统一的命名规范
|
|
45
|
+
# crawlo:{project_name}:filter:fingerprint (请求去重)
|
|
46
|
+
# crawlo:{project_name}:item:fingerprint (数据项去重)
|
|
47
|
+
# crawlo:{project_name}:queue:requests (请求队列)
|
|
48
|
+
# crawlo:{project_name}:queue:processing (处理中队列)
|
|
49
|
+
# crawlo:{project_name}:queue:failed (失败队列)
|
|
50
|
+
|
|
51
|
+
REDIS_TTL = 0
|
|
52
|
+
CLEANUP_FP = 0
|
|
53
|
+
FILTER_DEBUG = True
|
|
54
|
+
DECODE_RESPONSES = True
|
|
55
|
+
|
|
56
|
+
# ============================== 中间件配置 ==============================
|
|
57
|
+
|
|
87
58
|
MIDDLEWARES = [
|
|
59
|
+
# === 请求预处理阶段 ===
|
|
88
60
|
'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
|
|
89
61
|
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
90
62
|
'crawlo.middleware.default_header.DefaultHeaderMiddleware',
|
|
63
|
+
'crawlo.middleware.proxy.ProxyMiddleware',
|
|
64
|
+
'crawlo.middleware.offsite.OffsiteMiddleware',
|
|
65
|
+
|
|
66
|
+
# === 响应处理阶段 ===
|
|
91
67
|
'crawlo.middleware.retry.RetryMiddleware',
|
|
92
68
|
'crawlo.middleware.response_code.ResponseCodeMiddleware',
|
|
69
|
+
'crawlo.middleware.response_filter.ResponseFilterMiddleware',
|
|
93
70
|
]
|
|
94
71
|
|
|
72
|
+
# ============================== 数据管道配置 ==============================
|
|
73
|
+
|
|
74
|
+
# 数据处理管道(启用的存储方式)
|
|
95
75
|
PIPELINES = [
|
|
96
76
|
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
97
|
-
# '{{project_name}}.pipelines.DatabasePipeline',
|
|
77
|
+
# '{{project_name}}.pipelines.DatabasePipeline', # 自定义数据库管道
|
|
78
|
+
# 'crawlo.pipelines.mysql_pipeline.AsyncmyMySQLPipeline', # MySQL 存储
|
|
79
|
+
# 'crawlo.pipelines.mongo_pipeline.MongoPipeline', # MongoDB 存储
|
|
98
80
|
]
|
|
99
81
|
|
|
100
|
-
#
|
|
82
|
+
# 明确添加默认去重管道到管道列表开头
|
|
101
83
|
PIPELINES.insert(0, DEFAULT_DEDUP_PIPELINE)
|
|
102
84
|
|
|
85
|
+
# ============================== 扩展组件 ==============================
|
|
86
|
+
|
|
87
|
+
EXTENSIONS = [
|
|
88
|
+
'crawlo.extension.log_interval.LogIntervalExtension',
|
|
89
|
+
'crawlo.extension.log_stats.LogStats',
|
|
90
|
+
'crawlo.extension.logging_extension.CustomLoggerExtension',
|
|
91
|
+
]
|
|
92
|
+
|
|
103
93
|
# ============================== 日志配置 ==============================
|
|
94
|
+
|
|
104
95
|
LOG_LEVEL = 'INFO'
|
|
105
|
-
LOG_FILE = f'logs/{{project_name}}.log'
|
|
106
96
|
STATS_DUMP = True
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
97
|
+
LOG_FILE = f'logs/{{project_name}}.log'
|
|
98
|
+
LOG_FORMAT = '%(asctime)s - [%(name)s] - %(levelname)s: %(message)s'
|
|
99
|
+
LOG_ENCODING = 'utf-8'
|
|
@@ -37,6 +37,7 @@ class OptimizedRedisConnectionPool:
|
|
|
37
37
|
# 连接池实例
|
|
38
38
|
self._connection_pool: Optional[aioredis.ConnectionPool] = None
|
|
39
39
|
self._redis_client: Optional[aioredis.Redis] = None
|
|
40
|
+
self._connection_tested = False # 标记是否已测试连接
|
|
40
41
|
|
|
41
42
|
# 连接池统计信息
|
|
42
43
|
self._stats = {
|
|
@@ -61,7 +62,8 @@ class OptimizedRedisConnectionPool:
|
|
|
61
62
|
connection_pool=self._connection_pool
|
|
62
63
|
)
|
|
63
64
|
|
|
64
|
-
|
|
65
|
+
# 只在调试模式下输出详细连接池信息
|
|
66
|
+
self.logger.debug(f"✅ Redis连接池初始化成功: {self.redis_url}")
|
|
65
67
|
self.logger.debug(f" 连接池配置: {self.config}")
|
|
66
68
|
|
|
67
69
|
except Exception as e:
|
|
@@ -71,6 +73,18 @@ class OptimizedRedisConnectionPool:
|
|
|
71
73
|
raise_error=True
|
|
72
74
|
)
|
|
73
75
|
|
|
76
|
+
async def _test_connection(self):
|
|
77
|
+
"""测试Redis连接"""
|
|
78
|
+
if self._redis_client and not self._connection_tested:
|
|
79
|
+
try:
|
|
80
|
+
await self._redis_client.ping()
|
|
81
|
+
self._connection_tested = True
|
|
82
|
+
# 只在调试模式下输出连接测试成功信息
|
|
83
|
+
self.logger.debug(f"✅ Redis连接测试成功: {self.redis_url}")
|
|
84
|
+
except Exception as e:
|
|
85
|
+
self.logger.error(f"❌ Redis连接测试失败: {self.redis_url} - {e}")
|
|
86
|
+
raise
|
|
87
|
+
|
|
74
88
|
async def get_connection(self) -> aioredis.Redis:
|
|
75
89
|
"""
|
|
76
90
|
获取Redis连接实例
|
|
@@ -79,7 +93,10 @@ class OptimizedRedisConnectionPool:
|
|
|
79
93
|
Redis连接实例
|
|
80
94
|
"""
|
|
81
95
|
if not self._redis_client:
|
|
82
|
-
|
|
96
|
+
self._initialize_pool()
|
|
97
|
+
|
|
98
|
+
# 确保连接有效
|
|
99
|
+
await self._test_connection()
|
|
83
100
|
|
|
84
101
|
self._stats['active_connections'] += 1
|
|
85
102
|
return self._redis_client
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=1tc6uUDF1yRNU7K_k-Dl6h9FGy7Jp8fdhRsXu9PctFI,1312
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
3
|
-
crawlo/cli.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=vMQK58X8_YZGKzRm0ThvPAKFtpfyejGmUnDrY9RQ13w,22
|
|
3
|
+
crawlo/cli.py,sha256=8OD2us3YYG55AMTm_3YmHiZ8_Plokkx6p2DVR7w0JGs,2872
|
|
4
4
|
crawlo/config.py,sha256=zbFn2HfcbDcD1QOSgNOB9kjw12UDb7zo4Y7RLcJJirQ,9506
|
|
5
5
|
crawlo/config_validator.py,sha256=eQWrSY4xNnbDgwAmPOENyMb0wb3rdzOL55yBCKmpIak,9883
|
|
6
6
|
crawlo/crawler.py,sha256=KPVgLpDJIZ0k3-U9CtrYgvnLatFG9eYPdjUtqoPBHXU,40305
|
|
@@ -21,13 +21,13 @@ crawlo/commands/genspider.py,sha256=NAeXmfb9ElLOuLc4F2xIzuG779KvAAesy3EZa_QxqR8,
|
|
|
21
21
|
crawlo/commands/help.py,sha256=pHp0m6QBF4NZnZncPBxOLRlJQXJZQsztlyn5euFD9po,5186
|
|
22
22
|
crawlo/commands/list.py,sha256=VR4AWrOiLQgw2t6u8dm9uxsCoCPzX36_6XeTs3tzcQk,5772
|
|
23
23
|
crawlo/commands/run.py,sha256=-5psh1ui0zJmHjmzkSo8Tnl99B1e4Rz90lk0Ak-2SuA,12236
|
|
24
|
-
crawlo/commands/startproject.py,sha256=
|
|
24
|
+
crawlo/commands/startproject.py,sha256=8tMokyCBN4nEpWjAca3NQorU5t8WR5ffBQZ7eyvUWpA,16260
|
|
25
25
|
crawlo/commands/stats.py,sha256=fCNk7ix6v9PrscC-wKQl6ddA4xaQLogrN_1SNEj66tk,6128
|
|
26
26
|
crawlo/commands/utils.py,sha256=3nP8bMgPH36ELqg1ilzPhHS873ncNvZNxOEDYY4uDm4,5087
|
|
27
27
|
crawlo/core/__init__.py,sha256=PnFyJdVNHBoPmV1sW0AHQXijeoSTQ8cMYrbNM1JK8kA,41
|
|
28
|
-
crawlo/core/engine.py,sha256=
|
|
28
|
+
crawlo/core/engine.py,sha256=yCpwwxfKKYQ8r1ehjOuwKpPQ8YHpK8DqbFCatrHeCSw,13854
|
|
29
29
|
crawlo/core/processor.py,sha256=qmCqAeqhwYu-UE86evYesaGt9qpuSIfH-ZIZKcXFCZc,1140
|
|
30
|
-
crawlo/core/scheduler.py,sha256=
|
|
30
|
+
crawlo/core/scheduler.py,sha256=MDItA0ZXAaHup18NTSBBeeCKg8cqEBGIYCt0YUceIZU,11845
|
|
31
31
|
crawlo/data/__init__.py,sha256=8MgDxcMhx-emFARcLAw_ODOZNz0neYBcx7kEbzothd8,116
|
|
32
32
|
crawlo/data/user_agents.py,sha256=Zpm9nhP1Q3CVLtRGRBmpMFYW8qP-GxlDBeAOWF_2ad0,4698
|
|
33
33
|
crawlo/downloader/__init__.py,sha256=MlstaKfW-WLXNuZs7tb7cG_wG2sQLw2hdWmUjZEIH7c,8299
|
|
@@ -46,7 +46,7 @@ crawlo/extension/memory_monitor.py,sha256=4aszl3C0GMQbqFhGZjZq5iQuXQR1sOz06VHjjE
|
|
|
46
46
|
crawlo/extension/performance_profiler.py,sha256=EPiNuXuPPDU0Jtgy8arYHpr_8ASK13cCI2BytdJnu_I,4899
|
|
47
47
|
crawlo/extension/request_recorder.py,sha256=RC23yzXClnVv9j2ljQvjBkUfWznfnDHsrQejKhE9y5E,4074
|
|
48
48
|
crawlo/filters/__init__.py,sha256=XC_Q4ykZtSNYizYlAcehVwBBNO3lZ2zuWwafzXiuWyQ,4241
|
|
49
|
-
crawlo/filters/aioredis_filter.py,sha256=
|
|
49
|
+
crawlo/filters/aioredis_filter.py,sha256=XixK3DD5QbCLOw3Me2YdtMkxQpXOT75FE-GiVr_PUGc,8245
|
|
50
50
|
crawlo/filters/memory_filter.py,sha256=VJO0UFRYGxmV8dj4G1subsQ-FtvPcGLbvd7IVtqXnOs,9260
|
|
51
51
|
crawlo/items/__init__.py,sha256=bqekZrRlDhxfWie0UbCs656TptYseoe9QJ67I4E7Elk,386
|
|
52
52
|
crawlo/items/base.py,sha256=tAYrPJgblp3ZEihDXvappdYc6pGdim6x2_9QSmMKI2o,577
|
|
@@ -78,23 +78,23 @@ crawlo/pipelines/pipeline_manager.py,sha256=vK87pAEmpGR24yl6Cr7ovCKag2oB5mruijfY
|
|
|
78
78
|
crawlo/pipelines/redis_dedup_pipeline.py,sha256=QCHybXm7E-AoqeIaI1ZoFJIoeOB9B6DDlShxpgTyaFg,6164
|
|
79
79
|
crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
80
|
crawlo/queue/pqueue.py,sha256=yzF900ko2RReVNZtfk_VV3PzSXEUALI6SHf97geuu6s,1058
|
|
81
|
-
crawlo/queue/queue_manager.py,sha256=
|
|
82
|
-
crawlo/queue/redis_priority_queue.py,sha256=
|
|
81
|
+
crawlo/queue/queue_manager.py,sha256=megRgpuEQ1cOWJAHaaSfqu2h3oM5j9sIb9H5uJ4DbfE,15002
|
|
82
|
+
crawlo/queue/redis_priority_queue.py,sha256=_0rROJk8VHMtxFXpCENsEQb_oOfn8DTPhcLQueD1uOM,13115
|
|
83
83
|
crawlo/settings/__init__.py,sha256=xsukVKn_h2Hopm1Nj-bXkhbfyS62QTTvJi7fhZUwR9M,123
|
|
84
|
-
crawlo/settings/default_settings.py,sha256=
|
|
84
|
+
crawlo/settings/default_settings.py,sha256=wBol4uHlSZtFXyi9bs4a12sr50_Mltpbl8KrRqPngNY,8962
|
|
85
85
|
crawlo/settings/setting_manager.py,sha256=0RYAk07qoJ5WTw_mvV4ECWGS2QNpCnGmBZVTHjqOVIg,3707
|
|
86
86
|
crawlo/spider/__init__.py,sha256=Z_rK23l5yt-DuwJPg8bcqodM_FIs4-iHLaKOimGumcE,20452
|
|
87
87
|
crawlo/templates/crawlo.cfg.tmpl,sha256=9BAmwEibS5Tvy6HIcGXPb0BGeuesmibebmTW0iAEkmo,230
|
|
88
|
+
crawlo/templates/run.py.tmpl,sha256=bk5TbuzxQEaF3wF3i754MT7Z4BqOk_nzSRDnJElpg14,1099
|
|
88
89
|
crawlo/templates/project/__init__.py.tmpl,sha256=f3ETIXw_O6K-lkL6lXM5znMPJW1FZYGFrwDs2BnHcnQ,58
|
|
89
90
|
crawlo/templates/project/items.py.tmpl,sha256=mt1Mm--H2Ouos3r7JPkYh0r33rgYJf1YOMz0OZy8TYs,297
|
|
90
91
|
crawlo/templates/project/middlewares.py.tmpl,sha256=T67p8j0laL4NJJ_3xzPM9yivgZRjTEMiEtEWLPwbkmw,4160
|
|
91
92
|
crawlo/templates/project/pipelines.py.tmpl,sha256=GBHYU0Jx8sKDCdGJp44FMSH7u2slxoFg6a-R9Uwg_-I,2608
|
|
92
|
-
crawlo/templates/project/
|
|
93
|
-
crawlo/templates/project/
|
|
94
|
-
crawlo/templates/project/
|
|
95
|
-
crawlo/templates/project/
|
|
96
|
-
crawlo/templates/project/
|
|
97
|
-
crawlo/templates/project/settings_simple.py.tmpl,sha256=K0ZQ1QQBCVm7nJkPQqzNdu8gZl9gZAPMSoyEyhCX7og,3869
|
|
93
|
+
crawlo/templates/project/settings.py.tmpl,sha256=TbeOEFoGBct6MU2jbne20cRo7tPD0BRCbZ1DCvbqOEs,10718
|
|
94
|
+
crawlo/templates/project/settings_distributed.py.tmpl,sha256=5DdiSmEuAzqMuxQ4zAu3vLAvJk0bj-fxCuzofubrF48,5887
|
|
95
|
+
crawlo/templates/project/settings_gentle.py.tmpl,sha256=fhm4f8BArNN6d2eHpiUOblQOagLTyLYSzZ89jVBRJQQ,3839
|
|
96
|
+
crawlo/templates/project/settings_high_performance.py.tmpl,sha256=3BF_eSdxOl0KknwFipN9vh_gi-ZkEQsw1ZX1HvMxiww,4902
|
|
97
|
+
crawlo/templates/project/settings_simple.py.tmpl,sha256=cpm7VAAC3tA2RR2Znab3Ukf-2FpcdkY_IAEPK1m8MlQ,3752
|
|
98
98
|
crawlo/templates/project/spiders/__init__.py.tmpl,sha256=j_YKsw6HQMJyqlk3WUouP3bsr-XVxshRoSNakHBc00g,106
|
|
99
99
|
crawlo/templates/spider/spider.py.tmpl,sha256=jMhzyxpIpV_KigB-pmN-5mGMiYtu4mfQIOvpZcCGGJI,5055
|
|
100
100
|
crawlo/tools/__init__.py,sha256=5H6rAhjfNSqRMjjlLDVq-vEJWRFyCO-J6HN2kexnXJU,3671
|
|
@@ -119,7 +119,7 @@ crawlo/utils/large_scale_helper.py,sha256=HpcoyBTu8SZnQnS_F3dAL8WpQX5Xahe50H0ORt
|
|
|
119
119
|
crawlo/utils/log.py,sha256=A3lPyhD8kD88cV23KOL-_eT8g69xGQ5L1toDB2AO0mc,4005
|
|
120
120
|
crawlo/utils/performance_monitor.py,sha256=5B8d0Pjy8K2z_Gh6zhBdC-Hl6ldy55MDro30-wnE2mw,9576
|
|
121
121
|
crawlo/utils/queue_helper.py,sha256=xpUUTOqlU1xz2Pb9NKAVGo3AfAO-7Xvx8Lm1q65Dgck,4743
|
|
122
|
-
crawlo/utils/redis_connection_pool.py,sha256=
|
|
122
|
+
crawlo/utils/redis_connection_pool.py,sha256=Ng03XkCOQlWtnGfrMXBNLumJ6Iz9C8rZvMYEHdbmhHc,11066
|
|
123
123
|
crawlo/utils/redis_key_validator.py,sha256=-efQ44FaxrQdhbN3xT23M7O7a6L3IayumObNH3A_Nuc,5627
|
|
124
124
|
crawlo/utils/request.py,sha256=yoLB2rY8d78vgPjIWpdhY5SalIKjyLIvTG_UH6EMdVI,8798
|
|
125
125
|
crawlo/utils/request_serializer.py,sha256=oCGsLC_-Do7EuIREMQE01gINRtM0cC9X2JbW6ykkV74,8536
|
|
@@ -199,8 +199,8 @@ tests/test_template_content.py,sha256=5QAnhKZFDKg-_uFryllLMpCk3a1nCS44hMmYfXm8gW
|
|
|
199
199
|
tests/test_template_redis_key.py,sha256=U6L5HtnDyGp3s6-O4F_yG2Q2nNIGTqB_Q-ESv2EMeOU,4812
|
|
200
200
|
tests/test_tools.py,sha256=IWiu9JGV-5Ow0ivFtiDw62REht-8Hn7NfyR9rnYSlbU,5113
|
|
201
201
|
tests/tools_example.py,sha256=MfVBYemKvHs6MUbydgrJfhiGnygp5dRoIE-eIXCOR7M,7669
|
|
202
|
-
crawlo-1.2.
|
|
203
|
-
crawlo-1.2.
|
|
204
|
-
crawlo-1.2.
|
|
205
|
-
crawlo-1.2.
|
|
206
|
-
crawlo-1.2.
|
|
202
|
+
crawlo-1.2.6.dist-info/METADATA,sha256=Pe5YqJrdYaCh6nNn-Npxw5a3pHpIwq-BiR8FvxgyV5U,19185
|
|
203
|
+
crawlo-1.2.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
204
|
+
crawlo-1.2.6.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
|
|
205
|
+
crawlo-1.2.6.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
|
|
206
|
+
crawlo-1.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|