crawlo 1.1.9__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/core/engine.py +15 -6
- crawlo/core/scheduler.py +7 -0
- crawlo/queue/queue_manager.py +24 -21
- crawlo/settings/default_settings.py +1 -1
- crawlo-1.2.0.dist-info/METADATA +697 -0
- {crawlo-1.1.9.dist-info → crawlo-1.2.0.dist-info}/RECORD +11 -11
- tests/test_queue_manager_double_crawlo.py +13 -70
- crawlo-1.1.9.dist-info/METADATA +0 -626
- {crawlo-1.1.9.dist-info → crawlo-1.2.0.dist-info}/WHEEL +0 -0
- {crawlo-1.1.9.dist-info → crawlo-1.2.0.dist-info}/entry_points.txt +0 -0
- {crawlo-1.1.9.dist-info → crawlo-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=1tc6uUDF1yRNU7K_k-Dl6h9FGy7Jp8fdhRsXu9PctFI,1312
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
|
|
3
3
|
crawlo/cli.py,sha256=pbd9wR6evB4aHWNrTRG4WW2ScCN5p4kz44eAuohSdR0,2029
|
|
4
4
|
crawlo/config.py,sha256=cQTDYn2VCdlIs3Jb8mGwF6IWqj85BMr6HQaGS3XjZ7g,9535
|
|
5
5
|
crawlo/config_validator.py,sha256=Q2j9rGW2lZiaA1ka5cJWaabPr1W0fwYHzY_gv-qpPyY,9903
|
|
@@ -25,9 +25,9 @@ crawlo/commands/startproject.py,sha256=aBBR5dNb7R-yEMVMisoKiEA86HUFB_yHa0VgPftDX
|
|
|
25
25
|
crawlo/commands/stats.py,sha256=6pAgkEi8MBnCer2rWmKpaTYr1jaM6HeMG9owAvEzJyY,6064
|
|
26
26
|
crawlo/commands/utils.py,sha256=nohMvUU2zLvX0XzXk6KeCNxP0EvSWj9DiVLxM_7tD5o,5106
|
|
27
27
|
crawlo/core/__init__.py,sha256=PnFyJdVNHBoPmV1sW0AHQXijeoSTQ8cMYrbNM1JK8kA,41
|
|
28
|
-
crawlo/core/engine.py,sha256=
|
|
28
|
+
crawlo/core/engine.py,sha256=c65vwIPrwDzFvec2f1QJ2_hikBjj-CYjTGkYrjnWxto,13724
|
|
29
29
|
crawlo/core/processor.py,sha256=qmCqAeqhwYu-UE86evYesaGt9qpuSIfH-ZIZKcXFCZc,1140
|
|
30
|
-
crawlo/core/scheduler.py,sha256=
|
|
30
|
+
crawlo/core/scheduler.py,sha256=ONdbmcjGyUoOhnXsO2pmTGU6no1RxTEM5FDDcthGLGA,5219
|
|
31
31
|
crawlo/downloader/__init__.py,sha256=MlstaKfW-WLXNuZs7tb7cG_wG2sQLw2hdWmUjZEIH7c,8299
|
|
32
32
|
crawlo/downloader/aiohttp_downloader.py,sha256=n2qP3Q13lOnvwB7cb3YxNyNKYVHKqofNNg7j9tV9h-E,8400
|
|
33
33
|
crawlo/downloader/cffi_downloader.py,sha256=IpQUqvls4mEYs_UwPvtN2L4uUIujqn-rf03NuZZkMl0,10710
|
|
@@ -75,10 +75,10 @@ crawlo/pipelines/pipeline_manager.py,sha256=vK87pAEmpGR24yl6Cr7ovCKag2oB5mruijfY
|
|
|
75
75
|
crawlo/pipelines/redis_dedup_pipeline.py,sha256=sgrBSVdxPWgh8HQxvGsazz1MSyBERJF5jd1yoeYo0lE,6166
|
|
76
76
|
crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
crawlo/queue/pqueue.py,sha256=yzF900ko2RReVNZtfk_VV3PzSXEUALI6SHf97geuu6s,1058
|
|
78
|
-
crawlo/queue/queue_manager.py,sha256=
|
|
78
|
+
crawlo/queue/queue_manager.py,sha256=_oO5Taqj5WTSKtj9UuLrGZcWxMg2liqaa-kceXyF4GI,12874
|
|
79
79
|
crawlo/queue/redis_priority_queue.py,sha256=RbwKsVxzk31B1VRvyve4vHKe2DesL7K37IZAA31kdd0,12783
|
|
80
80
|
crawlo/settings/__init__.py,sha256=xsukVKn_h2Hopm1Nj-bXkhbfyS62QTTvJi7fhZUwR9M,123
|
|
81
|
-
crawlo/settings/default_settings.py,sha256=
|
|
81
|
+
crawlo/settings/default_settings.py,sha256=aTYgtWJUGmFF93SV4qeh3pmMpcTkkPd9bJkK486yTik,8781
|
|
82
82
|
crawlo/settings/setting_manager.py,sha256=0RYAk07qoJ5WTw_mvV4ECWGS2QNpCnGmBZVTHjqOVIg,3707
|
|
83
83
|
crawlo/spider/__init__.py,sha256=Z_rK23l5yt-DuwJPg8bcqodM_FIs4-iHLaKOimGumcE,20452
|
|
84
84
|
crawlo/templates/crawlo.cfg.tmpl,sha256=9BAmwEibS5Tvy6HIcGXPb0BGeuesmibebmTW0iAEkmo,230
|
|
@@ -167,7 +167,7 @@ tests/test_proxy_middleware_integration.py,sha256=zcl7fR9Toc-I-stSUTzKZPwcfh3kgr
|
|
|
167
167
|
tests/test_proxy_providers.py,sha256=XwWZCywTYguSsUxSm6fsbaoH1p9dKjqSIx9-sqKZehA,1693
|
|
168
168
|
tests/test_proxy_stats.py,sha256=Til_yksrRz2yBVw-yJi5-36LhNW3vTwpXTm4BdR9PUM,507
|
|
169
169
|
tests/test_proxy_strategies.py,sha256=ZkziozkvZd3KWOQnpHQ8Upd3WpyoX7gN0qFGluNm348,1809
|
|
170
|
-
tests/test_queue_manager_double_crawlo.py,sha256=
|
|
170
|
+
tests/test_queue_manager_double_crawlo.py,sha256=pdoWUFgjfiPGCsuCypUaxMzicXPQnKpZqVh1o4LuaSM,6822
|
|
171
171
|
tests/test_queue_manager_redis_key.py,sha256=-dnqs2p50zpf09-Z7vo74s8hLQ6cxXd8WCdHM0l17qM,6278
|
|
172
172
|
tests/test_redis_config.py,sha256=TqzFRojc6esGXjGhUCvSLYQDUTAgEJsty9vRVuNraMU,893
|
|
173
173
|
tests/test_redis_connection_pool.py,sha256=ORBU0k-6htQ2VVfyQkqTKVLGQWX04XRxdEGsZKeerQA,9177
|
|
@@ -183,8 +183,8 @@ tests/test_template_content.py,sha256=5QAnhKZFDKg-_uFryllLMpCk3a1nCS44hMmYfXm8gW
|
|
|
183
183
|
tests/test_template_redis_key.py,sha256=U6L5HtnDyGp3s6-O4F_yG2Q2nNIGTqB_Q-ESv2EMeOU,4812
|
|
184
184
|
tests/test_tools.py,sha256=IWiu9JGV-5Ow0ivFtiDw62REht-8Hn7NfyR9rnYSlbU,5113
|
|
185
185
|
tests/tools_example.py,sha256=MfVBYemKvHs6MUbydgrJfhiGnygp5dRoIE-eIXCOR7M,7669
|
|
186
|
-
crawlo-1.
|
|
187
|
-
crawlo-1.
|
|
188
|
-
crawlo-1.
|
|
189
|
-
crawlo-1.
|
|
190
|
-
crawlo-1.
|
|
186
|
+
crawlo-1.2.0.dist-info/METADATA,sha256=NwPsIy1QiXk07HBs2iVrgX1_GM8GvNmUVQr-BX08W1g,20004
|
|
187
|
+
crawlo-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
188
|
+
crawlo-1.2.0.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
|
|
189
|
+
crawlo-1.2.0.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
|
|
190
|
+
crawlo-1.2.0.dist-info/RECORD,,
|
|
@@ -49,26 +49,15 @@ async def test_queue_manager_naming():
|
|
|
49
49
|
print(f"测试 {i}: {test_case['name']}")
|
|
50
50
|
print(f" 输入队列名称: {test_case['queue_name']}")
|
|
51
51
|
|
|
52
|
-
#
|
|
52
|
+
# 使用优化后的项目名称提取逻辑
|
|
53
53
|
project_name = "default"
|
|
54
54
|
if ':' in test_case['queue_name']:
|
|
55
55
|
parts = test_case['queue_name'].split(':')
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
project_name = parts[2]
|
|
62
|
-
else:
|
|
63
|
-
project_name = "default"
|
|
64
|
-
elif parts[0] == "crawlo":
|
|
65
|
-
# 正常的 crawlo 前缀,取第二个部分作为项目名称
|
|
66
|
-
project_name = parts[1]
|
|
67
|
-
else:
|
|
68
|
-
# 没有 crawlo 前缀,使用第一个部分作为项目名称
|
|
69
|
-
project_name = parts[0]
|
|
70
|
-
else:
|
|
71
|
-
project_name = test_case['queue_name'] or "default"
|
|
56
|
+
# 跳过所有"crawlo"前缀,取第一个非"crawlo"部分作为项目名称
|
|
57
|
+
for part in parts:
|
|
58
|
+
if part != "crawlo":
|
|
59
|
+
project_name = part
|
|
60
|
+
break
|
|
72
61
|
else:
|
|
73
62
|
project_name = test_case['queue_name'] or "default"
|
|
74
63
|
|
|
@@ -132,26 +121,15 @@ async def test_queue_manager_create_queue():
|
|
|
132
121
|
# 创建队列管理器
|
|
133
122
|
queue_manager = QueueManager(config)
|
|
134
123
|
|
|
135
|
-
#
|
|
124
|
+
# 使用优化后的项目名称提取逻辑
|
|
136
125
|
project_name = "default"
|
|
137
126
|
if ':' in test_case['queue_name']:
|
|
138
127
|
parts = test_case['queue_name'].split(':')
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
project_name = parts[2]
|
|
145
|
-
else:
|
|
146
|
-
project_name = "default"
|
|
147
|
-
elif parts[0] == "crawlo":
|
|
148
|
-
# 正常的 crawlo 前缀,取第二个部分作为项目名称
|
|
149
|
-
project_name = parts[1]
|
|
150
|
-
else:
|
|
151
|
-
# 没有 crawlo 前缀,使用第一个部分作为项目名称
|
|
152
|
-
project_name = parts[0]
|
|
153
|
-
else:
|
|
154
|
-
project_name = test_case['queue_name'] or "default"
|
|
128
|
+
# 跳过所有"crawlo"前缀,取第一个非"crawlo"部分作为项目名称
|
|
129
|
+
for part in parts:
|
|
130
|
+
if part != "crawlo":
|
|
131
|
+
project_name = part
|
|
132
|
+
break
|
|
155
133
|
else:
|
|
156
134
|
project_name = test_case['queue_name'] or "default"
|
|
157
135
|
|
|
@@ -193,39 +171,4 @@ async def test_queue_manager_create_queue():
|
|
|
193
171
|
except Exception as e:
|
|
194
172
|
print(f"❌ 队列管理器创建队列测试失败: {e}")
|
|
195
173
|
traceback.print_exc()
|
|
196
|
-
return False
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
async def main():
|
|
200
|
-
"""主测试函数"""
|
|
201
|
-
print("🚀 开始队列管理器双重 crawlo 前缀问题测试...")
|
|
202
|
-
print("=" * 50)
|
|
203
|
-
|
|
204
|
-
try:
|
|
205
|
-
# 测试队列管理器项目名称提取
|
|
206
|
-
naming_test_success = await test_queue_manager_naming()
|
|
207
|
-
print()
|
|
208
|
-
|
|
209
|
-
# 测试队列管理器创建队列
|
|
210
|
-
create_test_success = await test_queue_manager_create_queue()
|
|
211
|
-
print()
|
|
212
|
-
|
|
213
|
-
print("=" * 50)
|
|
214
|
-
if naming_test_success and create_test_success:
|
|
215
|
-
print("🎉 队列管理器双重 crawlo 前缀问题测试通过!")
|
|
216
|
-
else:
|
|
217
|
-
print("❌ 部分测试失败,请检查实现")
|
|
218
|
-
return 1
|
|
219
|
-
|
|
220
|
-
except Exception as e:
|
|
221
|
-
print("=" * 50)
|
|
222
|
-
print(f"❌ 测试过程中发生异常: {e}")
|
|
223
|
-
traceback.print_exc()
|
|
224
|
-
return 1
|
|
225
|
-
|
|
226
|
-
return 0
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if __name__ == "__main__":
|
|
230
|
-
exit_code = asyncio.run(main())
|
|
231
|
-
sys.exit(exit_code)
|
|
174
|
+
return False
|