crawlo 1.1.9__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

@@ -1,5 +1,5 @@
1
1
  crawlo/__init__.py,sha256=1tc6uUDF1yRNU7K_k-Dl6h9FGy7Jp8fdhRsXu9PctFI,1312
2
- crawlo/__version__.py,sha256=lnHAM_YSOt5TvtAm4ZVs32T0TpsCQ5kOOznnQlur1YE,22
2
+ crawlo/__version__.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
3
3
  crawlo/cli.py,sha256=pbd9wR6evB4aHWNrTRG4WW2ScCN5p4kz44eAuohSdR0,2029
4
4
  crawlo/config.py,sha256=cQTDYn2VCdlIs3Jb8mGwF6IWqj85BMr6HQaGS3XjZ7g,9535
5
5
  crawlo/config_validator.py,sha256=Q2j9rGW2lZiaA1ka5cJWaabPr1W0fwYHzY_gv-qpPyY,9903
@@ -25,9 +25,9 @@ crawlo/commands/startproject.py,sha256=aBBR5dNb7R-yEMVMisoKiEA86HUFB_yHa0VgPftDX
25
25
  crawlo/commands/stats.py,sha256=6pAgkEi8MBnCer2rWmKpaTYr1jaM6HeMG9owAvEzJyY,6064
26
26
  crawlo/commands/utils.py,sha256=nohMvUU2zLvX0XzXk6KeCNxP0EvSWj9DiVLxM_7tD5o,5106
27
27
  crawlo/core/__init__.py,sha256=PnFyJdVNHBoPmV1sW0AHQXijeoSTQ8cMYrbNM1JK8kA,41
28
- crawlo/core/engine.py,sha256=tWiQGAxK3bhwmq6xgg03gz-hyflGi5jPSI1ML2vrBgE,13139
28
+ crawlo/core/engine.py,sha256=c65vwIPrwDzFvec2f1QJ2_hikBjj-CYjTGkYrjnWxto,13724
29
29
  crawlo/core/processor.py,sha256=qmCqAeqhwYu-UE86evYesaGt9qpuSIfH-ZIZKcXFCZc,1140
30
- crawlo/core/scheduler.py,sha256=4RsGl9P0hmXB6crbOwfNr2RLiIUET1h5JFIVKtrqyD0,4953
30
+ crawlo/core/scheduler.py,sha256=ONdbmcjGyUoOhnXsO2pmTGU6no1RxTEM5FDDcthGLGA,5219
31
31
  crawlo/downloader/__init__.py,sha256=MlstaKfW-WLXNuZs7tb7cG_wG2sQLw2hdWmUjZEIH7c,8299
32
32
  crawlo/downloader/aiohttp_downloader.py,sha256=n2qP3Q13lOnvwB7cb3YxNyNKYVHKqofNNg7j9tV9h-E,8400
33
33
  crawlo/downloader/cffi_downloader.py,sha256=IpQUqvls4mEYs_UwPvtN2L4uUIujqn-rf03NuZZkMl0,10710
@@ -75,10 +75,10 @@ crawlo/pipelines/pipeline_manager.py,sha256=vK87pAEmpGR24yl6Cr7ovCKag2oB5mruijfY
75
75
  crawlo/pipelines/redis_dedup_pipeline.py,sha256=sgrBSVdxPWgh8HQxvGsazz1MSyBERJF5jd1yoeYo0lE,6166
76
76
  crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
77
  crawlo/queue/pqueue.py,sha256=yzF900ko2RReVNZtfk_VV3PzSXEUALI6SHf97geuu6s,1058
78
- crawlo/queue/queue_manager.py,sha256=UxIhyLzVM0HNNzwQ6LvOoCJ-zOcE95gl0ZKpzTz1ybA,12930
78
+ crawlo/queue/queue_manager.py,sha256=_oO5Taqj5WTSKtj9UuLrGZcWxMg2liqaa-kceXyF4GI,12874
79
79
  crawlo/queue/redis_priority_queue.py,sha256=RbwKsVxzk31B1VRvyve4vHKe2DesL7K37IZAA31kdd0,12783
80
80
  crawlo/settings/__init__.py,sha256=xsukVKn_h2Hopm1Nj-bXkhbfyS62QTTvJi7fhZUwR9M,123
81
- crawlo/settings/default_settings.py,sha256=cwEIVL294hs7iWVOSfnwA4nuRWjHXJk_Bqgbd37Vmcw,8783
81
+ crawlo/settings/default_settings.py,sha256=aTYgtWJUGmFF93SV4qeh3pmMpcTkkPd9bJkK486yTik,8781
82
82
  crawlo/settings/setting_manager.py,sha256=0RYAk07qoJ5WTw_mvV4ECWGS2QNpCnGmBZVTHjqOVIg,3707
83
83
  crawlo/spider/__init__.py,sha256=Z_rK23l5yt-DuwJPg8bcqodM_FIs4-iHLaKOimGumcE,20452
84
84
  crawlo/templates/crawlo.cfg.tmpl,sha256=9BAmwEibS5Tvy6HIcGXPb0BGeuesmibebmTW0iAEkmo,230
@@ -167,7 +167,7 @@ tests/test_proxy_middleware_integration.py,sha256=zcl7fR9Toc-I-stSUTzKZPwcfh3kgr
167
167
  tests/test_proxy_providers.py,sha256=XwWZCywTYguSsUxSm6fsbaoH1p9dKjqSIx9-sqKZehA,1693
168
168
  tests/test_proxy_stats.py,sha256=Til_yksrRz2yBVw-yJi5-36LhNW3vTwpXTm4BdR9PUM,507
169
169
  tests/test_proxy_strategies.py,sha256=ZkziozkvZd3KWOQnpHQ8Upd3WpyoX7gN0qFGluNm348,1809
170
- tests/test_queue_manager_double_crawlo.py,sha256=LYduecvJ93HAU6Ub-tIrGM2pwOUGoCTkpmzBxjpY5tM,9116
170
+ tests/test_queue_manager_double_crawlo.py,sha256=pdoWUFgjfiPGCsuCypUaxMzicXPQnKpZqVh1o4LuaSM,6822
171
171
  tests/test_queue_manager_redis_key.py,sha256=-dnqs2p50zpf09-Z7vo74s8hLQ6cxXd8WCdHM0l17qM,6278
172
172
  tests/test_redis_config.py,sha256=TqzFRojc6esGXjGhUCvSLYQDUTAgEJsty9vRVuNraMU,893
173
173
  tests/test_redis_connection_pool.py,sha256=ORBU0k-6htQ2VVfyQkqTKVLGQWX04XRxdEGsZKeerQA,9177
@@ -183,8 +183,8 @@ tests/test_template_content.py,sha256=5QAnhKZFDKg-_uFryllLMpCk3a1nCS44hMmYfXm8gW
183
183
  tests/test_template_redis_key.py,sha256=U6L5HtnDyGp3s6-O4F_yG2Q2nNIGTqB_Q-ESv2EMeOU,4812
184
184
  tests/test_tools.py,sha256=IWiu9JGV-5Ow0ivFtiDw62REht-8Hn7NfyR9rnYSlbU,5113
185
185
  tests/tools_example.py,sha256=MfVBYemKvHs6MUbydgrJfhiGnygp5dRoIE-eIXCOR7M,7669
186
- crawlo-1.1.9.dist-info/METADATA,sha256=i3EFNboqsUyzh_CSajaNfJajxlwq-uiP7iDkjMCTQjg,25548
187
- crawlo-1.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
188
- crawlo-1.1.9.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
189
- crawlo-1.1.9.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
190
- crawlo-1.1.9.dist-info/RECORD,,
186
+ crawlo-1.2.0.dist-info/METADATA,sha256=NwPsIy1QiXk07HBs2iVrgX1_GM8GvNmUVQr-BX08W1g,20004
187
+ crawlo-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
188
+ crawlo-1.2.0.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
189
+ crawlo-1.2.0.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
190
+ crawlo-1.2.0.dist-info/RECORD,,
@@ -49,26 +49,15 @@ async def test_queue_manager_naming():
49
49
  print(f"测试 {i}: {test_case['name']}")
50
50
  print(f" 输入队列名称: {test_case['queue_name']}")
51
51
 
52
- # 测试队列管理器中的项目名称提取逻辑
52
+ # 使用优化后的项目名称提取逻辑
53
53
  project_name = "default"
54
54
  if ':' in test_case['queue_name']:
55
55
  parts = test_case['queue_name'].split(':')
56
- if len(parts) >= 2:
57
- # 处理可能的双重 crawlo 前缀
58
- if parts[0] == "crawlo" and parts[1] == "crawlo":
59
- # 双重 crawlo 前缀,取第三个部分作为项目名称
60
- if len(parts) >= 3:
61
- project_name = parts[2]
62
- else:
63
- project_name = "default"
64
- elif parts[0] == "crawlo":
65
- # 正常的 crawlo 前缀,取第二个部分作为项目名称
66
- project_name = parts[1]
67
- else:
68
- # 没有 crawlo 前缀,使用第一个部分作为项目名称
69
- project_name = parts[0]
70
- else:
71
- project_name = test_case['queue_name'] or "default"
56
+ # 跳过所有"crawlo"前缀,取第一个非"crawlo"部分作为项目名称
57
+ for part in parts:
58
+ if part != "crawlo":
59
+ project_name = part
60
+ break
72
61
  else:
73
62
  project_name = test_case['queue_name'] or "default"
74
63
 
@@ -132,26 +121,15 @@ async def test_queue_manager_create_queue():
132
121
  # 创建队列管理器
133
122
  queue_manager = QueueManager(config)
134
123
 
135
- # 创建队列实例(模拟队列管理器的_create_queue方法)
124
+ # 使用优化后的项目名称提取逻辑
136
125
  project_name = "default"
137
126
  if ':' in test_case['queue_name']:
138
127
  parts = test_case['queue_name'].split(':')
139
- if len(parts) >= 2:
140
- # 处理可能的双重 crawlo 前缀
141
- if parts[0] == "crawlo" and parts[1] == "crawlo":
142
- # 双重 crawlo 前缀,取第三个部分作为项目名称
143
- if len(parts) >= 3:
144
- project_name = parts[2]
145
- else:
146
- project_name = "default"
147
- elif parts[0] == "crawlo":
148
- # 正常的 crawlo 前缀,取第二个部分作为项目名称
149
- project_name = parts[1]
150
- else:
151
- # 没有 crawlo 前缀,使用第一个部分作为项目名称
152
- project_name = parts[0]
153
- else:
154
- project_name = test_case['queue_name'] or "default"
128
+ # 跳过所有"crawlo"前缀,取第一个非"crawlo"部分作为项目名称
129
+ for part in parts:
130
+ if part != "crawlo":
131
+ project_name = part
132
+ break
155
133
  else:
156
134
  project_name = test_case['queue_name'] or "default"
157
135
 
@@ -193,39 +171,4 @@ async def test_queue_manager_create_queue():
193
171
  except Exception as e:
194
172
  print(f"❌ 队列管理器创建队列测试失败: {e}")
195
173
  traceback.print_exc()
196
- return False
197
-
198
-
199
- async def main():
200
- """主测试函数"""
201
- print("🚀 开始队列管理器双重 crawlo 前缀问题测试...")
202
- print("=" * 50)
203
-
204
- try:
205
- # 测试队列管理器项目名称提取
206
- naming_test_success = await test_queue_manager_naming()
207
- print()
208
-
209
- # 测试队列管理器创建队列
210
- create_test_success = await test_queue_manager_create_queue()
211
- print()
212
-
213
- print("=" * 50)
214
- if naming_test_success and create_test_success:
215
- print("🎉 队列管理器双重 crawlo 前缀问题测试通过!")
216
- else:
217
- print("❌ 部分测试失败,请检查实现")
218
- return 1
219
-
220
- except Exception as e:
221
- print("=" * 50)
222
- print(f"❌ 测试过程中发生异常: {e}")
223
- traceback.print_exc()
224
- return 1
225
-
226
- return 0
227
-
228
-
229
- if __name__ == "__main__":
230
- exit_code = asyncio.run(main())
231
- sys.exit(exit_code)
174
+ return False