crawlo 1.3.2__py3-none-any.whl → 1.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +24 -0
- crawlo/__version__.py +1 -1
- crawlo/commands/run.py +58 -32
- crawlo/core/__init__.py +44 -0
- crawlo/core/engine.py +119 -45
- crawlo/core/scheduler.py +4 -3
- crawlo/crawler.py +603 -1133
- crawlo/downloader/aiohttp_downloader.py +4 -2
- crawlo/extension/__init__.py +1 -1
- crawlo/extension/logging_extension.py +23 -7
- crawlo/factories/__init__.py +28 -0
- crawlo/factories/base.py +69 -0
- crawlo/factories/crawler.py +104 -0
- crawlo/factories/registry.py +85 -0
- crawlo/filters/aioredis_filter.py +25 -2
- crawlo/framework.py +292 -0
- crawlo/initialization/__init__.py +40 -0
- crawlo/initialization/built_in.py +426 -0
- crawlo/initialization/context.py +142 -0
- crawlo/initialization/core.py +194 -0
- crawlo/initialization/phases.py +149 -0
- crawlo/initialization/registry.py +146 -0
- crawlo/items/base.py +2 -1
- crawlo/logging/__init__.py +38 -0
- crawlo/logging/config.py +97 -0
- crawlo/logging/factory.py +129 -0
- crawlo/logging/manager.py +112 -0
- crawlo/middleware/middleware_manager.py +1 -1
- crawlo/middleware/offsite.py +1 -1
- crawlo/mode_manager.py +26 -1
- crawlo/pipelines/pipeline_manager.py +2 -1
- crawlo/project.py +76 -46
- crawlo/queue/pqueue.py +11 -5
- crawlo/queue/queue_manager.py +143 -19
- crawlo/queue/redis_priority_queue.py +69 -49
- crawlo/settings/default_settings.py +110 -14
- crawlo/settings/setting_manager.py +29 -13
- crawlo/spider/__init__.py +34 -16
- crawlo/stats_collector.py +17 -3
- crawlo/task_manager.py +112 -3
- crawlo/templates/project/settings.py.tmpl +103 -202
- crawlo/templates/project/settings_distributed.py.tmpl +122 -135
- crawlo/templates/project/settings_gentle.py.tmpl +149 -43
- crawlo/templates/project/settings_high_performance.py.tmpl +127 -90
- crawlo/templates/project/settings_minimal.py.tmpl +46 -15
- crawlo/templates/project/settings_simple.py.tmpl +138 -75
- crawlo/templates/project/spiders/__init__.py.tmpl +5 -1
- crawlo/templates/run.py.tmpl +10 -14
- crawlo/templates/spiders_init.py.tmpl +10 -0
- crawlo/tools/network_diagnostic.py +365 -0
- crawlo/utils/class_loader.py +26 -0
- crawlo/utils/error_handler.py +76 -35
- crawlo/utils/log.py +41 -144
- crawlo/utils/redis_connection_pool.py +43 -6
- crawlo/utils/request_serializer.py +8 -1
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/METADATA +120 -14
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/RECORD +104 -45
- tests/authenticated_proxy_example.py +2 -2
- tests/baidu_performance_test.py +109 -0
- tests/baidu_test.py +60 -0
- tests/comprehensive_framework_test.py +213 -0
- tests/comprehensive_test.py +82 -0
- tests/comprehensive_testing_summary.md +187 -0
- tests/debug_configure.py +70 -0
- tests/debug_framework_logger.py +85 -0
- tests/debug_log_levels.py +64 -0
- tests/distributed_test.py +67 -0
- tests/distributed_test_debug.py +77 -0
- tests/final_command_test_report.md +0 -0
- tests/final_comprehensive_test.py +152 -0
- tests/final_validation_test.py +183 -0
- tests/framework_performance_test.py +203 -0
- tests/optimized_performance_test.py +212 -0
- tests/performance_comparison.py +246 -0
- tests/queue_blocking_test.py +114 -0
- tests/queue_test.py +90 -0
- tests/scrapy_comparison/ofweek_scrapy.py +139 -0
- tests/scrapy_comparison/scrapy_test.py +134 -0
- tests/simple_command_test.py +120 -0
- tests/simple_crawlo_test.py +128 -0
- tests/simple_log_test.py +58 -0
- tests/simple_optimization_test.py +129 -0
- tests/simple_spider_test.py +50 -0
- tests/simple_test.py +48 -0
- tests/test_all_commands.py +231 -0
- tests/test_batch_processor.py +179 -0
- tests/test_component_factory.py +175 -0
- tests/test_controlled_spider_mixin.py +80 -0
- tests/test_enhanced_error_handler_comprehensive.py +246 -0
- tests/test_factories.py +253 -0
- tests/test_framework_logger.py +67 -0
- tests/test_framework_startup.py +65 -0
- tests/test_large_scale_config.py +113 -0
- tests/test_large_scale_helper.py +236 -0
- tests/test_mode_change.py +73 -0
- tests/test_mode_consistency.py +1 -1
- tests/test_performance_monitor.py +116 -0
- tests/test_queue_empty_check.py +42 -0
- tests/untested_features_report.md +139 -0
- tests/verify_debug.py +52 -0
- tests/verify_log_fix.py +112 -0
- tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +0 -82
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/WHEEL +0 -0
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/entry_points.txt +0 -0
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
批处理工具测试
|
|
5
|
+
测试 BatchProcessor, RedisBatchProcessor, batch_process
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import unittest
|
|
10
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到 Python 路径
|
|
14
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
+
|
|
16
|
+
from crawlo.utils.batch_processor import BatchProcessor, RedisBatchProcessor, batch_process
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestBatchProcessor(unittest.TestCase):
|
|
20
|
+
"""批处理工具测试类"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
"""测试前准备"""
|
|
24
|
+
self.batch_processor = BatchProcessor(batch_size=3, max_concurrent_batches=2)
|
|
25
|
+
|
|
26
|
+
def test_batch_processor_initialization(self):
|
|
27
|
+
"""测试批处理器初始化"""
|
|
28
|
+
self.assertEqual(self.batch_processor.batch_size, 3)
|
|
29
|
+
self.assertEqual(self.batch_processor.max_concurrent_batches, 2)
|
|
30
|
+
|
|
31
|
+
def sync_process_item(self, item):
|
|
32
|
+
"""同步处理函数"""
|
|
33
|
+
return item * 2
|
|
34
|
+
|
|
35
|
+
def test_batch_processor_process_batch_sync(self):
|
|
36
|
+
"""测试批处理器同步处理批次"""
|
|
37
|
+
items = [1, 2, 3]
|
|
38
|
+
# 使用事件循环运行异步方法
|
|
39
|
+
results = asyncio.run(self.batch_processor.process_batch(items, self.sync_process_item))
|
|
40
|
+
self.assertEqual(results, [2, 4, 6])
|
|
41
|
+
|
|
42
|
+
def test_batch_processor_process_in_batches_sync(self):
|
|
43
|
+
"""测试批处理器同步分批处理大量数据"""
|
|
44
|
+
items = [1, 2, 3, 4, 5, 6, 7]
|
|
45
|
+
# 使用事件循环运行异步方法
|
|
46
|
+
results = asyncio.run(self.batch_processor.process_in_batches(items, self.sync_process_item))
|
|
47
|
+
expected = [2, 4, 6, 8, 10, 12, 14]
|
|
48
|
+
self.assertEqual(results, expected)
|
|
49
|
+
|
|
50
|
+
def test_batch_processor_with_exception_handling(self):
|
|
51
|
+
"""测试批处理器异常处理"""
|
|
52
|
+
def failing_processor(item):
|
|
53
|
+
if item == 2:
|
|
54
|
+
raise ValueError("处理失败")
|
|
55
|
+
return item * 2
|
|
56
|
+
|
|
57
|
+
items = [1, 2, 3]
|
|
58
|
+
# 使用事件循环运行异步方法
|
|
59
|
+
results = asyncio.run(self.batch_processor.process_batch(items, failing_processor))
|
|
60
|
+
# 异常项应该被过滤掉
|
|
61
|
+
self.assertIn(2, results)
|
|
62
|
+
self.assertIn(6, results)
|
|
63
|
+
# 检查长度至少为2
|
|
64
|
+
self.assertGreaterEqual(len(results), 2)
|
|
65
|
+
|
|
66
|
+
def test_batch_processor_decorator(self):
|
|
67
|
+
"""测试批处理器装饰器"""
|
|
68
|
+
@self.batch_processor.batch_process_decorator(batch_size=2)
|
|
69
|
+
def process_func(items):
|
|
70
|
+
return [item * 3 for item in items]
|
|
71
|
+
|
|
72
|
+
items = [1, 2, 3, 4]
|
|
73
|
+
results = process_func(items)
|
|
74
|
+
# 检查结果不为空
|
|
75
|
+
self.assertIsNotNone(results)
|
|
76
|
+
self.assertIsInstance(results, list)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TestRedisBatchProcessor(unittest.TestCase):
|
|
80
|
+
"""Redis批处理器测试类"""
|
|
81
|
+
|
|
82
|
+
def setUp(self):
|
|
83
|
+
"""测试前准备"""
|
|
84
|
+
self.mock_redis_client = Mock()
|
|
85
|
+
self.redis_batch_processor = RedisBatchProcessor(self.mock_redis_client, batch_size=3)
|
|
86
|
+
|
|
87
|
+
def test_redis_batch_processor_initialization(self):
|
|
88
|
+
"""测试Redis批处理器初始化"""
|
|
89
|
+
self.assertEqual(self.redis_batch_processor.batch_size, 3)
|
|
90
|
+
self.assertEqual(self.redis_batch_processor.redis_client, self.mock_redis_client)
|
|
91
|
+
|
|
92
|
+
def test_redis_batch_processor_batch_set(self):
|
|
93
|
+
"""测试Redis批处理器批量设置"""
|
|
94
|
+
items = [
|
|
95
|
+
{'key': 'key1', 'value': 'value1'},
|
|
96
|
+
{'key': 'key2', 'value': 'value2'},
|
|
97
|
+
{'key': 'key3', 'value': 'value3'}
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
# 模拟pipeline行为
|
|
101
|
+
mock_pipe = Mock()
|
|
102
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
103
|
+
mock_pipe.execute.return_value = None # execute方法返回None
|
|
104
|
+
mock_pipe.set.return_value = mock_pipe # set方法返回pipe自身以支持链式调用
|
|
105
|
+
|
|
106
|
+
# 使用事件循环运行异步方法
|
|
107
|
+
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
108
|
+
self.assertEqual(count, 3)
|
|
109
|
+
|
|
110
|
+
def test_redis_batch_processor_batch_set_empty(self):
|
|
111
|
+
"""测试Redis批处理器批量设置空列表"""
|
|
112
|
+
items = []
|
|
113
|
+
# 使用事件循环运行异步方法
|
|
114
|
+
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
115
|
+
self.assertEqual(count, 0)
|
|
116
|
+
|
|
117
|
+
def test_redis_batch_processor_batch_get(self):
|
|
118
|
+
"""测试Redis批处理器批量获取"""
|
|
119
|
+
keys = ['key1', 'key2', 'key3']
|
|
120
|
+
|
|
121
|
+
# 模拟pipeline行为
|
|
122
|
+
mock_pipe = Mock()
|
|
123
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
124
|
+
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
125
|
+
mock_pipe.execute.return_value = ['value1', 'value2', 'value3']
|
|
126
|
+
|
|
127
|
+
# 使用事件循环运行异步方法
|
|
128
|
+
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
129
|
+
expected = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
|
|
130
|
+
self.assertEqual(result, expected)
|
|
131
|
+
|
|
132
|
+
def test_redis_batch_processor_batch_get_with_none_values(self):
|
|
133
|
+
"""测试Redis批处理器批量获取包含None值"""
|
|
134
|
+
keys = ['key1', 'key2', 'key3']
|
|
135
|
+
|
|
136
|
+
# 模拟pipeline行为,其中key2返回None
|
|
137
|
+
mock_pipe = Mock()
|
|
138
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
139
|
+
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
140
|
+
mock_pipe.execute.return_value = ['value1', None, 'value3']
|
|
141
|
+
|
|
142
|
+
# 使用事件循环运行异步方法
|
|
143
|
+
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
144
|
+
expected = {'key1': 'value1', 'key3': 'value3'} # key2应该被过滤掉
|
|
145
|
+
self.assertEqual(result, expected)
|
|
146
|
+
|
|
147
|
+
def test_redis_batch_processor_batch_delete(self):
|
|
148
|
+
"""测试Redis批处理器批量删除"""
|
|
149
|
+
keys = ['key1', 'key2', 'key3']
|
|
150
|
+
|
|
151
|
+
# 模拟pipeline行为
|
|
152
|
+
mock_pipe = Mock()
|
|
153
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
154
|
+
mock_pipe.delete.return_value = mock_pipe # delete方法返回pipe自身以支持链式调用
|
|
155
|
+
mock_pipe.execute.return_value = None
|
|
156
|
+
|
|
157
|
+
# 使用事件循环运行异步方法
|
|
158
|
+
count = asyncio.run(self.redis_batch_processor.batch_delete(keys))
|
|
159
|
+
self.assertEqual(count, 3)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class TestBatchProcessFunction(unittest.TestCase):
|
|
163
|
+
"""批处理便捷函数测试类"""
|
|
164
|
+
|
|
165
|
+
def sync_process_item(self, item):
|
|
166
|
+
"""同步处理函数"""
|
|
167
|
+
return item * 2
|
|
168
|
+
|
|
169
|
+
def test_batch_process_sync_function(self):
|
|
170
|
+
"""测试批处理便捷函数处理同步函数"""
|
|
171
|
+
items = [1, 2, 3, 4, 5]
|
|
172
|
+
results = batch_process(items, self.sync_process_item, batch_size=2, max_concurrent_batches=2)
|
|
173
|
+
expected = [2, 4, 6, 8, 10]
|
|
174
|
+
self.assertEqual(results, expected)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if __name__ == '__main__':
|
|
178
|
+
# 运行测试
|
|
179
|
+
unittest.main()
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
组件工厂测试
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
# 添加项目根目录到 Python 路径
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
+
|
|
13
|
+
from crawlo.factories import (
|
|
14
|
+
ComponentRegistry,
|
|
15
|
+
ComponentFactory,
|
|
16
|
+
ComponentSpec,
|
|
17
|
+
DefaultComponentFactory,
|
|
18
|
+
CrawlerComponentFactory,
|
|
19
|
+
get_component_registry
|
|
20
|
+
)
|
|
21
|
+
from crawlo.factories.base import ComponentSpec
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestComponent:
|
|
25
|
+
"""测试组件类"""
|
|
26
|
+
def __init__(self, name="test_component", value=42):
|
|
27
|
+
self.name = name
|
|
28
|
+
self.value = value
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestComponentFactory(unittest.TestCase):
|
|
32
|
+
"""组件工厂测试类"""
|
|
33
|
+
|
|
34
|
+
def setUp(self):
|
|
35
|
+
"""测试前准备"""
|
|
36
|
+
self.registry = ComponentRegistry()
|
|
37
|
+
|
|
38
|
+
def test_component_spec_creation(self):
|
|
39
|
+
"""测试组件规范创建"""
|
|
40
|
+
def factory_func(**kwargs):
|
|
41
|
+
return TestComponent(**kwargs)
|
|
42
|
+
|
|
43
|
+
spec = ComponentSpec(
|
|
44
|
+
name="test_component",
|
|
45
|
+
component_type=TestComponent,
|
|
46
|
+
factory_func=factory_func,
|
|
47
|
+
dependencies=[],
|
|
48
|
+
singleton=False
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
self.assertEqual(spec.name, "test_component")
|
|
52
|
+
self.assertEqual(spec.component_type, TestComponent)
|
|
53
|
+
self.assertEqual(spec.dependencies, [])
|
|
54
|
+
self.assertFalse(spec.singleton)
|
|
55
|
+
|
|
56
|
+
def test_default_component_factory(self):
|
|
57
|
+
"""测试默认组件工厂"""
|
|
58
|
+
factory = DefaultComponentFactory()
|
|
59
|
+
|
|
60
|
+
def factory_func(**kwargs):
|
|
61
|
+
return TestComponent(**kwargs)
|
|
62
|
+
|
|
63
|
+
spec = ComponentSpec(
|
|
64
|
+
name="test_component",
|
|
65
|
+
component_type=TestComponent,
|
|
66
|
+
factory_func=factory_func
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# 测试创建组件
|
|
70
|
+
component = factory.create(spec, name="created_component", value=100)
|
|
71
|
+
self.assertIsInstance(component, TestComponent)
|
|
72
|
+
self.assertEqual(component.name, "created_component")
|
|
73
|
+
self.assertEqual(component.value, 100)
|
|
74
|
+
|
|
75
|
+
# 测试单例模式
|
|
76
|
+
spec_singleton = ComponentSpec(
|
|
77
|
+
name="singleton_component",
|
|
78
|
+
component_type=TestComponent,
|
|
79
|
+
factory_func=factory_func,
|
|
80
|
+
singleton=True
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
component1 = factory.create(spec_singleton, name="singleton_1", value=200)
|
|
84
|
+
component2 = factory.create(spec_singleton, name="singleton_2", value=300)
|
|
85
|
+
|
|
86
|
+
# 单例应该返回相同的实例
|
|
87
|
+
self.assertIs(component1, component2)
|
|
88
|
+
self.assertEqual(component1.value, 200) # 应该是第一次创建时的值
|
|
89
|
+
|
|
90
|
+
def test_component_registry_registration(self):
|
|
91
|
+
"""测试组件注册表注册功能"""
|
|
92
|
+
def factory_func(**kwargs):
|
|
93
|
+
return TestComponent(**kwargs)
|
|
94
|
+
|
|
95
|
+
spec = ComponentSpec(
|
|
96
|
+
name="registered_component",
|
|
97
|
+
component_type=TestComponent,
|
|
98
|
+
factory_func=factory_func
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# 注册组件规范
|
|
102
|
+
self.registry.register(spec)
|
|
103
|
+
|
|
104
|
+
# 验证注册
|
|
105
|
+
retrieved_spec = self.registry.get_spec("registered_component")
|
|
106
|
+
self.assertEqual(retrieved_spec, spec)
|
|
107
|
+
|
|
108
|
+
# 测试列出组件
|
|
109
|
+
components = self.registry.list_components()
|
|
110
|
+
self.assertIn("registered_component", components)
|
|
111
|
+
|
|
112
|
+
def test_component_registry_creation(self):
|
|
113
|
+
"""测试组件注册表创建功能"""
|
|
114
|
+
def factory_func(**kwargs):
|
|
115
|
+
return TestComponent(**kwargs)
|
|
116
|
+
|
|
117
|
+
spec = ComponentSpec(
|
|
118
|
+
name="creatable_component",
|
|
119
|
+
component_type=TestComponent,
|
|
120
|
+
factory_func=factory_func
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# 注册组件规范
|
|
124
|
+
self.registry.register(spec)
|
|
125
|
+
|
|
126
|
+
# 创建组件
|
|
127
|
+
component = self.registry.create("creatable_component", name="created", value=500)
|
|
128
|
+
self.assertIsInstance(component, TestComponent)
|
|
129
|
+
self.assertEqual(component.name, "created")
|
|
130
|
+
self.assertEqual(component.value, 500)
|
|
131
|
+
|
|
132
|
+
def test_global_component_registry(self):
|
|
133
|
+
"""测试全局组件注册表"""
|
|
134
|
+
registry = get_component_registry()
|
|
135
|
+
self.assertIsInstance(registry, ComponentRegistry)
|
|
136
|
+
|
|
137
|
+
# 测试注册表是否包含预注册的组件
|
|
138
|
+
components = registry.list_components()
|
|
139
|
+
# 应该至少包含crawler组件
|
|
140
|
+
self.assertGreater(len(components), 0)
|
|
141
|
+
|
|
142
|
+
def test_crawler_component_factory(self):
|
|
143
|
+
"""测试Crawler组件工厂"""
|
|
144
|
+
factory = CrawlerComponentFactory()
|
|
145
|
+
|
|
146
|
+
# 测试支持检查
|
|
147
|
+
class MockEngine:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
self.assertTrue(factory.supports(MockEngine)) # 默认支持所有类型
|
|
151
|
+
|
|
152
|
+
# 测试创建功能(需要crawler依赖)
|
|
153
|
+
def mock_engine_factory(crawler=None, **kwargs):
|
|
154
|
+
if crawler is None:
|
|
155
|
+
raise ValueError("需要crawler实例")
|
|
156
|
+
return "mock_engine"
|
|
157
|
+
|
|
158
|
+
spec = ComponentSpec(
|
|
159
|
+
name="mock_engine",
|
|
160
|
+
component_type=type('MockEngine', (), {}),
|
|
161
|
+
factory_func=mock_engine_factory,
|
|
162
|
+
dependencies=['crawler']
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# 测试缺少依赖时的错误处理
|
|
166
|
+
with self.assertRaises(ValueError):
|
|
167
|
+
factory.create(spec)
|
|
168
|
+
|
|
169
|
+
# 测试带依赖的创建
|
|
170
|
+
result = factory.create(spec, crawler="mock_crawler")
|
|
171
|
+
self.assertEqual(result, "mock_engine")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == '__main__':
|
|
175
|
+
unittest.main()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
受控爬虫混入类测试
|
|
5
|
+
测试 ControlledRequestMixin, AsyncControlledRequestMixin
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import unittest
|
|
10
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到 Python 路径
|
|
14
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
+
|
|
16
|
+
from crawlo.utils.controlled_spider_mixin import ControlledRequestMixin, AsyncControlledRequestMixin
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestControlledRequestMixin(unittest.TestCase):
|
|
20
|
+
"""受控请求混入类测试"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
"""测试前准备"""
|
|
24
|
+
self.mixin = ControlledRequestMixin()
|
|
25
|
+
|
|
26
|
+
def test_mixin_initialization(self):
|
|
27
|
+
"""测试混入类初始化"""
|
|
28
|
+
self.assertEqual(self.mixin.max_pending_requests, 100)
|
|
29
|
+
self.assertEqual(self.mixin.batch_size, 50)
|
|
30
|
+
self.assertEqual(self.mixin.generation_interval, 0.1)
|
|
31
|
+
self.assertEqual(self.mixin.backpressure_threshold, 200)
|
|
32
|
+
|
|
33
|
+
def test_mixin_configuration(self):
|
|
34
|
+
"""测试混入类配置"""
|
|
35
|
+
# 修改配置
|
|
36
|
+
self.mixin.max_pending_requests = 200
|
|
37
|
+
self.mixin.batch_size = 100
|
|
38
|
+
self.mixin.generation_interval = 0.05
|
|
39
|
+
self.mixin.backpressure_threshold = 300
|
|
40
|
+
|
|
41
|
+
self.assertEqual(self.mixin.max_pending_requests, 200)
|
|
42
|
+
self.assertEqual(self.mixin.batch_size, 100)
|
|
43
|
+
self.assertEqual(self.mixin.generation_interval, 0.05)
|
|
44
|
+
self.assertEqual(self.mixin.backpressure_threshold, 300)
|
|
45
|
+
|
|
46
|
+
def test_get_generation_stats(self):
|
|
47
|
+
"""测试获取生成统计信息"""
|
|
48
|
+
stats = self.mixin.get_generation_stats()
|
|
49
|
+
self.assertIsInstance(stats, dict)
|
|
50
|
+
self.assertIn('generated', stats)
|
|
51
|
+
self.assertIn('skipped', stats)
|
|
52
|
+
self.assertIn('backpressure_events', stats)
|
|
53
|
+
self.assertIn('total_generated', stats)
|
|
54
|
+
self.assertIn('last_generation_time', stats)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TestAsyncControlledRequestMixin(unittest.TestCase):
|
|
58
|
+
"""异步受控请求混入类测试"""
|
|
59
|
+
|
|
60
|
+
def setUp(self):
|
|
61
|
+
"""测试前准备"""
|
|
62
|
+
self.mixin = AsyncControlledRequestMixin()
|
|
63
|
+
|
|
64
|
+
def test_async_mixin_initialization(self):
|
|
65
|
+
"""测试异步混入类初始化"""
|
|
66
|
+
self.assertEqual(self.mixin.max_concurrent_generations, 10)
|
|
67
|
+
self.assertEqual(self.mixin.queue_monitor_interval, 1.0)
|
|
68
|
+
|
|
69
|
+
def test_async_mixin_configuration(self):
|
|
70
|
+
"""测试异步混入类配置"""
|
|
71
|
+
# 修改配置
|
|
72
|
+
self.mixin.max_concurrent_generations = 20
|
|
73
|
+
self.mixin.queue_monitor_interval = 0.5
|
|
74
|
+
|
|
75
|
+
self.assertEqual(self.mixin.max_concurrent_generations, 20)
|
|
76
|
+
self.assertEqual(self.mixin.queue_monitor_interval, 0.5)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == '__main__':
|
|
80
|
+
unittest.main()
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
增强错误处理工具综合测试
|
|
5
|
+
测试 ErrorContext, DetailedException, EnhancedErrorHandler 的更多功能
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import unittest
|
|
10
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
+
import asyncio
|
|
12
|
+
import traceback
|
|
13
|
+
|
|
14
|
+
# 添加项目根目录到 Python 路径
|
|
15
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
16
|
+
|
|
17
|
+
from crawlo.utils.enhanced_error_handler import (
|
|
18
|
+
EnhancedErrorHandler,
|
|
19
|
+
ErrorContext,
|
|
20
|
+
DetailedException,
|
|
21
|
+
handle_exception
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TestErrorContext(unittest.TestCase):
|
|
26
|
+
"""错误上下文测试"""
|
|
27
|
+
|
|
28
|
+
def test_error_context_initialization(self):
|
|
29
|
+
"""测试错误上下文初始化"""
|
|
30
|
+
context = ErrorContext(
|
|
31
|
+
context="测试上下文",
|
|
32
|
+
module="test_module",
|
|
33
|
+
function="test_function"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.assertEqual(context.context, "测试上下文")
|
|
37
|
+
self.assertEqual(context.module, "test_module")
|
|
38
|
+
self.assertEqual(context.function, "test_function")
|
|
39
|
+
self.assertIsNotNone(context.timestamp)
|
|
40
|
+
|
|
41
|
+
def test_error_context_string_representation(self):
|
|
42
|
+
"""测试错误上下文字符串表示"""
|
|
43
|
+
context = ErrorContext(
|
|
44
|
+
context="测试上下文",
|
|
45
|
+
module="test_module",
|
|
46
|
+
function="test_function"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
context_str = str(context)
|
|
50
|
+
self.assertIn("Module: test_module", context_str)
|
|
51
|
+
self.assertIn("Function: test_function", context_str)
|
|
52
|
+
self.assertIn("Context: 测试上下文", context_str)
|
|
53
|
+
self.assertIn("Time:", context_str)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestDetailedException(unittest.TestCase):
|
|
57
|
+
"""详细异常测试"""
|
|
58
|
+
|
|
59
|
+
def test_detailed_exception_initialization(self):
|
|
60
|
+
"""测试详细异常初始化"""
|
|
61
|
+
context = ErrorContext(context="测试上下文")
|
|
62
|
+
|
|
63
|
+
exception = DetailedException(
|
|
64
|
+
message="测试异常消息",
|
|
65
|
+
context=context,
|
|
66
|
+
error_code="TEST_001",
|
|
67
|
+
detail1="详细信息1",
|
|
68
|
+
detail2="详细信息2"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self.assertIn("测试异常消息", str(exception))
|
|
72
|
+
self.assertEqual(exception.context, context)
|
|
73
|
+
self.assertEqual(exception.error_code, "TEST_001")
|
|
74
|
+
self.assertEqual(exception.details["detail1"], "详细信息1")
|
|
75
|
+
self.assertEqual(exception.details["detail2"], "详细信息2")
|
|
76
|
+
self.assertIsNotNone(exception.timestamp)
|
|
77
|
+
|
|
78
|
+
def test_detailed_exception_without_context(self):
|
|
79
|
+
"""测试没有上下文的详细异常"""
|
|
80
|
+
exception = DetailedException(
|
|
81
|
+
message="测试异常消息",
|
|
82
|
+
error_code="TEST_002"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
self.assertEqual(str(exception), "测试异常消息")
|
|
86
|
+
self.assertIsNone(exception.context)
|
|
87
|
+
self.assertEqual(exception.error_code, "TEST_002")
|
|
88
|
+
self.assertIsNotNone(exception.timestamp)
|
|
89
|
+
|
|
90
|
+
def test_detailed_exception_string_with_context(self):
|
|
91
|
+
"""测试带上下文的详细异常字符串表示"""
|
|
92
|
+
context = ErrorContext(context="测试上下文")
|
|
93
|
+
exception = DetailedException(
|
|
94
|
+
message="测试异常消息",
|
|
95
|
+
context=context
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
exception_str = str(exception)
|
|
99
|
+
self.assertIn("测试异常消息", exception_str)
|
|
100
|
+
self.assertIn("测试上下文", exception_str)
|
|
101
|
+
|
|
102
|
+
def test_get_full_details(self):
|
|
103
|
+
"""测试获取完整详情"""
|
|
104
|
+
context = ErrorContext(context="测试上下文")
|
|
105
|
+
exception = DetailedException(
|
|
106
|
+
message="测试异常消息",
|
|
107
|
+
context=context,
|
|
108
|
+
error_code="TEST_003",
|
|
109
|
+
detail1="详细信息1"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
details = exception.get_full_details()
|
|
113
|
+
self.assertIn("测试异常消息", details["message"])
|
|
114
|
+
self.assertEqual(details["error_code"], "TEST_003")
|
|
115
|
+
self.assertIn("测试上下文", details["context"])
|
|
116
|
+
self.assertEqual(details["details"]["detail1"], "详细信息1")
|
|
117
|
+
self.assertEqual(details["exception_type"], "DetailedException")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class TestEnhancedErrorHandler(unittest.TestCase):
|
|
121
|
+
"""增强错误处理器测试"""
|
|
122
|
+
|
|
123
|
+
def setUp(self):
|
|
124
|
+
"""测试前准备"""
|
|
125
|
+
self.handler = EnhancedErrorHandler("test_logger", "ERROR")
|
|
126
|
+
|
|
127
|
+
def test_handler_initialization(self):
|
|
128
|
+
"""测试处理器初始化"""
|
|
129
|
+
self.assertEqual(len(self.handler.error_history), 0)
|
|
130
|
+
self.assertEqual(self.handler.max_history_size, 100)
|
|
131
|
+
|
|
132
|
+
def test_handle_error_without_raising(self):
|
|
133
|
+
"""测试不抛出异常的错误处理"""
|
|
134
|
+
try:
|
|
135
|
+
raise ValueError("测试错误")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
# 处理错误但不重新抛出
|
|
138
|
+
error_details = self.handler.handle_error(e, raise_error=False, log_error=False)
|
|
139
|
+
|
|
140
|
+
# 验证返回的错误详情
|
|
141
|
+
self.assertIsInstance(error_details, dict)
|
|
142
|
+
self.assertEqual(error_details["exception_type"], "ValueError")
|
|
143
|
+
self.assertEqual(error_details["message"], "测试错误")
|
|
144
|
+
|
|
145
|
+
def test_safe_call_success(self):
|
|
146
|
+
"""测试安全调用成功"""
|
|
147
|
+
def normal_function(x, y):
|
|
148
|
+
return x + y
|
|
149
|
+
|
|
150
|
+
result = self.handler.safe_call(normal_function, 1, 2, default_return=0)
|
|
151
|
+
self.assertEqual(result, 3)
|
|
152
|
+
|
|
153
|
+
def test_safe_call_with_exception(self):
|
|
154
|
+
"""测试安全调用异常"""
|
|
155
|
+
def failing_function():
|
|
156
|
+
raise RuntimeError("函数执行失败")
|
|
157
|
+
|
|
158
|
+
result = self.handler.safe_call(failing_function, default_return="默认值")
|
|
159
|
+
self.assertEqual(result, "默认值")
|
|
160
|
+
|
|
161
|
+
def test_get_and_clear_error_history(self):
|
|
162
|
+
"""测试获取和清空错误历史"""
|
|
163
|
+
# 产生一些错误
|
|
164
|
+
try:
|
|
165
|
+
raise ValueError("错误1")
|
|
166
|
+
except Exception as e:
|
|
167
|
+
self.handler.handle_error(e, raise_error=False, log_error=False)
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
raise RuntimeError("错误2")
|
|
171
|
+
except Exception as e:
|
|
172
|
+
self.handler.handle_error(e, raise_error=False, log_error=False)
|
|
173
|
+
|
|
174
|
+
# 检查历史记录
|
|
175
|
+
history = self.handler.get_error_history()
|
|
176
|
+
self.assertEqual(len(history), 2)
|
|
177
|
+
|
|
178
|
+
# 清空历史记录
|
|
179
|
+
self.handler.clear_error_history()
|
|
180
|
+
history = self.handler.get_error_history()
|
|
181
|
+
self.assertEqual(len(history), 0)
|
|
182
|
+
|
|
183
|
+
def test_error_history_size_limit(self):
|
|
184
|
+
"""测试错误历史大小限制"""
|
|
185
|
+
# 产生超过100个错误
|
|
186
|
+
for i in range(110):
|
|
187
|
+
try:
|
|
188
|
+
raise ValueError(f"错误{i}")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
self.handler.handle_error(e, raise_error=False, log_error=False)
|
|
191
|
+
|
|
192
|
+
# 检查历史记录大小限制
|
|
193
|
+
history = self.handler.get_error_history()
|
|
194
|
+
self.assertEqual(len(history), 100) # 应该限制在100个
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class TestHandleExceptionDecorator(unittest.TestCase):
|
|
198
|
+
"""异常处理装饰器测试"""
|
|
199
|
+
|
|
200
|
+
def test_handle_exception_decorator_sync(self):
|
|
201
|
+
"""测试同步函数的异常处理装饰器"""
|
|
202
|
+
@handle_exception(context="测试装饰器", module="test_module", raise_error=False)
|
|
203
|
+
def failing_function():
|
|
204
|
+
raise ValueError("装饰器测试错误")
|
|
205
|
+
|
|
206
|
+
# 调用应该捕获异常但不抛出
|
|
207
|
+
try:
|
|
208
|
+
result = failing_function()
|
|
209
|
+
self.assertIsNone(result) # 默认返回None
|
|
210
|
+
except Exception:
|
|
211
|
+
self.fail("异常应该被捕获")
|
|
212
|
+
|
|
213
|
+
def test_handle_exception_decorator_async(self):
|
|
214
|
+
"""测试异步函数的异常处理装饰器"""
|
|
215
|
+
@handle_exception(context="异步测试装饰器", module="test_module", raise_error=False)
|
|
216
|
+
async def async_failing_function():
|
|
217
|
+
raise RuntimeError("异步装饰器测试错误")
|
|
218
|
+
|
|
219
|
+
# 异步调用
|
|
220
|
+
async def test_async():
|
|
221
|
+
try:
|
|
222
|
+
result = await async_failing_function()
|
|
223
|
+
self.assertIsNone(result) # 默认返回None
|
|
224
|
+
except Exception:
|
|
225
|
+
self.fail("异常应该被捕获")
|
|
226
|
+
|
|
227
|
+
# 使用事件循环运行异步函数
|
|
228
|
+
asyncio.run(test_async())
|
|
229
|
+
|
|
230
|
+
def test_handle_exception_decorator_with_detailed_exception(self):
|
|
231
|
+
"""测试装饰器处理详细异常"""
|
|
232
|
+
@handle_exception(context="详细异常测试", module="test_module", raise_error=False)
|
|
233
|
+
def function_with_detailed_exception():
|
|
234
|
+
context = ErrorContext(context="函数内部上下文")
|
|
235
|
+
raise DetailedException("详细异常消息", context=context, error_code="DETAIL_001")
|
|
236
|
+
|
|
237
|
+
# 调用应该捕获异常
|
|
238
|
+
try:
|
|
239
|
+
result = function_with_detailed_exception()
|
|
240
|
+
self.assertIsNone(result)
|
|
241
|
+
except Exception:
|
|
242
|
+
self.fail("详细异常应该被捕获")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
if __name__ == '__main__':
|
|
246
|
+
unittest.main()
|