crawlo 1.4.7__py3-none-any.whl → 1.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +90 -90
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +186 -186
- crawlo/commands/help.py +140 -140
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +379 -379
- crawlo/commands/startproject.py +460 -460
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +320 -320
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +451 -451
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +290 -290
- crawlo/crawler.py +698 -698
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +280 -280
- crawlo/downloader/aiohttp_downloader.py +233 -233
- crawlo/downloader/cffi_downloader.py +250 -250
- crawlo/downloader/httpx_downloader.py +265 -265
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +425 -425
- crawlo/downloader/selenium_downloader.py +486 -486
- crawlo/event.py +45 -45
- crawlo/exceptions.py +214 -214
- crawlo/extension/__init__.py +64 -64
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +53 -53
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +104 -104
- crawlo/factories/registry.py +84 -84
- crawlo/factories/utils.py +134 -134
- crawlo/filters/__init__.py +170 -170
- crawlo/filters/aioredis_filter.py +347 -347
- crawlo/filters/memory_filter.py +261 -261
- crawlo/framework.py +306 -306
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +391 -391
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +240 -240
- crawlo/initialization/phases.py +229 -229
- crawlo/initialization/registry.py +143 -143
- crawlo/initialization/utils.py +48 -48
- crawlo/interfaces.py +23 -23
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +42 -42
- crawlo/logging/config.py +280 -276
- crawlo/logging/factory.py +175 -175
- crawlo/logging/manager.py +104 -104
- crawlo/middleware/__init__.py +87 -87
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +142 -142
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +209 -209
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/mode_manager.py +287 -287
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +408 -376
- crawlo/network/response.py +598 -569
- crawlo/pipelines/__init__.py +52 -52
- crawlo/pipelines/base_pipeline.py +452 -452
- crawlo/pipelines/bloom_dedup_pipeline.py +145 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +196 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +104 -105
- crawlo/pipelines/mongo_pipeline.py +140 -139
- crawlo/pipelines/mysql_pipeline.py +468 -469
- crawlo/pipelines/pipeline_manager.py +100 -100
- crawlo/pipelines/redis_dedup_pipeline.py +155 -155
- crawlo/project.py +347 -347
- crawlo/queue/__init__.py +9 -9
- crawlo/queue/pqueue.py +38 -38
- crawlo/queue/queue_manager.py +591 -591
- crawlo/queue/redis_priority_queue.py +518 -518
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +287 -284
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +658 -657
- crawlo/stats_collector.py +81 -81
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +1 -1
- crawlo/templates/project/items.py.tmpl +13 -13
- crawlo/templates/project/middlewares.py.tmpl +38 -38
- crawlo/templates/project/pipelines.py.tmpl +35 -35
- crawlo/templates/project/settings.py.tmpl +113 -109
- crawlo/templates/project/settings_distributed.py.tmpl +160 -156
- crawlo/templates/project/settings_gentle.py.tmpl +174 -170
- crawlo/templates/project/settings_high_performance.py.tmpl +175 -171
- crawlo/templates/project/settings_minimal.py.tmpl +102 -98
- crawlo/templates/project/settings_simple.py.tmpl +172 -168
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +23 -23
- crawlo/templates/spider/spider.py.tmpl +32 -32
- crawlo/templates/spiders_init.py.tmpl +4 -4
- crawlo/tools/__init__.py +86 -86
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +74 -50
- crawlo/utils/batch_processor.py +276 -276
- crawlo/utils/config_manager.py +442 -442
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +250 -250
- crawlo/utils/encoding_helper.py +190 -0
- crawlo/utils/error_handler.py +410 -410
- crawlo/utils/fingerprint.py +121 -121
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/leak_detector.py +335 -335
- crawlo/utils/misc.py +81 -81
- crawlo/utils/mongo_connection_pool.py +157 -157
- crawlo/utils/mysql_connection_pool.py +197 -197
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_checker.py +90 -90
- crawlo/utils/redis_connection_pool.py +578 -578
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +278 -278
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/resource_manager.py +337 -337
- crawlo/utils/response_helper.py +113 -0
- crawlo/utils/selector_helper.py +138 -137
- crawlo/utils/singleton.py +69 -69
- crawlo/utils/spider_loader.py +201 -201
- crawlo/utils/text_helper.py +94 -94
- {crawlo-1.4.7.dist-info → crawlo-1.4.8.dist-info}/METADATA +831 -689
- crawlo-1.4.8.dist-info/RECORD +347 -0
- examples/__init__.py +7 -7
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +217 -217
- tests/authenticated_proxy_example.py +110 -110
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/bug_check_test.py +250 -250
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/direct_selector_helper_test.py +96 -96
- tests/distributed_dedup_test.py +467 -467
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/error_handling_example.py +171 -171
- tests/explain_mysql_update_behavior.py +76 -76
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/monitor_redis_dedup.sh +72 -72
- tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
- tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
- tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -4
- tests/ofweek_scrapy/scrapy.cfg +11 -11
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +244 -244
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_cli_test.py +54 -54
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +126 -126
- tests/simple_follow_test.py +38 -38
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_response_selector_test.py +94 -94
- tests/simple_selector_helper_test.py +154 -154
- tests/simple_selector_test.py +207 -207
- tests/simple_spider_test.py +49 -49
- tests/simple_url_test.py +73 -73
- tests/simulate_mysql_update_test.py +139 -139
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_asyncmy_usage.py +56 -56
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_cli_arguments.py +118 -118
- tests/test_component_factory.py +174 -174
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawler_process_import.py +38 -38
- tests/test_crawler_process_spider_modules.py +47 -47
- tests/test_crawlo_proxy_integration.py +114 -114
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +124 -124
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +272 -272
- tests/test_edge_cases.py +305 -305
- tests/test_encoding_core.py +56 -56
- tests/test_encoding_detection.py +126 -126
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_factory_compatibility.py +196 -196
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +374 -374
- tests/test_logging_final.py +184 -184
- tests/test_logging_integration.py +312 -312
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +141 -141
- tests/test_mode_consistency.py +51 -51
- tests/test_multi_directory.py +67 -67
- tests/test_multiple_spider_modules.py +80 -80
- tests/test_mysql_pipeline_config.py +164 -164
- tests/test_mysql_pipeline_error.py +98 -98
- tests/test_mysql_pipeline_init_log.py +82 -82
- tests/test_mysql_pipeline_integration.py +132 -132
- tests/test_mysql_pipeline_refactor.py +143 -143
- tests/test_mysql_pipeline_refactor_simple.py +85 -85
- tests/test_mysql_pipeline_robustness.py +195 -195
- tests/test_mysql_pipeline_types.py +88 -88
- tests/test_mysql_update_columns.py +93 -93
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_optimized_selector_naming.py +100 -100
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +211 -211
- tests/test_priority_consistency.py +151 -151
- tests/test_priority_consistency_fixed.py +249 -249
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +217 -217
- tests/test_proxy_middleware_enhanced.py +212 -212
- tests/test_proxy_middleware_integration.py +142 -142
- tests/test_proxy_middleware_refactored.py +207 -207
- tests/test_proxy_only.py +83 -83
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_proxy_with_downloader.py +152 -152
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +130 -130
- tests/test_random_headers_default.py +322 -322
- tests/test_random_headers_necessity.py +308 -308
- tests/test_random_user_agent.py +72 -72
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +129 -129
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_follow.py +104 -104
- tests/test_response_improvements.py +152 -152
- tests/test_response_selector_methods.py +92 -92
- tests/test_response_url_methods.py +70 -70
- tests/test_response_urljoin.py +86 -86
- tests/test_retry_middleware.py +333 -333
- tests/test_retry_middleware_realistic.py +273 -273
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_scrapy_style_encoding.py +112 -112
- tests/test_selector_helper.py +100 -100
- tests/test_selector_optimizations.py +146 -146
- tests/test_simple_response.py +61 -61
- tests/test_spider_loader.py +49 -49
- tests/test_spider_loader_comprehensive.py +69 -69
- tests/test_spider_modules.py +84 -84
- tests/test_spiders/test_spider.py +9 -9
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +176 -176
- tests/test_user_agents.py +96 -96
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- tests/verify_mysql_warnings.py +109 -109
- crawlo/utils/log.py +0 -80
- crawlo/utils/url_utils.py +0 -40
- crawlo-1.4.7.dist-info/RECORD +0 -347
- {crawlo-1.4.7.dist-info → crawlo-1.4.8.dist-info}/WHEEL +0 -0
- {crawlo-1.4.7.dist-info → crawlo-1.4.8.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.7.dist-info → crawlo-1.4.8.dist-info}/top_level.txt +0 -0
tests/test_asyncmy_usage.py
CHANGED
|
@@ -1,57 +1,57 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import asyncio
|
|
3
|
-
from asyncmy import create_pool
|
|
4
|
-
|
|
5
|
-
async def test_asyncmy_usage():
|
|
6
|
-
"""测试asyncmy库的正确使用方式"""
|
|
7
|
-
try:
|
|
8
|
-
# 创建连接池
|
|
9
|
-
pool = await create_pool(
|
|
10
|
-
host='127.0.0.1',
|
|
11
|
-
port=3306,
|
|
12
|
-
user='root',
|
|
13
|
-
password='123456',
|
|
14
|
-
db='test',
|
|
15
|
-
minsize=1,
|
|
16
|
-
maxsize=5
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
# 获取连接
|
|
20
|
-
conn = await pool.acquire()
|
|
21
|
-
try:
|
|
22
|
-
# 获取游标
|
|
23
|
-
cursor = await conn.cursor()
|
|
24
|
-
try:
|
|
25
|
-
# 执行SQL
|
|
26
|
-
result = cursor.execute("SELECT 1")
|
|
27
|
-
print(f"execute返回类型: {type(result)}")
|
|
28
|
-
print(f"execute返回值: {result}")
|
|
29
|
-
|
|
30
|
-
# 检查是否需要await
|
|
31
|
-
if hasattr(result, '__await__'):
|
|
32
|
-
print("execute返回的是协程对象,需要await")
|
|
33
|
-
result = await result
|
|
34
|
-
else:
|
|
35
|
-
print("execute返回的不是协程对象,不需要await")
|
|
36
|
-
|
|
37
|
-
# 提交事务
|
|
38
|
-
await conn.commit()
|
|
39
|
-
|
|
40
|
-
finally:
|
|
41
|
-
await cursor.close()
|
|
42
|
-
finally:
|
|
43
|
-
pool.release(conn)
|
|
44
|
-
|
|
45
|
-
# 关闭连接池
|
|
46
|
-
pool.close()
|
|
47
|
-
await pool.wait_closed()
|
|
48
|
-
|
|
49
|
-
print("测试完成")
|
|
50
|
-
|
|
51
|
-
except Exception as e:
|
|
52
|
-
print(f"测试出错: {e}")
|
|
53
|
-
import traceback
|
|
54
|
-
traceback.print_exc()
|
|
55
|
-
|
|
56
|
-
if __name__ == "__main__":
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import asyncio
|
|
3
|
+
from asyncmy import create_pool
|
|
4
|
+
|
|
5
|
+
async def test_asyncmy_usage():
|
|
6
|
+
"""测试asyncmy库的正确使用方式"""
|
|
7
|
+
try:
|
|
8
|
+
# 创建连接池
|
|
9
|
+
pool = await create_pool(
|
|
10
|
+
host='127.0.0.1',
|
|
11
|
+
port=3306,
|
|
12
|
+
user='root',
|
|
13
|
+
password='123456',
|
|
14
|
+
db='test',
|
|
15
|
+
minsize=1,
|
|
16
|
+
maxsize=5
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# 获取连接
|
|
20
|
+
conn = await pool.acquire()
|
|
21
|
+
try:
|
|
22
|
+
# 获取游标
|
|
23
|
+
cursor = await conn.cursor()
|
|
24
|
+
try:
|
|
25
|
+
# 执行SQL
|
|
26
|
+
result = cursor.execute("SELECT 1")
|
|
27
|
+
print(f"execute返回类型: {type(result)}")
|
|
28
|
+
print(f"execute返回值: {result}")
|
|
29
|
+
|
|
30
|
+
# 检查是否需要await
|
|
31
|
+
if hasattr(result, '__await__'):
|
|
32
|
+
print("execute返回的是协程对象,需要await")
|
|
33
|
+
result = await result
|
|
34
|
+
else:
|
|
35
|
+
print("execute返回的不是协程对象,不需要await")
|
|
36
|
+
|
|
37
|
+
# 提交事务
|
|
38
|
+
await conn.commit()
|
|
39
|
+
|
|
40
|
+
finally:
|
|
41
|
+
await cursor.close()
|
|
42
|
+
finally:
|
|
43
|
+
pool.release(conn)
|
|
44
|
+
|
|
45
|
+
# 关闭连接池
|
|
46
|
+
pool.close()
|
|
47
|
+
await pool.wait_closed()
|
|
48
|
+
|
|
49
|
+
print("测试完成")
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
print(f"测试出错: {e}")
|
|
53
|
+
import traceback
|
|
54
|
+
traceback.print_exc()
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
57
|
asyncio.run(test_asyncmy_usage())
|
tests/test_batch_processor.py
CHANGED
|
@@ -1,179 +1,179 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
批处理工具测试
|
|
5
|
-
测试 BatchProcessor, RedisBatchProcessor, batch_process
|
|
6
|
-
"""
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
import unittest
|
|
10
|
-
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
-
import asyncio
|
|
12
|
-
|
|
13
|
-
# 添加项目根目录到 Python 路径
|
|
14
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
-
|
|
16
|
-
from crawlo.utils.batch_processor import BatchProcessor, RedisBatchProcessor, batch_process
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TestBatchProcessor(unittest.TestCase):
|
|
20
|
-
"""批处理工具测试类"""
|
|
21
|
-
|
|
22
|
-
def setUp(self):
|
|
23
|
-
"""测试前准备"""
|
|
24
|
-
self.batch_processor = BatchProcessor(batch_size=3, max_concurrent_batches=2)
|
|
25
|
-
|
|
26
|
-
def test_batch_processor_initialization(self):
|
|
27
|
-
"""测试批处理器初始化"""
|
|
28
|
-
self.assertEqual(self.batch_processor.batch_size, 3)
|
|
29
|
-
self.assertEqual(self.batch_processor.max_concurrent_batches, 2)
|
|
30
|
-
|
|
31
|
-
def sync_process_item(self, item):
|
|
32
|
-
"""同步处理函数"""
|
|
33
|
-
return item * 2
|
|
34
|
-
|
|
35
|
-
def test_batch_processor_process_batch_sync(self):
|
|
36
|
-
"""测试批处理器同步处理批次"""
|
|
37
|
-
items = [1, 2, 3]
|
|
38
|
-
# 使用事件循环运行异步方法
|
|
39
|
-
results = asyncio.run(self.batch_processor.process_batch(items, self.sync_process_item))
|
|
40
|
-
self.assertEqual(results, [2, 4, 6])
|
|
41
|
-
|
|
42
|
-
def test_batch_processor_process_in_batches_sync(self):
|
|
43
|
-
"""测试批处理器同步分批处理大量数据"""
|
|
44
|
-
items = [1, 2, 3, 4, 5, 6, 7]
|
|
45
|
-
# 使用事件循环运行异步方法
|
|
46
|
-
results = asyncio.run(self.batch_processor.process_in_batches(items, self.sync_process_item))
|
|
47
|
-
expected = [2, 4, 6, 8, 10, 12, 14]
|
|
48
|
-
self.assertEqual(results, expected)
|
|
49
|
-
|
|
50
|
-
def test_batch_processor_with_exception_handling(self):
|
|
51
|
-
"""测试批处理器异常处理"""
|
|
52
|
-
def failing_processor(item):
|
|
53
|
-
if item == 2:
|
|
54
|
-
raise ValueError("处理失败")
|
|
55
|
-
return item * 2
|
|
56
|
-
|
|
57
|
-
items = [1, 2, 3]
|
|
58
|
-
# 使用事件循环运行异步方法
|
|
59
|
-
results = asyncio.run(self.batch_processor.process_batch(items, failing_processor))
|
|
60
|
-
# 异常项应该被过滤掉
|
|
61
|
-
self.assertIn(2, results)
|
|
62
|
-
self.assertIn(6, results)
|
|
63
|
-
# 检查长度至少为2
|
|
64
|
-
self.assertGreaterEqual(len(results), 2)
|
|
65
|
-
|
|
66
|
-
def test_batch_processor_decorator(self):
|
|
67
|
-
"""测试批处理器装饰器"""
|
|
68
|
-
@self.batch_processor.batch_process_decorator(batch_size=2)
|
|
69
|
-
def process_func(items):
|
|
70
|
-
return [item * 3 for item in items]
|
|
71
|
-
|
|
72
|
-
items = [1, 2, 3, 4]
|
|
73
|
-
results = process_func(items)
|
|
74
|
-
# 检查结果不为空
|
|
75
|
-
self.assertIsNotNone(results)
|
|
76
|
-
self.assertIsInstance(results, list)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class TestRedisBatchProcessor(unittest.TestCase):
|
|
80
|
-
"""Redis批处理器测试类"""
|
|
81
|
-
|
|
82
|
-
def setUp(self):
|
|
83
|
-
"""测试前准备"""
|
|
84
|
-
self.mock_redis_client = Mock()
|
|
85
|
-
self.redis_batch_processor = RedisBatchProcessor(self.mock_redis_client, batch_size=3)
|
|
86
|
-
|
|
87
|
-
def test_redis_batch_processor_initialization(self):
|
|
88
|
-
"""测试Redis批处理器初始化"""
|
|
89
|
-
self.assertEqual(self.redis_batch_processor.batch_size, 3)
|
|
90
|
-
self.assertEqual(self.redis_batch_processor.redis_client, self.mock_redis_client)
|
|
91
|
-
|
|
92
|
-
def test_redis_batch_processor_batch_set(self):
|
|
93
|
-
"""测试Redis批处理器批量设置"""
|
|
94
|
-
items = [
|
|
95
|
-
{'key': 'key1', 'value': 'value1'},
|
|
96
|
-
{'key': 'key2', 'value': 'value2'},
|
|
97
|
-
{'key': 'key3', 'value': 'value3'}
|
|
98
|
-
]
|
|
99
|
-
|
|
100
|
-
# 模拟pipeline行为
|
|
101
|
-
mock_pipe = Mock()
|
|
102
|
-
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
103
|
-
mock_pipe.execute.return_value = None # execute方法返回None
|
|
104
|
-
mock_pipe.set.return_value = mock_pipe # set方法返回pipe自身以支持链式调用
|
|
105
|
-
|
|
106
|
-
# 使用事件循环运行异步方法
|
|
107
|
-
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
108
|
-
self.assertEqual(count, 3)
|
|
109
|
-
|
|
110
|
-
def test_redis_batch_processor_batch_set_empty(self):
|
|
111
|
-
"""测试Redis批处理器批量设置空列表"""
|
|
112
|
-
items = []
|
|
113
|
-
# 使用事件循环运行异步方法
|
|
114
|
-
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
115
|
-
self.assertEqual(count, 0)
|
|
116
|
-
|
|
117
|
-
def test_redis_batch_processor_batch_get(self):
|
|
118
|
-
"""测试Redis批处理器批量获取"""
|
|
119
|
-
keys = ['key1', 'key2', 'key3']
|
|
120
|
-
|
|
121
|
-
# 模拟pipeline行为
|
|
122
|
-
mock_pipe = Mock()
|
|
123
|
-
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
124
|
-
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
125
|
-
mock_pipe.execute.return_value = ['value1', 'value2', 'value3']
|
|
126
|
-
|
|
127
|
-
# 使用事件循环运行异步方法
|
|
128
|
-
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
129
|
-
expected = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
|
|
130
|
-
self.assertEqual(result, expected)
|
|
131
|
-
|
|
132
|
-
def test_redis_batch_processor_batch_get_with_none_values(self):
|
|
133
|
-
"""测试Redis批处理器批量获取包含None值"""
|
|
134
|
-
keys = ['key1', 'key2', 'key3']
|
|
135
|
-
|
|
136
|
-
# 模拟pipeline行为,其中key2返回None
|
|
137
|
-
mock_pipe = Mock()
|
|
138
|
-
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
139
|
-
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
140
|
-
mock_pipe.execute.return_value = ['value1', None, 'value3']
|
|
141
|
-
|
|
142
|
-
# 使用事件循环运行异步方法
|
|
143
|
-
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
144
|
-
expected = {'key1': 'value1', 'key3': 'value3'} # key2应该被过滤掉
|
|
145
|
-
self.assertEqual(result, expected)
|
|
146
|
-
|
|
147
|
-
def test_redis_batch_processor_batch_delete(self):
|
|
148
|
-
"""测试Redis批处理器批量删除"""
|
|
149
|
-
keys = ['key1', 'key2', 'key3']
|
|
150
|
-
|
|
151
|
-
# 模拟pipeline行为
|
|
152
|
-
mock_pipe = Mock()
|
|
153
|
-
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
154
|
-
mock_pipe.delete.return_value = mock_pipe # delete方法返回pipe自身以支持链式调用
|
|
155
|
-
mock_pipe.execute.return_value = None
|
|
156
|
-
|
|
157
|
-
# 使用事件循环运行异步方法
|
|
158
|
-
count = asyncio.run(self.redis_batch_processor.batch_delete(keys))
|
|
159
|
-
self.assertEqual(count, 3)
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
class TestBatchProcessFunction(unittest.TestCase):
|
|
163
|
-
"""批处理便捷函数测试类"""
|
|
164
|
-
|
|
165
|
-
def sync_process_item(self, item):
|
|
166
|
-
"""同步处理函数"""
|
|
167
|
-
return item * 2
|
|
168
|
-
|
|
169
|
-
def test_batch_process_sync_function(self):
|
|
170
|
-
"""测试批处理便捷函数处理同步函数"""
|
|
171
|
-
items = [1, 2, 3, 4, 5]
|
|
172
|
-
results = batch_process(items, self.sync_process_item, batch_size=2, max_concurrent_batches=2)
|
|
173
|
-
expected = [2, 4, 6, 8, 10]
|
|
174
|
-
self.assertEqual(results, expected)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if __name__ == '__main__':
|
|
178
|
-
# 运行测试
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
批处理工具测试
|
|
5
|
+
测试 BatchProcessor, RedisBatchProcessor, batch_process
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import unittest
|
|
10
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到 Python 路径
|
|
14
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
+
|
|
16
|
+
from crawlo.utils.batch_processor import BatchProcessor, RedisBatchProcessor, batch_process
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestBatchProcessor(unittest.TestCase):
|
|
20
|
+
"""批处理工具测试类"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
"""测试前准备"""
|
|
24
|
+
self.batch_processor = BatchProcessor(batch_size=3, max_concurrent_batches=2)
|
|
25
|
+
|
|
26
|
+
def test_batch_processor_initialization(self):
|
|
27
|
+
"""测试批处理器初始化"""
|
|
28
|
+
self.assertEqual(self.batch_processor.batch_size, 3)
|
|
29
|
+
self.assertEqual(self.batch_processor.max_concurrent_batches, 2)
|
|
30
|
+
|
|
31
|
+
def sync_process_item(self, item):
|
|
32
|
+
"""同步处理函数"""
|
|
33
|
+
return item * 2
|
|
34
|
+
|
|
35
|
+
def test_batch_processor_process_batch_sync(self):
|
|
36
|
+
"""测试批处理器同步处理批次"""
|
|
37
|
+
items = [1, 2, 3]
|
|
38
|
+
# 使用事件循环运行异步方法
|
|
39
|
+
results = asyncio.run(self.batch_processor.process_batch(items, self.sync_process_item))
|
|
40
|
+
self.assertEqual(results, [2, 4, 6])
|
|
41
|
+
|
|
42
|
+
def test_batch_processor_process_in_batches_sync(self):
|
|
43
|
+
"""测试批处理器同步分批处理大量数据"""
|
|
44
|
+
items = [1, 2, 3, 4, 5, 6, 7]
|
|
45
|
+
# 使用事件循环运行异步方法
|
|
46
|
+
results = asyncio.run(self.batch_processor.process_in_batches(items, self.sync_process_item))
|
|
47
|
+
expected = [2, 4, 6, 8, 10, 12, 14]
|
|
48
|
+
self.assertEqual(results, expected)
|
|
49
|
+
|
|
50
|
+
def test_batch_processor_with_exception_handling(self):
|
|
51
|
+
"""测试批处理器异常处理"""
|
|
52
|
+
def failing_processor(item):
|
|
53
|
+
if item == 2:
|
|
54
|
+
raise ValueError("处理失败")
|
|
55
|
+
return item * 2
|
|
56
|
+
|
|
57
|
+
items = [1, 2, 3]
|
|
58
|
+
# 使用事件循环运行异步方法
|
|
59
|
+
results = asyncio.run(self.batch_processor.process_batch(items, failing_processor))
|
|
60
|
+
# 异常项应该被过滤掉
|
|
61
|
+
self.assertIn(2, results)
|
|
62
|
+
self.assertIn(6, results)
|
|
63
|
+
# 检查长度至少为2
|
|
64
|
+
self.assertGreaterEqual(len(results), 2)
|
|
65
|
+
|
|
66
|
+
def test_batch_processor_decorator(self):
|
|
67
|
+
"""测试批处理器装饰器"""
|
|
68
|
+
@self.batch_processor.batch_process_decorator(batch_size=2)
|
|
69
|
+
def process_func(items):
|
|
70
|
+
return [item * 3 for item in items]
|
|
71
|
+
|
|
72
|
+
items = [1, 2, 3, 4]
|
|
73
|
+
results = process_func(items)
|
|
74
|
+
# 检查结果不为空
|
|
75
|
+
self.assertIsNotNone(results)
|
|
76
|
+
self.assertIsInstance(results, list)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TestRedisBatchProcessor(unittest.TestCase):
|
|
80
|
+
"""Redis批处理器测试类"""
|
|
81
|
+
|
|
82
|
+
def setUp(self):
|
|
83
|
+
"""测试前准备"""
|
|
84
|
+
self.mock_redis_client = Mock()
|
|
85
|
+
self.redis_batch_processor = RedisBatchProcessor(self.mock_redis_client, batch_size=3)
|
|
86
|
+
|
|
87
|
+
def test_redis_batch_processor_initialization(self):
|
|
88
|
+
"""测试Redis批处理器初始化"""
|
|
89
|
+
self.assertEqual(self.redis_batch_processor.batch_size, 3)
|
|
90
|
+
self.assertEqual(self.redis_batch_processor.redis_client, self.mock_redis_client)
|
|
91
|
+
|
|
92
|
+
def test_redis_batch_processor_batch_set(self):
|
|
93
|
+
"""测试Redis批处理器批量设置"""
|
|
94
|
+
items = [
|
|
95
|
+
{'key': 'key1', 'value': 'value1'},
|
|
96
|
+
{'key': 'key2', 'value': 'value2'},
|
|
97
|
+
{'key': 'key3', 'value': 'value3'}
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
# 模拟pipeline行为
|
|
101
|
+
mock_pipe = Mock()
|
|
102
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
103
|
+
mock_pipe.execute.return_value = None # execute方法返回None
|
|
104
|
+
mock_pipe.set.return_value = mock_pipe # set方法返回pipe自身以支持链式调用
|
|
105
|
+
|
|
106
|
+
# 使用事件循环运行异步方法
|
|
107
|
+
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
108
|
+
self.assertEqual(count, 3)
|
|
109
|
+
|
|
110
|
+
def test_redis_batch_processor_batch_set_empty(self):
|
|
111
|
+
"""测试Redis批处理器批量设置空列表"""
|
|
112
|
+
items = []
|
|
113
|
+
# 使用事件循环运行异步方法
|
|
114
|
+
count = asyncio.run(self.redis_batch_processor.batch_set(items))
|
|
115
|
+
self.assertEqual(count, 0)
|
|
116
|
+
|
|
117
|
+
def test_redis_batch_processor_batch_get(self):
|
|
118
|
+
"""测试Redis批处理器批量获取"""
|
|
119
|
+
keys = ['key1', 'key2', 'key3']
|
|
120
|
+
|
|
121
|
+
# 模拟pipeline行为
|
|
122
|
+
mock_pipe = Mock()
|
|
123
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
124
|
+
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
125
|
+
mock_pipe.execute.return_value = ['value1', 'value2', 'value3']
|
|
126
|
+
|
|
127
|
+
# 使用事件循环运行异步方法
|
|
128
|
+
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
129
|
+
expected = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
|
|
130
|
+
self.assertEqual(result, expected)
|
|
131
|
+
|
|
132
|
+
def test_redis_batch_processor_batch_get_with_none_values(self):
|
|
133
|
+
"""测试Redis批处理器批量获取包含None值"""
|
|
134
|
+
keys = ['key1', 'key2', 'key3']
|
|
135
|
+
|
|
136
|
+
# 模拟pipeline行为,其中key2返回None
|
|
137
|
+
mock_pipe = Mock()
|
|
138
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
139
|
+
mock_pipe.get.return_value = mock_pipe # get方法返回pipe自身以支持链式调用
|
|
140
|
+
mock_pipe.execute.return_value = ['value1', None, 'value3']
|
|
141
|
+
|
|
142
|
+
# 使用事件循环运行异步方法
|
|
143
|
+
result = asyncio.run(self.redis_batch_processor.batch_get(keys))
|
|
144
|
+
expected = {'key1': 'value1', 'key3': 'value3'} # key2应该被过滤掉
|
|
145
|
+
self.assertEqual(result, expected)
|
|
146
|
+
|
|
147
|
+
def test_redis_batch_processor_batch_delete(self):
|
|
148
|
+
"""测试Redis批处理器批量删除"""
|
|
149
|
+
keys = ['key1', 'key2', 'key3']
|
|
150
|
+
|
|
151
|
+
# 模拟pipeline行为
|
|
152
|
+
mock_pipe = Mock()
|
|
153
|
+
self.mock_redis_client.pipeline.return_value = mock_pipe
|
|
154
|
+
mock_pipe.delete.return_value = mock_pipe # delete方法返回pipe自身以支持链式调用
|
|
155
|
+
mock_pipe.execute.return_value = None
|
|
156
|
+
|
|
157
|
+
# 使用事件循环运行异步方法
|
|
158
|
+
count = asyncio.run(self.redis_batch_processor.batch_delete(keys))
|
|
159
|
+
self.assertEqual(count, 3)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class TestBatchProcessFunction(unittest.TestCase):
|
|
163
|
+
"""批处理便捷函数测试类"""
|
|
164
|
+
|
|
165
|
+
def sync_process_item(self, item):
|
|
166
|
+
"""同步处理函数"""
|
|
167
|
+
return item * 2
|
|
168
|
+
|
|
169
|
+
def test_batch_process_sync_function(self):
|
|
170
|
+
"""测试批处理便捷函数处理同步函数"""
|
|
171
|
+
items = [1, 2, 3, 4, 5]
|
|
172
|
+
results = batch_process(items, self.sync_process_item, batch_size=2, max_concurrent_batches=2)
|
|
173
|
+
expected = [2, 4, 6, 8, 10]
|
|
174
|
+
self.assertEqual(results, expected)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if __name__ == '__main__':
|
|
178
|
+
# 运行测试
|
|
179
179
|
unittest.main()
|
tests/test_cleaners.py
CHANGED
|
@@ -1,55 +1,55 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
数据清洗工具测试
|
|
5
|
-
"""
|
|
6
|
-
import unittest
|
|
7
|
-
from crawlo.tools import (
|
|
8
|
-
TextCleaner,
|
|
9
|
-
DataFormatter,
|
|
10
|
-
remove_html_tags,
|
|
11
|
-
decode_html_entities,
|
|
12
|
-
clean_text,
|
|
13
|
-
format_number,
|
|
14
|
-
format_currency,
|
|
15
|
-
format_phone_number
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TestCleaners(unittest.TestCase):
|
|
20
|
-
"""数据清洗工具测试类"""
|
|
21
|
-
|
|
22
|
-
def test_text_cleaner(self):
|
|
23
|
-
"""测试文本清洗功能"""
|
|
24
|
-
# 测试移除HTML标签
|
|
25
|
-
html_text = "<p>这是一个<b>测试</b>文本</p>"
|
|
26
|
-
clean_text_result = remove_html_tags(html_text)
|
|
27
|
-
self.assertEqual(clean_text_result, "这是一个测试文本")
|
|
28
|
-
|
|
29
|
-
# 测试解码HTML实体
|
|
30
|
-
entity_text = "这是一个 测试&文本"
|
|
31
|
-
decoded_text = decode_html_entities(entity_text)
|
|
32
|
-
self.assertEqual(decoded_text, "这是一个 测试&文本")
|
|
33
|
-
|
|
34
|
-
# 测试综合清洗
|
|
35
|
-
complex_text = "<p>这是一个 <b>测试</b>&文本</p>"
|
|
36
|
-
cleaned = clean_text(complex_text)
|
|
37
|
-
self.assertEqual(cleaned, "这是一个 测试&文本")
|
|
38
|
-
|
|
39
|
-
def test_data_formatter(self):
|
|
40
|
-
"""测试数据格式化功能"""
|
|
41
|
-
# 测试数字格式化
|
|
42
|
-
formatted_num = format_number(1234.567, precision=2, thousand_separator=True)
|
|
43
|
-
self.assertEqual(formatted_num, "1,234.57")
|
|
44
|
-
|
|
45
|
-
# 测试货币格式化
|
|
46
|
-
formatted_currency = format_currency(1234.567, "¥", 2)
|
|
47
|
-
self.assertEqual(formatted_currency, "¥1,234.57")
|
|
48
|
-
|
|
49
|
-
# 测试电话号码格式化
|
|
50
|
-
formatted_phone = format_phone_number("13812345678", "+86", "international")
|
|
51
|
-
self.assertEqual(formatted_phone, "+86 138 1234 5678")
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
数据清洗工具测试
|
|
5
|
+
"""
|
|
6
|
+
import unittest
|
|
7
|
+
from crawlo.tools import (
|
|
8
|
+
TextCleaner,
|
|
9
|
+
DataFormatter,
|
|
10
|
+
remove_html_tags,
|
|
11
|
+
decode_html_entities,
|
|
12
|
+
clean_text,
|
|
13
|
+
format_number,
|
|
14
|
+
format_currency,
|
|
15
|
+
format_phone_number
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestCleaners(unittest.TestCase):
|
|
20
|
+
"""数据清洗工具测试类"""
|
|
21
|
+
|
|
22
|
+
def test_text_cleaner(self):
|
|
23
|
+
"""测试文本清洗功能"""
|
|
24
|
+
# 测试移除HTML标签
|
|
25
|
+
html_text = "<p>这是一个<b>测试</b>文本</p>"
|
|
26
|
+
clean_text_result = remove_html_tags(html_text)
|
|
27
|
+
self.assertEqual(clean_text_result, "这是一个测试文本")
|
|
28
|
+
|
|
29
|
+
# 测试解码HTML实体
|
|
30
|
+
entity_text = "这是一个 测试&文本"
|
|
31
|
+
decoded_text = decode_html_entities(entity_text)
|
|
32
|
+
self.assertEqual(decoded_text, "这是一个 测试&文本")
|
|
33
|
+
|
|
34
|
+
# 测试综合清洗
|
|
35
|
+
complex_text = "<p>这是一个 <b>测试</b>&文本</p>"
|
|
36
|
+
cleaned = clean_text(complex_text)
|
|
37
|
+
self.assertEqual(cleaned, "这是一个 测试&文本")
|
|
38
|
+
|
|
39
|
+
def test_data_formatter(self):
|
|
40
|
+
"""测试数据格式化功能"""
|
|
41
|
+
# 测试数字格式化
|
|
42
|
+
formatted_num = format_number(1234.567, precision=2, thousand_separator=True)
|
|
43
|
+
self.assertEqual(formatted_num, "1,234.57")
|
|
44
|
+
|
|
45
|
+
# 测试货币格式化
|
|
46
|
+
formatted_currency = format_currency(1234.567, "¥", 2)
|
|
47
|
+
self.assertEqual(formatted_currency, "¥1,234.57")
|
|
48
|
+
|
|
49
|
+
# 测试电话号码格式化
|
|
50
|
+
formatted_phone = format_phone_number("13812345678", "+86", "international")
|
|
51
|
+
self.assertEqual(formatted_phone, "+86 138 1234 5678")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == '__main__':
|
|
55
55
|
unittest.main()
|