crawlo 1.1.4__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (186) hide show
  1. crawlo/__init__.py +61 -34
  2. crawlo/__version__.py +1 -1
  3. crawlo/cleaners/__init__.py +61 -0
  4. crawlo/cleaners/data_formatter.py +226 -0
  5. crawlo/cleaners/encoding_converter.py +126 -0
  6. crawlo/cleaners/text_cleaner.py +233 -0
  7. crawlo/cli.py +40 -40
  8. crawlo/commands/__init__.py +13 -13
  9. crawlo/commands/check.py +594 -594
  10. crawlo/commands/genspider.py +151 -151
  11. crawlo/commands/list.py +155 -155
  12. crawlo/commands/run.py +285 -285
  13. crawlo/commands/startproject.py +300 -196
  14. crawlo/commands/stats.py +188 -188
  15. crawlo/commands/utils.py +186 -186
  16. crawlo/config.py +309 -279
  17. crawlo/config_validator.py +253 -0
  18. crawlo/core/__init__.py +2 -2
  19. crawlo/core/engine.py +346 -172
  20. crawlo/core/processor.py +40 -40
  21. crawlo/core/scheduler.py +137 -166
  22. crawlo/crawler.py +1027 -1027
  23. crawlo/downloader/__init__.py +266 -242
  24. crawlo/downloader/aiohttp_downloader.py +220 -212
  25. crawlo/downloader/cffi_downloader.py +256 -251
  26. crawlo/downloader/httpx_downloader.py +259 -259
  27. crawlo/downloader/hybrid_downloader.py +214 -0
  28. crawlo/downloader/playwright_downloader.py +403 -0
  29. crawlo/downloader/selenium_downloader.py +473 -0
  30. crawlo/event.py +11 -11
  31. crawlo/exceptions.py +81 -81
  32. crawlo/extension/__init__.py +37 -37
  33. crawlo/extension/health_check.py +141 -141
  34. crawlo/extension/log_interval.py +57 -57
  35. crawlo/extension/log_stats.py +81 -81
  36. crawlo/extension/logging_extension.py +43 -43
  37. crawlo/extension/memory_monitor.py +104 -88
  38. crawlo/extension/performance_profiler.py +133 -117
  39. crawlo/extension/request_recorder.py +107 -107
  40. crawlo/filters/__init__.py +154 -154
  41. crawlo/filters/aioredis_filter.py +280 -242
  42. crawlo/filters/memory_filter.py +269 -269
  43. crawlo/items/__init__.py +23 -23
  44. crawlo/items/base.py +21 -21
  45. crawlo/items/fields.py +53 -53
  46. crawlo/items/items.py +104 -104
  47. crawlo/middleware/__init__.py +21 -21
  48. crawlo/middleware/default_header.py +32 -32
  49. crawlo/middleware/download_delay.py +28 -28
  50. crawlo/middleware/middleware_manager.py +135 -135
  51. crawlo/middleware/proxy.py +272 -248
  52. crawlo/middleware/request_ignore.py +30 -30
  53. crawlo/middleware/response_code.py +18 -18
  54. crawlo/middleware/response_filter.py +26 -26
  55. crawlo/middleware/retry.py +124 -124
  56. crawlo/mode_manager.py +206 -201
  57. crawlo/network/__init__.py +21 -21
  58. crawlo/network/request.py +338 -311
  59. crawlo/network/response.py +360 -271
  60. crawlo/pipelines/__init__.py +21 -21
  61. crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
  62. crawlo/pipelines/console_pipeline.py +39 -39
  63. crawlo/pipelines/csv_pipeline.py +316 -316
  64. crawlo/pipelines/database_dedup_pipeline.py +224 -224
  65. crawlo/pipelines/json_pipeline.py +218 -218
  66. crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  67. crawlo/pipelines/mongo_pipeline.py +131 -131
  68. crawlo/pipelines/mysql_pipeline.py +316 -316
  69. crawlo/pipelines/pipeline_manager.py +56 -56
  70. crawlo/pipelines/redis_dedup_pipeline.py +166 -162
  71. crawlo/project.py +153 -153
  72. crawlo/queue/pqueue.py +37 -37
  73. crawlo/queue/queue_manager.py +320 -307
  74. crawlo/queue/redis_priority_queue.py +277 -209
  75. crawlo/settings/__init__.py +7 -7
  76. crawlo/settings/default_settings.py +216 -278
  77. crawlo/settings/setting_manager.py +99 -99
  78. crawlo/spider/__init__.py +639 -639
  79. crawlo/stats_collector.py +59 -59
  80. crawlo/subscriber.py +130 -130
  81. crawlo/task_manager.py +30 -30
  82. crawlo/templates/crawlo.cfg.tmpl +10 -10
  83. crawlo/templates/project/__init__.py.tmpl +3 -3
  84. crawlo/templates/project/items.py.tmpl +17 -17
  85. crawlo/templates/project/middlewares.py.tmpl +110 -110
  86. crawlo/templates/project/pipelines.py.tmpl +97 -97
  87. crawlo/templates/project/run.py.tmpl +251 -251
  88. crawlo/templates/project/settings.py.tmpl +326 -279
  89. crawlo/templates/project/settings_distributed.py.tmpl +120 -0
  90. crawlo/templates/project/settings_gentle.py.tmpl +95 -0
  91. crawlo/templates/project/settings_high_performance.py.tmpl +152 -0
  92. crawlo/templates/project/settings_simple.py.tmpl +69 -0
  93. crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  94. crawlo/templates/spider/spider.py.tmpl +141 -141
  95. crawlo/tools/__init__.py +183 -0
  96. crawlo/tools/anti_crawler.py +269 -0
  97. crawlo/tools/authenticated_proxy.py +241 -0
  98. crawlo/tools/data_validator.py +181 -0
  99. crawlo/tools/date_tools.py +36 -0
  100. crawlo/tools/distributed_coordinator.py +387 -0
  101. crawlo/tools/retry_mechanism.py +221 -0
  102. crawlo/tools/scenario_adapter.py +263 -0
  103. crawlo/utils/__init__.py +35 -7
  104. crawlo/utils/batch_processor.py +261 -0
  105. crawlo/utils/controlled_spider_mixin.py +439 -439
  106. crawlo/utils/date_tools.py +290 -233
  107. crawlo/utils/db_helper.py +343 -343
  108. crawlo/utils/enhanced_error_handler.py +360 -0
  109. crawlo/utils/env_config.py +106 -0
  110. crawlo/utils/error_handler.py +126 -0
  111. crawlo/utils/func_tools.py +82 -82
  112. crawlo/utils/large_scale_config.py +286 -286
  113. crawlo/utils/large_scale_helper.py +343 -343
  114. crawlo/utils/log.py +128 -128
  115. crawlo/utils/performance_monitor.py +285 -0
  116. crawlo/utils/queue_helper.py +175 -175
  117. crawlo/utils/redis_connection_pool.py +335 -0
  118. crawlo/utils/redis_key_validator.py +200 -0
  119. crawlo/utils/request.py +267 -267
  120. crawlo/utils/request_serializer.py +219 -219
  121. crawlo/utils/spider_loader.py +62 -62
  122. crawlo/utils/system.py +11 -11
  123. crawlo/utils/tools.py +4 -4
  124. crawlo/utils/url.py +39 -39
  125. {crawlo-1.1.4.dist-info → crawlo-1.1.5.dist-info}/METADATA +401 -403
  126. crawlo-1.1.5.dist-info/RECORD +185 -0
  127. examples/__init__.py +7 -7
  128. tests/__init__.py +7 -7
  129. tests/advanced_tools_example.py +276 -0
  130. tests/authenticated_proxy_example.py +237 -0
  131. tests/cleaners_example.py +161 -0
  132. tests/config_validation_demo.py +103 -0
  133. {examples → tests}/controlled_spider_example.py +205 -205
  134. tests/date_tools_example.py +181 -0
  135. tests/dynamic_loading_example.py +524 -0
  136. tests/dynamic_loading_test.py +105 -0
  137. tests/env_config_example.py +134 -0
  138. tests/error_handling_example.py +172 -0
  139. tests/redis_key_validation_demo.py +131 -0
  140. tests/response_improvements_example.py +145 -0
  141. tests/test_advanced_tools.py +149 -0
  142. tests/test_all_redis_key_configs.py +146 -0
  143. tests/test_authenticated_proxy.py +142 -0
  144. tests/test_cleaners.py +55 -0
  145. tests/test_comprehensive.py +147 -0
  146. tests/test_config_validator.py +194 -0
  147. tests/test_date_tools.py +124 -0
  148. tests/test_dynamic_downloaders_proxy.py +125 -0
  149. tests/test_dynamic_proxy.py +93 -0
  150. tests/test_dynamic_proxy_config.py +147 -0
  151. tests/test_dynamic_proxy_real.py +110 -0
  152. tests/test_edge_cases.py +304 -0
  153. tests/test_enhanced_error_handler.py +271 -0
  154. tests/test_env_config.py +122 -0
  155. tests/test_error_handler_compatibility.py +113 -0
  156. tests/test_final_validation.py +153 -153
  157. tests/test_framework_env_usage.py +104 -0
  158. tests/test_integration.py +357 -0
  159. tests/test_item_dedup_redis_key.py +123 -0
  160. tests/test_parsel.py +30 -0
  161. tests/test_performance.py +328 -0
  162. tests/test_proxy_health_check.py +32 -32
  163. tests/test_proxy_middleware_integration.py +136 -136
  164. tests/test_proxy_providers.py +56 -56
  165. tests/test_proxy_stats.py +19 -19
  166. tests/test_proxy_strategies.py +59 -59
  167. tests/test_queue_manager_redis_key.py +177 -0
  168. tests/test_redis_config.py +28 -28
  169. tests/test_redis_connection_pool.py +295 -0
  170. tests/test_redis_key_naming.py +182 -0
  171. tests/test_redis_key_validator.py +124 -0
  172. tests/test_redis_queue.py +224 -224
  173. tests/test_request_serialization.py +70 -70
  174. tests/test_response_improvements.py +153 -0
  175. tests/test_scheduler.py +241 -241
  176. tests/test_simple_response.py +62 -0
  177. tests/test_telecom_spider_redis_key.py +206 -0
  178. tests/test_template_content.py +88 -0
  179. tests/test_template_redis_key.py +135 -0
  180. tests/test_tools.py +154 -0
  181. tests/tools_example.py +258 -0
  182. crawlo/core/enhanced_engine.py +0 -190
  183. crawlo-1.1.4.dist-info/RECORD +0 -117
  184. {crawlo-1.1.4.dist-info → crawlo-1.1.5.dist-info}/WHEEL +0 -0
  185. {crawlo-1.1.4.dist-info → crawlo-1.1.5.dist-info}/entry_points.txt +0 -0
  186. {crawlo-1.1.4.dist-info → crawlo-1.1.5.dist-info}/top_level.txt +0 -0
crawlo/utils/log.py CHANGED
@@ -1,129 +1,129 @@
1
- # -*- coding: UTF-8 -*-
2
- """
3
- 日志管理器:安全版本,使用字符串化 key 避免 unhashable 问题
4
- """
5
- import os
6
- from logging import (
7
- Formatter,
8
- StreamHandler,
9
- FileHandler,
10
- Logger,
11
- DEBUG,
12
- INFO,
13
- WARNING,
14
- ERROR,
15
- CRITICAL,
16
- )
17
-
18
- LOG_FORMAT = '%(asctime)s - [%(name)s] - %(levelname)s: %(message)s'
19
-
20
-
21
- class LoggerManager:
22
- logger_cache = {}
23
- _default_filename = None
24
- _default_level = INFO
25
- _default_file_level = INFO
26
- _default_console_level = INFO
27
- _default_log_format = LOG_FORMAT
28
- _default_encoding = 'utf-8'
29
-
30
- _level_map = {
31
- 'DEBUG': DEBUG,
32
- 'INFO': INFO,
33
- 'WARNING': WARNING,
34
- 'ERROR': ERROR,
35
- 'CRITICAL': CRITICAL,
36
- }
37
-
38
- @classmethod
39
- def _to_level(cls, level):
40
- """安全转换为日志级别 int"""
41
- if level is None:
42
- return INFO
43
- if isinstance(level, int):
44
- return level
45
- if isinstance(level, str):
46
- return cls._level_map.get(level.upper(), INFO)
47
- if hasattr(level, 'get'): # 如 SettingManager 或 dict
48
- lv = level.get('LOG_LEVEL')
49
- if isinstance(lv, int):
50
- return lv
51
- if isinstance(lv, str):
52
- return cls._level_map.get(lv.upper(), INFO)
53
- return INFO
54
-
55
- @classmethod
56
- def configure(cls, settings=None, **kwargs):
57
- """
58
- 使用 settings 对象或关键字参数配置日志
59
- """
60
- # 优先使用 settings,否则用 kwargs
61
- get_val = settings.get if hasattr(settings, 'get') else (lambda k, d=None: kwargs.get(k, d))
62
-
63
- filename = get_val('LOG_FILE')
64
- level = get_val('LOG_LEVEL', 'INFO')
65
- file_level = get_val('LOG_FILE_LEVEL', level)
66
- console_level = get_val('LOG_CONSOLE_LEVEL', level)
67
- log_format = get_val('LOG_FORMAT', LOG_FORMAT)
68
- encoding = get_val('LOG_ENCODING', 'utf-8')
69
-
70
- cls._default_filename = filename
71
- cls._default_level = cls._to_level(level)
72
- cls._default_file_level = cls._to_level(file_level)
73
- cls._default_console_level = cls._to_level(console_level)
74
- cls._default_log_format = log_format
75
- cls._default_encoding = encoding
76
-
77
- @classmethod
78
- def get_logger(cls, name='default', level=None, filename=None):
79
- """
80
- 简化接口,只暴露必要参数
81
- """
82
- # 确定最终参数
83
- final_level = cls._to_level(level) if level is not None else cls._default_level
84
- final_filename = filename if filename is not None else cls._default_filename
85
-
86
- # ✅ 安全的字符串化 key,避免任何 unhashable 类型
87
- key_parts = [
88
- name,
89
- str(final_level),
90
- final_filename or 'no_file',
91
- ]
92
- key = '|'.join(key_parts) # 如 "my_spider|20|logs/app.log"
93
-
94
- if key in cls.logger_cache:
95
- return cls.logger_cache[key]
96
-
97
- # 创建 logger
98
- _logger = Logger(name=name)
99
- _logger.setLevel(final_level)
100
-
101
- formatter = Formatter(cls._default_log_format)
102
-
103
- # 控制台
104
- if cls._default_console_level is not False:
105
- ch = StreamHandler()
106
- ch.setFormatter(formatter)
107
- ch.setLevel(cls._default_console_level)
108
- _logger.addHandler(ch)
109
-
110
- # 文件
111
- if final_filename:
112
- try:
113
- log_dir = os.path.dirname(final_filename)
114
- if log_dir and not os.path.exists(log_dir):
115
- os.makedirs(log_dir, exist_ok=True)
116
-
117
- fh = FileHandler(final_filename, encoding=cls._default_encoding)
118
- fh.setFormatter(formatter)
119
- fh.setLevel(cls._default_file_level)
120
- _logger.addHandler(fh)
121
- except Exception as e:
122
- print(f"[Logger] 无法创建日志文件 {final_filename}: {e}")
123
-
124
- cls.logger_cache[key] = _logger
125
- return _logger
126
-
127
-
128
- # 全局快捷函数
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ 日志管理器:安全版本,使用字符串化 key 避免 unhashable 问题
4
+ """
5
+ import os
6
+ from logging import (
7
+ Formatter,
8
+ StreamHandler,
9
+ FileHandler,
10
+ Logger,
11
+ DEBUG,
12
+ INFO,
13
+ WARNING,
14
+ ERROR,
15
+ CRITICAL,
16
+ )
17
+
18
+ LOG_FORMAT = '%(asctime)s - [%(name)s] - %(levelname)s: %(message)s'
19
+
20
+
21
+ class LoggerManager:
22
+ logger_cache = {}
23
+ _default_filename = None
24
+ _default_level = INFO
25
+ _default_file_level = INFO
26
+ _default_console_level = INFO
27
+ _default_log_format = LOG_FORMAT
28
+ _default_encoding = 'utf-8'
29
+
30
+ _level_map = {
31
+ 'DEBUG': DEBUG,
32
+ 'INFO': INFO,
33
+ 'WARNING': WARNING,
34
+ 'ERROR': ERROR,
35
+ 'CRITICAL': CRITICAL,
36
+ }
37
+
38
+ @classmethod
39
+ def _to_level(cls, level):
40
+ """安全转换为日志级别 int"""
41
+ if level is None:
42
+ return INFO
43
+ if isinstance(level, int):
44
+ return level
45
+ if isinstance(level, str):
46
+ return cls._level_map.get(level.upper(), INFO)
47
+ if hasattr(level, 'get'): # 如 SettingManager 或 dict
48
+ lv = level.get('LOG_LEVEL')
49
+ if isinstance(lv, int):
50
+ return lv
51
+ if isinstance(lv, str):
52
+ return cls._level_map.get(lv.upper(), INFO)
53
+ return INFO
54
+
55
+ @classmethod
56
+ def configure(cls, settings=None, **kwargs):
57
+ """
58
+ 使用 settings 对象或关键字参数配置日志
59
+ """
60
+ # 优先使用 settings,否则用 kwargs
61
+ get_val = settings.get if hasattr(settings, 'get') else (lambda k, d=None: kwargs.get(k, d))
62
+
63
+ filename = get_val('LOG_FILE')
64
+ level = get_val('LOG_LEVEL', 'INFO')
65
+ file_level = get_val('LOG_FILE_LEVEL', level)
66
+ console_level = get_val('LOG_CONSOLE_LEVEL', level)
67
+ log_format = get_val('LOG_FORMAT', LOG_FORMAT)
68
+ encoding = get_val('LOG_ENCODING', 'utf-8')
69
+
70
+ cls._default_filename = filename
71
+ cls._default_level = cls._to_level(level)
72
+ cls._default_file_level = cls._to_level(file_level)
73
+ cls._default_console_level = cls._to_level(console_level)
74
+ cls._default_log_format = log_format
75
+ cls._default_encoding = encoding
76
+
77
+ @classmethod
78
+ def get_logger(cls, name='default', level=None, filename=None):
79
+ """
80
+ 简化接口,只暴露必要参数
81
+ """
82
+ # 确定最终参数
83
+ final_level = cls._to_level(level) if level is not None else cls._default_level
84
+ final_filename = filename if filename is not None else cls._default_filename
85
+
86
+ # ✅ 安全的字符串化 key,避免任何 unhashable 类型
87
+ key_parts = [
88
+ name,
89
+ str(final_level),
90
+ final_filename or 'no_file',
91
+ ]
92
+ key = '|'.join(key_parts) # 如 "my_spider|20|logs/app.log"
93
+
94
+ if key in cls.logger_cache:
95
+ return cls.logger_cache[key]
96
+
97
+ # 创建 logger
98
+ _logger = Logger(name=name)
99
+ _logger.setLevel(final_level)
100
+
101
+ formatter = Formatter(cls._default_log_format)
102
+
103
+ # 控制台
104
+ if cls._default_console_level is not False:
105
+ ch = StreamHandler()
106
+ ch.setFormatter(formatter)
107
+ ch.setLevel(cls._default_console_level)
108
+ _logger.addHandler(ch)
109
+
110
+ # 文件
111
+ if final_filename:
112
+ try:
113
+ log_dir = os.path.dirname(final_filename)
114
+ if log_dir and not os.path.exists(log_dir):
115
+ os.makedirs(log_dir, exist_ok=True)
116
+
117
+ fh = FileHandler(final_filename, encoding=cls._default_encoding)
118
+ fh.setFormatter(formatter)
119
+ fh.setLevel(cls._default_file_level)
120
+ _logger.addHandler(fh)
121
+ except Exception as e:
122
+ print(f"[Logger] 无法创建日志文件 {final_filename}: {e}")
123
+
124
+ cls.logger_cache[key] = _logger
125
+ return _logger
126
+
127
+
128
+ # 全局快捷函数
129
129
  get_logger = LoggerManager.get_logger
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ 性能监控工具
5
+ 提供系统性能监控和资源使用情况跟踪
6
+ """
7
+ import time
8
+ import psutil
9
+ import asyncio
10
+ from typing import Dict, Any, Optional, Callable
11
+ from functools import wraps
12
+
13
+ from crawlo.utils.log import get_logger
14
+ from crawlo.utils.error_handler import ErrorHandler
15
+
16
+
17
+ class PerformanceMonitor:
18
+ """性能监控器"""
19
+
20
+ def __init__(self, logger_name: str = __name__):
21
+ self.logger = get_logger(logger_name)
22
+ self.error_handler = ErrorHandler(logger_name)
23
+ self.process = psutil.Process()
24
+ self.start_time = time.time()
25
+
26
+ # 性能指标
27
+ self.metrics = {
28
+ 'cpu_usage': [],
29
+ 'memory_usage': [],
30
+ 'network_io': [],
31
+ 'disk_io': []
32
+ }
33
+
34
+ def get_system_metrics(self) -> Dict[str, Any]:
35
+ """
36
+ 获取系统性能指标
37
+
38
+ Returns:
39
+ 包含各种性能指标的字典
40
+ """
41
+ try:
42
+ # CPU使用率
43
+ cpu_percent = psutil.cpu_percent(interval=1)
44
+
45
+ # 内存使用情况
46
+ memory = psutil.virtual_memory()
47
+
48
+ # 网络IO
49
+ net_io = psutil.net_io_counters()
50
+
51
+ # 磁盘IO
52
+ disk_io = psutil.disk_io_counters()
53
+
54
+ # 进程特定信息
55
+ process_memory = self.process.memory_info()
56
+ process_cpu = self.process.cpu_percent()
57
+
58
+ return {
59
+ 'timestamp': time.time(),
60
+ 'uptime': time.time() - self.start_time,
61
+ 'cpu': {
62
+ 'percent': cpu_percent,
63
+ 'count': psutil.cpu_count(),
64
+ 'freq': psutil.cpu_freq()._asdict() if psutil.cpu_freq() else {}
65
+ },
66
+ 'memory': {
67
+ 'total': memory.total,
68
+ 'available': memory.available,
69
+ 'percent': memory.percent,
70
+ 'used': memory.used,
71
+ 'free': memory.free
72
+ },
73
+ 'process': {
74
+ 'memory_rss': process_memory.rss,
75
+ 'memory_vms': process_memory.vms,
76
+ 'cpu_percent': process_cpu,
77
+ 'num_threads': self.process.num_threads(),
78
+ 'num_fds': self.process.num_fds() if hasattr(self.process, 'num_fds') else 0
79
+ },
80
+ 'network': {
81
+ 'bytes_sent': net_io.bytes_sent,
82
+ 'bytes_recv': net_io.bytes_recv,
83
+ 'packets_sent': net_io.packets_sent,
84
+ 'packets_recv': net_io.packets_recv
85
+ },
86
+ 'disk': {
87
+ 'read_bytes': disk_io.read_bytes,
88
+ 'write_bytes': disk_io.write_bytes,
89
+ 'read_count': disk_io.read_count,
90
+ 'write_count': disk_io.write_count
91
+ }
92
+ }
93
+ except Exception as e:
94
+ self.error_handler.handle_error(
95
+ e,
96
+ context="获取系统性能指标失败",
97
+ raise_error=False
98
+ )
99
+ return {}
100
+
101
+ def log_system_metrics(self, detailed: bool = False):
102
+ """
103
+ 记录系统性能指标
104
+
105
+ Args:
106
+ detailed: 是否记录详细信息
107
+ """
108
+ try:
109
+ metrics = self.get_system_metrics()
110
+ if not metrics:
111
+ return
112
+
113
+ # 基本信息
114
+ basic_info = (
115
+ f"📊 系统性能指标 | "
116
+ f"CPU: {metrics['cpu']['percent']:.1f}% | "
117
+ f"内存: {metrics['memory']['percent']:.1f}% | "
118
+ f"进程CPU: {metrics['process']['cpu_percent']:.1f}% | "
119
+ f"进程内存: {metrics['process']['memory_rss'] / 1024 / 1024:.1f}MB"
120
+ )
121
+ self.logger.info(basic_info)
122
+
123
+ # 详细信息
124
+ if detailed:
125
+ detailed_info = (
126
+ f" 详细信息:\n"
127
+ f" - CPU: {metrics['cpu']['count']} 核心\n"
128
+ f" - 内存: 总计 {metrics['memory']['total'] / 1024 / 1024 / 1024:.1f}GB, "
129
+ f"可用 {metrics['memory']['available'] / 1024 / 1024 / 1024:.1f}GB\n"
130
+ f" - 网络: 发送 {metrics['network']['bytes_sent'] / 1024 / 1024:.1f}MB, "
131
+ f"接收 {metrics['network']['bytes_recv'] / 1024 / 1024:.1f}MB\n"
132
+ f" - 磁盘: 读取 {metrics['disk']['read_bytes'] / 1024 / 1024:.1f}MB, "
133
+ f"写入 {metrics['disk']['write_bytes'] / 1024 / 1024:.1f}MB"
134
+ )
135
+ self.logger.debug(detailed_info)
136
+ except Exception as e:
137
+ self.error_handler.handle_error(
138
+ e,
139
+ context="记录系统性能指标失败",
140
+ raise_error=False
141
+ )
142
+
143
+ def start_monitoring(self, interval: int = 60, detailed: bool = False):
144
+ """
145
+ 开始定期监控
146
+
147
+ Args:
148
+ interval: 监控间隔(秒)
149
+ detailed: 是否记录详细信息
150
+ """
151
+ async def monitor_loop():
152
+ while True:
153
+ try:
154
+ self.log_system_metrics(detailed)
155
+ await asyncio.sleep(interval)
156
+ except asyncio.CancelledError:
157
+ break
158
+ except Exception as e:
159
+ self.logger.error(f"监控循环错误: {e}")
160
+
161
+ # 启动监控任务
162
+ self.monitor_task = asyncio.create_task(monitor_loop())
163
+ self.logger.info(f"开始性能监控,间隔: {interval}秒")
164
+
165
+ async def stop_monitoring(self):
166
+ """停止监控"""
167
+ if hasattr(self, 'monitor_task') and self.monitor_task:
168
+ self.monitor_task.cancel()
169
+ try:
170
+ await self.monitor_task
171
+ except asyncio.CancelledError:
172
+ pass
173
+ self.logger.info("性能监控已停止")
174
+
175
+
176
+ class PerformanceTimer:
177
+ """性能计时器"""
178
+
179
+ def __init__(self, name: str = "timer"):
180
+ self.name = name
181
+ self.start_time = None
182
+ self.end_time = None
183
+ self.logger = get_logger(f"{__name__}.{self.__class__.__name__}")
184
+ self.error_handler = ErrorHandler(f"{__name__}.{self.__class__.__name__}")
185
+
186
+ def start(self):
187
+ """开始计时"""
188
+ self.start_time = time.time()
189
+ self.logger.debug(f"⏱️ 开始计时: {self.name}")
190
+
191
+ def stop(self) -> float:
192
+ """
193
+ 停止计时并返回耗时
194
+
195
+ Returns:
196
+ 耗时(秒)
197
+ """
198
+ self.end_time = time.time()
199
+ if self.start_time is None:
200
+ raise RuntimeError("计时器未启动")
201
+
202
+ elapsed = self.end_time - self.start_time
203
+ self.logger.debug(f"⏱️ 停止计时: {self.name}, 耗时: {elapsed:.3f}秒")
204
+ return elapsed
205
+
206
+ def __enter__(self):
207
+ self.start()
208
+ return self
209
+
210
+ def __exit__(self, exc_type, exc_val, exc_tb):
211
+ try:
212
+ elapsed = self.stop()
213
+ if exc_type is None:
214
+ self.logger.info(f"✅ {self.name} 执行成功,耗时: {elapsed:.3f}秒")
215
+ else:
216
+ self.logger.error(f"❌ {self.name} 执行失败,耗时: {elapsed:.3f}秒")
217
+ except Exception as e:
218
+ self.error_handler.handle_error(
219
+ e,
220
+ context=f"计时器退出时发生错误: {self.name}",
221
+ raise_error=False
222
+ )
223
+
224
+
225
+ def performance_monitor_decorator(name: str = None, log_level: str = "INFO"):
226
+ """
227
+ 装饰器:监控函数性能
228
+
229
+ Args:
230
+ name: 函数名称(如果为None则使用函数名)
231
+ log_level: 日志级别
232
+ """
233
+ def decorator(func):
234
+ @wraps(func)
235
+ async def async_wrapper(*args, **kwargs):
236
+ timer_name = name or f"{func.__module__}.{func.__name__}"
237
+ logger = get_logger(timer_name)
238
+
239
+ with PerformanceTimer(timer_name) as timer:
240
+ if asyncio.iscoroutinefunction(func):
241
+ return await func(*args, **kwargs)
242
+ else:
243
+ return func(*args, **kwargs)
244
+
245
+ @wraps(func)
246
+ def sync_wrapper(*args, **kwargs):
247
+ timer_name = name or f"{func.__module__}.{func.__name__}"
248
+ logger = get_logger(timer_name)
249
+
250
+ with PerformanceTimer(timer_name) as timer:
251
+ return func(*args, **kwargs)
252
+
253
+ # 根据函数是否为异步函数返回相应的包装器
254
+ import inspect
255
+ if inspect.iscoroutinefunction(func):
256
+ return async_wrapper
257
+ else:
258
+ return sync_wrapper
259
+
260
+ return decorator
261
+
262
+
263
+ # 全局性能监控器实例
264
+ default_performance_monitor = PerformanceMonitor()
265
+
266
+
267
+ def monitor_performance(interval: int = 60, detailed: bool = False):
268
+ """
269
+ 便捷函数:开始性能监控
270
+
271
+ Args:
272
+ interval: 监控间隔(秒)
273
+ detailed: 是否记录详细信息
274
+ """
275
+ default_performance_monitor.start_monitoring(interval, detailed)
276
+
277
+
278
+ def get_current_metrics() -> Dict[str, Any]:
279
+ """
280
+ 便捷函数:获取当前性能指标
281
+
282
+ Returns:
283
+ 性能指标字典
284
+ """
285
+ return default_performance_monitor.get_system_metrics()