crawlo 1.4.4__py3-none-any.whl → 1.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +11 -15
- crawlo/__version__.py +1 -1
- crawlo/commands/startproject.py +24 -0
- crawlo/core/engine.py +2 -2
- crawlo/core/scheduler.py +4 -4
- crawlo/crawler.py +8 -7
- crawlo/downloader/__init__.py +5 -2
- crawlo/extension/__init__.py +2 -2
- crawlo/filters/aioredis_filter.py +8 -1
- crawlo/filters/memory_filter.py +8 -1
- crawlo/initialization/built_in.py +13 -4
- crawlo/initialization/core.py +5 -4
- crawlo/interfaces.py +24 -0
- crawlo/middleware/__init__.py +7 -4
- crawlo/middleware/middleware_manager.py +15 -8
- crawlo/mode_manager.py +45 -11
- crawlo/network/response.py +374 -69
- crawlo/pipelines/mysql_pipeline.py +6 -6
- crawlo/pipelines/pipeline_manager.py +2 -2
- crawlo/project.py +2 -4
- crawlo/settings/default_settings.py +4 -0
- crawlo/task_manager.py +2 -2
- crawlo/templates/project/items.py.tmpl +2 -2
- crawlo/templates/project/middlewares.py.tmpl +9 -89
- crawlo/templates/project/pipelines.py.tmpl +8 -68
- crawlo/tools/__init__.py +0 -11
- crawlo/utils/__init__.py +17 -1
- crawlo/utils/db_helper.py +220 -319
- crawlo/utils/error_handler.py +313 -67
- crawlo/utils/fingerprint.py +3 -4
- crawlo/utils/misc.py +82 -0
- crawlo/utils/request.py +55 -66
- crawlo/utils/selector_helper.py +138 -0
- crawlo/utils/spider_loader.py +185 -45
- crawlo/utils/text_helper.py +95 -0
- crawlo-1.4.5.dist-info/METADATA +329 -0
- {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/RECORD +76 -49
- tests/bug_check_test.py +251 -0
- tests/direct_selector_helper_test.py +97 -0
- tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -0
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -0
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -0
- tests/ofweek_scrapy/ofweek_scrapy/settings.py +85 -0
- tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -0
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +162 -0
- tests/ofweek_scrapy/scrapy.cfg +11 -0
- tests/performance_comparison.py +4 -5
- tests/simple_crawlo_test.py +1 -2
- tests/simple_follow_test.py +39 -0
- tests/simple_response_selector_test.py +95 -0
- tests/simple_selector_helper_test.py +155 -0
- tests/simple_selector_test.py +208 -0
- tests/simple_url_test.py +74 -0
- tests/test_crawler_process_import.py +39 -0
- tests/test_crawler_process_spider_modules.py +48 -0
- tests/test_edge_cases.py +7 -5
- tests/test_encoding_core.py +57 -0
- tests/test_encoding_detection.py +127 -0
- tests/test_factory_compatibility.py +197 -0
- tests/test_optimized_selector_naming.py +101 -0
- tests/test_priority_behavior.py +18 -18
- tests/test_response_follow.py +105 -0
- tests/test_response_selector_methods.py +93 -0
- tests/test_response_url_methods.py +71 -0
- tests/test_response_urljoin.py +87 -0
- tests/test_scrapy_style_encoding.py +113 -0
- tests/test_selector_helper.py +101 -0
- tests/test_selector_optimizations.py +147 -0
- tests/test_spider_loader.py +50 -0
- tests/test_spider_loader_comprehensive.py +70 -0
- tests/test_spiders/__init__.py +1 -0
- tests/test_spiders/test_spider.py +10 -0
- crawlo/tools/anti_crawler.py +0 -269
- crawlo/utils/class_loader.py +0 -26
- crawlo/utils/enhanced_error_handler.py +0 -357
- crawlo-1.4.4.dist-info/METADATA +0 -190
- tests/simple_log_test.py +0 -58
- tests/simple_test.py +0 -48
- tests/test_framework_logger.py +0 -67
- tests/test_framework_startup.py +0 -65
- tests/test_mode_change.py +0 -73
- {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/WHEEL +0 -0
- {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/top_level.txt +0 -0
- /tests/{final_command_test_report.md → ofweek_scrapy/ofweek_scrapy/__init__.py} +0 -0
crawlo-1.4.4.dist-info/METADATA
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: crawlo
|
|
3
|
-
Version: 1.4.4
|
|
4
|
-
Summary: Crawlo 是一款基于异步IO的高性能Python爬虫框架,支持分布式抓取。
|
|
5
|
-
Home-page: https://github.com/crawl-coder/Crawlo.git
|
|
6
|
-
Author: crawl-coder
|
|
7
|
-
Author-email: crawlo@qq.com
|
|
8
|
-
License: MIT
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Requires-Python: >=3.6
|
|
13
|
-
Description-Content-Type: text/markdown
|
|
14
|
-
Requires-Dist: aiohttp>=3.12.14
|
|
15
|
-
Requires-Dist: aiomysql>=0.2.0
|
|
16
|
-
Requires-Dist: aioredis>=2.0.1
|
|
17
|
-
Requires-Dist: asyncmy>=0.2.10
|
|
18
|
-
Requires-Dist: cssselect>=1.2.0
|
|
19
|
-
Requires-Dist: dateparser>=1.2.2
|
|
20
|
-
Requires-Dist: httpx[http2]>=0.27.0
|
|
21
|
-
Requires-Dist: curl-cffi>=0.13.0
|
|
22
|
-
Requires-Dist: lxml>=5.2.1
|
|
23
|
-
Requires-Dist: motor>=3.7.0
|
|
24
|
-
Requires-Dist: parsel>=1.9.1
|
|
25
|
-
Requires-Dist: pydantic>=2.11.7
|
|
26
|
-
Requires-Dist: pymongo>=4.11
|
|
27
|
-
Requires-Dist: PyMySQL>=1.1.1
|
|
28
|
-
Requires-Dist: python-dateutil>=2.9.0.post0
|
|
29
|
-
Requires-Dist: redis>=6.2.0
|
|
30
|
-
Requires-Dist: requests>=2.32.4
|
|
31
|
-
Requires-Dist: six>=1.17.0
|
|
32
|
-
Requires-Dist: ujson>=5.9.0
|
|
33
|
-
Requires-Dist: urllib3>=2.5.0
|
|
34
|
-
Requires-Dist: w3lib>=2.1.2
|
|
35
|
-
Requires-Dist: rich>=14.1.0
|
|
36
|
-
Requires-Dist: astor>=0.8.1
|
|
37
|
-
Requires-Dist: watchdog>=6.0.0
|
|
38
|
-
Provides-Extra: render
|
|
39
|
-
Requires-Dist: webdriver-manager>=4.0.0; extra == "render"
|
|
40
|
-
Requires-Dist: playwright; extra == "render"
|
|
41
|
-
Requires-Dist: selenium>=3.141.0; extra == "render"
|
|
42
|
-
Provides-Extra: all
|
|
43
|
-
Requires-Dist: bitarray>=1.5.3; extra == "all"
|
|
44
|
-
Requires-Dist: PyExecJS>=1.5.1; extra == "all"
|
|
45
|
-
Requires-Dist: pymongo>=3.10.1; extra == "all"
|
|
46
|
-
Requires-Dist: redis-py-cluster>=2.1.0; extra == "all"
|
|
47
|
-
Requires-Dist: webdriver-manager>=4.0.0; extra == "all"
|
|
48
|
-
Requires-Dist: playwright; extra == "all"
|
|
49
|
-
Requires-Dist: selenium>=3.141.0; extra == "all"
|
|
50
|
-
|
|
51
|
-
# Crawlo 爬虫框架
|
|
52
|
-
|
|
53
|
-
Crawlo 是一个高性能、可扩展的 Python 爬虫框架,支持单机和分布式部署。
|
|
54
|
-
|
|
55
|
-
## 特性
|
|
56
|
-
|
|
57
|
-
- 高性能异步爬取
|
|
58
|
-
- 支持多种下载器 (aiohttp, httpx, curl-cffi)
|
|
59
|
-
- 内置数据清洗和验证
|
|
60
|
-
- 分布式爬取支持
|
|
61
|
-
- 灵活的中间件系统
|
|
62
|
-
- 强大的配置管理系统
|
|
63
|
-
- 详细的日志记录和监控
|
|
64
|
-
- Windows 和 Linux 兼容
|
|
65
|
-
|
|
66
|
-
## 安装
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
pip install crawlo
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
或者从源码安装:
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
git clone https://github.com/your-username/crawlo.git
|
|
76
|
-
cd crawlo
|
|
77
|
-
pip install -r requirements.txt
|
|
78
|
-
pip install .
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
## 快速开始
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
from crawlo import Spider
|
|
85
|
-
|
|
86
|
-
class MySpider(Spider):
|
|
87
|
-
name = 'example'
|
|
88
|
-
|
|
89
|
-
def parse(self, response):
|
|
90
|
-
# 解析逻辑
|
|
91
|
-
pass
|
|
92
|
-
|
|
93
|
-
# 运行爬虫
|
|
94
|
-
# crawlo run example
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
## 日志系统
|
|
98
|
-
|
|
99
|
-
Crawlo 拥有一个功能强大的日志系统,支持多种配置选项:
|
|
100
|
-
|
|
101
|
-
### 基本配置
|
|
102
|
-
|
|
103
|
-
```python
|
|
104
|
-
from crawlo.logging import configure_logging, get_logger
|
|
105
|
-
|
|
106
|
-
# 配置日志系统
|
|
107
|
-
configure_logging(
|
|
108
|
-
LOG_LEVEL='INFO',
|
|
109
|
-
LOG_FILE='logs/app.log',
|
|
110
|
-
LOG_MAX_BYTES=10*1024*1024, # 10MB
|
|
111
|
-
LOG_BACKUP_COUNT=5
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
# 获取logger
|
|
115
|
-
logger = get_logger('my_module')
|
|
116
|
-
logger.info('这是一条日志消息')
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
### 高级配置
|
|
120
|
-
|
|
121
|
-
```python
|
|
122
|
-
# 分别配置控制台和文件日志级别
|
|
123
|
-
configure_logging(
|
|
124
|
-
LOG_LEVEL='INFO',
|
|
125
|
-
LOG_CONSOLE_LEVEL='WARNING', # 控制台只显示WARNING及以上级别
|
|
126
|
-
LOG_FILE_LEVEL='DEBUG', # 文件记录DEBUG及以上级别
|
|
127
|
-
LOG_FILE='logs/app.log',
|
|
128
|
-
LOG_INCLUDE_THREAD_ID=True, # 包含线程ID
|
|
129
|
-
LOG_INCLUDE_PROCESS_ID=True # 包含进程ID
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# 模块特定日志级别
|
|
133
|
-
configure_logging(
|
|
134
|
-
LOG_LEVEL='WARNING',
|
|
135
|
-
LOG_LEVELS={
|
|
136
|
-
'my_module.debug': 'DEBUG',
|
|
137
|
-
'my_module.info': 'INFO'
|
|
138
|
-
}
|
|
139
|
-
)
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
### 性能监控
|
|
143
|
-
|
|
144
|
-
```python
|
|
145
|
-
from crawlo.logging import get_monitor
|
|
146
|
-
|
|
147
|
-
# 启用日志性能监控
|
|
148
|
-
monitor = get_monitor()
|
|
149
|
-
monitor.enable_monitoring()
|
|
150
|
-
|
|
151
|
-
# 获取性能报告
|
|
152
|
-
report = monitor.get_performance_report()
|
|
153
|
-
print(report)
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
### 日志采样
|
|
157
|
-
|
|
158
|
-
```python
|
|
159
|
-
from crawlo.logging import get_sampler
|
|
160
|
-
|
|
161
|
-
# 设置采样率(只记录30%的日志)
|
|
162
|
-
sampler = get_sampler()
|
|
163
|
-
sampler.set_sample_rate('my_module', 0.3)
|
|
164
|
-
|
|
165
|
-
# 设置速率限制(每秒最多100条日志)
|
|
166
|
-
sampler.set_rate_limit('my_module', 100)
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
## Windows 兼容性说明
|
|
170
|
-
|
|
171
|
-
在 Windows 系统上使用日志轮转功能时,可能会遇到文件锁定问题。为了解决这个问题,建议安装 `concurrent-log-handler` 库:
|
|
172
|
-
|
|
173
|
-
```bash
|
|
174
|
-
pip install concurrent-log-handler
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
Crawlo 框架会自动检测并使用这个库来提供更好的 Windows 兼容性。
|
|
178
|
-
|
|
179
|
-
如果未安装 `concurrent-log-handler`,在 Windows 上运行时可能会出现以下错误:
|
|
180
|
-
```
|
|
181
|
-
PermissionError: [WinError 32] 另一个程序正在使用此文件,进程无法访问。
|
|
182
|
-
```
|
|
183
|
-
|
|
184
|
-
## 文档
|
|
185
|
-
|
|
186
|
-
请查看 [文档](https://your-docs-url.com) 获取更多信息。
|
|
187
|
-
|
|
188
|
-
## 许可证
|
|
189
|
-
|
|
190
|
-
MIT
|
tests/simple_log_test.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
简单的日志系统测试
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
# 确保日志目录存在
|
|
11
|
-
os.makedirs('/examples/ofweek_standalone/logs', exist_ok=True)
|
|
12
|
-
|
|
13
|
-
# 测试日志系统
|
|
14
|
-
from crawlo.utils.log import LoggerManager, get_logger
|
|
15
|
-
|
|
16
|
-
print("=== 简单日志系统测试 ===")
|
|
17
|
-
|
|
18
|
-
# 1. 直接配置日志系统
|
|
19
|
-
print("1. 配置日志系统...")
|
|
20
|
-
LoggerManager.configure(
|
|
21
|
-
LOG_LEVEL='INFO',
|
|
22
|
-
LOG_FILE='/Users/oscar/projects/Crawlo/examples/ofweek_standalone/logs/simple_test.log'
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
# 2. 创建logger
|
|
26
|
-
print("2. 创建logger...")
|
|
27
|
-
logger = get_logger('test.logger')
|
|
28
|
-
print(f" Logger: {logger}")
|
|
29
|
-
print(f" Handlers: {len(logger.handlers)}")
|
|
30
|
-
|
|
31
|
-
for i, handler in enumerate(logger.handlers):
|
|
32
|
-
handler_type = type(handler).__name__
|
|
33
|
-
print(f" Handler {i}: {handler_type}")
|
|
34
|
-
if hasattr(handler, 'baseFilename'):
|
|
35
|
-
print(f" File: {handler.baseFilename}")
|
|
36
|
-
|
|
37
|
-
# 3. 测试日志输出
|
|
38
|
-
print("3. 测试日志输出...")
|
|
39
|
-
logger.info("这是一条测试INFO日志")
|
|
40
|
-
logger.debug("这是一条测试DEBUG日志")
|
|
41
|
-
logger.warning("这是一条测试WARNING日志")
|
|
42
|
-
|
|
43
|
-
print("4. 检查日志文件...")
|
|
44
|
-
log_file = '/Users/oscar/projects/Crawlo/examples/ofweek_standalone/logs/simple_test.log'
|
|
45
|
-
if os.path.exists(log_file):
|
|
46
|
-
print(f" 日志文件存在: {log_file}")
|
|
47
|
-
with open(log_file, 'r', encoding='utf-8') as f:
|
|
48
|
-
content = f.read()
|
|
49
|
-
print(f" 文件内容长度: {len(content)} 字符")
|
|
50
|
-
if content:
|
|
51
|
-
print(" 文件内容:")
|
|
52
|
-
print(content)
|
|
53
|
-
else:
|
|
54
|
-
print(" 文件为空")
|
|
55
|
-
else:
|
|
56
|
-
print(f" 日志文件不存在: {log_file}")
|
|
57
|
-
|
|
58
|
-
print("=== 测试完成 ===")
|
tests/simple_test.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
简化的框架测试
|
|
5
|
-
"""
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
# 设置基本配置
|
|
11
|
-
test_log_file = '/Users/oscar/projects/Crawlo/simple_test.log'
|
|
12
|
-
if os.path.exists(test_log_file):
|
|
13
|
-
os.remove(test_log_file)
|
|
14
|
-
|
|
15
|
-
# 最简单的测试
|
|
16
|
-
try:
|
|
17
|
-
from crawlo.utils.log import LoggerManager
|
|
18
|
-
|
|
19
|
-
print("配置日志系统...")
|
|
20
|
-
LoggerManager.configure(
|
|
21
|
-
LOG_LEVEL='INFO',
|
|
22
|
-
LOG_FILE=test_log_file
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
from crawlo.utils.log import get_logger
|
|
26
|
-
logger = get_logger('test.simple')
|
|
27
|
-
|
|
28
|
-
print("测试日志输出...")
|
|
29
|
-
logger.info("这是一条测试信息")
|
|
30
|
-
logger.info("Crawlo框架初始化完成")
|
|
31
|
-
logger.info("Crawlo Framework Started 1.3.3")
|
|
32
|
-
|
|
33
|
-
print("检查日志文件...")
|
|
34
|
-
if os.path.exists(test_log_file):
|
|
35
|
-
with open(test_log_file, 'r', encoding='utf-8') as f:
|
|
36
|
-
content = f.read()
|
|
37
|
-
print(f"日志文件内容: {len(content)} 字符")
|
|
38
|
-
print("内容:")
|
|
39
|
-
print(content)
|
|
40
|
-
else:
|
|
41
|
-
print("日志文件未创建")
|
|
42
|
-
|
|
43
|
-
except Exception as e:
|
|
44
|
-
print(f"错误: {e}")
|
|
45
|
-
import traceback
|
|
46
|
-
traceback.print_exc()
|
|
47
|
-
|
|
48
|
-
print("测试完成")
|
tests/test_framework_logger.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试框架日志系统
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
from crawlo.initialization import initialize_framework, get_framework_initializer
|
|
11
|
-
from crawlo.utils.log import get_logger, LoggerManager
|
|
12
|
-
|
|
13
|
-
def test_framework_logger():
|
|
14
|
-
print("=== 测试框架日志系统 ===")
|
|
15
|
-
|
|
16
|
-
# 1. 初始化框架,传入基本配置
|
|
17
|
-
print("1. 初始化框架...")
|
|
18
|
-
custom_settings = {
|
|
19
|
-
'LOG_LEVEL': 'INFO',
|
|
20
|
-
'LOG_FILE': 'test_framework.log',
|
|
21
|
-
'PROJECT_NAME': 'test_framework',
|
|
22
|
-
'RUN_MODE': 'standalone'
|
|
23
|
-
}
|
|
24
|
-
settings = initialize_framework(custom_settings)
|
|
25
|
-
print(f" LOG_LEVEL: {settings.get('LOG_LEVEL')}")
|
|
26
|
-
print(f" LOG_FILE: {settings.get('LOG_FILE')}")
|
|
27
|
-
|
|
28
|
-
# 2. 获取框架初始化管理器
|
|
29
|
-
init_manager = get_framework_initializer()
|
|
30
|
-
print(f" 框架是否就绪: {init_manager.is_ready}")
|
|
31
|
-
print(f" 初始化阶段: {init_manager.phase}")
|
|
32
|
-
|
|
33
|
-
# 3. 测试框架logger
|
|
34
|
-
framework_logger = init_manager.logger
|
|
35
|
-
if framework_logger:
|
|
36
|
-
print(f" 框架logger名称: {framework_logger.name}")
|
|
37
|
-
print(f" 框架logger级别: {framework_logger.level}")
|
|
38
|
-
print(f" 框架logger处理器数量: {len(framework_logger.handlers)}")
|
|
39
|
-
|
|
40
|
-
for i, handler in enumerate(framework_logger.handlers):
|
|
41
|
-
handler_type = type(handler).__name__
|
|
42
|
-
print(f" 处理器{i}: {handler_type}, 级别: {handler.level}")
|
|
43
|
-
else:
|
|
44
|
-
print(" 框架logger为None!")
|
|
45
|
-
framework_logger = get_logger('crawlo.framework')
|
|
46
|
-
print(f" 手动创建的框架logger: {framework_logger.name}")
|
|
47
|
-
|
|
48
|
-
# 4. 测试日志输出
|
|
49
|
-
print("2. 测试日志输出...")
|
|
50
|
-
framework_logger.info("Crawlo框架初始化完成")
|
|
51
|
-
framework_logger.info("Crawlo Framework Started 1.3.3")
|
|
52
|
-
framework_logger.info("使用单机模式 - 简单快速,适合开发和中小规模爬取")
|
|
53
|
-
framework_logger.info("Run Mode: standalone")
|
|
54
|
-
framework_logger.info("Starting running test_spider")
|
|
55
|
-
|
|
56
|
-
# 5. 测试其他logger
|
|
57
|
-
print("3. 测试其他组件logger...")
|
|
58
|
-
queue_logger = get_logger('QueueManager')
|
|
59
|
-
queue_logger.info("Queue initialized successfully Type: memory")
|
|
60
|
-
|
|
61
|
-
scheduler_logger = get_logger('Scheduler')
|
|
62
|
-
scheduler_logger.info("enabled filters: crawlo.filters.memory_filter.MemoryFilter")
|
|
63
|
-
|
|
64
|
-
print("=== 测试完成 ===")
|
|
65
|
-
|
|
66
|
-
if __name__ == "__main__":
|
|
67
|
-
test_framework_logger()
|
tests/test_framework_startup.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试框架启动信息是否正确输出到日志文件
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
def test_framework_startup():
|
|
11
|
-
print("=== 测试框架启动信息输出 ===")
|
|
12
|
-
|
|
13
|
-
# 删除旧的日志文件
|
|
14
|
-
test_log_file = '/Users/oscar/projects/Crawlo/test_startup.log'
|
|
15
|
-
if os.path.exists(test_log_file):
|
|
16
|
-
os.remove(test_log_file)
|
|
17
|
-
|
|
18
|
-
# 准备测试设置
|
|
19
|
-
test_settings = {
|
|
20
|
-
'PROJECT_NAME': 'test_startup',
|
|
21
|
-
'LOG_LEVEL': 'INFO',
|
|
22
|
-
'LOG_FILE': test_log_file,
|
|
23
|
-
'RUN_MODE': 'standalone'
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
# 初始化框架
|
|
27
|
-
from crawlo.initialization import initialize_framework
|
|
28
|
-
settings = initialize_framework(test_settings)
|
|
29
|
-
|
|
30
|
-
print(f"设置初始化完成: {settings.get('PROJECT_NAME')}")
|
|
31
|
-
|
|
32
|
-
# 检查日志文件是否包含框架启动信息
|
|
33
|
-
if os.path.exists(test_log_file):
|
|
34
|
-
with open(test_log_file, 'r', encoding='utf-8') as f:
|
|
35
|
-
content = f.read()
|
|
36
|
-
print(f"日志文件内容长度: {len(content)} 字符")
|
|
37
|
-
|
|
38
|
-
# 检查关键的框架启动信息
|
|
39
|
-
if "Crawlo框架初始化完成" in content:
|
|
40
|
-
print("✅ 发现: Crawlo框架初始化完成")
|
|
41
|
-
else:
|
|
42
|
-
print("❌ 未找到: Crawlo框架初始化完成")
|
|
43
|
-
|
|
44
|
-
if "Crawlo Framework Started" in content:
|
|
45
|
-
print("✅ 发现: Crawlo Framework Started")
|
|
46
|
-
else:
|
|
47
|
-
print("❌ 未找到: Crawlo Framework Started")
|
|
48
|
-
|
|
49
|
-
if "使用单机模式" in content:
|
|
50
|
-
print("✅ 发现: 使用单机模式")
|
|
51
|
-
else:
|
|
52
|
-
print("❌ 未找到: 使用单机模式")
|
|
53
|
-
|
|
54
|
-
print("\n前50行日志内容:")
|
|
55
|
-
lines = content.split('\n')[:50]
|
|
56
|
-
for i, line in enumerate(lines, 1):
|
|
57
|
-
if any(keyword in line for keyword in ["框架", "Framework", "Started"]):
|
|
58
|
-
print(f"{i:3d}: {line}")
|
|
59
|
-
else:
|
|
60
|
-
print("❌ 日志文件未创建")
|
|
61
|
-
|
|
62
|
-
print("=== 测试完成 ===")
|
|
63
|
-
|
|
64
|
-
if __name__ == "__main__":
|
|
65
|
-
test_framework_startup()
|
tests/test_mode_change.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试运行模式日志级别修改
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
def test_mode_log_level():
|
|
11
|
-
print("=== 测试运行模式日志级别修改 ===")
|
|
12
|
-
|
|
13
|
-
# 删除旧的日志文件
|
|
14
|
-
test_log_file = '/Users/oscar/projects/Crawlo/test_mode_change.log'
|
|
15
|
-
if os.path.exists(test_log_file):
|
|
16
|
-
os.remove(test_log_file)
|
|
17
|
-
|
|
18
|
-
# 准备测试设置
|
|
19
|
-
test_settings = {
|
|
20
|
-
'PROJECT_NAME': 'test_mode_change',
|
|
21
|
-
'LOG_LEVEL': 'INFO',
|
|
22
|
-
'LOG_FILE': test_log_file,
|
|
23
|
-
'RUN_MODE': 'standalone'
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
try:
|
|
27
|
-
# 初始化框架
|
|
28
|
-
from crawlo.initialization import initialize_framework
|
|
29
|
-
settings = initialize_framework(test_settings)
|
|
30
|
-
|
|
31
|
-
print(f"设置初始化完成: {settings.get('PROJECT_NAME')}")
|
|
32
|
-
|
|
33
|
-
# 检查日志文件是否包含运行模式信息
|
|
34
|
-
if os.path.exists(test_log_file):
|
|
35
|
-
with open(test_log_file, 'r', encoding='utf-8') as f:
|
|
36
|
-
content = f.read()
|
|
37
|
-
print(f"日志文件内容长度: {len(content)} 字符")
|
|
38
|
-
|
|
39
|
-
# 检查是否还有INFO级别的运行模式信息
|
|
40
|
-
info_lines = [line for line in content.split('\n') if 'INFO' in line and '使用单机模式' in line]
|
|
41
|
-
debug_lines = [line for line in content.split('\n') if 'DEBUG' in line and '使用单机模式' in line]
|
|
42
|
-
|
|
43
|
-
if info_lines:
|
|
44
|
-
print("❌ 仍然发现INFO级别的运行模式信息:")
|
|
45
|
-
for line in info_lines:
|
|
46
|
-
print(f" {line}")
|
|
47
|
-
else:
|
|
48
|
-
print("✅ 没有发现INFO级别的运行模式信息")
|
|
49
|
-
|
|
50
|
-
if debug_lines:
|
|
51
|
-
print("✅ 发现DEBUG级别的运行模式信息:")
|
|
52
|
-
for line in debug_lines:
|
|
53
|
-
print(f" {line}")
|
|
54
|
-
else:
|
|
55
|
-
print("❌ 没有发现DEBUG级别的运行模式信息")
|
|
56
|
-
|
|
57
|
-
print("\n所有日志内容:")
|
|
58
|
-
lines = content.split('\n')
|
|
59
|
-
for i, line in enumerate(lines, 1):
|
|
60
|
-
if line.strip():
|
|
61
|
-
print(f"{i:3d}: {line}")
|
|
62
|
-
else:
|
|
63
|
-
print("❌ 日志文件未创建")
|
|
64
|
-
|
|
65
|
-
except Exception as e:
|
|
66
|
-
print(f"错误: {e}")
|
|
67
|
-
import traceback
|
|
68
|
-
traceback.print_exc()
|
|
69
|
-
|
|
70
|
-
print("=== 测试完成 ===")
|
|
71
|
-
|
|
72
|
-
if __name__ == "__main__":
|
|
73
|
-
test_mode_log_level()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|