crawlo 1.4.4__py3-none-any.whl → 1.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (85) hide show
  1. crawlo/__init__.py +11 -15
  2. crawlo/__version__.py +1 -1
  3. crawlo/commands/startproject.py +24 -0
  4. crawlo/core/engine.py +2 -2
  5. crawlo/core/scheduler.py +4 -4
  6. crawlo/crawler.py +8 -7
  7. crawlo/downloader/__init__.py +5 -2
  8. crawlo/extension/__init__.py +2 -2
  9. crawlo/filters/aioredis_filter.py +8 -1
  10. crawlo/filters/memory_filter.py +8 -1
  11. crawlo/initialization/built_in.py +13 -4
  12. crawlo/initialization/core.py +5 -4
  13. crawlo/interfaces.py +24 -0
  14. crawlo/middleware/__init__.py +7 -4
  15. crawlo/middleware/middleware_manager.py +15 -8
  16. crawlo/mode_manager.py +45 -11
  17. crawlo/network/response.py +374 -69
  18. crawlo/pipelines/mysql_pipeline.py +6 -6
  19. crawlo/pipelines/pipeline_manager.py +2 -2
  20. crawlo/project.py +2 -4
  21. crawlo/settings/default_settings.py +4 -0
  22. crawlo/task_manager.py +2 -2
  23. crawlo/templates/project/items.py.tmpl +2 -2
  24. crawlo/templates/project/middlewares.py.tmpl +9 -89
  25. crawlo/templates/project/pipelines.py.tmpl +8 -68
  26. crawlo/tools/__init__.py +0 -11
  27. crawlo/utils/__init__.py +17 -1
  28. crawlo/utils/db_helper.py +220 -319
  29. crawlo/utils/error_handler.py +313 -67
  30. crawlo/utils/fingerprint.py +3 -4
  31. crawlo/utils/misc.py +82 -0
  32. crawlo/utils/request.py +55 -66
  33. crawlo/utils/selector_helper.py +138 -0
  34. crawlo/utils/spider_loader.py +185 -45
  35. crawlo/utils/text_helper.py +95 -0
  36. crawlo-1.4.5.dist-info/METADATA +329 -0
  37. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/RECORD +76 -49
  38. tests/bug_check_test.py +251 -0
  39. tests/direct_selector_helper_test.py +97 -0
  40. tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -0
  41. tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -0
  42. tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -0
  43. tests/ofweek_scrapy/ofweek_scrapy/settings.py +85 -0
  44. tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -0
  45. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +162 -0
  46. tests/ofweek_scrapy/scrapy.cfg +11 -0
  47. tests/performance_comparison.py +4 -5
  48. tests/simple_crawlo_test.py +1 -2
  49. tests/simple_follow_test.py +39 -0
  50. tests/simple_response_selector_test.py +95 -0
  51. tests/simple_selector_helper_test.py +155 -0
  52. tests/simple_selector_test.py +208 -0
  53. tests/simple_url_test.py +74 -0
  54. tests/test_crawler_process_import.py +39 -0
  55. tests/test_crawler_process_spider_modules.py +48 -0
  56. tests/test_edge_cases.py +7 -5
  57. tests/test_encoding_core.py +57 -0
  58. tests/test_encoding_detection.py +127 -0
  59. tests/test_factory_compatibility.py +197 -0
  60. tests/test_optimized_selector_naming.py +101 -0
  61. tests/test_priority_behavior.py +18 -18
  62. tests/test_response_follow.py +105 -0
  63. tests/test_response_selector_methods.py +93 -0
  64. tests/test_response_url_methods.py +71 -0
  65. tests/test_response_urljoin.py +87 -0
  66. tests/test_scrapy_style_encoding.py +113 -0
  67. tests/test_selector_helper.py +101 -0
  68. tests/test_selector_optimizations.py +147 -0
  69. tests/test_spider_loader.py +50 -0
  70. tests/test_spider_loader_comprehensive.py +70 -0
  71. tests/test_spiders/__init__.py +1 -0
  72. tests/test_spiders/test_spider.py +10 -0
  73. crawlo/tools/anti_crawler.py +0 -269
  74. crawlo/utils/class_loader.py +0 -26
  75. crawlo/utils/enhanced_error_handler.py +0 -357
  76. crawlo-1.4.4.dist-info/METADATA +0 -190
  77. tests/simple_log_test.py +0 -58
  78. tests/simple_test.py +0 -48
  79. tests/test_framework_logger.py +0 -67
  80. tests/test_framework_startup.py +0 -65
  81. tests/test_mode_change.py +0 -73
  82. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/WHEEL +0 -0
  83. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/entry_points.txt +0 -0
  84. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/top_level.txt +0 -0
  85. /tests/{final_command_test_report.md → ofweek_scrapy/ofweek_scrapy/__init__.py} +0 -0
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ 简化选择器测试
5
+ """
6
+ import sys
7
+ import os
8
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
9
+
10
+ # 直接导入需要的模块
11
+ from parsel import Selector, SelectorList
12
+
13
+
14
+ class MockResponse:
15
+ """模拟Response类用于测试"""
16
+
17
+ def __init__(self, text):
18
+ self._text = text
19
+ self._selector_instance = None
20
+
21
+ @property
22
+ def text(self):
23
+ return self._text
24
+
25
+ @property
26
+ def _selector(self):
27
+ if self._selector_instance is None:
28
+ self._selector_instance = Selector(self.text)
29
+ return self._selector_instance
30
+
31
+ def xpath(self, query):
32
+ return self._selector.xpath(query)
33
+
34
+ def css(self, query):
35
+ return self._selector.css(query)
36
+
37
+ def _is_xpath(self, query):
38
+ return query.startswith(('/', '//', './'))
39
+
40
+ def _extract_text_from_elements(self, elements, join_str=" "):
41
+ texts = []
42
+ for element in elements:
43
+ if hasattr(element, 'xpath'):
44
+ element_texts = element.xpath('.//text()').getall()
45
+ else:
46
+ element_texts = [str(element)]
47
+ for text in element_texts:
48
+ cleaned = text.strip()
49
+ if cleaned:
50
+ texts.append(cleaned)
51
+ return join_str.join(texts)
52
+
53
+ def extract_text(self, xpath_or_css, join_str=" ", default=''):
54
+ try:
55
+ elements = self.xpath(xpath_or_css) if self._is_xpath(xpath_or_css) else self.css(xpath_or_css)
56
+ if not elements:
57
+ return default
58
+ return self._extract_text_from_elements(elements, join_str)
59
+ except Exception:
60
+ return default
61
+
62
+ def extract_texts(self, xpath_or_css, join_str=" ", default=None):
63
+ if default is None:
64
+ default = []
65
+
66
+ try:
67
+ elements = self.xpath(xpath_or_css) if self._is_xpath(xpath_or_css) else self.css(xpath_or_css)
68
+ if not elements:
69
+ return default
70
+
71
+ result = []
72
+ for element in elements:
73
+ if hasattr(element, 'xpath'):
74
+ texts = element.xpath('.//text()').getall()
75
+ else:
76
+ texts = [str(element)]
77
+
78
+ clean_texts = [text.strip() for text in texts if text.strip()]
79
+ if clean_texts:
80
+ result.append(join_str.join(clean_texts))
81
+
82
+ return result if result else default
83
+ except Exception:
84
+ return default
85
+
86
+ def extract_attr(self, xpath_or_css, attr_name, default=None):
87
+ try:
88
+ elements = self.xpath(xpath_or_css) if self._is_xpath(xpath_or_css) else self.css(xpath_or_css)
89
+ if not elements:
90
+ return default
91
+ if hasattr(elements, 'attrib'):
92
+ return elements.attrib.get(attr_name, default)
93
+ elif len(elements) > 0 and hasattr(elements[0], 'attrib'):
94
+ return elements[0].attrib.get(attr_name, default)
95
+ return default
96
+ except Exception:
97
+ return default
98
+
99
+ def extract_attrs(self, xpath_or_css, attr_name, default=None):
100
+ if default is None:
101
+ default = []
102
+
103
+ try:
104
+ elements = self.xpath(xpath_or_css) if self._is_xpath(xpath_or_css) else self.css(xpath_or_css)
105
+ if not elements:
106
+ return default
107
+
108
+ result = []
109
+ for element in elements:
110
+ if hasattr(element, 'attrib'):
111
+ attr_value = element.attrib.get(attr_name)
112
+ if attr_value is not None:
113
+ result.append(attr_value)
114
+
115
+ return result if result else default
116
+ except Exception:
117
+ return default
118
+
119
+
120
+ def test_selector_methods():
121
+ """测试选择器方法"""
122
+ print("测试选择器方法...")
123
+ print("=" * 50)
124
+
125
+ # 创建测试HTML
126
+ html_content = """
127
+ <html>
128
+ <head>
129
+ <title>测试页面</title>
130
+ </head>
131
+ <body>
132
+ <div class="content">
133
+ <h1>主标题</h1>
134
+ <p class="intro">介绍段落</p>
135
+ <ul class="list">
136
+ <li>项目1</li>
137
+ <li>项目2</li>
138
+ <li>项目3</li>
139
+ </ul>
140
+ <a href="https://example.com" class="link">链接文本</a>
141
+ <img src="image.jpg" alt="图片描述" class="image">
142
+ </div>
143
+ </body>
144
+ </html>
145
+ """
146
+
147
+ response = MockResponse(html_content)
148
+
149
+ # 测试 extract_text
150
+ print("1. 测试 extract_text:")
151
+ title = response.extract_text('title')
152
+ print(f" 标题: {title}")
153
+
154
+ h1_text = response.extract_text('.content h1')
155
+ print(f" H1文本: {h1_text}")
156
+
157
+ # 测试XPath
158
+ title_xpath = response.extract_text('//title')
159
+ print(f" XPath标题: {title_xpath}")
160
+
161
+ print()
162
+
163
+ # 测试 extract_texts
164
+ print("2. 测试 extract_texts:")
165
+ list_items = response.extract_texts('.list li')
166
+ print(f" 列表项: {list_items}")
167
+
168
+ # 测试XPath
169
+ list_items_xpath = response.extract_texts('//ul[@class="list"]/li')
170
+ print(f" XPath列表项: {list_items_xpath}")
171
+
172
+ print()
173
+
174
+ # 测试 extract_attr
175
+ print("3. 测试 extract_attr:")
176
+ link_href = response.extract_attr('.link', 'href')
177
+ print(f" 链接href: {link_href}")
178
+
179
+ img_alt = response.extract_attr('.image', 'alt')
180
+ print(f" 图片alt: {img_alt}")
181
+
182
+ # 测试XPath
183
+ link_href_xpath = response.extract_attr('//a[@class="link"]', 'href')
184
+ print(f" XPath链接href: {link_href_xpath}")
185
+
186
+ print()
187
+
188
+ # 测试 extract_attrs
189
+ print("4. 测试 extract_attrs:")
190
+ all_links = response.extract_attrs('a', 'href')
191
+ print(f" 所有链接: {all_links}")
192
+
193
+ print()
194
+
195
+ # 测试边界情况
196
+ print("5. 测试边界情况:")
197
+ non_exist = response.extract_text('.non-exist', default='默认文本')
198
+ print(f" 不存在元素的默认值: {non_exist}")
199
+
200
+ non_exist_attr = response.extract_attr('.non-exist', 'href', default='默认链接')
201
+ print(f" 不存在属性的默认值: {non_exist_attr}")
202
+
203
+ print()
204
+ print("所有测试完成!")
205
+
206
+
207
+ if __name__ == '__main__':
208
+ test_selector_methods()
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ Response URL 处理方法简单测试
5
+ """
6
+ import sys
7
+ import os
8
+
9
+ # 添加项目根目录到Python路径
10
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11
+
12
+ # 直接导入需要的模块
13
+ from urllib.parse import urlparse, urlsplit, parse_qs, urlencode, quote, unquote, urldefrag
14
+
15
+
16
+ def test_url_methods():
17
+ """测试 URL 处理方法"""
18
+ print("测试 Response URL 处理方法")
19
+
20
+ # 测试数据
21
+ test_url = "https://example.com/test?param1=value1&param2=value2#section1"
22
+ print(f"测试URL: {test_url}")
23
+
24
+ # 1. 测试 urlparse
25
+ print("\n1. 测试 urlparse:")
26
+ parsed = urlparse(test_url)
27
+ print(f" scheme: {parsed.scheme}")
28
+ print(f" netloc: {parsed.netloc}")
29
+ print(f" path: {parsed.path}")
30
+ print(f" query: {parsed.query}")
31
+ print(f" fragment: {parsed.fragment}")
32
+
33
+ # 2. 测试 urlsplit
34
+ print("\n2. 测试 urlsplit:")
35
+ split_result = urlsplit(test_url)
36
+ print(f" scheme: {split_result.scheme}")
37
+ print(f" netloc: {split_result.netloc}")
38
+ print(f" path: {split_result.path}")
39
+ print(f" query: {split_result.query}")
40
+ print(f" fragment: {split_result.fragment}")
41
+
42
+ # 3. 测试 parse_qs
43
+ print("\n3. 测试 parse_qs:")
44
+ query_dict = parse_qs(parsed.query)
45
+ print(f" 解析结果: {query_dict}")
46
+
47
+ # 4. 测试 urlencode
48
+ print("\n4. 测试 urlencode:")
49
+ test_dict = {"name": "张三", "age": 25, "city": "北京"}
50
+ encoded = urlencode(test_dict)
51
+ print(f" 编码结果: {encoded}")
52
+
53
+ # 5. 测试 quote/unquote
54
+ print("\n5. 测试 quote/unquote:")
55
+ original = "hello world 你好"
56
+ quoted = quote(original)
57
+ print(f" 原始字符串: {original}")
58
+ print(f" URL编码: {quoted}")
59
+
60
+ unquoted = unquote(quoted)
61
+ print(f" URL解码: {unquoted}")
62
+ print(f" 编码解码是否一致: {original == unquoted}")
63
+
64
+ # 6. 测试 urldefrag
65
+ print("\n6. 测试 urldefrag:")
66
+ url_without_frag, fragment = urldefrag(test_url)
67
+ print(f" 去除片段的URL: {url_without_frag}")
68
+ print(f" 片段: {fragment}")
69
+
70
+ print("\n所有测试完成!")
71
+
72
+
73
+ if __name__ == '__main__':
74
+ test_url_methods()
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ 测试CrawlerProcess导入功能
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11
+
12
+
13
+ def test_crawler_process_import():
14
+ """测试CrawlerProcess导入功能"""
15
+ print("测试CrawlerProcess导入功能...")
16
+
17
+ try:
18
+ # 测试直接从crawlo导入CrawlerProcess
19
+ from crawlo import CrawlerProcess
20
+ print(f" 成功从crawlo导入CrawlerProcess: {CrawlerProcess}")
21
+
22
+ # 测试创建实例
23
+ process = CrawlerProcess()
24
+ print(f" 成功创建CrawlerProcess实例: {process}")
25
+
26
+ print("CrawlerProcess导入测试通过!")
27
+
28
+ except ImportError as e:
29
+ print(f" 导入失败: {e}")
30
+ # 如果直接导入失败,尝试从crawler模块导入
31
+ try:
32
+ from crawlo.crawler import CrawlerProcess
33
+ print(f" 成功从crawlo.crawler导入CrawlerProcess: {CrawlerProcess}")
34
+ except ImportError as e2:
35
+ print(f" 从crawler模块导入也失败: {e2}")
36
+
37
+
38
+ if __name__ == '__main__':
39
+ test_crawler_process_import()
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ 测试CrawlerProcess与SPIDER_MODULES的集成
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ import asyncio
11
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
12
+
13
+ from crawlo.crawler import CrawlerProcess
14
+ from crawlo.settings.setting_manager import SettingManager
15
+
16
+
17
+ def test_crawler_process_spider_modules():
18
+ """测试CrawlerProcess与SPIDER_MODULES的集成"""
19
+ print("测试CrawlerProcess与SPIDER_MODULES的集成...")
20
+
21
+ # 创建一个包含SPIDER_MODULES的设置
22
+ settings = SettingManager({
23
+ 'SPIDER_MODULES': ['tests.test_spiders'],
24
+ 'SPIDER_LOADER_WARN_ONLY': True,
25
+ 'CONCURRENCY': 1,
26
+ 'LOG_LEVEL': 'INFO'
27
+ })
28
+
29
+ # 创建CrawlerProcess实例
30
+ process = CrawlerProcess(settings=settings)
31
+
32
+ # 测试获取爬虫名称
33
+ spider_names = process.get_spider_names()
34
+ print(f"发现的爬虫: {spider_names}")
35
+
36
+ # 测试检查爬虫是否已注册
37
+ is_registered = process.is_spider_registered('test_spider')
38
+ print(f"爬虫'test_spider'是否已注册: {is_registered}")
39
+
40
+ # 测试获取爬虫类
41
+ spider_class = process.get_spider_class('test_spider')
42
+ print(f"爬虫'test_spider'的类: {spider_class}")
43
+
44
+ print("测试完成!")
45
+
46
+
47
+ if __name__ == '__main__':
48
+ test_crawler_process_spider_modules()
tests/test_edge_cases.py CHANGED
@@ -112,13 +112,15 @@ async def test_redis_queue_edge_cases():
112
112
  print(" 特殊字符 URL 测试通过")
113
113
 
114
114
  # 4. 测试优先级(高优先级值应该先出队)
115
- high_priority_request = Request(url="https://high-priority.com", priority=1000)
116
- low_priority_request = Request(url="https://low-priority.com", priority=-1000)
115
+ # 注意:Request构造函数会将传入的priority值取反存储
116
+ # 所以priority=1000的请求实际存储为-1000,priority=-1000的请求实际存储为1000
117
+ high_priority_request = Request(url="https://high-priority.com", priority=1000) # 实际存储为-1000
118
+ low_priority_request = Request(url="https://low-priority.com", priority=-1000) # 实际存储为1000
117
119
 
118
- await queue.put(high_priority_request) # 高优先级值
119
- await queue.put(low_priority_request) # 低优先级值
120
+ await queue.put(high_priority_request, priority=high_priority_request.priority) # 使用实际存储的priority值
121
+ await queue.put(low_priority_request, priority=low_priority_request.priority) # 使用实际存储的priority值
120
122
 
121
- # 高优先级值应该先出队
123
+ # 高优先级值应该先出队(因为score = priority,score小的先出队)
122
124
  first = await queue.get(timeout=1.0)
123
125
  assert first is not None and first.url == "https://high-priority.com", "高优先级值应该先出队"
124
126
  print(" 优先级测试通过")
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ 编码检测核心功能测试
5
+ """
6
+ import sys
7
+ import os
8
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
9
+
10
+ from crawlo.network.response import Response
11
+
12
+
13
+ def test_encoding_detection():
14
+ """测试编码检测核心功能"""
15
+ print("测试编码检测核心功能...")
16
+
17
+ # 测试 Request 编码优先级
18
+ class MockRequest:
19
+ encoding = 'gbk'
20
+
21
+ response1 = Response(
22
+ url="https://example.com",
23
+ body=b'',
24
+ request=MockRequest()
25
+ )
26
+ print(f"Request 编码优先级: {response1.encoding}")
27
+
28
+ # 测试 Content-Type 头部编码
29
+ response2 = Response(
30
+ url="https://example.com",
31
+ body=b'',
32
+ headers={"content-type": "text/html; charset=iso-8859-1"}
33
+ )
34
+ print(f"Content-Type 编码: {response2.encoding}")
35
+
36
+ # 测试声明编码方法
37
+ declared_enc = response2._declared_encoding()
38
+ print(f"声明编码: {declared_enc}")
39
+
40
+ # 测试默认编码
41
+ response3 = Response(
42
+ url="https://example.com",
43
+ body=b''
44
+ )
45
+ print(f"默认编码: {response3.encoding}")
46
+
47
+ # 验证结果
48
+ assert response1.encoding == 'gbk', f"Expected 'gbk', got {response1.encoding}"
49
+ assert response2.encoding == 'iso-8859-1', f"Expected 'iso-8859-1', got {response2.encoding}"
50
+ assert declared_enc == 'iso-8859-1', f"Expected 'iso-8859-1', got {declared_enc}"
51
+ assert response3.encoding == 'utf-8', f"Expected 'utf-8', got {response3.encoding}"
52
+
53
+ print("所有测试通过!")
54
+
55
+
56
+ if __name__ == '__main__':
57
+ test_encoding_detection()
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ Response 编码检测优化测试
5
+ """
6
+ import unittest
7
+
8
+ # 模拟 Response 类的部分功能用于测试
9
+ class MockResponse:
10
+ def __init__(self, body, headers=None, request=None):
11
+ self.body = body
12
+ self.headers = headers or {}
13
+ self.request = request
14
+ self._DEFAULT_ENCODING = "ascii"
15
+
16
+ def _determine_encoding(self):
17
+ """简化版编码检测"""
18
+ # 1. 优先使用声明的编码
19
+ declared_encoding = self._declared_encoding()
20
+ if declared_encoding:
21
+ return declared_encoding
22
+
23
+ # 2. 默认使用 utf-8
24
+ return 'utf-8'
25
+
26
+ def _declared_encoding(self):
27
+ """获取声明的编码"""
28
+ # 1. Request 中指定的编码
29
+ if self.request and getattr(self.request, 'encoding', None):
30
+ return self.request.encoding
31
+
32
+ # 2. 从 Content-Type 头中检测
33
+ content_type = self.headers.get("content-type", "") or self.headers.get("Content-Type", "")
34
+ if content_type:
35
+ import re
36
+ charset_match = re.search(r"charset=([\w-]+)", content_type, re.I)
37
+ if charset_match:
38
+ return charset_match.group(1).lower()
39
+
40
+ return None
41
+
42
+
43
+ class TestDetermineEncoding(unittest.TestCase):
44
+ """编码检测测试类"""
45
+
46
+ def test_request_encoding_priority(self):
47
+ """测试 Request 编码优先级"""
48
+ class MockRequest:
49
+ encoding = 'gbk'
50
+
51
+ response = MockResponse(b'', request=MockRequest())
52
+ encoding = response._determine_encoding()
53
+ self.assertEqual(encoding, 'gbk')
54
+
55
+ def test_content_type_encoding(self):
56
+ """测试 Content-Type 头部编码检测"""
57
+ response = MockResponse(
58
+ b'',
59
+ headers={"content-type": "text/html; charset=iso-8859-1"}
60
+ )
61
+ encoding = response._determine_encoding()
62
+ self.assertEqual(encoding, 'iso-8859-1')
63
+
64
+ def test_default_encoding(self):
65
+ """测试默认编码"""
66
+ response = MockResponse(b'')
67
+ encoding = response._determine_encoding()
68
+ self.assertEqual(encoding, 'utf-8')
69
+
70
+ def test_case_insensitive_content_type(self):
71
+ """测试 Content-Type 头部大小写不敏感"""
72
+ response = MockResponse(
73
+ b'',
74
+ headers={"Content-Type": "text/html; CHARSET=UTF-8"}
75
+ )
76
+ encoding = response._determine_encoding()
77
+ self.assertEqual(encoding, 'utf-8')
78
+
79
+ def test_declared_encoding_with_request(self):
80
+ """测试声明编码 - Request优先级"""
81
+ class MockRequest:
82
+ encoding = 'gbk'
83
+
84
+ response = MockResponse(b'', request=MockRequest())
85
+ declared_encoding = response._declared_encoding()
86
+ self.assertEqual(declared_encoding, 'gbk')
87
+
88
+ def test_declared_encoding_with_content_type(self):
89
+ """测试声明编码 - Content-Type"""
90
+ response = MockResponse(
91
+ b'',
92
+ headers={"content-type": "text/html; charset=iso-8859-1"}
93
+ )
94
+ declared_encoding = response._declared_encoding()
95
+ self.assertEqual(declared_encoding, 'iso-8859-1')
96
+
97
+
98
+ def test_encoding_detection():
99
+ """简单测试编码检测功能"""
100
+ print("测试编码检测功能...")
101
+
102
+ # 测试 Request 编码优先级
103
+ class MockRequest:
104
+ encoding = 'gbk'
105
+
106
+ response1 = MockResponse(b'', request=MockRequest())
107
+ encoding1 = response1._determine_encoding()
108
+ print(f"Request 编码优先级: {encoding1}")
109
+
110
+ # 测试 Content-Type 头部编码
111
+ response2 = MockResponse(
112
+ b'',
113
+ headers={"content-type": "text/html; charset=iso-8859-1"}
114
+ )
115
+ encoding2 = response2._determine_encoding()
116
+ print(f"Content-Type 编码: {encoding2}")
117
+
118
+ # 测试默认编码
119
+ response3 = MockResponse(b'')
120
+ encoding3 = response3._determine_encoding()
121
+ print(f"默认编码: {encoding3}")
122
+
123
+ print("编码检测测试完成!")
124
+
125
+
126
+ if __name__ == '__main__':
127
+ test_encoding_detection()