crawlo 1.4.4__py3-none-any.whl → 1.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (85) hide show
  1. crawlo/__init__.py +11 -15
  2. crawlo/__version__.py +1 -1
  3. crawlo/commands/startproject.py +24 -0
  4. crawlo/core/engine.py +2 -2
  5. crawlo/core/scheduler.py +4 -4
  6. crawlo/crawler.py +8 -7
  7. crawlo/downloader/__init__.py +5 -2
  8. crawlo/extension/__init__.py +2 -2
  9. crawlo/filters/aioredis_filter.py +8 -1
  10. crawlo/filters/memory_filter.py +8 -1
  11. crawlo/initialization/built_in.py +13 -4
  12. crawlo/initialization/core.py +5 -4
  13. crawlo/interfaces.py +24 -0
  14. crawlo/middleware/__init__.py +7 -4
  15. crawlo/middleware/middleware_manager.py +15 -8
  16. crawlo/mode_manager.py +45 -11
  17. crawlo/network/response.py +374 -69
  18. crawlo/pipelines/mysql_pipeline.py +6 -6
  19. crawlo/pipelines/pipeline_manager.py +2 -2
  20. crawlo/project.py +2 -4
  21. crawlo/settings/default_settings.py +4 -0
  22. crawlo/task_manager.py +2 -2
  23. crawlo/templates/project/items.py.tmpl +2 -2
  24. crawlo/templates/project/middlewares.py.tmpl +9 -89
  25. crawlo/templates/project/pipelines.py.tmpl +8 -68
  26. crawlo/tools/__init__.py +0 -11
  27. crawlo/utils/__init__.py +17 -1
  28. crawlo/utils/db_helper.py +220 -319
  29. crawlo/utils/error_handler.py +313 -67
  30. crawlo/utils/fingerprint.py +3 -4
  31. crawlo/utils/misc.py +82 -0
  32. crawlo/utils/request.py +55 -66
  33. crawlo/utils/selector_helper.py +138 -0
  34. crawlo/utils/spider_loader.py +185 -45
  35. crawlo/utils/text_helper.py +95 -0
  36. crawlo-1.4.5.dist-info/METADATA +329 -0
  37. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/RECORD +76 -49
  38. tests/bug_check_test.py +251 -0
  39. tests/direct_selector_helper_test.py +97 -0
  40. tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -0
  41. tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -0
  42. tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -0
  43. tests/ofweek_scrapy/ofweek_scrapy/settings.py +85 -0
  44. tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -0
  45. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +162 -0
  46. tests/ofweek_scrapy/scrapy.cfg +11 -0
  47. tests/performance_comparison.py +4 -5
  48. tests/simple_crawlo_test.py +1 -2
  49. tests/simple_follow_test.py +39 -0
  50. tests/simple_response_selector_test.py +95 -0
  51. tests/simple_selector_helper_test.py +155 -0
  52. tests/simple_selector_test.py +208 -0
  53. tests/simple_url_test.py +74 -0
  54. tests/test_crawler_process_import.py +39 -0
  55. tests/test_crawler_process_spider_modules.py +48 -0
  56. tests/test_edge_cases.py +7 -5
  57. tests/test_encoding_core.py +57 -0
  58. tests/test_encoding_detection.py +127 -0
  59. tests/test_factory_compatibility.py +197 -0
  60. tests/test_optimized_selector_naming.py +101 -0
  61. tests/test_priority_behavior.py +18 -18
  62. tests/test_response_follow.py +105 -0
  63. tests/test_response_selector_methods.py +93 -0
  64. tests/test_response_url_methods.py +71 -0
  65. tests/test_response_urljoin.py +87 -0
  66. tests/test_scrapy_style_encoding.py +113 -0
  67. tests/test_selector_helper.py +101 -0
  68. tests/test_selector_optimizations.py +147 -0
  69. tests/test_spider_loader.py +50 -0
  70. tests/test_spider_loader_comprehensive.py +70 -0
  71. tests/test_spiders/__init__.py +1 -0
  72. tests/test_spiders/test_spider.py +10 -0
  73. crawlo/tools/anti_crawler.py +0 -269
  74. crawlo/utils/class_loader.py +0 -26
  75. crawlo/utils/enhanced_error_handler.py +0 -357
  76. crawlo-1.4.4.dist-info/METADATA +0 -190
  77. tests/simple_log_test.py +0 -58
  78. tests/simple_test.py +0 -48
  79. tests/test_framework_logger.py +0 -67
  80. tests/test_framework_startup.py +0 -65
  81. tests/test_mode_change.py +0 -73
  82. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/WHEEL +0 -0
  83. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/entry_points.txt +0 -0
  84. {crawlo-1.4.4.dist-info → crawlo-1.4.5.dist-info}/top_level.txt +0 -0
  85. /tests/{final_command_test_report.md → ofweek_scrapy/ofweek_scrapy/__init__.py} +0 -0
@@ -0,0 +1,329 @@
1
+ Metadata-Version: 2.4
2
+ Name: crawlo
3
+ Version: 1.4.5
4
+ Summary: Crawlo 是一款基于异步IO的高性能Python爬虫框架,支持分布式抓取。
5
+ Home-page: https://github.com/crawl-coder/Crawlo.git
6
+ Author: crawl-coder
7
+ Author-email: crawlo@qq.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.6
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: aiohttp>=3.12.14
15
+ Requires-Dist: aiomysql>=0.2.0
16
+ Requires-Dist: aioredis>=2.0.1
17
+ Requires-Dist: asyncmy>=0.2.10
18
+ Requires-Dist: cssselect>=1.2.0
19
+ Requires-Dist: dateparser>=1.2.2
20
+ Requires-Dist: httpx[http2]>=0.27.0
21
+ Requires-Dist: curl-cffi>=0.13.0
22
+ Requires-Dist: lxml>=5.2.1
23
+ Requires-Dist: motor>=3.7.0
24
+ Requires-Dist: parsel>=1.9.1
25
+ Requires-Dist: pydantic>=2.11.7
26
+ Requires-Dist: pymongo>=4.11
27
+ Requires-Dist: PyMySQL>=1.1.1
28
+ Requires-Dist: python-dateutil>=2.9.0.post0
29
+ Requires-Dist: redis>=6.2.0
30
+ Requires-Dist: requests>=2.32.4
31
+ Requires-Dist: six>=1.17.0
32
+ Requires-Dist: ujson>=5.9.0
33
+ Requires-Dist: urllib3>=2.5.0
34
+ Requires-Dist: w3lib>=2.1.2
35
+ Requires-Dist: rich>=14.1.0
36
+ Requires-Dist: astor>=0.8.1
37
+ Requires-Dist: watchdog>=6.0.0
38
+ Provides-Extra: render
39
+ Requires-Dist: webdriver-manager>=4.0.0; extra == "render"
40
+ Requires-Dist: playwright; extra == "render"
41
+ Requires-Dist: selenium>=3.141.0; extra == "render"
42
+ Provides-Extra: all
43
+ Requires-Dist: bitarray>=1.5.3; extra == "all"
44
+ Requires-Dist: PyExecJS>=1.5.1; extra == "all"
45
+ Requires-Dist: pymongo>=3.10.1; extra == "all"
46
+ Requires-Dist: redis-py-cluster>=2.1.0; extra == "all"
47
+ Requires-Dist: webdriver-manager>=4.0.0; extra == "all"
48
+ Requires-Dist: playwright; extra == "all"
49
+ Requires-Dist: selenium>=3.141.0; extra == "all"
50
+
51
+ # Crawlo 爬虫框架
52
+
53
+ Crawlo 是一个高性能、可扩展的 Python 爬虫框架,支持单机和分布式部署。
54
+
55
+ ## 特性
56
+
57
+ - 高性能异步爬取
58
+ - 支持多种下载器 (aiohttp, httpx, curl-cffi)
59
+ - 内置数据清洗和验证
60
+ - 分布式爬取支持
61
+ - 灵活的中间件系统
62
+ - 强大的配置管理系统
63
+ - 详细的日志记录和监控
64
+ - Windows 和 Linux 兼容
65
+
66
+ ## 安装
67
+
68
+ ```bash
69
+ pip install crawlo
70
+ ```
71
+
72
+ 或者从源码安装:
73
+
74
+ ```bash
75
+ git clone git@github.com:crawl-coder/Crawlo.git
76
+ cd crawlo
77
+ pip install -r requirements.txt
78
+ pip install .
79
+ ```
80
+
81
+ ## 快速开始
82
+
83
+ ```python
84
+ from crawlo import Spider
85
+
86
+ class MySpider(Spider):
87
+ name = 'example'
88
+
89
+ def parse(self, response):
90
+ # 解析逻辑
91
+ pass
92
+
93
+ # 运行爬虫
94
+ # crawlo run example
95
+ ```
96
+
97
+ ## Response 对象功能
98
+
99
+ Crawlo 框架对 Response 对象进行了增强,提供了更多便捷方法:
100
+
101
+ ### URL 处理
102
+
103
+ 使用 Response 对象封装的 URL 处理方法可以方便地处理各种 URL 操作,无需手动导入 `urllib.parse` 中的函数:
104
+
105
+ ```python
106
+ class MySpider(Spider):
107
+ def parse(self, response):
108
+ # 1. 处理相对URL和绝对URL
109
+ absolute_url = response.urljoin('/relative/path')
110
+
111
+ # 2. 解析URL组件
112
+ parsed = response.urlparse() # 解析当前响应URL
113
+ scheme = parsed.scheme
114
+ domain = parsed.netloc
115
+ path = parsed.path
116
+
117
+ # 3. 解析查询参数
118
+ query_params = response.parse_qs() # 解析当前URL的查询参数
119
+
120
+ # 4. 编码查询参数
121
+ new_query = response.urlencode({'key': 'value', 'name': '测试'})
122
+
123
+ # 5. URL编码/解码
124
+ encoded = response.quote('hello world 你好')
125
+ decoded = response.unquote(encoded)
126
+
127
+ # 6. 移除URL片段
128
+ url_without_fragment, fragment = response.urldefrag('http://example.com/path#section')
129
+
130
+ yield Request(url=absolute_url, callback=self.parse_detail)
131
+ ```
132
+
133
+ ### 编码检测优化
134
+
135
+ Crawlo 框架参考 Scrapy 的设计模式对 Response 对象的编码检测功能进行了优化,提供了更准确和可靠的编码检测:
136
+
137
+ ```python
138
+ class MySpider(Spider):
139
+ def parse(self, response):
140
+ # 自动检测响应编码
141
+ encoding = response.encoding
142
+
143
+ # 获取声明的编码(Request编码 > BOM > HTTP头部 > HTML meta标签)
144
+ declared_encoding = response._declared_encoding()
145
+
146
+ # 响应文本已自动使用正确的编码解码
147
+ text = response.text
148
+
149
+ # 处理解码后的内容
150
+ # ...
151
+ ```
152
+
153
+ 编码检测优先级:
154
+ 1. Request 中指定的编码
155
+ 2. BOM 字节顺序标记
156
+ 3. HTTP Content-Type 头部
157
+ 4. HTML meta 标签声明
158
+ 5. 内容自动检测
159
+ 6. 默认编码 (utf-8)
160
+
161
+ ### 选择器方法优化
162
+
163
+ Crawlo 框架对 Response 对象的选择器方法进行了优化,提供了更便捷的数据提取功能,方法命名更加直观和统一:
164
+
165
+ ```python
166
+ class MySpider(Spider):
167
+ def parse(self, response):
168
+ # 1. 提取单个元素文本(支持CSS和XPath)
169
+ title = response.extract_text('title') # CSS选择器
170
+ title = response.extract_text('//title') # XPath选择器
171
+
172
+ # 2. 提取多个元素文本
173
+ paragraphs = response.extract_texts('.content p') # CSS选择器
174
+ paragraphs = response.extract_texts('//div[@class="content"]//p') # XPath选择器
175
+
176
+ # 3. 提取单个元素属性
177
+ link_href = response.extract_attr('a', 'href') # CSS选择器
178
+ link_href = response.extract_attr('//a[@class="link"]', 'href') # XPath选择器
179
+
180
+ # 4. 提取多个元素属性
181
+ all_links = response.extract_attrs('a', 'href') # CSS选择器
182
+ all_links = response.extract_attrs('//a[@class="link"]', 'href') # XPath选择器
183
+
184
+ yield {
185
+ 'title': title,
186
+ 'paragraphs': paragraphs,
187
+ 'links': all_links
188
+ }
189
+ ```
190
+
191
+ 所有选择器方法都采用了简洁直观的命名风格,便于记忆和使用。
192
+
193
+ ### 工具模块
194
+
195
+ Crawlo 框架提供了丰富的工具模块,用于处理各种常见任务。选择器相关的辅助函数现在位于 `crawlo.utils.selector_helper` 模块中:
196
+
197
+ ```python
198
+ from crawlo.utils import (
199
+ extract_text,
200
+ extract_texts,
201
+ extract_attr,
202
+ extract_attrs,
203
+ is_xpath
204
+ )
205
+
206
+ # 在自定义代码中使用这些工具函数
207
+ title_elements = response.css('title')
208
+ title_text = extract_text(title_elements)
209
+
210
+ li_elements = response.css('.list li')
211
+ li_texts = extract_texts(li_elements)
212
+
213
+ link_elements = response.css('.link')
214
+ link_href = extract_attr(link_elements, 'href')
215
+
216
+ all_links = response.css('a')
217
+ all_hrefs = extract_attrs(all_links, 'href')
218
+ ```
219
+
220
+ ## 日志系统
221
+
222
+ Crawlo 拥有一个功能强大的日志系统,支持多种配置选项:
223
+
224
+ ### 基本配置
225
+
226
+ ```python
227
+ from crawlo.logging import configure_logging, get_logger
228
+
229
+ # 配置日志系统
230
+ configure_logging(
231
+ LOG_LEVEL='INFO',
232
+ LOG_FILE='logs/app.log',
233
+ LOG_MAX_BYTES=10*1024*1024, # 10MB
234
+ LOG_BACKUP_COUNT=5
235
+ )
236
+
237
+ # 获取logger
238
+ logger = get_logger('my_module')
239
+ logger.info('这是一条日志消息')
240
+ ```
241
+
242
+ ### 高级配置
243
+
244
+ ```python
245
+ # 分别配置控制台和文件日志级别
246
+ configure_logging(
247
+ LOG_LEVEL='INFO',
248
+ LOG_CONSOLE_LEVEL='WARNING', # 控制台只显示WARNING及以上级别
249
+ LOG_FILE_LEVEL='DEBUG', # 文件记录DEBUG及以上级别
250
+ LOG_FILE='logs/app.log',
251
+ LOG_INCLUDE_THREAD_ID=True, # 包含线程ID
252
+ LOG_INCLUDE_PROCESS_ID=True # 包含进程ID
253
+ )
254
+
255
+ # 模块特定日志级别
256
+ configure_logging(
257
+ LOG_LEVEL='WARNING',
258
+ LOG_LEVELS={
259
+ 'my_module.debug': 'DEBUG',
260
+ 'my_module.info': 'INFO'
261
+ }
262
+ )
263
+ ```
264
+
265
+ ### 性能监控
266
+
267
+ ```python
268
+ from crawlo.logging import get_monitor
269
+
270
+ # 启用日志性能监控
271
+ monitor = get_monitor()
272
+ monitor.enable_monitoring()
273
+
274
+ # 获取性能报告
275
+ report = monitor.get_performance_report()
276
+ print(report)
277
+ ```
278
+
279
+ ### 日志采样
280
+
281
+ ```python
282
+ from crawlo.logging import get_sampler
283
+
284
+ # 设置采样率(只记录30%的日志)
285
+ sampler = get_sampler()
286
+ sampler.set_sample_rate('my_module', 0.3)
287
+
288
+ # 设置速率限制(每秒最多100条日志)
289
+ sampler.set_rate_limit('my_module', 100)
290
+ ```
291
+
292
+ ## Windows 兼容性说明
293
+
294
+ 在 Windows 系统上使用日志轮转功能时,可能会遇到文件锁定问题。为了解决这个问题,建议安装 `concurrent-log-handler` 库:
295
+
296
+ ```bash
297
+ pip install concurrent-log-handler
298
+ ```
299
+
300
+ Crawlo 框架会自动检测并使用这个库来提供更好的 Windows 兼容性。
301
+
302
+ 如果未安装 `concurrent-log-handler`,在 Windows 上运行时可能会出现以下错误:
303
+ ```
304
+ PermissionError: [WinError 32] 另一个程序正在使用此文件,进程无法访问。
305
+ ```
306
+
307
+ ## 爬虫自动发现
308
+
309
+ Crawlo 框架支持通过 `SPIDER_MODULES` 配置自动发现和加载爬虫,类似于 Scrapy 的机制:
310
+
311
+ ```python
312
+ # settings.py
313
+ SPIDER_MODULES = [
314
+ 'myproject.spiders',
315
+ 'myproject.more_spiders',
316
+ ]
317
+
318
+ SPIDER_LOADER_WARN_ONLY = True # 加载错误时只警告不报错
319
+ ```
320
+
321
+ 框架会自动扫描配置的模块目录,发现并注册其中的爬虫类。
322
+
323
+ ## 文档
324
+
325
+ 请查看 [文档](https://your-docs-url.com) 获取更多信息。
326
+
327
+ ## 许可证
328
+
329
+ MIT
@@ -1,40 +1,41 @@
1
- crawlo/__init__.py,sha256=2Io5P9qJghOAjjD3YWdaiIq5laPLyLWVkEqgiVfUa3o,2381
2
- crawlo/__version__.py,sha256=2ik6wvURqg571WApVvR_ELhg_eclmC_WvbDLEPmoO4Q,23
1
+ crawlo/__init__.py,sha256=n5vFwi0iuYrpAIyoNJZzWHV1gvF-vh-Yze3jiuwEXqM,2180
2
+ crawlo/__version__.py,sha256=47Hd5fKyrYgSfmOfBF7ibw9EyAE1ctXOQOLg_x_Ld9w,23
3
3
  crawlo/cli.py,sha256=AQnAB5NMI-Ic1VPw_Jjng8L4AI4-wMozOwzE6CfXkZU,2402
4
4
  crawlo/config.py,sha256=EQIT7WpkXAlr2ocd5SYJYOKTSWUlQx2AkTHX7ErEWxw,9798
5
5
  crawlo/config_validator.py,sha256=oY4-2bwXUlwHAnGgkI-EznviDfML_dcxbWSGXNSxC2k,11516
6
- crawlo/crawler.py,sha256=E-fgYVtx6v2xEPixlQeWfNYVbW1oeWE0fQFZTQ6_K-I,27305
6
+ crawlo/crawler.py,sha256=6f9eDeUEZVfnUywaZ6CnL5R3bHO4sG82z-Syl3zZKvE,27360
7
7
  crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
8
8
  crawlo/exceptions.py,sha256=YVIDnC1bKSMv3fXH_6tinWMuD9HmKHIaUfO4_fkX5sY,1247
9
9
  crawlo/framework.py,sha256=9gP6VN4MHqutGXaxnwpNMSULfVYbNp906UdZiJGywlQ,9458
10
- crawlo/mode_manager.py,sha256=S4dUoeVZ4fMnd4pXWutcHwk5Zv68ZBTgo9taR9OkQiM,7768
11
- crawlo/project.py,sha256=nVRc0CIdd9g863NGfuItvajl8zlO5mEta4FQCx9_vZ8,14060
10
+ crawlo/interfaces.py,sha256=q1vwMSiZLfLpPhFa9Y0hAcjYEKvLkW2fZ2fmoAZ-5TE,653
11
+ crawlo/mode_manager.py,sha256=e8QmwsnndFx_hGME_7w-hazKo0GOYjUr-7FBf7dWxgc,8903
12
+ crawlo/project.py,sha256=9wnlHd-rYAC3TT1Fc1ftyUBx7mbDT6TQCqoaIP6N3iA,13998
12
13
  crawlo/stats_collector.py,sha256=hIjlnX750jU4Oncyand1jBccfaX4Tu7egd2DBYu2N7A,2379
13
14
  crawlo/subscriber.py,sha256=h8fx69NJZeWem0ZkCmfHAi2kgfDGFObHpwN0aGNUM6Y,5115
14
- crawlo/task_manager.py,sha256=I9h3Rl0VRAfwqp24CHT3TuEAapNdTbVghkmuJhtM7jg,5966
15
+ crawlo/task_manager.py,sha256=Ic6PFUqZOhLXuZ_UEk_8Neb9FmqYv8I2RzV3vLzFNSU,5966
15
16
  crawlo/commands/__init__.py,sha256=orvY6wLOBwGUEJKeF3h_T1fxj8AaQLjngBDd-3xKOE4,392
16
17
  crawlo/commands/check.py,sha256=TKDhI_sj7kErgiJpt2vCZ9QL-g6yWjrrPWKbgh8pgEU,23199
17
18
  crawlo/commands/genspider.py,sha256=JB4ZuFpKsYwtjx3DSsxugH7e3kqxhDWPG5ZKfvM0isI,6041
18
19
  crawlo/commands/help.py,sha256=8xPC0iNCg1rRBoK2bb6noAEANc1JwrdM35eF-j6yeZM,5111
19
20
  crawlo/commands/list.py,sha256=trzcd3kG6DhkOqYZADcl3yR7M8iJBgRw5fE-g9e0gVM,5877
20
21
  crawlo/commands/run.py,sha256=EjpIilgCTkXGVSV4rEISbJubdhqrok9nNe5-xDfDK5E,13169
21
- crawlo/commands/startproject.py,sha256=-Bo8vvDfIhqzGmWyhxMatBlPLhYpRwJC7l4fpbN8vVk,16506
22
+ crawlo/commands/startproject.py,sha256=boZrMyn6TgCi1jt3D3DQfui6hJitjwNO8mqlWKNOBns,17366
22
23
  crawlo/commands/stats.py,sha256=vlGJLyiXZtY0ASdzCK59JNereSsAel4W9JCGaOzCr-8,6201
23
24
  crawlo/commands/utils.py,sha256=YVNEEzlm_qNY3SVvU8h6o2lQMkVgypvoB4ZFrP4gln0,5578
24
25
  crawlo/core/__init__.py,sha256=BWkj3AqZwp2Bk73UzUlC_qsqv_MH_HNrzy4DY1hosj4,1330
25
- crawlo/core/engine.py,sha256=y9mj0nKHb3Ki4scXkxsMO6XoTIqxmbsD0WuryR_6iHg,19385
26
+ crawlo/core/engine.py,sha256=znJ0VDFBImYi6KkTD8GHNo-V9BDnPSv9iYfTYLPsVSc,19379
26
27
  crawlo/core/processor.py,sha256=hR5MrbeZvDUx0ShKntr4qwkeVZzjlPJ8EAKgIFkNVts,1555
27
- crawlo/core/scheduler.py,sha256=-6DBz7gUg8WwUl39DAbi6Ng2AJSswNBCDr_mV3sUZFs,14088
28
+ crawlo/core/scheduler.py,sha256=G9xtrvE1wsTSOTOFUKDEphJvy6Xk5icuCGXTScYy7nQ,14084
28
29
  crawlo/data/__init__.py,sha256=UPqgioMdu3imSUmpLWzVlpvoBnEfaPSAT-crCcWd7iw,121
29
30
  crawlo/data/user_agents.py,sha256=zjjFkldQkqtrn45j0WZplaZLannPxZDeAU0JofxQcBc,9891
30
- crawlo/downloader/__init__.py,sha256=VZG5HiSHOmimiH9okQN3MBwgXsCzxr2awflVz5UiboY,8897
31
+ crawlo/downloader/__init__.py,sha256=P5pl-BGYCkdKWgoIewcYPz7ocVLixVfYuCDFmYyuqIw,8966
31
32
  crawlo/downloader/aiohttp_downloader.py,sha256=-dIFucMOQhiiEmtgEpG2Lqh1vF-PvDddbIrZ8Hge0Ig,9556
32
33
  crawlo/downloader/cffi_downloader.py,sha256=QxoeocCE2DsQCnhZla6-BjhplaTZDWMbEJmNrghWSDA,10488
33
34
  crawlo/downloader/httpx_downloader.py,sha256=MpgDeIdGqNsiSKLOEDBnr5Z0eUbhHnqVEmAuoIfJmFU,12296
34
35
  crawlo/downloader/hybrid_downloader.py,sha256=dNnFeegRnyLaOxTWI6XrWKqqVPx80AZBZNgmrcKRVBM,8240
35
36
  crawlo/downloader/playwright_downloader.py,sha256=L-TVzG7cYfuBlqW0XSZuz5C_r9fpJrmYNcoQ-cDEna4,16663
36
37
  crawlo/downloader/selenium_downloader.py,sha256=P8GuhEw6OYVeN3oeksuBLpUJCELXiu0mAR23X6IIOAA,21508
37
- crawlo/extension/__init__.py,sha256=-R4P9fklpgSB8cGEduMsjkbJZ7ReYSrZaYjApgYUm9U,2986
38
+ crawlo/extension/__init__.py,sha256=wwaTTWYUzbg5b84sQn2JvBlyuhVGkw-REkhVlR2vymA,2980
38
39
  crawlo/extension/health_check.py,sha256=stDpyP4gOzAdbBlPbSf0rge0QounAhF8CtrGq5fa_7c,5657
39
40
  crawlo/extension/log_interval.py,sha256=N25aNjFkjh9br6g3ViFqRrz06C2geAdfGas-OT2oZh8,4497
40
41
  crawlo/extension/log_stats.py,sha256=CWjMb_V1o8j8uwGFvh9SZ7EYLl_OYzmuIsOT5V-_BE4,2452
@@ -47,12 +48,12 @@ crawlo/factories/base.py,sha256=loB_vyc0CsQK0BgwRoSOFS8gPcmv-b9irtjC9UaBGA4,1832
47
48
  crawlo/factories/crawler.py,sha256=e9zl4qytByzsYbz66klY3cZTvQei0-9GjdFK4XCyXcg,3198
48
49
  crawlo/factories/registry.py,sha256=YU87CdsntOz609M0aQbGcCG9glPinUJxOn-_CaM4f-M,2595
49
50
  crawlo/filters/__init__.py,sha256=noSe07tp2Ip_zXwAbS021BojrqNRaObDO-2YV6DOQfc,4381
50
- crawlo/filters/aioredis_filter.py,sha256=unms0WaRsxbCL6VaAQMT-SsBHKyxR6-o118pf-3ErK0,9512
51
- crawlo/filters/memory_filter.py,sha256=ZojFhZ6gE76aQBC-rfImxSkSMwQtiotenx0pIcQOaFg,9561
51
+ crawlo/filters/aioredis_filter.py,sha256=WglGW-XLjsy8r_NDrNsXk_nzwaIq081MBnooHqCCQZA,9841
52
+ crawlo/filters/memory_filter.py,sha256=gIPXCw650v81XRiz0MhWXH-zcn24ERzDTzBQZRoy1YU,9890
52
53
  crawlo/initialization/__init__.py,sha256=uNRMm9GccMYZi51scpvo-CPx_3ayp3Y81psBHlUoDfw,1132
53
- crawlo/initialization/built_in.py,sha256=1uEEtYCTQlfx5uRW-s9oumlmIEJrjZa0QpOLAbZbZqI,15758
54
+ crawlo/initialization/built_in.py,sha256=DlZf4k9FlU52tnwlFtKqWHqlFZpo-VHB0qP61rVqJzo,16259
54
55
  crawlo/initialization/context.py,sha256=wG9t-M-Qttj7TN6gDumPX5Q5GHaPDUpLTZZDne2r3TE,4863
55
- crawlo/initialization/core.py,sha256=sMiSBueoaWoDohQJ50IFC_DSvj0EeYSB7G1MORlDtMc,6872
56
+ crawlo/initialization/core.py,sha256=GWc9QNSp2JmHlCAhgq1aqGDXHcO6QlxFAVfePKC1xeo,6872
56
57
  crawlo/initialization/phases.py,sha256=iWhGITh9eudfSmzf2G3DLPAIJkCDrv9TVBtnAoS1_3c,4176
57
58
  crawlo/initialization/registry.py,sha256=kKVegqWxtPCaZ1mTyVHN4yFecAGDOPFJfebkP-SoobE,4919
58
59
  crawlo/items/__init__.py,sha256=rFpx1qFBo0Ik7bSdnXC8EVTJUOQdoJYGVdhYjaH00nk,409
@@ -66,10 +67,10 @@ crawlo/logging/factory.py,sha256=b4Z0fBmP00GpvpJ7k4QxqYP32n_EqG5KD3ouUWU7L4U,665
66
67
  crawlo/logging/manager.py,sha256=aem7yla0q83rf2CpwQEyg5YMbey4TzkquBVWiKtcqdQ,3182
67
68
  crawlo/logging/monitor.py,sha256=mzZWm3rQ2mGUoAmkEJPUkBmR0VWK66l14aqqhQ0zwE8,4935
68
69
  crawlo/logging/sampler.py,sha256=1BoRMpusP3wbXRnet5xl9_Yb_3_-AUq9WJhK9gYg7v4,5292
69
- crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
70
+ crawlo/middleware/__init__.py,sha256=khNCstVcYlL14SbLZ8ys9ub1-C8k4FIiMQ99Vw9wA-0,635
70
71
  crawlo/middleware/default_header.py,sha256=Pw-ev8ffi16GeCh84R5L3hAZgp3G1QXS-H5kV3JEp4Q,5164
71
72
  crawlo/middleware/download_delay.py,sha256=2iWnJFtWDlqDy5MsAob8TPiJQoiz9v21yatkBI0eptg,3542
72
- crawlo/middleware/middleware_manager.py,sha256=_Kgd6Ir4cRUiPCEHJELZPOkKNtmu-WAE59dRWKPpAU8,6415
73
+ crawlo/middleware/middleware_manager.py,sha256=H_o0nwo_xQ8aSRnnvEs2Ho3fS-3WNi_5AjChhqvRYnk,6645
73
74
  crawlo/middleware/offsite.py,sha256=4tUkPqXMMXsi1WwYnJ_e7wMd6sRgK19QHRCYq8-w8jk,4682
74
75
  crawlo/middleware/proxy.py,sha256=uKk5OSLIs7jv9bBgkZwsi1rIpthooxhMrGBC2BPRDCc,16022
75
76
  crawlo/middleware/request_ignore.py,sha256=7qdX4zAimjSGwdod_aWUbOTfzLBWZ5KzLVFchGMCxCI,2663
@@ -79,7 +80,7 @@ crawlo/middleware/retry.py,sha256=Acfo95B9wF8fQTCQIqluZOS2hHdnknQu_FOHvpGKJp0,42
79
80
  crawlo/middleware/simple_proxy.py,sha256=rQ4RkqewGvDRCw021nGrg8ngkBzg3wqrEVqvSmBgQ6M,2256
80
81
  crawlo/network/__init__.py,sha256=bvEnpEUBZJ79URfNZbsHhsBKna54hM2-x_BV8eotTA4,418
81
82
  crawlo/network/request.py,sha256=e6-YLgK7SU8D19n21mQwqt_b_aeRVJFOgWPIBPal2ys,14178
82
- crawlo/network/response.py,sha256=QwJhL3xJfPVy_gwtGrg61oAgaqCoCmjyj1Ug7Zju7Pg,13060
83
+ crawlo/network/response.py,sha256=-URnNc_J7qBSG19uJbfuF6A_14MHLOtY78FvcZDzbsI,23418
83
84
  crawlo/pipelines/__init__.py,sha256=FDe2Pr5tiHtV8hFlheElRO_O1aVKvSWlkTcAl9BXAKA,637
84
85
  crawlo/pipelines/bloom_dedup_pipeline.py,sha256=vIF_6noJAdpotrJpnCmrVXCi59gRmHHn28mYW6VukbM,5465
85
86
  crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
@@ -88,24 +89,24 @@ crawlo/pipelines/database_dedup_pipeline.py,sha256=IxahtD_mhni-Y21_idOMX58_Htf46
88
89
  crawlo/pipelines/json_pipeline.py,sha256=wrCsh8YInmcPLAkhPrHObMx89VZfhf-c7qRrYsTixPE,8585
89
90
  crawlo/pipelines/memory_dedup_pipeline.py,sha256=lKkYPu6vkpPjfQ1-xOLvPFT4VdTI8QVx0yjqtVR0ZB0,3598
90
91
  crawlo/pipelines/mongo_pipeline.py,sha256=PohTKTGw3QRvuP-T6SrquwW3FAHSno8jQ2D2cH_d75U,5837
91
- crawlo/pipelines/mysql_pipeline.py,sha256=Kjgu6cks1KD4FPXwlTnFaos2LG-N8LLaBDyKZ_MEcsI,14196
92
- crawlo/pipelines/pipeline_manager.py,sha256=R6MRb5d-caOit7PZoglJLHa3qQ68U5YAQlwt8KcjRxo,4393
92
+ crawlo/pipelines/mysql_pipeline.py,sha256=pLJQJUKqzWrrOxuO-eHXNq5xLza0DHeuGnpwX2Pc4NI,14186
93
+ crawlo/pipelines/pipeline_manager.py,sha256=_DtWfxcTinIf5ApzUOVjZksd2tPbc7qeKi92IVd_kbs,4387
93
94
  crawlo/pipelines/redis_dedup_pipeline.py,sha256=RB1kXLr8ZuWNrgZKYwt--tlmnWsQTbuwTsSt3pafol8,6077
94
95
  crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
96
  crawlo/queue/pqueue.py,sha256=bbgd3l1VfqYXfz-4VFaiWLmJit1LdB3qHalCtNqyrqI,1210
96
97
  crawlo/queue/queue_manager.py,sha256=8rKygMxr6DgSjnGsKFmvlTI5XAARvQIN_ENkAruHGXs,21532
97
98
  crawlo/queue/redis_priority_queue.py,sha256=vLvg2toKaRrXD1QyEdu1ZjTmANv7clFaBF7mCtstBmI,15995
98
99
  crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
99
- crawlo/settings/default_settings.py,sha256=IKh2eZ9WWXkAbHx5K5KX0whNtumATRpZ7ifFPZJFfBk,11827
100
+ crawlo/settings/default_settings.py,sha256=kBcE5PF-sfB12cjIxHeNPEvzSWSHYDu6saEgrTGXn5o,11970
100
101
  crawlo/settings/setting_manager.py,sha256=yI1tGaludevxKGGZO3Pn4aYofrg2cwYwvMZCFC5PPZw,8595
101
102
  crawlo/spider/__init__.py,sha256=QGhe_yNsnfnCF3G9nSoWEw23b8SkP5oSFU5W79C5DzI,21881
102
103
  crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
103
104
  crawlo/templates/run.py.tmpl,sha256=g8yst2hkqhKGNotR33fDxwmEsX6aEvhrXY_cfYos_vc,788
104
105
  crawlo/templates/spiders_init.py.tmpl,sha256=p6UK8KWr8FDydNxiAh6Iz29MY5WmgXIkf2z-buOGhOM,354
105
106
  crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
106
- crawlo/templates/project/items.py.tmpl,sha256=8_3DBA8HrS2XbfHzsMZNJiZbFY6fDJUUMFoFti_obJk,314
107
- crawlo/templates/project/middlewares.py.tmpl,sha256=fxHqi-Sjec5GiHJciprOU-6SAUTzM728NlZckIqf9hM,4278
108
- crawlo/templates/project/pipelines.py.tmpl,sha256=j9oqEhCezmmHlBhMWgYtlgup4jhWnMlv6AEiAOHODkg,2704
107
+ crawlo/templates/project/items.py.tmpl,sha256=hpQ2AfUmhddnzMuKM5LF6t44dOfFXwJRAZlWFKUFOZw,343
108
+ crawlo/templates/project/middlewares.py.tmpl,sha256=eEobZl8g_0DtiwLYbirQULqOacH-yUrrs4PUrGcJ2UE,1098
109
+ crawlo/templates/project/pipelines.py.tmpl,sha256=7BeaQDMHbIjhKzRtzlCMiFlU8xgMzDs2PIHq3EVUAlQ,887
109
110
  crawlo/templates/project/settings.py.tmpl,sha256=mL9_JAyz8R35r-ywRHi4T-dtal7oczU5kodEWxldw40,5265
110
111
  crawlo/templates/project/settings_distributed.py.tmpl,sha256=RHzfWZITv-0ErCR9OYEswAZHpA5d9fYil0ZoGCtFt8g,5459
111
112
  crawlo/templates/project/settings_gentle.py.tmpl,sha256=pmjrBLjnpGcR90RkcJrM5O8PsTrRhUB92QR3R4TJyko,5733
@@ -114,8 +115,7 @@ crawlo/templates/project/settings_minimal.py.tmpl,sha256=1qUPhSdHtvLSHTpytUJ8K63
114
115
  crawlo/templates/project/settings_simple.py.tmpl,sha256=sIyrCIVXsHSKl8Yjj8HkGs-ppMFH26a5yp6egVNlT2Q,5585
115
116
  crawlo/templates/project/spiders/__init__.py.tmpl,sha256=llhcIItXpm0TlEeumeLwp4fcYv2NHl8Iru7tLhDhxiE,216
116
117
  crawlo/templates/spider/spider.py.tmpl,sha256=KvU-9YpN6MifDE7XzejjyyQS7RUjLDLZ8zqJcLwSsu0,5198
117
- crawlo/tools/__init__.py,sha256=tOYfYPvZlrO8cmvnMWBjTma6UTLTFZN3qdC8pJwHrzI,4142
118
- crawlo/tools/anti_crawler.py,sha256=LwLC6BkxDSkxc5H1hQ6kY9j7O0PZGAMPZECr7gbqw2M,9431
118
+ crawlo/tools/__init__.py,sha256=sXDMZNP6EwZIFivGcRthxqD1DFMMS8UOJvULAzHD-w4,3927
119
119
  crawlo/tools/authenticated_proxy.py,sha256=ULCK0Cc9F2rGhRqu6kzKBdxzK9v2n1CsatSQ_PMxpAg,7272
120
120
  crawlo/tools/data_formatter.py,sha256=iBDHpZBZvn9O7pLkTQilE1TzYJQEc3z3f6HXoVus0f0,7808
121
121
  crawlo/tools/data_validator.py,sha256=bLWnkpFdclJuqjtSAgMI5nznN4vAuPwE1YaiFWKWenM,5490
@@ -127,27 +127,28 @@ crawlo/tools/request_tools.py,sha256=oXrk4yWMACVa65fDQCQgzsg6a94FH4_lS7qNR53FHYU
127
127
  crawlo/tools/retry_mechanism.py,sha256=4AQ_HLuYt4hYMI9XHoKFk2GQKEiDJB5pAnsMCfjc6Bk,7777
128
128
  crawlo/tools/scenario_adapter.py,sha256=pzysL1B2uQ1ZSEncVHd9Hv2viHNgaxP44YAUcDcppfw,9660
129
129
  crawlo/tools/text_cleaner.py,sha256=UrMGcgRnJaufjmDKIDsRYKMA8znCAArHDgouttWPygk,6690
130
- crawlo/utils/__init__.py,sha256=8kMbOZf9bzOUjtvh2QvqXZmiZh3pYzxXH9YQhYcwcoY,597
130
+ crawlo/utils/__init__.py,sha256=nxLnfqcEGLnsfSEagoKNyu-pm2ByU9BwE5tLxcS71Qo,1003
131
131
  crawlo/utils/batch_processor.py,sha256=8LNy-K2SrQVUxmGEWxQyYw_j9M-erN4Ie7O4d3zpBvM,9142
132
- crawlo/utils/class_loader.py,sha256=kZRGfyA3OPAH2QsQ-beOKjw3JKKBs6OyJyJyXkvWDrc,675
133
132
  crawlo/utils/controlled_spider_mixin.py,sha256=8CuM3Cr2wQLHbaO_ohbCsPcImJnyfZHpERbSeMgQ-AQ,16936
134
- crawlo/utils/db_helper.py,sha256=ZqOt1d3mErVv4TOvoWlov0niUxORB9aHByTmMoNFIDw,10902
135
- crawlo/utils/enhanced_error_handler.py,sha256=fJC__rnYNKTNUHNbgjZtT846HoE31qyGbPft9bwyYLU,14214
133
+ crawlo/utils/db_helper.py,sha256=xTgBTXSWTNXM19rLsypPtnsswO0HdDV1K7zn_wYk4s0,8137
136
134
  crawlo/utils/env_config.py,sha256=W-VD_WF63DHxsyJysvp1eJwRh3L_pBRl_PitQAY3nQY,4079
137
- crawlo/utils/error_handler.py,sha256=vJ_4EVkuVn_TrM16VgN4doyhe_Pg7xWwVUWIENgJQAg,5455
138
- crawlo/utils/fingerprint.py,sha256=70Me5avs40HYbz6LQ9La56EVP2qRapYjX-zF7WQDGkM,3687
135
+ crawlo/utils/error_handler.py,sha256=e2LeUGT_OMcNKcjiX9Pp-NuQh5spsHBqIPBd7VxA2IQ,16247
136
+ crawlo/utils/fingerprint.py,sha256=3IbctH3zwyBjN_12SH7-vrFt-akA2WSo3iAzHc6u--s,3689
139
137
  crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
140
138
  crawlo/utils/large_scale_config.py,sha256=NZMsDj4qbVx06Fu0aHqNKX1yzo6WFT7CgrhVnvw1ZFs,8372
141
139
  crawlo/utils/large_scale_helper.py,sha256=4ORkZcIrwJ0SlKOUh7l7WIuERORuRhNBgHCM71Rz0n0,12452
142
140
  crawlo/utils/log.py,sha256=KmUWVYq8t6fSGOC88nnYCDxwBUdoPWvaBmpOSHn2oWI,2914
141
+ crawlo/utils/misc.py,sha256=m_TbfMf4Aoe70zmkv7XWyFg8Rz0qOYPXepwB6EcYr7Y,2519
143
142
  crawlo/utils/performance_monitor.py,sha256=32KspSo7RWvCX_fl0ZFn4ScWWOqbVVwEhPRd921Ez6I,9832
144
143
  crawlo/utils/queue_helper.py,sha256=gFmkh1jKlIcN1rmo2Jl6vYcLP5ByUWlfHO9eNlZPBLs,4918
145
144
  crawlo/utils/redis_connection_pool.py,sha256=EsPZkmQctWkoYU2wcrqkgwnIWnE6nG4XCXECKn216JA,12575
146
145
  crawlo/utils/redis_key_validator.py,sha256=-UTTx0Ul184pzwSply8hVdH0lp-gkXXOc_gEHR_7VlU,5809
147
- crawlo/utils/request.py,sha256=ejdKpTwc-HE04HQybafhOVywzz57IV3pY0YMkSLyGUo,9065
146
+ crawlo/utils/request.py,sha256=RcINrLvShfZ5VHu1T_hJJRXp-viKWSo35C2JOgWyl2k,8641
148
147
  crawlo/utils/request_serializer.py,sha256=b5abcgjJk4IU6Wfg46AmOAU2wmzu_WqcpEbuAncRMGQ,8931
149
- crawlo/utils/spider_loader.py,sha256=xNzQb7qhQ7TqZsfFtCLpuVcsGi-USriZosU0YSBr9II,2233
148
+ crawlo/utils/selector_helper.py,sha256=BVczzsSzPL5zF5KHXK3hyuqEl9o0ADYEuCH7Aw8aj98,4332
149
+ crawlo/utils/spider_loader.py,sha256=oxifl0p4SOFhvvnD38Em4zGtC7sRr_pw4dki01MoAq0,7677
150
150
  crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
151
+ crawlo/utils/text_helper.py,sha256=TTZgQPayMFUOYj8syt47Gwa4AQVY15W1b56STJetAKE,2920
151
152
  crawlo/utils/tools.py,sha256=uy7qw5Z1BIhyEgiHENvtM7WoGCJxlS8EX3PmOA7ouCo,275
152
153
  crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
153
154
  examples/__init__.py,sha256=NkRbV8_S1tb8S2AW6BE2U6P2-eGOPwMR1k0YQAwQpSE,130
@@ -156,6 +157,7 @@ tests/advanced_tools_example.py,sha256=1_iitECKCuWUYMNNGo61l3lmwMRrWdA8F_Xw56UaG
156
157
  tests/authenticated_proxy_example.py,sha256=fKmHXXxIxCJXjEplttCWRh7PZhbxkBSxJF91Bx-qOME,3019
157
158
  tests/baidu_performance_test.py,sha256=wxdaI7UwKboMYH_qcaqZLxAStvndH60bvKGzD8F-jaI,3974
158
159
  tests/baidu_test.py,sha256=NKYnwDbPJX3tmKtRn7uQ_QWzUXiLTQC-Gdr1cQkJzEo,1874
160
+ tests/bug_check_test.py,sha256=EIDOUk_QgtBOWKuBLm_WHbgJ0fsDuJACJ-nuxnBIdkQ,8056
159
161
  tests/cleaners_example.py,sha256=blVqSJ7SeWUNd17JjHZJgVTzWH65XKevLyaMB_Wg8qA,5324
160
162
  tests/comprehensive_framework_test.py,sha256=_1N-OGbKvBTNachNvIjkL_izr4uv6OUybUkhxxz5MAk,5977
161
163
  tests/comprehensive_test.py,sha256=wypCaB56IV8w8nd5VA5LSXUQ3IgLf0AKKUiCci6yEJQ,2969
@@ -169,13 +171,13 @@ tests/debug_log_config.py,sha256=cPS6qOLnynYTFOxpjcy9OUgIqrkasWb9f2c_PASc2_E,371
169
171
  tests/debug_log_levels.py,sha256=CZWG3KGDq-hYJ5TPhoZTyjKFKkkM-AoK3oP1w-JC1sc,2168
170
172
  tests/debug_pipelines.py,sha256=FMb36bH9lQxBLb-nM579hBRK1S16Vxu1t_BC3Dj8O2w,2164
171
173
  tests/detailed_log_test.py,sha256=oTCFF_Un7Jq2gV4rpRDFOxlHJSthnQhvEf0CSItfB7I,7501
174
+ tests/direct_selector_helper_test.py,sha256=p7_x3x87JUnpKplmwYO4zN5ympcPJSPdHsviso-LmpI,2862
172
175
  tests/distributed_test.py,sha256=u6cEiymZzCItaTClKTxwVjNmOj9_PZii4_eGNAVMDW8,1825
173
176
  tests/distributed_test_debug.py,sha256=pUv6ZKEJ5pK9xOA7lgVk6WW3cBAtnb1bsuZzJ8oGLvY,2181
174
177
  tests/dynamic_loading_example.py,sha256=7LdeQZFevrb-U1_dgr4oX3aYo2Da4HvE_0KIf1fw4Ew,18786
175
178
  tests/dynamic_loading_test.py,sha256=dzDW7b66HeDsIYsYgvNRihE3V6b6gEbUGQpp-eJbcIM,3413
176
179
  tests/env_config_example.py,sha256=_ZRDh_LR23ZKpy9E--y_KM0QIOiZF5vRT98QTn52TY8,4951
177
180
  tests/error_handling_example.py,sha256=grTeo1X17rFz4lhgASb0g5yu4NWbmNz5neyuonnNR40,5294
178
- tests/final_command_test_report.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
181
  tests/final_comprehensive_test.py,sha256=szTNbtwKfYNmE0kzDPCsE_kvnTG7FNKl2JERakGhKIk,4314
180
182
  tests/final_log_test.py,sha256=CpZ4ZvvuvFiBvz1a50qN599XIU086ett_I0bSX42BLU,9367
181
183
  tests/final_validation_test.py,sha256=4cuTr58i46JI6M4Tz54e7vrVFrOr3R7HSWgyQPKmM9M,5244
@@ -184,20 +186,23 @@ tests/framework_performance_test.py,sha256=Qp47VrsCK0ylEhDkFOm7lnD8rVkaJ7u1MopsE
184
186
  tests/log_buffering_test.py,sha256=0B5UY1yQuxnBU1pEyz3IBYweN__4fOkPXly-kYfOpNU,3226
185
187
  tests/log_generation_timing_test.py,sha256=zHb_m2FqlpRCYw-wqFWFn8cbVH8UR3VvXKSM6nNnbgo,4681
186
188
  tests/optimized_performance_test.py,sha256=bA0dN4j7ViyTSSiCJEjlkJ9Y7jspTFKs2xX7UXHE8Gs,7379
187
- tests/performance_comparison.py,sha256=CKGFbcwqsF3CAWIaOWF3Ca6o_OdEdZCgIfYb_m6CzIw,9302
189
+ tests/performance_comparison.py,sha256=UevHOM_9z2ILedf_xZ_8F8QiPjb_M8WTfGQrxzKtgco,9266
188
190
  tests/queue_blocking_test.py,sha256=hp-6hmTOO64oOAWVtlN8cFJ95GjbK3t9fj-4q_TKowk,3955
189
191
  tests/queue_test.py,sha256=HeBiBXqAgIAbUkLVQ3McS6NdRselA30m3lnuxNBvZbk,2689
190
192
  tests/redis_key_validation_demo.py,sha256=WD2jvuBwHhLYIb3lVFtvYSSnmXWn1EW4EPCEwFhfi6M,4467
191
193
  tests/request_params_example.py,sha256=J50NdsnK1sDrqG-5m3oA-mu1_wHwVwHIfsWxGeQpz7o,4250
192
194
  tests/response_improvements_example.py,sha256=t1cbG3nesp82bqog4_ku1GvQzNbhRyWa5EaKTmOPrSk,5402
193
195
  tests/simple_command_test.py,sha256=8TowzW45ukKTPeaNC5uij3RR7rqPULiBr2PguSSMdP8,3688
194
- tests/simple_crawlo_test.py,sha256=gfL910xt56HhrERAB9QhO_oxKWKw2j2IsifQkgYtwvE,4848
195
- tests/simple_log_test.py,sha256=xOEGH5UzRLsCpwqgi1VEoZ7NY3DoWckwy6Wy9lKQ6ZE,1757
196
+ tests/simple_crawlo_test.py,sha256=FYDn5cgAxHN81QSYa_wcJcxJit7aLnIopnkHKKr83dE,4801
197
+ tests/simple_follow_test.py,sha256=3vNT5Eqwza6fxAY9Xl_9xtFGdfrPwm6NnVHdRmJsH8A,1053
196
198
  tests/simple_log_test2.py,sha256=Rn3XerVlkT0M-vbQmrQL7bVIZG3REnJNmMvUvKr6C20,3944
197
199
  tests/simple_optimization_test.py,sha256=hflvaC81ra1ZrPOp-Z7rQrH95OnSADvAjy95BLulD6o,3678
198
200
  tests/simple_queue_type_test.py,sha256=wAf4XLKl9oS5BlfrRJ1SLY-kYmNq4YY0LdIC7HmW-yg,1193
201
+ tests/simple_response_selector_test.py,sha256=0naeRUX1n-oAW6VRj-12c6nre2D0RjJ0dD3Nx7BBTjY,2844
202
+ tests/simple_selector_helper_test.py,sha256=l9FsVhQ-z-ICqqetLIyeSaI8Dn6bXNCD8sLdr0tpvms,4438
203
+ tests/simple_selector_test.py,sha256=XzOYzpEzr0yaioLV6v-4XC60VZMd5jRthlyp7Ud02o4,6630
199
204
  tests/simple_spider_test.py,sha256=RzziJg-fbIVJ6_CgbismfkwrLwpJp4WWp2RLgG7Tpws,1168
200
- tests/simple_test.py,sha256=Pyxgg0YnBG_3_NRFla0HgJ21CYlfO4K-js3x6_-6ZEk,1258
205
+ tests/simple_url_test.py,sha256=g9RBn46V7fHZTU0BrB5pl5AGCbw6QuKOXClVACb-MEQ,2297
201
206
  tests/spider_log_timing_test.py,sha256=pvYpKZemClr4mCR76xywhsiWbT5sPdzD_taZKFjlgvM,5573
202
207
  tests/test_advanced_tools.py,sha256=HT_TcwfFzli-CavIJSqQqnCxnBn5FDMX09zL7AJ5tNY,5398
203
208
  tests/test_all_commands.py,sha256=VgVa9SzU5Irvn5igHpC2W4E_6ZDWDt7jc-T4UPK_PFE,7718
@@ -212,6 +217,8 @@ tests/test_config_consistency.py,sha256=RgSxyaypMpysltsGSh1vFMeOShiZZG0rmUKzEhNL
212
217
  tests/test_config_merge.py,sha256=ts1j-TIKkFS0EO5q1I4O7f4YUKR5MLTmRSqOpOlv094,5606
213
218
  tests/test_config_validator.py,sha256=Z4gBHkI0_fEx-xgiiG4T33F4BAuePuF81obpNTXfseY,6202
214
219
  tests/test_controlled_spider_mixin.py,sha256=AQ493ic6AxZAKd7QCgnUES92BBWCMNteTd5DjoQlhwo,2864
220
+ tests/test_crawler_process_import.py,sha256=iIPqSCpv2VRb_hWTu5euLME4PDFf7NwixeBypRuv39Y,1175
221
+ tests/test_crawler_process_spider_modules.py,sha256=uMr4esj6ascVBzt0WrPd3ZOQfKD00O6tJrNhuWOdvV0,1395
215
222
  tests/test_crawlo_proxy_integration.py,sha256=81DVwosMoiSMxj4V_jLzcL7aqvSv_8ucggkQyXsvzT0,2733
216
223
  tests/test_date_tools.py,sha256=pcLDyhLrZ_jh-PhPm4CvLZEgNeH9kLMPKN5zacHwuWM,4053
217
224
  tests/test_dedup_fix.py,sha256=UFdm8lIi0ZIdp40W8ruxRD69bxzijuFUfNyJmB4Fwl0,8788
@@ -226,18 +233,19 @@ tests/test_dynamic_downloaders_proxy.py,sha256=t_aWpxOHi4h3_fg2ImtIq7IIJ0r3PTHtn
226
233
  tests/test_dynamic_proxy.py,sha256=zi7Ocbhc9GL1zCs0XhmG2NvBBeIZ2d2hPJVh18lH4Y0,3172
227
234
  tests/test_dynamic_proxy_config.py,sha256=C_9CEjCJtrr0SxIXCyLDhSIi88ujF7UAT1F-FAphd0w,5853
228
235
  tests/test_dynamic_proxy_real.py,sha256=krWnbFIH26mWNPhOfPMmx3ZxJfOreZxMZFGwVb_8-K8,3511
229
- tests/test_edge_cases.py,sha256=1RnFaCebYTDNNz_LK8M0MepiSwPvJUk_FBK4nQTCUbg,10729
236
+ tests/test_edge_cases.py,sha256=460JtYR6yuTo8J4wqJScMzDkrrDUE2Q8R425AaUycIQ,11127
237
+ tests/test_encoding_core.py,sha256=k5fZET0R1KInhAlbbHEJv4m9d6NuibOxxfIcR43TS7Y,1681
238
+ tests/test_encoding_detection.py,sha256=Zb1KkF2CR57qa0Hr_Iv8msompGJZT2EIL_2mGp0zX9Q,4245
230
239
  tests/test_enhanced_error_handler.py,sha256=Ku_86jv7iDe25v8ZxalcXxJJjIiIvQXWH8ZldbwdVm8,8581
231
240
  tests/test_enhanced_error_handler_comprehensive.py,sha256=j_cxyIPGks9A3untKhAdj5HU0hrLbbzOLu0uAtGUlJo,9369
232
241
  tests/test_env_config.py,sha256=Qu1sDeADs69dSr1x0QmEe8nJrMHneE_4JClt-N901e8,4867
233
242
  tests/test_error_handler_compatibility.py,sha256=xJ43cmCwfBGh-qBwCGiMDPPlfNDLw4ZrmlrHN9IojkU,4241
234
243
  tests/test_factories.py,sha256=wKFfr8YBXPs-AQ8YOFgDhINn5uivKqPBZQPUe5GL9Ig,8865
244
+ tests/test_factory_compatibility.py,sha256=zzTXd3ku3iedgxgB1DxTt3zfetiIl6wCjL9yXIUCpic,6260
235
245
  tests/test_final_validation.py,sha256=OuZI01O0E68Pao--bD-BFDTRZFPc_Mt4W-OXUzlt6ZA,4966
236
246
  tests/test_fingerprint_consistency.py,sha256=68V5u_2hNABI5pNWzXUrA1PJ08Xh9x3-JsMSNNjORMo,4956
237
247
  tests/test_fingerprint_simple.py,sha256=qiSba8gF3Zl91QO_ijJO7KstLdjATs30V_GZCNHShig,1626
238
248
  tests/test_framework_env_usage.py,sha256=bFb_ptdLeX2obdJWEqEHPWweiWR-wR2BpvEaJMQK7h4,4201
239
- tests/test_framework_logger.py,sha256=nAtL_N49L7OurthY329vZK_jBjJIcPcETEIiV0HGqt0,2560
240
- tests/test_framework_startup.py,sha256=I_ij7J6NO3DTBuHlh1Z4CJUcGxLEjRaIB6EdykiISEc,2267
241
249
  tests/test_get_component_logger.py,sha256=UKj5uT1F3L3atoJFmpk7QSDO2fZHgw-7Y84vMFbHRkM,2285
242
250
  tests/test_hash_performance.py,sha256=4eVPwbu66Oun0LVyTTNd9d2cj2V1xq0YZkRg8Z0TO-Q,3211
243
251
  tests/test_integration.py,sha256=lVEzKNAjFzFZHRNZAyJmXxa_5Ogf_qqL4APqs620o58,4839
@@ -249,17 +257,17 @@ tests/test_logging_final.py,sha256=K9vxyODslXza05hElVEcvzbXgzthYKK5CRj4UJTftIw,6
249
257
  tests/test_logging_integration.py,sha256=5WpExyt6BmYBZwrjqtQIGOw1Id64opJBAIahDk70Mlc,11131
250
258
  tests/test_logging_system.py,sha256=LGfK14ZEWzRtl3_VkBGz-AaVa_dDtuC5zu40m8FvmMo,9206
251
259
  tests/test_middleware_debug.py,sha256=gtiaWCxBSTcaNkdqXirM7CsThr_HfiCueBdQCpp7rqg,4572
252
- tests/test_mode_change.py,sha256=GT53CBdxcG3-evcKz_OOfH4PBiq_oqQyuDjRXrvv1UU,2665
253
260
  tests/test_mode_consistency.py,sha256=t72WX0etC_AayaL2AT6e2lIgbfP-zxTgYAiTARSN2Jk,1276
254
261
  tests/test_multi_directory.py,sha256=sH9Y3B-fuESlc7J1aICa-AlBcCW8HFR-Q5j2anUr8l0,2196
255
262
  tests/test_multiple_spider_modules.py,sha256=M0wPyQW7HMasbMIgn_R78wjZEj4A_DgqaGHp0qF9Y0c,2567
256
263
  tests/test_offsite_middleware.py,sha256=njpXTdngOqBs60Wj6xgo5EEXlJnMHd7vtYGi9dVauW0,10602
257
264
  tests/test_offsite_middleware_simple.py,sha256=4MfDKSXGHcoFLYnnxCH2rmnzztWyN0xByYLoHtepyiA,7918
265
+ tests/test_optimized_selector_naming.py,sha256=fbmlB5S2kBwtQWpWoQ4lQ7rUQm2_DeWK-t6KqvIRTUQ,2787
258
266
  tests/test_parsel.py,sha256=wuZqRFIm9xx1tt6o3Xi_OjvwhT_MPmHiUEj2ax06zlo,701
259
267
  tests/test_performance.py,sha256=Lqs2iu3dmWipZkBPARcwIjDLXsqe42ntz1M4RzqqXKo,11457
260
268
  tests/test_performance_monitor.py,sha256=paW3HGg6ReHb9lwnOivGCrI8STwbwp_mbuhgfds1h3I,4187
261
269
  tests/test_pipeline_fingerprint_consistency.py,sha256=LL55oGSDGy0K8LxoyKa6ogNHXhJlZHe509vCFbibLkk,2847
262
- tests/test_priority_behavior.py,sha256=p04M0HIgBaXyuVHmp-ImITA9jGaKI_RPwZ3DPY_Trt4,9134
270
+ tests/test_priority_behavior.py,sha256=JQ5uv80cAUKV9Eh3S8j5zxYSSL-dmzhwhuKOINM26zU,9325
263
271
  tests/test_priority_consistency.py,sha256=rVX7nku5N_QpB_ffDu3xqREkCWPX5aNNiXy112o9wpA,5756
264
272
  tests/test_priority_consistency_fixed.py,sha256=MlYi5PIr5wxunC3Ku4ilnxOatKyRu2qIvhV7pjadkjg,10765
265
273
  tests/test_proxy_api.py,sha256=XnmklS-xU4ke_560gV6AIlBsRmG8YLQTGFAZrTUZuhc,11013
@@ -293,12 +301,21 @@ tests/test_request_params.py,sha256=l2etiDebqylPBym1e9DSDn4wxwTHv8DQHKq9AzlzlG0,
293
301
  tests/test_request_serialization.py,sha256=Ikgec8tt_sPCK6jcZyK8vRw84zRNE6nxQy9rba1WKmE,2332
294
302
  tests/test_response_code_middleware.py,sha256=wSe525bm-bk_iWMjPDzUu1LfOQrwJY8_MLKAspq2dzk,12193
295
303
  tests/test_response_filter_middleware.py,sha256=YWrGzJ7wmftTjJXcNTtJl3b3EdJsO4oR22ZLWwgErhg,16327
304
+ tests/test_response_follow.py,sha256=gjVZ_knsuHUaCDOjRPk-qG9HRCwReXlVrIx_KpveRHM,3738
296
305
  tests/test_response_improvements.py,sha256=vNqHKyoEoYeEGAUiRzdsff2V6yvJ9QnDwGg7gmN38Ow,6028
306
+ tests/test_response_selector_methods.py,sha256=6aS7q_PBx601MnXbCze-ZWNO-uCKFVjhxcCg9NJqKrI,2738
307
+ tests/test_response_url_methods.py,sha256=plOpSN3JLRI8-lbj4cva8-_jRFdDwmax9Gkv6O2Ac-s,2759
308
+ tests/test_response_urljoin.py,sha256=uXTWhFx8-XBb-Vaghn9YKJz5ThkwRuNykBWW4S7f3go,3379
297
309
  tests/test_retry_middleware.py,sha256=mi7s4HDAqmmd9nvyxs3ZgxdEKOYkCgDu3rDvU_9o8vQ,11133
298
310
  tests/test_retry_middleware_realistic.py,sha256=Sam5y4jCN8oeElU4xxeS5zjAyzS-P8siPV7OaifgsyU,9679
299
311
  tests/test_scheduler.py,sha256=1fCu35QgK5gzgrhD0aUZj5lxL0QbokzPav-yEJxz9Ig,8182
300
312
  tests/test_scheduler_config_update.py,sha256=LuxjEbt20QrPyVkjSFxvTnFtUxwMaHB6TcqjFyo8bow,4261
313
+ tests/test_scrapy_style_encoding.py,sha256=2K_0lHsYqop4qb5lO1U8g7hbae4nkMPrbEvVTl5TT9Y,3408
314
+ tests/test_selector_helper.py,sha256=-fw8p-uJixTKso7OLUBTVJ2oOjL8LIJA1WDetzthGO0,2818
315
+ tests/test_selector_optimizations.py,sha256=5t5RrDkcy0YtK2Es9DBfi3Cejfv6yV4dagulIQhmEho,4665
301
316
  tests/test_simple_response.py,sha256=_ui2PuVZvJcAuLY7HZ8xcsy_tDBimgBqX0ukj3kE5J0,1549
317
+ tests/test_spider_loader.py,sha256=-myi78LztwABeaCpJj-DzO2CxNEYW8lavtVuUreoHcI,1314
318
+ tests/test_spider_loader_comprehensive.py,sha256=gp6SWrDQcrg4RFNkLJQWDQ16NDfpdOlg0rCyJ86-F-8,2591
302
319
  tests/test_spider_modules.py,sha256=wxPs28FtpGnQTemMY6r7WxVrwYo3bHnAd5dq94qj1K4,2797
303
320
  tests/test_telecom_spider_redis_key.py,sha256=c-gfixPul2VlYMQJGf0H5ZgYJ461fQgSKbCPrbAU45M,7625
304
321
  tests/test_template_content.py,sha256=2RgCdOA3pMUSOqC_JbTGeW7KonbTqJ0ySYJNWegU-v0,2903
@@ -311,10 +328,20 @@ tests/untested_features_report.md,sha256=31aUlsw_1OKe0_ijAjeH85kJ7HJ8qzKLJdOHDjW
311
328
  tests/verify_debug.py,sha256=iQ4Efwg9bQTHscr73VYAAZ8rBIe1u6mQfeaEK5YgneY,1564
312
329
  tests/verify_distributed.py,sha256=0IolM4ymuPOz_uTfHSWFO3Vxzp7Lo6i0zhSbzJhHFtI,4045
313
330
  tests/verify_log_fix.py,sha256=7reyVl3MXTDASyChgU5BAYuzuxvFjSLG9HywAHso0qg,4336
331
+ tests/ofweek_scrapy/scrapy.cfg,sha256=D_8rsW65iTbH7nG1kI25jYTCpoQKBVa2shajrsC6fBw,280
332
+ tests/ofweek_scrapy/ofweek_scrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
+ tests/ofweek_scrapy/ofweek_scrapy/items.py,sha256=Y_TwwHPAgOXTuCTdnhRxil7vYPk1_rzj1ZatTq4AX-I,280
334
+ tests/ofweek_scrapy/ofweek_scrapy/middlewares.py,sha256=O4jVSXZgxtsRzU9O_O3YdkS7_QLndzv3uYP-Op8g254,3654
335
+ tests/ofweek_scrapy/ofweek_scrapy/pipelines.py,sha256=ZO6WqTqPpTwLvnwO7YL0E35OPp4zSfJ_GhMeshNRSow,379
336
+ tests/ofweek_scrapy/ofweek_scrapy/settings.py,sha256=X3Y6goZluAz0n2bepWAKEhZX0URFfe9_lBRBCPgtLPk,2933
337
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
338
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py,sha256=gcfKze-ipzP7JTDGCL3TgtjwIwfgI7dPL6GmdXVT0fs,6880
314
339
  tests/scrapy_comparison/ofweek_scrapy.py,sha256=rhVds_WjYum1bLuWWe90HtXE51fZXEqhhPSc822ZasQ,5790
315
340
  tests/scrapy_comparison/scrapy_test.py,sha256=-IsGUHPBgEL0TmXjeLZl-TUA01B7Dsc2nRo4JZbFwZA,5599
316
- crawlo-1.4.4.dist-info/METADATA,sha256=LAg9xmMfxLUwVUGPqw_p48hGJYZqsRC9Mc4KqDroAUQ,4848
317
- crawlo-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
318
- crawlo-1.4.4.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
319
- crawlo-1.4.4.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
320
- crawlo-1.4.4.dist-info/RECORD,,
341
+ tests/test_spiders/__init__.py,sha256=Ws2DhfUA0Xh5Cxr9M46td7B6hyNoLTyAhZ60FnIh6D0,20
342
+ tests/test_spiders/test_spider.py,sha256=kNGEg80HMMFgzVseI1jJjljZEBy3QYKt_3SXGASffFM,168
343
+ crawlo-1.4.5.dist-info/METADATA,sha256=o0MSsONyv_KU7dMNANtCZlkLpVdDUz8zGJKd5i2DM1g,9355
344
+ crawlo-1.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
345
+ crawlo-1.4.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
346
+ crawlo-1.4.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
347
+ crawlo-1.4.5.dist-info/RECORD,,