crawlo 1.4.5__py3-none-any.whl → 1.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (375) hide show
  1. crawlo/__init__.py +90 -89
  2. crawlo/__version__.py +1 -1
  3. crawlo/cli.py +75 -75
  4. crawlo/commands/__init__.py +14 -14
  5. crawlo/commands/check.py +594 -594
  6. crawlo/commands/genspider.py +186 -186
  7. crawlo/commands/help.py +140 -138
  8. crawlo/commands/list.py +155 -155
  9. crawlo/commands/run.py +379 -341
  10. crawlo/commands/startproject.py +460 -460
  11. crawlo/commands/stats.py +187 -187
  12. crawlo/commands/utils.py +196 -196
  13. crawlo/config.py +320 -312
  14. crawlo/config_validator.py +277 -277
  15. crawlo/core/__init__.py +52 -52
  16. crawlo/core/engine.py +451 -438
  17. crawlo/core/processor.py +47 -47
  18. crawlo/core/scheduler.py +290 -291
  19. crawlo/crawler.py +698 -657
  20. crawlo/data/__init__.py +5 -5
  21. crawlo/data/user_agents.py +194 -194
  22. crawlo/downloader/__init__.py +280 -276
  23. crawlo/downloader/aiohttp_downloader.py +233 -233
  24. crawlo/downloader/cffi_downloader.py +250 -245
  25. crawlo/downloader/httpx_downloader.py +265 -259
  26. crawlo/downloader/hybrid_downloader.py +212 -212
  27. crawlo/downloader/playwright_downloader.py +425 -402
  28. crawlo/downloader/selenium_downloader.py +486 -472
  29. crawlo/event.py +45 -11
  30. crawlo/exceptions.py +215 -82
  31. crawlo/extension/__init__.py +65 -64
  32. crawlo/extension/health_check.py +141 -141
  33. crawlo/extension/log_interval.py +94 -94
  34. crawlo/extension/log_stats.py +70 -70
  35. crawlo/extension/logging_extension.py +53 -61
  36. crawlo/extension/memory_monitor.py +104 -104
  37. crawlo/extension/performance_profiler.py +133 -133
  38. crawlo/extension/request_recorder.py +107 -107
  39. crawlo/factories/__init__.py +27 -27
  40. crawlo/factories/base.py +68 -68
  41. crawlo/factories/crawler.py +104 -103
  42. crawlo/factories/registry.py +84 -84
  43. crawlo/factories/utils.py +135 -0
  44. crawlo/filters/__init__.py +170 -153
  45. crawlo/filters/aioredis_filter.py +348 -264
  46. crawlo/filters/memory_filter.py +261 -276
  47. crawlo/framework.py +306 -292
  48. crawlo/initialization/__init__.py +44 -44
  49. crawlo/initialization/built_in.py +391 -434
  50. crawlo/initialization/context.py +141 -141
  51. crawlo/initialization/core.py +240 -194
  52. crawlo/initialization/phases.py +230 -149
  53. crawlo/initialization/registry.py +143 -145
  54. crawlo/initialization/utils.py +49 -0
  55. crawlo/interfaces.py +23 -23
  56. crawlo/items/__init__.py +23 -23
  57. crawlo/items/base.py +23 -23
  58. crawlo/items/fields.py +52 -52
  59. crawlo/items/items.py +104 -104
  60. crawlo/logging/__init__.py +42 -46
  61. crawlo/logging/config.py +277 -197
  62. crawlo/logging/factory.py +175 -171
  63. crawlo/logging/manager.py +104 -112
  64. crawlo/middleware/__init__.py +87 -24
  65. crawlo/middleware/default_header.py +132 -132
  66. crawlo/middleware/download_delay.py +104 -104
  67. crawlo/middleware/middleware_manager.py +142 -142
  68. crawlo/middleware/offsite.py +123 -123
  69. crawlo/middleware/proxy.py +209 -386
  70. crawlo/middleware/request_ignore.py +86 -86
  71. crawlo/middleware/response_code.py +150 -150
  72. crawlo/middleware/response_filter.py +136 -136
  73. crawlo/middleware/retry.py +124 -124
  74. crawlo/mode_manager.py +287 -253
  75. crawlo/network/__init__.py +21 -21
  76. crawlo/network/request.py +375 -379
  77. crawlo/network/response.py +569 -664
  78. crawlo/pipelines/__init__.py +53 -22
  79. crawlo/pipelines/base_pipeline.py +452 -0
  80. crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
  81. crawlo/pipelines/console_pipeline.py +39 -39
  82. crawlo/pipelines/csv_pipeline.py +316 -316
  83. crawlo/pipelines/database_dedup_pipeline.py +197 -197
  84. crawlo/pipelines/json_pipeline.py +218 -218
  85. crawlo/pipelines/memory_dedup_pipeline.py +105 -105
  86. crawlo/pipelines/mongo_pipeline.py +140 -132
  87. crawlo/pipelines/mysql_pipeline.py +470 -326
  88. crawlo/pipelines/pipeline_manager.py +100 -100
  89. crawlo/pipelines/redis_dedup_pipeline.py +155 -156
  90. crawlo/project.py +347 -347
  91. crawlo/queue/__init__.py +10 -0
  92. crawlo/queue/pqueue.py +38 -38
  93. crawlo/queue/queue_manager.py +591 -525
  94. crawlo/queue/redis_priority_queue.py +519 -370
  95. crawlo/settings/__init__.py +7 -7
  96. crawlo/settings/default_settings.py +285 -270
  97. crawlo/settings/setting_manager.py +219 -219
  98. crawlo/spider/__init__.py +657 -657
  99. crawlo/stats_collector.py +82 -73
  100. crawlo/subscriber.py +129 -129
  101. crawlo/task_manager.py +138 -138
  102. crawlo/templates/crawlo.cfg.tmpl +10 -10
  103. crawlo/templates/project/__init__.py.tmpl +2 -4
  104. crawlo/templates/project/items.py.tmpl +13 -17
  105. crawlo/templates/project/middlewares.py.tmpl +38 -38
  106. crawlo/templates/project/pipelines.py.tmpl +35 -36
  107. crawlo/templates/project/settings.py.tmpl +110 -157
  108. crawlo/templates/project/settings_distributed.py.tmpl +156 -161
  109. crawlo/templates/project/settings_gentle.py.tmpl +170 -171
  110. crawlo/templates/project/settings_high_performance.py.tmpl +171 -172
  111. crawlo/templates/project/settings_minimal.py.tmpl +99 -77
  112. crawlo/templates/project/settings_simple.py.tmpl +168 -169
  113. crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
  114. crawlo/templates/run.py.tmpl +23 -30
  115. crawlo/templates/spider/spider.py.tmpl +33 -144
  116. crawlo/templates/spiders_init.py.tmpl +5 -10
  117. crawlo/tools/__init__.py +86 -189
  118. crawlo/tools/date_tools.py +289 -289
  119. crawlo/tools/distributed_coordinator.py +384 -384
  120. crawlo/tools/scenario_adapter.py +262 -262
  121. crawlo/tools/text_cleaner.py +232 -232
  122. crawlo/utils/__init__.py +50 -50
  123. crawlo/utils/batch_processor.py +276 -259
  124. crawlo/utils/config_manager.py +442 -0
  125. crawlo/utils/controlled_spider_mixin.py +439 -439
  126. crawlo/utils/db_helper.py +250 -244
  127. crawlo/utils/error_handler.py +410 -410
  128. crawlo/utils/fingerprint.py +121 -121
  129. crawlo/utils/func_tools.py +82 -82
  130. crawlo/utils/large_scale_helper.py +344 -344
  131. crawlo/utils/leak_detector.py +335 -0
  132. crawlo/utils/log.py +79 -79
  133. crawlo/utils/misc.py +81 -81
  134. crawlo/utils/mongo_connection_pool.py +157 -0
  135. crawlo/utils/mysql_connection_pool.py +197 -0
  136. crawlo/utils/performance_monitor.py +285 -285
  137. crawlo/utils/queue_helper.py +175 -175
  138. crawlo/utils/redis_checker.py +91 -0
  139. crawlo/utils/redis_connection_pool.py +578 -388
  140. crawlo/utils/redis_key_validator.py +198 -198
  141. crawlo/utils/request.py +278 -256
  142. crawlo/utils/request_serializer.py +225 -225
  143. crawlo/utils/resource_manager.py +337 -0
  144. crawlo/utils/selector_helper.py +137 -137
  145. crawlo/utils/singleton.py +70 -0
  146. crawlo/utils/spider_loader.py +201 -201
  147. crawlo/utils/text_helper.py +94 -94
  148. crawlo/utils/{url.py → url_utils.py} +39 -39
  149. crawlo-1.4.7.dist-info/METADATA +689 -0
  150. crawlo-1.4.7.dist-info/RECORD +347 -0
  151. examples/__init__.py +7 -7
  152. tests/__init__.py +7 -7
  153. tests/advanced_tools_example.py +217 -275
  154. tests/authenticated_proxy_example.py +110 -106
  155. tests/baidu_performance_test.py +108 -108
  156. tests/baidu_test.py +59 -59
  157. tests/bug_check_test.py +250 -250
  158. tests/cleaners_example.py +160 -160
  159. tests/comprehensive_framework_test.py +212 -212
  160. tests/comprehensive_test.py +81 -81
  161. tests/comprehensive_testing_summary.md +186 -186
  162. tests/config_validation_demo.py +142 -142
  163. tests/controlled_spider_example.py +205 -205
  164. tests/date_tools_example.py +180 -180
  165. tests/debug_configure.py +69 -69
  166. tests/debug_framework_logger.py +84 -84
  167. tests/debug_log_config.py +126 -126
  168. tests/debug_log_levels.py +63 -63
  169. tests/debug_pipelines.py +66 -66
  170. tests/detailed_log_test.py +233 -233
  171. tests/direct_selector_helper_test.py +96 -96
  172. tests/distributed_dedup_test.py +467 -0
  173. tests/distributed_test.py +66 -66
  174. tests/distributed_test_debug.py +76 -76
  175. tests/dynamic_loading_example.py +523 -523
  176. tests/dynamic_loading_test.py +104 -104
  177. tests/error_handling_example.py +171 -171
  178. tests/explain_mysql_update_behavior.py +77 -0
  179. tests/final_comprehensive_test.py +151 -151
  180. tests/final_log_test.py +260 -260
  181. tests/final_validation_test.py +182 -182
  182. tests/fix_log_test.py +142 -142
  183. tests/framework_performance_test.py +202 -202
  184. tests/log_buffering_test.py +111 -111
  185. tests/log_generation_timing_test.py +153 -153
  186. tests/monitor_redis_dedup.sh +72 -0
  187. tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
  188. tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
  189. tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
  190. tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
  191. tests/ofweek_scrapy/scrapy.cfg +11 -11
  192. tests/optimized_performance_test.py +211 -211
  193. tests/performance_comparison.py +244 -244
  194. tests/queue_blocking_test.py +113 -113
  195. tests/queue_test.py +89 -89
  196. tests/redis_key_validation_demo.py +130 -130
  197. tests/request_params_example.py +150 -150
  198. tests/response_improvements_example.py +144 -144
  199. tests/scrapy_comparison/ofweek_scrapy.py +138 -138
  200. tests/scrapy_comparison/scrapy_test.py +133 -133
  201. tests/simple_cli_test.py +55 -0
  202. tests/simple_command_test.py +119 -119
  203. tests/simple_crawlo_test.py +126 -126
  204. tests/simple_follow_test.py +38 -38
  205. tests/simple_log_test2.py +137 -137
  206. tests/simple_optimization_test.py +128 -128
  207. tests/simple_queue_type_test.py +41 -41
  208. tests/simple_response_selector_test.py +94 -94
  209. tests/simple_selector_helper_test.py +154 -154
  210. tests/simple_selector_test.py +207 -207
  211. tests/simple_spider_test.py +49 -49
  212. tests/simple_url_test.py +73 -73
  213. tests/simulate_mysql_update_test.py +140 -0
  214. tests/spider_log_timing_test.py +177 -177
  215. tests/test_advanced_tools.py +148 -148
  216. tests/test_all_commands.py +230 -230
  217. tests/test_all_pipeline_fingerprints.py +133 -133
  218. tests/test_all_redis_key_configs.py +145 -145
  219. tests/test_asyncmy_usage.py +57 -0
  220. tests/test_batch_processor.py +178 -178
  221. tests/test_cleaners.py +54 -54
  222. tests/test_cli_arguments.py +119 -0
  223. tests/test_component_factory.py +174 -174
  224. tests/test_config_consistency.py +80 -80
  225. tests/test_config_merge.py +152 -152
  226. tests/test_config_validator.py +182 -182
  227. tests/test_controlled_spider_mixin.py +79 -79
  228. tests/test_crawler_process_import.py +38 -38
  229. tests/test_crawler_process_spider_modules.py +47 -47
  230. tests/test_crawlo_proxy_integration.py +114 -108
  231. tests/test_date_tools.py +123 -123
  232. tests/test_dedup_fix.py +220 -220
  233. tests/test_dedup_pipeline_consistency.py +124 -124
  234. tests/test_default_header_middleware.py +313 -313
  235. tests/test_distributed.py +65 -65
  236. tests/test_double_crawlo_fix.py +204 -204
  237. tests/test_double_crawlo_fix_simple.py +124 -124
  238. tests/test_download_delay_middleware.py +221 -221
  239. tests/test_downloader_proxy_compatibility.py +272 -268
  240. tests/test_edge_cases.py +305 -305
  241. tests/test_encoding_core.py +56 -56
  242. tests/test_encoding_detection.py +126 -126
  243. tests/test_enhanced_error_handler.py +270 -270
  244. tests/test_enhanced_error_handler_comprehensive.py +245 -245
  245. tests/test_error_handler_compatibility.py +112 -112
  246. tests/test_factories.py +252 -252
  247. tests/test_factory_compatibility.py +196 -196
  248. tests/test_final_validation.py +153 -153
  249. tests/test_fingerprint_consistency.py +135 -135
  250. tests/test_fingerprint_simple.py +51 -51
  251. tests/test_get_component_logger.py +83 -83
  252. tests/test_hash_performance.py +99 -99
  253. tests/test_integration.py +169 -169
  254. tests/test_item_dedup_redis_key.py +122 -122
  255. tests/test_large_scale_helper.py +235 -235
  256. tests/test_logging_enhancements.py +374 -374
  257. tests/test_logging_final.py +184 -184
  258. tests/test_logging_integration.py +312 -312
  259. tests/test_logging_system.py +282 -282
  260. tests/test_middleware_debug.py +141 -141
  261. tests/test_mode_consistency.py +51 -51
  262. tests/test_multi_directory.py +67 -67
  263. tests/test_multiple_spider_modules.py +80 -80
  264. tests/test_mysql_pipeline_config.py +165 -0
  265. tests/test_mysql_pipeline_error.py +99 -0
  266. tests/test_mysql_pipeline_init_log.py +83 -0
  267. tests/test_mysql_pipeline_integration.py +133 -0
  268. tests/test_mysql_pipeline_refactor.py +144 -0
  269. tests/test_mysql_pipeline_refactor_simple.py +86 -0
  270. tests/test_mysql_pipeline_robustness.py +196 -0
  271. tests/test_mysql_pipeline_types.py +89 -0
  272. tests/test_mysql_update_columns.py +94 -0
  273. tests/test_offsite_middleware.py +244 -244
  274. tests/test_offsite_middleware_simple.py +203 -203
  275. tests/test_optimized_selector_naming.py +100 -100
  276. tests/test_parsel.py +29 -29
  277. tests/test_performance.py +327 -327
  278. tests/test_performance_monitor.py +115 -115
  279. tests/test_pipeline_fingerprint_consistency.py +86 -86
  280. tests/test_priority_behavior.py +211 -211
  281. tests/test_priority_consistency.py +151 -151
  282. tests/test_priority_consistency_fixed.py +249 -249
  283. tests/test_proxy_health_check.py +32 -32
  284. tests/test_proxy_middleware.py +217 -121
  285. tests/test_proxy_middleware_enhanced.py +212 -216
  286. tests/test_proxy_middleware_integration.py +142 -137
  287. tests/test_proxy_middleware_refactored.py +207 -184
  288. tests/test_proxy_only.py +84 -0
  289. tests/test_proxy_providers.py +56 -56
  290. tests/test_proxy_stats.py +19 -19
  291. tests/test_proxy_strategies.py +59 -59
  292. tests/test_proxy_with_downloader.py +153 -0
  293. tests/test_queue_empty_check.py +41 -41
  294. tests/test_queue_manager_double_crawlo.py +173 -173
  295. tests/test_queue_manager_redis_key.py +179 -179
  296. tests/test_queue_naming.py +154 -154
  297. tests/test_queue_type.py +106 -106
  298. tests/test_queue_type_redis_config_consistency.py +130 -130
  299. tests/test_random_headers_default.py +322 -322
  300. tests/test_random_headers_necessity.py +308 -308
  301. tests/test_random_user_agent.py +72 -72
  302. tests/test_redis_config.py +28 -28
  303. tests/test_redis_connection_pool.py +294 -294
  304. tests/test_redis_key_naming.py +181 -181
  305. tests/test_redis_key_validator.py +123 -123
  306. tests/test_redis_queue.py +224 -224
  307. tests/test_redis_queue_name_fix.py +175 -175
  308. tests/test_redis_queue_type_fallback.py +129 -129
  309. tests/test_request_ignore_middleware.py +182 -182
  310. tests/test_request_params.py +111 -111
  311. tests/test_request_serialization.py +70 -70
  312. tests/test_response_code_middleware.py +349 -349
  313. tests/test_response_filter_middleware.py +427 -427
  314. tests/test_response_follow.py +104 -104
  315. tests/test_response_improvements.py +152 -152
  316. tests/test_response_selector_methods.py +92 -92
  317. tests/test_response_url_methods.py +70 -70
  318. tests/test_response_urljoin.py +86 -86
  319. tests/test_retry_middleware.py +333 -333
  320. tests/test_retry_middleware_realistic.py +273 -273
  321. tests/test_scheduler.py +252 -252
  322. tests/test_scheduler_config_update.py +133 -133
  323. tests/test_scrapy_style_encoding.py +112 -112
  324. tests/test_selector_helper.py +100 -100
  325. tests/test_selector_optimizations.py +146 -146
  326. tests/test_simple_response.py +61 -61
  327. tests/test_spider_loader.py +49 -49
  328. tests/test_spider_loader_comprehensive.py +69 -69
  329. tests/test_spider_modules.py +84 -84
  330. tests/test_spiders/test_spider.py +9 -9
  331. tests/test_telecom_spider_redis_key.py +205 -205
  332. tests/test_template_content.py +87 -87
  333. tests/test_template_redis_key.py +134 -134
  334. tests/test_tools.py +159 -159
  335. tests/test_user_agent_randomness.py +176 -176
  336. tests/test_user_agents.py +96 -96
  337. tests/untested_features_report.md +138 -138
  338. tests/verify_debug.py +51 -51
  339. tests/verify_distributed.py +117 -117
  340. tests/verify_log_fix.py +111 -111
  341. tests/verify_mysql_warnings.py +110 -0
  342. crawlo/logging/async_handler.py +0 -181
  343. crawlo/logging/monitor.py +0 -153
  344. crawlo/logging/sampler.py +0 -167
  345. crawlo/middleware/simple_proxy.py +0 -65
  346. crawlo/tools/authenticated_proxy.py +0 -241
  347. crawlo/tools/data_formatter.py +0 -226
  348. crawlo/tools/data_validator.py +0 -181
  349. crawlo/tools/encoding_converter.py +0 -127
  350. crawlo/tools/network_diagnostic.py +0 -365
  351. crawlo/tools/request_tools.py +0 -83
  352. crawlo/tools/retry_mechanism.py +0 -224
  353. crawlo/utils/env_config.py +0 -143
  354. crawlo/utils/large_scale_config.py +0 -287
  355. crawlo/utils/system.py +0 -11
  356. crawlo/utils/tools.py +0 -5
  357. crawlo-1.4.5.dist-info/METADATA +0 -329
  358. crawlo-1.4.5.dist-info/RECORD +0 -347
  359. tests/env_config_example.py +0 -134
  360. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
  361. tests/test_authenticated_proxy.py +0 -142
  362. tests/test_comprehensive.py +0 -147
  363. tests/test_dynamic_downloaders_proxy.py +0 -125
  364. tests/test_dynamic_proxy.py +0 -93
  365. tests/test_dynamic_proxy_config.py +0 -147
  366. tests/test_dynamic_proxy_real.py +0 -110
  367. tests/test_env_config.py +0 -122
  368. tests/test_framework_env_usage.py +0 -104
  369. tests/test_large_scale_config.py +0 -113
  370. tests/test_proxy_api.py +0 -265
  371. tests/test_real_scenario_proxy.py +0 -196
  372. tests/tools_example.py +0 -261
  373. {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/WHEEL +0 -0
  374. {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/entry_points.txt +0 -0
  375. {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/top_level.txt +0 -0
@@ -1,347 +0,0 @@
1
- crawlo/__init__.py,sha256=n5vFwi0iuYrpAIyoNJZzWHV1gvF-vh-Yze3jiuwEXqM,2180
2
- crawlo/__version__.py,sha256=47Hd5fKyrYgSfmOfBF7ibw9EyAE1ctXOQOLg_x_Ld9w,23
3
- crawlo/cli.py,sha256=AQnAB5NMI-Ic1VPw_Jjng8L4AI4-wMozOwzE6CfXkZU,2402
4
- crawlo/config.py,sha256=EQIT7WpkXAlr2ocd5SYJYOKTSWUlQx2AkTHX7ErEWxw,9798
5
- crawlo/config_validator.py,sha256=oY4-2bwXUlwHAnGgkI-EznviDfML_dcxbWSGXNSxC2k,11516
6
- crawlo/crawler.py,sha256=6f9eDeUEZVfnUywaZ6CnL5R3bHO4sG82z-Syl3zZKvE,27360
7
- crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
8
- crawlo/exceptions.py,sha256=YVIDnC1bKSMv3fXH_6tinWMuD9HmKHIaUfO4_fkX5sY,1247
9
- crawlo/framework.py,sha256=9gP6VN4MHqutGXaxnwpNMSULfVYbNp906UdZiJGywlQ,9458
10
- crawlo/interfaces.py,sha256=q1vwMSiZLfLpPhFa9Y0hAcjYEKvLkW2fZ2fmoAZ-5TE,653
11
- crawlo/mode_manager.py,sha256=e8QmwsnndFx_hGME_7w-hazKo0GOYjUr-7FBf7dWxgc,8903
12
- crawlo/project.py,sha256=9wnlHd-rYAC3TT1Fc1ftyUBx7mbDT6TQCqoaIP6N3iA,13998
13
- crawlo/stats_collector.py,sha256=hIjlnX750jU4Oncyand1jBccfaX4Tu7egd2DBYu2N7A,2379
14
- crawlo/subscriber.py,sha256=h8fx69NJZeWem0ZkCmfHAi2kgfDGFObHpwN0aGNUM6Y,5115
15
- crawlo/task_manager.py,sha256=Ic6PFUqZOhLXuZ_UEk_8Neb9FmqYv8I2RzV3vLzFNSU,5966
16
- crawlo/commands/__init__.py,sha256=orvY6wLOBwGUEJKeF3h_T1fxj8AaQLjngBDd-3xKOE4,392
17
- crawlo/commands/check.py,sha256=TKDhI_sj7kErgiJpt2vCZ9QL-g6yWjrrPWKbgh8pgEU,23199
18
- crawlo/commands/genspider.py,sha256=JB4ZuFpKsYwtjx3DSsxugH7e3kqxhDWPG5ZKfvM0isI,6041
19
- crawlo/commands/help.py,sha256=8xPC0iNCg1rRBoK2bb6noAEANc1JwrdM35eF-j6yeZM,5111
20
- crawlo/commands/list.py,sha256=trzcd3kG6DhkOqYZADcl3yR7M8iJBgRw5fE-g9e0gVM,5877
21
- crawlo/commands/run.py,sha256=EjpIilgCTkXGVSV4rEISbJubdhqrok9nNe5-xDfDK5E,13169
22
- crawlo/commands/startproject.py,sha256=boZrMyn6TgCi1jt3D3DQfui6hJitjwNO8mqlWKNOBns,17366
23
- crawlo/commands/stats.py,sha256=vlGJLyiXZtY0ASdzCK59JNereSsAel4W9JCGaOzCr-8,6201
24
- crawlo/commands/utils.py,sha256=YVNEEzlm_qNY3SVvU8h6o2lQMkVgypvoB4ZFrP4gln0,5578
25
- crawlo/core/__init__.py,sha256=BWkj3AqZwp2Bk73UzUlC_qsqv_MH_HNrzy4DY1hosj4,1330
26
- crawlo/core/engine.py,sha256=znJ0VDFBImYi6KkTD8GHNo-V9BDnPSv9iYfTYLPsVSc,19379
27
- crawlo/core/processor.py,sha256=hR5MrbeZvDUx0ShKntr4qwkeVZzjlPJ8EAKgIFkNVts,1555
28
- crawlo/core/scheduler.py,sha256=G9xtrvE1wsTSOTOFUKDEphJvy6Xk5icuCGXTScYy7nQ,14084
29
- crawlo/data/__init__.py,sha256=UPqgioMdu3imSUmpLWzVlpvoBnEfaPSAT-crCcWd7iw,121
30
- crawlo/data/user_agents.py,sha256=zjjFkldQkqtrn45j0WZplaZLannPxZDeAU0JofxQcBc,9891
31
- crawlo/downloader/__init__.py,sha256=P5pl-BGYCkdKWgoIewcYPz7ocVLixVfYuCDFmYyuqIw,8966
32
- crawlo/downloader/aiohttp_downloader.py,sha256=-dIFucMOQhiiEmtgEpG2Lqh1vF-PvDddbIrZ8Hge0Ig,9556
33
- crawlo/downloader/cffi_downloader.py,sha256=QxoeocCE2DsQCnhZla6-BjhplaTZDWMbEJmNrghWSDA,10488
34
- crawlo/downloader/httpx_downloader.py,sha256=MpgDeIdGqNsiSKLOEDBnr5Z0eUbhHnqVEmAuoIfJmFU,12296
35
- crawlo/downloader/hybrid_downloader.py,sha256=dNnFeegRnyLaOxTWI6XrWKqqVPx80AZBZNgmrcKRVBM,8240
36
- crawlo/downloader/playwright_downloader.py,sha256=L-TVzG7cYfuBlqW0XSZuz5C_r9fpJrmYNcoQ-cDEna4,16663
37
- crawlo/downloader/selenium_downloader.py,sha256=P8GuhEw6OYVeN3oeksuBLpUJCELXiu0mAR23X6IIOAA,21508
38
- crawlo/extension/__init__.py,sha256=wwaTTWYUzbg5b84sQn2JvBlyuhVGkw-REkhVlR2vymA,2980
39
- crawlo/extension/health_check.py,sha256=stDpyP4gOzAdbBlPbSf0rge0QounAhF8CtrGq5fa_7c,5657
40
- crawlo/extension/log_interval.py,sha256=N25aNjFkjh9br6g3ViFqRrz06C2geAdfGas-OT2oZh8,4497
41
- crawlo/extension/log_stats.py,sha256=CWjMb_V1o8j8uwGFvh9SZ7EYLl_OYzmuIsOT5V-_BE4,2452
42
- crawlo/extension/logging_extension.py,sha256=WnHVoC4aLHYLapAN0ylt3k5aanP_T1GOyJrAVy-6ePE,2415
43
- crawlo/extension/memory_monitor.py,sha256=fClPchpCkVjcIiU0AJHCKDd7HEiz5B4KqNqKTRZ2hcU,4394
44
- crawlo/extension/performance_profiler.py,sha256=BjWD3LOb4VwjQJQvQtWNg7GluEwFquI1CztNfgMzy3c,5032
45
- crawlo/extension/request_recorder.py,sha256=KA_RmcfscDxP5wPdolO76yKfRj-1jmHhG3jkVGO1pbc,4181
46
- crawlo/factories/__init__.py,sha256=24dH70p05pZerO9-9gaKpTawRGeGvQYw7j5brvq8GUg,714
47
- crawlo/factories/base.py,sha256=loB_vyc0CsQK0BgwRoSOFS8gPcmv-b9irtjC9UaBGA4,1832
48
- crawlo/factories/crawler.py,sha256=e9zl4qytByzsYbz66klY3cZTvQei0-9GjdFK4XCyXcg,3198
49
- crawlo/factories/registry.py,sha256=YU87CdsntOz609M0aQbGcCG9glPinUJxOn-_CaM4f-M,2595
50
- crawlo/filters/__init__.py,sha256=noSe07tp2Ip_zXwAbS021BojrqNRaObDO-2YV6DOQfc,4381
51
- crawlo/filters/aioredis_filter.py,sha256=WglGW-XLjsy8r_NDrNsXk_nzwaIq081MBnooHqCCQZA,9841
52
- crawlo/filters/memory_filter.py,sha256=gIPXCw650v81XRiz0MhWXH-zcn24ERzDTzBQZRoy1YU,9890
53
- crawlo/initialization/__init__.py,sha256=uNRMm9GccMYZi51scpvo-CPx_3ayp3Y81psBHlUoDfw,1132
54
- crawlo/initialization/built_in.py,sha256=DlZf4k9FlU52tnwlFtKqWHqlFZpo-VHB0qP61rVqJzo,16259
55
- crawlo/initialization/context.py,sha256=wG9t-M-Qttj7TN6gDumPX5Q5GHaPDUpLTZZDne2r3TE,4863
56
- crawlo/initialization/core.py,sha256=GWc9QNSp2JmHlCAhgq1aqGDXHcO6QlxFAVfePKC1xeo,6872
57
- crawlo/initialization/phases.py,sha256=iWhGITh9eudfSmzf2G3DLPAIJkCDrv9TVBtnAoS1_3c,4176
58
- crawlo/initialization/registry.py,sha256=kKVegqWxtPCaZ1mTyVHN4yFecAGDOPFJfebkP-SoobE,4919
59
- crawlo/items/__init__.py,sha256=rFpx1qFBo0Ik7bSdnXC8EVTJUOQdoJYGVdhYjaH00nk,409
60
- crawlo/items/base.py,sha256=q0YTJlqUtizsqXwfWlk0ndcINV9dDyUckwMx8_JrkeY,602
61
- crawlo/items/fields.py,sha256=l-DIwK6CCpdzNvf6ELz7Ckc7YCghZD9UCXA8vhNn2UE,1852
62
- crawlo/items/items.py,sha256=OmVEvMmgofMU95GkaiWkfNQ2fjsH2fY9sw3SKcmUhLs,3478
63
- crawlo/logging/__init__.py,sha256=NlvL0sc9NZqy-Poplwpd2wsUleGyg33MyRb1wxyG-zs,1184
64
- crawlo/logging/async_handler.py,sha256=cMBVi9Ue1y5yZ7r0Uzwr7j_4nyxwmcGXwSE1J7_yIOw,5259
65
- crawlo/logging/config.py,sha256=0N7w542vlKyE5tSLAbDJYm9U4_1lQro_TQKVIJE_pA4,7167
66
- crawlo/logging/factory.py,sha256=b4Z0fBmP00GpvpJ7k4QxqYP32n_EqG5KD3ouUWU7L4U,6656
67
- crawlo/logging/manager.py,sha256=aem7yla0q83rf2CpwQEyg5YMbey4TzkquBVWiKtcqdQ,3182
68
- crawlo/logging/monitor.py,sha256=mzZWm3rQ2mGUoAmkEJPUkBmR0VWK66l14aqqhQ0zwE8,4935
69
- crawlo/logging/sampler.py,sha256=1BoRMpusP3wbXRnet5xl9_Yb_3_-AUq9WJhK9gYg7v4,5292
70
- crawlo/middleware/__init__.py,sha256=khNCstVcYlL14SbLZ8ys9ub1-C8k4FIiMQ99Vw9wA-0,635
71
- crawlo/middleware/default_header.py,sha256=Pw-ev8ffi16GeCh84R5L3hAZgp3G1QXS-H5kV3JEp4Q,5164
72
- crawlo/middleware/download_delay.py,sha256=2iWnJFtWDlqDy5MsAob8TPiJQoiz9v21yatkBI0eptg,3542
73
- crawlo/middleware/middleware_manager.py,sha256=H_o0nwo_xQ8aSRnnvEs2Ho3fS-3WNi_5AjChhqvRYnk,6645
74
- crawlo/middleware/offsite.py,sha256=4tUkPqXMMXsi1WwYnJ_e7wMd6sRgK19QHRCYq8-w8jk,4682
75
- crawlo/middleware/proxy.py,sha256=uKk5OSLIs7jv9bBgkZwsi1rIpthooxhMrGBC2BPRDCc,16022
76
- crawlo/middleware/request_ignore.py,sha256=7qdX4zAimjSGwdod_aWUbOTfzLBWZ5KzLVFchGMCxCI,2663
77
- crawlo/middleware/response_code.py,sha256=d5t0hmP8QliuvvtFOqW-ogCBtZxg2eyjsOtlQAEUxM8,4533
78
- crawlo/middleware/response_filter.py,sha256=tVGr06bfJBR3xAHI2G5c3WimFsGHu8qoJtDcsVuCATU,4384
79
- crawlo/middleware/retry.py,sha256=Acfo95B9wF8fQTCQIqluZOS2hHdnknQu_FOHvpGKJp0,4248
80
- crawlo/middleware/simple_proxy.py,sha256=rQ4RkqewGvDRCw021nGrg8ngkBzg3wqrEVqvSmBgQ6M,2256
81
- crawlo/network/__init__.py,sha256=bvEnpEUBZJ79URfNZbsHhsBKna54hM2-x_BV8eotTA4,418
82
- crawlo/network/request.py,sha256=e6-YLgK7SU8D19n21mQwqt_b_aeRVJFOgWPIBPal2ys,14178
83
- crawlo/network/response.py,sha256=-URnNc_J7qBSG19uJbfuF6A_14MHLOtY78FvcZDzbsI,23418
84
- crawlo/pipelines/__init__.py,sha256=FDe2Pr5tiHtV8hFlheElRO_O1aVKvSWlkTcAl9BXAKA,637
85
- crawlo/pipelines/bloom_dedup_pipeline.py,sha256=vIF_6noJAdpotrJpnCmrVXCi59gRmHHn28mYW6VukbM,5465
86
- crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
87
- crawlo/pipelines/csv_pipeline.py,sha256=qbXZoqq4FIR9QkUGpC0ryWzmqGJSrM2bxmWLM4I1nXM,12490
88
- crawlo/pipelines/database_dedup_pipeline.py,sha256=IxahtD_mhni-Y21_idOMX58_Htf46A7n52enG9VR2PI,7296
89
- crawlo/pipelines/json_pipeline.py,sha256=wrCsh8YInmcPLAkhPrHObMx89VZfhf-c7qRrYsTixPE,8585
90
- crawlo/pipelines/memory_dedup_pipeline.py,sha256=lKkYPu6vkpPjfQ1-xOLvPFT4VdTI8QVx0yjqtVR0ZB0,3598
91
- crawlo/pipelines/mongo_pipeline.py,sha256=PohTKTGw3QRvuP-T6SrquwW3FAHSno8jQ2D2cH_d75U,5837
92
- crawlo/pipelines/mysql_pipeline.py,sha256=pLJQJUKqzWrrOxuO-eHXNq5xLza0DHeuGnpwX2Pc4NI,14186
93
- crawlo/pipelines/pipeline_manager.py,sha256=_DtWfxcTinIf5ApzUOVjZksd2tPbc7qeKi92IVd_kbs,4387
94
- crawlo/pipelines/redis_dedup_pipeline.py,sha256=RB1kXLr8ZuWNrgZKYwt--tlmnWsQTbuwTsSt3pafol8,6077
95
- crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
- crawlo/queue/pqueue.py,sha256=bbgd3l1VfqYXfz-4VFaiWLmJit1LdB3qHalCtNqyrqI,1210
97
- crawlo/queue/queue_manager.py,sha256=8rKygMxr6DgSjnGsKFmvlTI5XAARvQIN_ENkAruHGXs,21532
98
- crawlo/queue/redis_priority_queue.py,sha256=vLvg2toKaRrXD1QyEdu1ZjTmANv7clFaBF7mCtstBmI,15995
99
- crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
100
- crawlo/settings/default_settings.py,sha256=kBcE5PF-sfB12cjIxHeNPEvzSWSHYDu6saEgrTGXn5o,11970
101
- crawlo/settings/setting_manager.py,sha256=yI1tGaludevxKGGZO3Pn4aYofrg2cwYwvMZCFC5PPZw,8595
102
- crawlo/spider/__init__.py,sha256=QGhe_yNsnfnCF3G9nSoWEw23b8SkP5oSFU5W79C5DzI,21881
103
- crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
104
- crawlo/templates/run.py.tmpl,sha256=g8yst2hkqhKGNotR33fDxwmEsX6aEvhrXY_cfYos_vc,788
105
- crawlo/templates/spiders_init.py.tmpl,sha256=p6UK8KWr8FDydNxiAh6Iz29MY5WmgXIkf2z-buOGhOM,354
106
- crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
107
- crawlo/templates/project/items.py.tmpl,sha256=hpQ2AfUmhddnzMuKM5LF6t44dOfFXwJRAZlWFKUFOZw,343
108
- crawlo/templates/project/middlewares.py.tmpl,sha256=eEobZl8g_0DtiwLYbirQULqOacH-yUrrs4PUrGcJ2UE,1098
109
- crawlo/templates/project/pipelines.py.tmpl,sha256=7BeaQDMHbIjhKzRtzlCMiFlU8xgMzDs2PIHq3EVUAlQ,887
110
- crawlo/templates/project/settings.py.tmpl,sha256=mL9_JAyz8R35r-ywRHi4T-dtal7oczU5kodEWxldw40,5265
111
- crawlo/templates/project/settings_distributed.py.tmpl,sha256=RHzfWZITv-0ErCR9OYEswAZHpA5d9fYil0ZoGCtFt8g,5459
112
- crawlo/templates/project/settings_gentle.py.tmpl,sha256=pmjrBLjnpGcR90RkcJrM5O8PsTrRhUB92QR3R4TJyko,5733
113
- crawlo/templates/project/settings_high_performance.py.tmpl,sha256=9QhXSzfxIsMPyq0kZY9h2YBllyXGpGE37bMEbSrs_Ag,5823
114
- crawlo/templates/project/settings_minimal.py.tmpl,sha256=1qUPhSdHtvLSHTpytUJ8K63sMROhTwkz8e4tVg1fYoM,2222
115
- crawlo/templates/project/settings_simple.py.tmpl,sha256=sIyrCIVXsHSKl8Yjj8HkGs-ppMFH26a5yp6egVNlT2Q,5585
116
- crawlo/templates/project/spiders/__init__.py.tmpl,sha256=llhcIItXpm0TlEeumeLwp4fcYv2NHl8Iru7tLhDhxiE,216
117
- crawlo/templates/spider/spider.py.tmpl,sha256=KvU-9YpN6MifDE7XzejjyyQS7RUjLDLZ8zqJcLwSsu0,5198
118
- crawlo/tools/__init__.py,sha256=sXDMZNP6EwZIFivGcRthxqD1DFMMS8UOJvULAzHD-w4,3927
119
- crawlo/tools/authenticated_proxy.py,sha256=ULCK0Cc9F2rGhRqu6kzKBdxzK9v2n1CsatSQ_PMxpAg,7272
120
- crawlo/tools/data_formatter.py,sha256=iBDHpZBZvn9O7pLkTQilE1TzYJQEc3z3f6HXoVus0f0,7808
121
- crawlo/tools/data_validator.py,sha256=bLWnkpFdclJuqjtSAgMI5nznN4vAuPwE1YaiFWKWenM,5490
122
- crawlo/tools/date_tools.py,sha256=QOT3W5MqcEQhVM3cTZYxu1MRfgX-TI4aF1RI9s0QbdE,9195
123
- crawlo/tools/distributed_coordinator.py,sha256=kkRbRoxz7iXKI3AQElyTptDpYl352ErbSkM3wjSHVwU,12574
124
- crawlo/tools/encoding_converter.py,sha256=CqHAsR2rwxuzsyR-TeQNb79HX5mH4KAUixEY-sX7204,4170
125
- crawlo/tools/network_diagnostic.py,sha256=X1hSbUthIVbMHCU7ti43Zpu8XTaDJd5Oxr2zAkEuSB0,13013
126
- crawlo/tools/request_tools.py,sha256=oXrk4yWMACVa65fDQCQgzsg6a94FH4_lS7qNR53FHYU,2420
127
- crawlo/tools/retry_mechanism.py,sha256=4AQ_HLuYt4hYMI9XHoKFk2GQKEiDJB5pAnsMCfjc6Bk,7777
128
- crawlo/tools/scenario_adapter.py,sha256=pzysL1B2uQ1ZSEncVHd9Hv2viHNgaxP44YAUcDcppfw,9660
129
- crawlo/tools/text_cleaner.py,sha256=UrMGcgRnJaufjmDKIDsRYKMA8znCAArHDgouttWPygk,6690
130
- crawlo/utils/__init__.py,sha256=nxLnfqcEGLnsfSEagoKNyu-pm2ByU9BwE5tLxcS71Qo,1003
131
- crawlo/utils/batch_processor.py,sha256=8LNy-K2SrQVUxmGEWxQyYw_j9M-erN4Ie7O4d3zpBvM,9142
132
- crawlo/utils/controlled_spider_mixin.py,sha256=8CuM3Cr2wQLHbaO_ohbCsPcImJnyfZHpERbSeMgQ-AQ,16936
133
- crawlo/utils/db_helper.py,sha256=xTgBTXSWTNXM19rLsypPtnsswO0HdDV1K7zn_wYk4s0,8137
134
- crawlo/utils/env_config.py,sha256=W-VD_WF63DHxsyJysvp1eJwRh3L_pBRl_PitQAY3nQY,4079
135
- crawlo/utils/error_handler.py,sha256=e2LeUGT_OMcNKcjiX9Pp-NuQh5spsHBqIPBd7VxA2IQ,16247
136
- crawlo/utils/fingerprint.py,sha256=3IbctH3zwyBjN_12SH7-vrFt-akA2WSo3iAzHc6u--s,3689
137
- crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
138
- crawlo/utils/large_scale_config.py,sha256=NZMsDj4qbVx06Fu0aHqNKX1yzo6WFT7CgrhVnvw1ZFs,8372
139
- crawlo/utils/large_scale_helper.py,sha256=4ORkZcIrwJ0SlKOUh7l7WIuERORuRhNBgHCM71Rz0n0,12452
140
- crawlo/utils/log.py,sha256=KmUWVYq8t6fSGOC88nnYCDxwBUdoPWvaBmpOSHn2oWI,2914
141
- crawlo/utils/misc.py,sha256=m_TbfMf4Aoe70zmkv7XWyFg8Rz0qOYPXepwB6EcYr7Y,2519
142
- crawlo/utils/performance_monitor.py,sha256=32KspSo7RWvCX_fl0ZFn4ScWWOqbVVwEhPRd921Ez6I,9832
143
- crawlo/utils/queue_helper.py,sha256=gFmkh1jKlIcN1rmo2Jl6vYcLP5ByUWlfHO9eNlZPBLs,4918
144
- crawlo/utils/redis_connection_pool.py,sha256=EsPZkmQctWkoYU2wcrqkgwnIWnE6nG4XCXECKn216JA,12575
145
- crawlo/utils/redis_key_validator.py,sha256=-UTTx0Ul184pzwSply8hVdH0lp-gkXXOc_gEHR_7VlU,5809
146
- crawlo/utils/request.py,sha256=RcINrLvShfZ5VHu1T_hJJRXp-viKWSo35C2JOgWyl2k,8641
147
- crawlo/utils/request_serializer.py,sha256=b5abcgjJk4IU6Wfg46AmOAU2wmzu_WqcpEbuAncRMGQ,8931
148
- crawlo/utils/selector_helper.py,sha256=BVczzsSzPL5zF5KHXK3hyuqEl9o0ADYEuCH7Aw8aj98,4332
149
- crawlo/utils/spider_loader.py,sha256=oxifl0p4SOFhvvnD38Em4zGtC7sRr_pw4dki01MoAq0,7677
150
- crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
151
- crawlo/utils/text_helper.py,sha256=TTZgQPayMFUOYj8syt47Gwa4AQVY15W1b56STJetAKE,2920
152
- crawlo/utils/tools.py,sha256=uy7qw5Z1BIhyEgiHENvtM7WoGCJxlS8EX3PmOA7ouCo,275
153
- crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
154
- examples/__init__.py,sha256=NkRbV8_S1tb8S2AW6BE2U6P2-eGOPwMR1k0YQAwQpSE,130
155
- tests/__init__.py,sha256=409aRX8hsPffiZCVjOogtxwhACzBp8G2UTJyUQSxhK0,136
156
- tests/advanced_tools_example.py,sha256=1_iitECKCuWUYMNNGo61l3lmwMRrWdA8F_Xw56UaGZY,9340
157
- tests/authenticated_proxy_example.py,sha256=fKmHXXxIxCJXjEplttCWRh7PZhbxkBSxJF91Bx-qOME,3019
158
- tests/baidu_performance_test.py,sha256=wxdaI7UwKboMYH_qcaqZLxAStvndH60bvKGzD8F-jaI,3974
159
- tests/baidu_test.py,sha256=NKYnwDbPJX3tmKtRn7uQ_QWzUXiLTQC-Gdr1cQkJzEo,1874
160
- tests/bug_check_test.py,sha256=EIDOUk_QgtBOWKuBLm_WHbgJ0fsDuJACJ-nuxnBIdkQ,8056
161
- tests/cleaners_example.py,sha256=blVqSJ7SeWUNd17JjHZJgVTzWH65XKevLyaMB_Wg8qA,5324
162
- tests/comprehensive_framework_test.py,sha256=_1N-OGbKvBTNachNvIjkL_izr4uv6OUybUkhxxz5MAk,5977
163
- tests/comprehensive_test.py,sha256=wypCaB56IV8w8nd5VA5LSXUQ3IgLf0AKKUiCci6yEJQ,2969
164
- tests/comprehensive_testing_summary.md,sha256=S01gclEas5oDhSLjxT_mOoeMl8FE4NMA-gveQOrIS4g,6513
165
- tests/config_validation_demo.py,sha256=jbZ7h-HGsJmuqBb1euB_AhmKjllkvPmItRF1K0MQrVM,4171
166
- tests/controlled_spider_example.py,sha256=2SAQKoREGHe-OzVaSkGpopCcrou6QXmeW7rLdmsyopw,7981
167
- tests/date_tools_example.py,sha256=XI3iFEzeo7Nb5YepK8WHytIaBegtxWVSISpqQgpV6M8,5042
168
- tests/debug_configure.py,sha256=9VZRJtA0u_SbCC_ZMdsiT9raseRE-C2spZpZuXZNysI,2213
169
- tests/debug_framework_logger.py,sha256=p-dfOjEipjXQd6yk75S-SMoepzwujiwuv3rNwNSyjow,3413
170
- tests/debug_log_config.py,sha256=cPS6qOLnynYTFOxpjcy9OUgIqrkasWb9f2c_PASc2_E,3714
171
- tests/debug_log_levels.py,sha256=CZWG3KGDq-hYJ5TPhoZTyjKFKkkM-AoK3oP1w-JC1sc,2168
172
- tests/debug_pipelines.py,sha256=FMb36bH9lQxBLb-nM579hBRK1S16Vxu1t_BC3Dj8O2w,2164
173
- tests/detailed_log_test.py,sha256=oTCFF_Un7Jq2gV4rpRDFOxlHJSthnQhvEf0CSItfB7I,7501
174
- tests/direct_selector_helper_test.py,sha256=p7_x3x87JUnpKplmwYO4zN5ympcPJSPdHsviso-LmpI,2862
175
- tests/distributed_test.py,sha256=u6cEiymZzCItaTClKTxwVjNmOj9_PZii4_eGNAVMDW8,1825
176
- tests/distributed_test_debug.py,sha256=pUv6ZKEJ5pK9xOA7lgVk6WW3cBAtnb1bsuZzJ8oGLvY,2181
177
- tests/dynamic_loading_example.py,sha256=7LdeQZFevrb-U1_dgr4oX3aYo2Da4HvE_0KIf1fw4Ew,18786
178
- tests/dynamic_loading_test.py,sha256=dzDW7b66HeDsIYsYgvNRihE3V6b6gEbUGQpp-eJbcIM,3413
179
- tests/env_config_example.py,sha256=_ZRDh_LR23ZKpy9E--y_KM0QIOiZF5vRT98QTn52TY8,4951
180
- tests/error_handling_example.py,sha256=grTeo1X17rFz4lhgASb0g5yu4NWbmNz5neyuonnNR40,5294
181
- tests/final_comprehensive_test.py,sha256=szTNbtwKfYNmE0kzDPCsE_kvnTG7FNKl2JERakGhKIk,4314
182
- tests/final_log_test.py,sha256=CpZ4ZvvuvFiBvz1a50qN599XIU086ett_I0bSX42BLU,9367
183
- tests/final_validation_test.py,sha256=4cuTr58i46JI6M4Tz54e7vrVFrOr3R7HSWgyQPKmM9M,5244
184
- tests/fix_log_test.py,sha256=hcRy0j3j0CT0oLN7KNA0VL-_o4M-uE1amR6GziBflfU,4440
185
- tests/framework_performance_test.py,sha256=Qp47VrsCK0ylEhDkFOm7lnD8rVkaJ7u1MopsEhAomrE,6985
186
- tests/log_buffering_test.py,sha256=0B5UY1yQuxnBU1pEyz3IBYweN__4fOkPXly-kYfOpNU,3226
187
- tests/log_generation_timing_test.py,sha256=zHb_m2FqlpRCYw-wqFWFn8cbVH8UR3VvXKSM6nNnbgo,4681
188
- tests/optimized_performance_test.py,sha256=bA0dN4j7ViyTSSiCJEjlkJ9Y7jspTFKs2xX7UXHE8Gs,7379
189
- tests/performance_comparison.py,sha256=UevHOM_9z2ILedf_xZ_8F8QiPjb_M8WTfGQrxzKtgco,9266
190
- tests/queue_blocking_test.py,sha256=hp-6hmTOO64oOAWVtlN8cFJ95GjbK3t9fj-4q_TKowk,3955
191
- tests/queue_test.py,sha256=HeBiBXqAgIAbUkLVQ3McS6NdRselA30m3lnuxNBvZbk,2689
192
- tests/redis_key_validation_demo.py,sha256=WD2jvuBwHhLYIb3lVFtvYSSnmXWn1EW4EPCEwFhfi6M,4467
193
- tests/request_params_example.py,sha256=J50NdsnK1sDrqG-5m3oA-mu1_wHwVwHIfsWxGeQpz7o,4250
194
- tests/response_improvements_example.py,sha256=t1cbG3nesp82bqog4_ku1GvQzNbhRyWa5EaKTmOPrSk,5402
195
- tests/simple_command_test.py,sha256=8TowzW45ukKTPeaNC5uij3RR7rqPULiBr2PguSSMdP8,3688
196
- tests/simple_crawlo_test.py,sha256=FYDn5cgAxHN81QSYa_wcJcxJit7aLnIopnkHKKr83dE,4801
197
- tests/simple_follow_test.py,sha256=3vNT5Eqwza6fxAY9Xl_9xtFGdfrPwm6NnVHdRmJsH8A,1053
198
- tests/simple_log_test2.py,sha256=Rn3XerVlkT0M-vbQmrQL7bVIZG3REnJNmMvUvKr6C20,3944
199
- tests/simple_optimization_test.py,sha256=hflvaC81ra1ZrPOp-Z7rQrH95OnSADvAjy95BLulD6o,3678
200
- tests/simple_queue_type_test.py,sha256=wAf4XLKl9oS5BlfrRJ1SLY-kYmNq4YY0LdIC7HmW-yg,1193
201
- tests/simple_response_selector_test.py,sha256=0naeRUX1n-oAW6VRj-12c6nre2D0RjJ0dD3Nx7BBTjY,2844
202
- tests/simple_selector_helper_test.py,sha256=l9FsVhQ-z-ICqqetLIyeSaI8Dn6bXNCD8sLdr0tpvms,4438
203
- tests/simple_selector_test.py,sha256=XzOYzpEzr0yaioLV6v-4XC60VZMd5jRthlyp7Ud02o4,6630
204
- tests/simple_spider_test.py,sha256=RzziJg-fbIVJ6_CgbismfkwrLwpJp4WWp2RLgG7Tpws,1168
205
- tests/simple_url_test.py,sha256=g9RBn46V7fHZTU0BrB5pl5AGCbw6QuKOXClVACb-MEQ,2297
206
- tests/spider_log_timing_test.py,sha256=pvYpKZemClr4mCR76xywhsiWbT5sPdzD_taZKFjlgvM,5573
207
- tests/test_advanced_tools.py,sha256=HT_TcwfFzli-CavIJSqQqnCxnBn5FDMX09zL7AJ5tNY,5398
208
- tests/test_all_commands.py,sha256=VgVa9SzU5Irvn5igHpC2W4E_6ZDWDt7jc-T4UPK_PFE,7718
209
- tests/test_all_pipeline_fingerprints.py,sha256=NDrBYr0f9CAhjmSezTS4NUrAdcotrSX3ElJTWqjXXbU,5308
210
- tests/test_all_redis_key_configs.py,sha256=dWc4Dsr07_vuSpb4hwkMpyy6XO8SI7vglVjGuGvXoa4,5710
211
- tests/test_authenticated_proxy.py,sha256=lnvmQwuf0zaZP_E05EzcNFR2VJbwTkLjOmZGNoJKaC4,4339
212
- tests/test_batch_processor.py,sha256=4_nYlu9R1JkDCFHq0bYc9LUNqsg41r7sQ879hkrhEts,7212
213
- tests/test_cleaners.py,sha256=HDK8_YU7GUj_3hGU415cxEeUR74mnDSk0yroLlgDI0I,1816
214
- tests/test_component_factory.py,sha256=V3hO5pJHSDtViLAykXSUqkeH4g-GB4GczwutrTatS2U,5809
215
- tests/test_comprehensive.py,sha256=dvRJeeVYc1cgXK9Y171hH9Y847zZpWSAFFH-EI3UepQ,5182
216
- tests/test_config_consistency.py,sha256=RgSxyaypMpysltsGSh1vFMeOShiZZG0rmUKzEhNLpYw,2001
217
- tests/test_config_merge.py,sha256=ts1j-TIKkFS0EO5q1I4O7f4YUKR5MLTmRSqOpOlv094,5606
218
- tests/test_config_validator.py,sha256=Z4gBHkI0_fEx-xgiiG4T33F4BAuePuF81obpNTXfseY,6202
219
- tests/test_controlled_spider_mixin.py,sha256=AQ493ic6AxZAKd7QCgnUES92BBWCMNteTd5DjoQlhwo,2864
220
- tests/test_crawler_process_import.py,sha256=iIPqSCpv2VRb_hWTu5euLME4PDFf7NwixeBypRuv39Y,1175
221
- tests/test_crawler_process_spider_modules.py,sha256=uMr4esj6ascVBzt0WrPd3ZOQfKD00O6tJrNhuWOdvV0,1395
222
- tests/test_crawlo_proxy_integration.py,sha256=81DVwosMoiSMxj4V_jLzcL7aqvSv_8ucggkQyXsvzT0,2733
223
- tests/test_date_tools.py,sha256=pcLDyhLrZ_jh-PhPm4CvLZEgNeH9kLMPKN5zacHwuWM,4053
224
- tests/test_dedup_fix.py,sha256=UFdm8lIi0ZIdp40W8ruxRD69bxzijuFUfNyJmB4Fwl0,8788
225
- tests/test_dedup_pipeline_consistency.py,sha256=dn5EAZSU5gQOV5EQwreHp76i5aQZ9tEdltSGO7dif5M,5176
226
- tests/test_default_header_middleware.py,sha256=UDjEPIUCre1M6ndjV_uXLVCfY7WJwyN-1Xn15hzbKMo,13126
227
- tests/test_distributed.py,sha256=78Pn4HPLIaO8t1IiaSkckBmuEVTcnC8IDw7znf9_Zcw,1790
228
- tests/test_double_crawlo_fix.py,sha256=lZwrT5ij6Jbh0EzZswhw05FXwgKaEZsSHekLTrJJajg,7856
229
- tests/test_double_crawlo_fix_simple.py,sha256=NDmCEeyvpf_D1tGQMA66iLPPKlAnSZcEg71e7GHYcjg,4768
230
- tests/test_download_delay_middleware.py,sha256=Idc6KzhL3hY3aDKgn1j_v5-mLIHz7dTnV5c4tJVZh5Q,9107
231
- tests/test_downloader_proxy_compatibility.py,sha256=0hgIzWXIqd92YXEB5sNneyp4Sk7PaG76up2cd6N9QQY,8903
232
- tests/test_dynamic_downloaders_proxy.py,sha256=t_aWpxOHi4h3_fg2ImtIq7IIJ0r3PTHtnXiopPe2ZlM,4450
233
- tests/test_dynamic_proxy.py,sha256=zi7Ocbhc9GL1zCs0XhmG2NvBBeIZ2d2hPJVh18lH4Y0,3172
234
- tests/test_dynamic_proxy_config.py,sha256=C_9CEjCJtrr0SxIXCyLDhSIi88ujF7UAT1F-FAphd0w,5853
235
- tests/test_dynamic_proxy_real.py,sha256=krWnbFIH26mWNPhOfPMmx3ZxJfOreZxMZFGwVb_8-K8,3511
236
- tests/test_edge_cases.py,sha256=460JtYR6yuTo8J4wqJScMzDkrrDUE2Q8R425AaUycIQ,11127
237
- tests/test_encoding_core.py,sha256=k5fZET0R1KInhAlbbHEJv4m9d6NuibOxxfIcR43TS7Y,1681
238
- tests/test_encoding_detection.py,sha256=Zb1KkF2CR57qa0Hr_Iv8msompGJZT2EIL_2mGp0zX9Q,4245
239
- tests/test_enhanced_error_handler.py,sha256=Ku_86jv7iDe25v8ZxalcXxJJjIiIvQXWH8ZldbwdVm8,8581
240
- tests/test_enhanced_error_handler_comprehensive.py,sha256=j_cxyIPGks9A3untKhAdj5HU0hrLbbzOLu0uAtGUlJo,9369
241
- tests/test_env_config.py,sha256=Qu1sDeADs69dSr1x0QmEe8nJrMHneE_4JClt-N901e8,4867
242
- tests/test_error_handler_compatibility.py,sha256=xJ43cmCwfBGh-qBwCGiMDPPlfNDLw4ZrmlrHN9IojkU,4241
243
- tests/test_factories.py,sha256=wKFfr8YBXPs-AQ8YOFgDhINn5uivKqPBZQPUe5GL9Ig,8865
244
- tests/test_factory_compatibility.py,sha256=zzTXd3ku3iedgxgB1DxTt3zfetiIl6wCjL9yXIUCpic,6260
245
- tests/test_final_validation.py,sha256=OuZI01O0E68Pao--bD-BFDTRZFPc_Mt4W-OXUzlt6ZA,4966
246
- tests/test_fingerprint_consistency.py,sha256=68V5u_2hNABI5pNWzXUrA1PJ08Xh9x3-JsMSNNjORMo,4956
247
- tests/test_fingerprint_simple.py,sha256=qiSba8gF3Zl91QO_ijJO7KstLdjATs30V_GZCNHShig,1626
248
- tests/test_framework_env_usage.py,sha256=bFb_ptdLeX2obdJWEqEHPWweiWR-wR2BpvEaJMQK7h4,4201
249
- tests/test_get_component_logger.py,sha256=UKj5uT1F3L3atoJFmpk7QSDO2fZHgw-7Y84vMFbHRkM,2285
250
- tests/test_hash_performance.py,sha256=4eVPwbu66Oun0LVyTTNd9d2cj2V1xq0YZkRg8Z0TO-Q,3211
251
- tests/test_integration.py,sha256=lVEzKNAjFzFZHRNZAyJmXxa_5Ogf_qqL4APqs620o58,4839
252
- tests/test_item_dedup_redis_key.py,sha256=dp_H59exJLaZHh5oMtmMEOWh-DNZwbnwIFYDjOpHgd0,3842
253
- tests/test_large_scale_config.py,sha256=Ik32ilAOQXsyw2sHR53gDPNNjY0AXybQ9ya2JY-EeqM,4296
254
- tests/test_large_scale_helper.py,sha256=0L6EKHcKgh7XHvoW4wRSkxmw8GolUwSOCgZ_-ZmCyDo,8371
255
- tests/test_logging_enhancements.py,sha256=YHcYWC8PG_AP5wZnmOr6H7QuU7m-3xzxEhppM0Jubvg,12731
256
- tests/test_logging_final.py,sha256=K9vxyODslXza05hElVEcvzbXgzthYKK5CRj4UJTftIw,6376
257
- tests/test_logging_integration.py,sha256=5WpExyt6BmYBZwrjqtQIGOw1Id64opJBAIahDk70Mlc,11131
258
- tests/test_logging_system.py,sha256=LGfK14ZEWzRtl3_VkBGz-AaVa_dDtuC5zu40m8FvmMo,9206
259
- tests/test_middleware_debug.py,sha256=gtiaWCxBSTcaNkdqXirM7CsThr_HfiCueBdQCpp7rqg,4572
260
- tests/test_mode_consistency.py,sha256=t72WX0etC_AayaL2AT6e2lIgbfP-zxTgYAiTARSN2Jk,1276
261
- tests/test_multi_directory.py,sha256=sH9Y3B-fuESlc7J1aICa-AlBcCW8HFR-Q5j2anUr8l0,2196
262
- tests/test_multiple_spider_modules.py,sha256=M0wPyQW7HMasbMIgn_R78wjZEj4A_DgqaGHp0qF9Y0c,2567
263
- tests/test_offsite_middleware.py,sha256=njpXTdngOqBs60Wj6xgo5EEXlJnMHd7vtYGi9dVauW0,10602
264
- tests/test_offsite_middleware_simple.py,sha256=4MfDKSXGHcoFLYnnxCH2rmnzztWyN0xByYLoHtepyiA,7918
265
- tests/test_optimized_selector_naming.py,sha256=fbmlB5S2kBwtQWpWoQ4lQ7rUQm2_DeWK-t6KqvIRTUQ,2787
266
- tests/test_parsel.py,sha256=wuZqRFIm9xx1tt6o3Xi_OjvwhT_MPmHiUEj2ax06zlo,701
267
- tests/test_performance.py,sha256=Lqs2iu3dmWipZkBPARcwIjDLXsqe42ntz1M4RzqqXKo,11457
268
- tests/test_performance_monitor.py,sha256=paW3HGg6ReHb9lwnOivGCrI8STwbwp_mbuhgfds1h3I,4187
269
- tests/test_pipeline_fingerprint_consistency.py,sha256=LL55oGSDGy0K8LxoyKa6ogNHXhJlZHe509vCFbibLkk,2847
270
- tests/test_priority_behavior.py,sha256=JQ5uv80cAUKV9Eh3S8j5zxYSSL-dmzhwhuKOINM26zU,9325
271
- tests/test_priority_consistency.py,sha256=rVX7nku5N_QpB_ffDu3xqREkCWPX5aNNiXy112o9wpA,5756
272
- tests/test_priority_consistency_fixed.py,sha256=MlYi5PIr5wxunC3Ku4ilnxOatKyRu2qIvhV7pjadkjg,10765
273
- tests/test_proxy_api.py,sha256=XnmklS-xU4ke_560gV6AIlBsRmG8YLQTGFAZrTUZuhc,11013
274
- tests/test_proxy_health_check.py,sha256=_tDlxa_6TdL3M5RLkHF82roXJ8WIuG5hELBp2GADyKQ,1123
275
- tests/test_proxy_middleware.py,sha256=EdQAfwwAJIBxw9JmUFTDEu_pdxapaTlcJr7KcrY6-AY,4021
276
- tests/test_proxy_middleware_enhanced.py,sha256=QR-p26F63N7MxNjZ2QJUeerh_xdnCDejkrGPIh7Fh4U,7035
277
- tests/test_proxy_middleware_integration.py,sha256=mTPK_XvbmLCV_QoVZzA3ybWOOX61493Ew78WfTp-bYQ,4441
278
- tests/test_proxy_middleware_refactored.py,sha256=VbkTWkmmomcyswobA_gf3p_bERl_eexY2e6ohJQS_A8,6960
279
- tests/test_proxy_providers.py,sha256=u_R2fhab90vqvQEaOAztpAOe9tJXvUMIdoDxmStmXJ4,1749
280
- tests/test_proxy_stats.py,sha256=ES00CEoDITYPFBGPk8pecFzD3ItYIv6NSpcqNd8-kvo,526
281
- tests/test_proxy_strategies.py,sha256=9Z1pXmTNyw-eIhGXlf2abZbJx6igLohYq-_3hldQ5uE,1868
282
- tests/test_queue_empty_check.py,sha256=ZJC6jOgZq0Wb0-ubrB1ZNcCaUiWeCxoNZmjkd6PY6t0,1182
283
- tests/test_queue_manager_double_crawlo.py,sha256=MijZ3JuyHMuqGbRC-8kclFr-4O7m_T8CqezP4qiWk-E,6957
284
- tests/test_queue_manager_redis_key.py,sha256=txHLq5XUZZN7h9HUlqlUCEVCTe2IXdf9r7F_P2zNVdY,7117
285
- tests/test_queue_naming.py,sha256=GzPqxvATF9A5iQMgn2wQM68qTHTbsQ9dPhktAkyzDG4,4786
286
- tests/test_queue_type.py,sha256=N8y3s0Cwco-XphRcIiR4bQuNAEOgmxvlRoR1jzghTtY,3304
287
- tests/test_queue_type_redis_config_consistency.py,sha256=1ew7Zp9CxH1DQ0RUmsZMV-n7s8B5dCly6u3FkJbR1qE,5259
288
- tests/test_random_headers_default.py,sha256=ulDb3_kRpnTCN1-TO3m6wVM-eMkZS_ezsSbd1ur8Xpg,12772
289
- tests/test_random_headers_necessity.py,sha256=SSbNQIE347oCQvuG6yaAambFU-3MyQzTV5jN1kArRGY,11741
290
- tests/test_random_user_agent.py,sha256=6HjU4iUcMk-J6bR2N5FhIkWDfnaFKAPNVyRzxmQQ14k,2302
291
- tests/test_real_scenario_proxy.py,sha256=clmLvBfap5OpsaCE08MAWap-78jhVrxYfVfDNyoa4Hg,8454
292
- tests/test_redis_config.py,sha256=51_Fy1PqIhS0MMO2nR4q6oQjBFxfqcUPK_4NNf5s83g,903
293
- tests/test_redis_connection_pool.py,sha256=pKfXdE3Cm_L_fNqI9zqFmqiidCwR0t7hiM_Fu_V1cNI,9328
294
- tests/test_redis_key_naming.py,sha256=MTFk656JhiGVTsMctBDhBNOMFcBDZrsQA3UfPZ-Dgj4,6911
295
- tests/test_redis_key_validator.py,sha256=GszSzGADgk3uN6Bye1d8pS-AtMVgB8jwqW-22gPNM6M,4418
296
- tests/test_redis_queue.py,sha256=WQV3MtGg8rJzHgC2kRfXM6lSMXpwXJVQZfqn2dVrhg0,6758
297
- tests/test_redis_queue_name_fix.py,sha256=9V-qDmUz97wOdPv8pKcVnQRxeZ126WSdB5JBnow4ho8,5770
298
- tests/test_redis_queue_type_fallback.py,sha256=Bg6b5bqt-TKotE_wWdkaZNQr2WXT8gaDfolv8785NyE,4692
299
- tests/test_request_ignore_middleware.py,sha256=QN81wgG_W_XfXCF9LvJNxCNwbOH6_tZnLIwLDTK2K5Q,6229
300
- tests/test_request_params.py,sha256=l2etiDebqylPBym1e9DSDn4wxwTHv8DQHKq9AzlzlG0,4287
301
- tests/test_request_serialization.py,sha256=Ikgec8tt_sPCK6jcZyK8vRw84zRNE6nxQy9rba1WKmE,2332
302
- tests/test_response_code_middleware.py,sha256=wSe525bm-bk_iWMjPDzUu1LfOQrwJY8_MLKAspq2dzk,12193
303
- tests/test_response_filter_middleware.py,sha256=YWrGzJ7wmftTjJXcNTtJl3b3EdJsO4oR22ZLWwgErhg,16327
304
- tests/test_response_follow.py,sha256=gjVZ_knsuHUaCDOjRPk-qG9HRCwReXlVrIx_KpveRHM,3738
305
- tests/test_response_improvements.py,sha256=vNqHKyoEoYeEGAUiRzdsff2V6yvJ9QnDwGg7gmN38Ow,6028
306
- tests/test_response_selector_methods.py,sha256=6aS7q_PBx601MnXbCze-ZWNO-uCKFVjhxcCg9NJqKrI,2738
307
- tests/test_response_url_methods.py,sha256=plOpSN3JLRI8-lbj4cva8-_jRFdDwmax9Gkv6O2Ac-s,2759
308
- tests/test_response_urljoin.py,sha256=uXTWhFx8-XBb-Vaghn9YKJz5ThkwRuNykBWW4S7f3go,3379
309
- tests/test_retry_middleware.py,sha256=mi7s4HDAqmmd9nvyxs3ZgxdEKOYkCgDu3rDvU_9o8vQ,11133
310
- tests/test_retry_middleware_realistic.py,sha256=Sam5y4jCN8oeElU4xxeS5zjAyzS-P8siPV7OaifgsyU,9679
311
- tests/test_scheduler.py,sha256=1fCu35QgK5gzgrhD0aUZj5lxL0QbokzPav-yEJxz9Ig,8182
312
- tests/test_scheduler_config_update.py,sha256=LuxjEbt20QrPyVkjSFxvTnFtUxwMaHB6TcqjFyo8bow,4261
313
- tests/test_scrapy_style_encoding.py,sha256=2K_0lHsYqop4qb5lO1U8g7hbae4nkMPrbEvVTl5TT9Y,3408
314
- tests/test_selector_helper.py,sha256=-fw8p-uJixTKso7OLUBTVJ2oOjL8LIJA1WDetzthGO0,2818
315
- tests/test_selector_optimizations.py,sha256=5t5RrDkcy0YtK2Es9DBfi3Cejfv6yV4dagulIQhmEho,4665
316
- tests/test_simple_response.py,sha256=_ui2PuVZvJcAuLY7HZ8xcsy_tDBimgBqX0ukj3kE5J0,1549
317
- tests/test_spider_loader.py,sha256=-myi78LztwABeaCpJj-DzO2CxNEYW8lavtVuUreoHcI,1314
318
- tests/test_spider_loader_comprehensive.py,sha256=gp6SWrDQcrg4RFNkLJQWDQ16NDfpdOlg0rCyJ86-F-8,2591
319
- tests/test_spider_modules.py,sha256=wxPs28FtpGnQTemMY6r7WxVrwYo3bHnAd5dq94qj1K4,2797
320
- tests/test_telecom_spider_redis_key.py,sha256=c-gfixPul2VlYMQJGf0H5ZgYJ461fQgSKbCPrbAU45M,7625
321
- tests/test_template_content.py,sha256=2RgCdOA3pMUSOqC_JbTGeW7KonbTqJ0ySYJNWegU-v0,2903
322
- tests/test_template_redis_key.py,sha256=99-s0_-8MFJbIvGG_X__sH0qkXWTtJv8fdTdlftsq4I,4876
323
- tests/test_tools.py,sha256=z50Bvq_q8FwpyxNkmh00_A3sXkSv2l1Q_EbK02FDYgk,5504
324
- tests/test_user_agent_randomness.py,sha256=tE8_zh-BjMAQ9CTgScxZze6JarNher6COkdoLU68YfA,5681
325
- tests/test_user_agents.py,sha256=e4haX-o8Janl-PawGJ9MemZyMqTX33_tBF_WnYSVoUw,3327
326
- tests/tools_example.py,sha256=Rxu5vVKnj3CZ3mCx-EEotBWPtZs2S7ktyqq-SYeclxU,7999
327
- tests/untested_features_report.md,sha256=31aUlsw_1OKe0_ijAjeH85kJ7HJ8qzKLJdOHDjWtYdY,4169
328
- tests/verify_debug.py,sha256=iQ4Efwg9bQTHscr73VYAAZ8rBIe1u6mQfeaEK5YgneY,1564
329
- tests/verify_distributed.py,sha256=0IolM4ymuPOz_uTfHSWFO3Vxzp7Lo6i0zhSbzJhHFtI,4045
330
- tests/verify_log_fix.py,sha256=7reyVl3MXTDASyChgU5BAYuzuxvFjSLG9HywAHso0qg,4336
331
- tests/ofweek_scrapy/scrapy.cfg,sha256=D_8rsW65iTbH7nG1kI25jYTCpoQKBVa2shajrsC6fBw,280
332
- tests/ofweek_scrapy/ofweek_scrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
- tests/ofweek_scrapy/ofweek_scrapy/items.py,sha256=Y_TwwHPAgOXTuCTdnhRxil7vYPk1_rzj1ZatTq4AX-I,280
334
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py,sha256=O4jVSXZgxtsRzU9O_O3YdkS7_QLndzv3uYP-Op8g254,3654
335
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py,sha256=ZO6WqTqPpTwLvnwO7YL0E35OPp4zSfJ_GhMeshNRSow,379
336
- tests/ofweek_scrapy/ofweek_scrapy/settings.py,sha256=X3Y6goZluAz0n2bepWAKEhZX0URFfe9_lBRBCPgtLPk,2933
337
- tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
338
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py,sha256=gcfKze-ipzP7JTDGCL3TgtjwIwfgI7dPL6GmdXVT0fs,6880
339
- tests/scrapy_comparison/ofweek_scrapy.py,sha256=rhVds_WjYum1bLuWWe90HtXE51fZXEqhhPSc822ZasQ,5790
340
- tests/scrapy_comparison/scrapy_test.py,sha256=-IsGUHPBgEL0TmXjeLZl-TUA01B7Dsc2nRo4JZbFwZA,5599
341
- tests/test_spiders/__init__.py,sha256=Ws2DhfUA0Xh5Cxr9M46td7B6hyNoLTyAhZ60FnIh6D0,20
342
- tests/test_spiders/test_spider.py,sha256=kNGEg80HMMFgzVseI1jJjljZEBy3QYKt_3SXGASffFM,168
343
- crawlo-1.4.5.dist-info/METADATA,sha256=o0MSsONyv_KU7dMNANtCZlkLpVdDUz8zGJKd5i2DM1g,9355
344
- crawlo-1.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
345
- crawlo-1.4.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
346
- crawlo-1.4.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
347
- crawlo-1.4.5.dist-info/RECORD,,
@@ -1,134 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- 环境变量配置工具使用示例
5
- 展示如何在 Crawlo 项目中正确使用环境变量配置工具
6
- """
7
- import os
8
- import sys
9
-
10
- # 添加项目根目录到Python路径
11
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
12
-
13
- from crawlo.utils.env_config import get_env_var, get_redis_config, get_runtime_config
14
- from crawlo.settings.setting_manager import SettingManager
15
- from crawlo.settings import default_settings
16
-
17
-
18
- def example_basic_usage():
19
- """基本使用示例"""
20
- print("=== 基本环境变量使用示例 ===")
21
-
22
- # 获取字符串环境变量
23
- project_name = get_env_var('PROJECT_NAME', 'my_crawler', str)
24
- print(f"项目名称: {project_name}")
25
-
26
- # 获取整数环境变量
27
- concurrency = get_env_var('CONCURRENCY', 8, int)
28
- print(f"并发数: {concurrency}")
29
-
30
- # 获取布尔环境变量
31
- debug_mode = get_env_var('DEBUG_MODE', False, bool)
32
- print(f"调试模式: {debug_mode}")
33
-
34
-
35
- def example_redis_config():
36
- """Redis配置示例"""
37
- print("\n=== Redis配置示例 ===")
38
-
39
- # 获取Redis配置
40
- redis_config = get_redis_config()
41
- print(f"Redis主机: {redis_config['REDIS_HOST']}")
42
- print(f"Redis端口: {redis_config['REDIS_PORT']}")
43
- print(f"Redis密码: {'*' * len(redis_config['REDIS_PASSWORD']) if redis_config['REDIS_PASSWORD'] else '无'}")
44
- print(f"Redis数据库: {redis_config['REDIS_DB']}")
45
-
46
- # 生成Redis URL
47
- if redis_config['REDIS_PASSWORD']:
48
- redis_url = f"redis://:{redis_config['REDIS_PASSWORD']}@{redis_config['REDIS_HOST']}:{redis_config['REDIS_PORT']}/{redis_config['REDIS_DB']}"
49
- else:
50
- redis_url = f"redis://{redis_config['REDIS_HOST']}:{redis_config['REDIS_PORT']}/{redis_config['REDIS_DB']}"
51
-
52
- print(f"Redis URL: {redis_url}")
53
-
54
-
55
- def example_runtime_config():
56
- """运行时配置示例"""
57
- print("\n=== 运行时配置示例 ===")
58
-
59
- # 获取运行时配置
60
- runtime_config = get_runtime_config()
61
- print(f"运行模式: {runtime_config['CRAWLO_MODE']}")
62
- print(f"项目名称: {runtime_config['PROJECT_NAME']}")
63
- print(f"并发数: {runtime_config['CONCURRENCY']}")
64
-
65
-
66
- def example_settings_integration():
67
- """与Settings集成示例"""
68
- print("\n=== 与Settings集成示例 ===")
69
-
70
- # 创建设置管理器
71
- settings = SettingManager()
72
-
73
- # 更新Redis相关设置
74
- redis_config = get_redis_config()
75
- settings.set('REDIS_HOST', redis_config['REDIS_HOST'])
76
- settings.set('REDIS_PORT', redis_config['REDIS_PORT'])
77
- settings.set('REDIS_PASSWORD', redis_config['REDIS_PASSWORD'])
78
- settings.set('REDIS_DB', redis_config['REDIS_DB'])
79
-
80
- # 更新运行时设置
81
- runtime_config = get_runtime_config()
82
- settings.set('PROJECT_NAME', runtime_config['PROJECT_NAME'])
83
- settings.set('RUN_MODE', runtime_config['CRAWLO_MODE'])
84
- settings.set('CONCURRENCY', runtime_config['CONCURRENCY'])
85
-
86
- # 显示一些关键设置
87
- print(f"项目名称: {settings.get('PROJECT_NAME')}")
88
- print(f"运行模式: {settings.get('RUN_MODE')}")
89
- print(f"并发数: {settings.get_int('CONCURRENCY')}")
90
- print(f"Redis主机: {settings.get('REDIS_HOST')}")
91
- print(f"Redis端口: {settings.get_int('REDIS_PORT')}")
92
-
93
-
94
- def example_env_setup():
95
- """环境变量设置示例"""
96
- print("\n=== 环境变量设置示例 ===")
97
- print("在命令行中设置环境变量的示例:")
98
- print(" Windows (PowerShell):")
99
- print(" $env:PROJECT_NAME = \"my_distributed_crawler\"")
100
- print(" $env:REDIS_HOST = \"redis.example.com\"")
101
- print(" $env:REDIS_PORT = \"6380\"")
102
- print(" $env:CONCURRENCY = \"16\"")
103
- print(" $env:CRAWLO_MODE = \"distributed\"")
104
- print()
105
- print(" Linux/macOS:")
106
- print(" export PROJECT_NAME=\"my_distributed_crawler\"")
107
- print(" export REDIS_HOST=\"redis.example.com\"")
108
- print(" export REDIS_PORT=\"6380\"")
109
- print(" export CONCURRENCY=\"16\"")
110
- print(" export CRAWLO_MODE=\"distributed\"")
111
-
112
-
113
- if __name__ == '__main__':
114
- # 设置一些测试环境变量
115
- os.environ['PROJECT_NAME'] = 'test_crawler'
116
- os.environ['CONCURRENCY'] = '12'
117
- os.environ['DEBUG_MODE'] = 'true'
118
- os.environ['REDIS_HOST'] = 'redis.test.com'
119
- os.environ['REDIS_PORT'] = '6380'
120
- os.environ['REDIS_PASSWORD'] = 'test_password'
121
- os.environ['CRAWLO_MODE'] = 'distributed'
122
-
123
- # 运行示例
124
- example_basic_usage()
125
- example_redis_config()
126
- example_runtime_config()
127
- example_settings_integration()
128
- example_env_setup()
129
-
130
- # 清理测试环境变量
131
- for var in ['PROJECT_NAME', 'CONCURRENCY', 'DEBUG_MODE', 'REDIS_HOST',
132
- 'REDIS_PORT', 'REDIS_PASSWORD', 'CRAWLO_MODE']:
133
- if var in os.environ:
134
- del os.environ[var]
@@ -1,162 +0,0 @@
1
- import scrapy
2
- from urllib.parse import urljoin
3
-
4
- class OfweekSpider(scrapy.Spider):
5
- name = 'ofweek'
6
- allowed_domains = ['ee.ofweek.com']
7
-
8
- def start_requests(self):
9
- """生成初始请求"""
10
- headers = {
11
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
12
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
13
- "Cache-Control": "no-cache",
14
- "Connection": "keep-alive",
15
- "Pragma": "no-cache",
16
- "Referer": "https://ee.ofweek.com/CATList-2800-8100-ee-2.html",
17
- "Sec-Fetch-Dest": "document",
18
- "Sec-Fetch-Mode": "navigate",
19
- "Sec-Fetch-Site": "same-origin",
20
- "Sec-Fetch-User": "?1",
21
- "Upgrade-Insecure-Requests": "1",
22
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
23
- }
24
- cookies = {
25
- "__utmz": "57425525.1730117117.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
26
- "Hm_lvt_abe9900db162c6d089cdbfd107db0f03": "1739244841",
27
- "Hm_lvt_af50e2fc51af73da7720fb324b88a975": "1740100727",
28
- "JSESSIONID": "FEA96D3B5FC31350B2285E711BF2A541",
29
- "Hm_lvt_28a416fcfc17063eb9c4f9bb1a1f5cda": "1757477622",
30
- "HMACCOUNT": "08DF0D235A291EAA",
31
- "__utma": "57425525.2080994505.1730117117.1747970718.1757477622.50",
32
- "__utmc": "57425525",
33
- "__utmt": "1",
34
- "__utmb": "57425525.2.10.1757477622",
35
- "Hm_lpvt_28a416fcfc17063eb9c4f9bb1a1f5cda": "1757477628",
36
- "index_burying_point": "c64d6c31e69d560efe319cc9f8be279f"
37
- }
38
-
39
- # 使用与Crawlo相同的页数进行公平测试
40
- max_page = 50 # 原来是1851,现在改为50页进行测试
41
- start_urls = []
42
- for page in range(1, max_page + 1):
43
- url = f'https://ee.ofweek.com/CATList-2800-8100-ee-{page}.html'
44
- start_urls.append(url)
45
-
46
- self.logger.info(f"生成了 {len(start_urls)} 个起始URL")
47
-
48
- # 生成请求
49
- for url in start_urls:
50
- self.logger.info(f"添加起始URL: {url}")
51
- try:
52
- yield scrapy.Request(
53
- url=url,
54
- callback=self.parse,
55
- headers=headers,
56
- cookies=cookies,
57
- dont_filter=True
58
- )
59
- except Exception as e:
60
- self.logger.error(f"创建请求失败: {url}, 错误: {e}")
61
-
62
- self.logger.info("start_requests方法执行完成")
63
-
64
- def parse(self, response):
65
- """解析列表页"""
66
- self.logger.info(f'正在解析页面: {response.url}')
67
-
68
- # 检查响应状态
69
- if response.status != 200:
70
- self.logger.warning(f"页面返回非200状态码: {response.status}, URL: {response.url}")
71
- return
72
-
73
- # ================== 数据提取 ==================
74
- try:
75
- rows = response.xpath('//div[@class="main_left"]/div[@class="list_model"]/div[@class="model_right model_right2"]')
76
- self.logger.info(f"在页面 {response.url} 中找到 {len(rows)} 个条目")
77
-
78
- for row in rows:
79
- try:
80
- # 提取URL和标题
81
- url = row.xpath('./h3/a/@href').extract_first()
82
- title = row.xpath('./h3/a/text()').extract_first()
83
-
84
- # 容错处理
85
- if not url:
86
- self.logger.warning(f"条目缺少URL,跳过")
87
- continue
88
-
89
- if not title:
90
- self.logger.warning(f"条目缺少标题,跳过")
91
- continue
92
-
93
- # 确保 URL 是绝对路径
94
- absolute_url = response.urljoin(url)
95
-
96
- # 验证URL格式
97
- if not absolute_url.startswith(('http://', 'https://')):
98
- self.logger.warning(f"无效的URL格式,跳过: {absolute_url}")
99
- continue
100
-
101
- self.logger.info(f"提取到详情页链接: {absolute_url}, 标题: {title}")
102
- yield scrapy.Request(
103
- url=absolute_url,
104
- meta={
105
- "title": title.strip() if title else '',
106
- "parent_url": response.url
107
- },
108
- callback=self.parse_detail
109
- )
110
- except Exception as e:
111
- self.logger.error(f"处理条目时出错: {e}")
112
- continue
113
-
114
- except Exception as e:
115
- self.logger.error(f"解析页面 {response.url} 时出错: {e}")
116
-
117
- def parse_detail(self, response):
118
- """解析详情页"""
119
- self.logger.info(f'正在解析详情页: {response.url}')
120
-
121
- # 检查响应状态
122
- if response.status != 200:
123
- self.logger.warning(f"详情页返回非200状态码: {response.status}, URL: {response.url}")
124
- return
125
-
126
- try:
127
- title = response.meta.get('title', '')
128
-
129
- # 提取内容,增加容错处理
130
- content_elements = response.xpath('//div[@class="TRS_Editor"]|//*[@id="articleC"]')
131
- if content_elements:
132
- content = content_elements.xpath('.//text()').extract()
133
- content = '\n'.join([text.strip() for text in content if text.strip()])
134
- else:
135
- content = ''
136
- self.logger.warning(f"未找到内容区域: {response.url}")
137
-
138
- # 提取发布时间
139
- publish_time = response.xpath('//div[@class="time fl"]/text()').extract_first()
140
- if publish_time:
141
- publish_time = publish_time.strip()
142
-
143
- source = response.xpath('//div[@class="source-name"]/text()').extract_first()
144
-
145
- # 创建数据项
146
- item = {
147
- 'title': title.strip() if title else '',
148
- 'publish_time': publish_time if publish_time else '',
149
- 'url': response.url,
150
- 'source': source if source else '',
151
- 'content': content
152
- }
153
-
154
- # 验证必要字段
155
- if not item['title']:
156
- self.logger.warning(f"详情页缺少标题: {response.url}")
157
-
158
- self.logger.info(f"成功提取详情页数据: {item['title']}")
159
- yield item
160
-
161
- except Exception as e:
162
- self.logger.error(f"解析详情页 {response.url} 时出错: {e}")