crawlo 1.4.7__tar.gz → 1.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (359) hide show
  1. {crawlo-1.4.7 → crawlo-1.4.8}/LICENSE +22 -22
  2. {crawlo-1.4.7 → crawlo-1.4.8}/MANIFEST.in +16 -16
  3. {crawlo-1.4.7/crawlo.egg-info → crawlo-1.4.8}/PKG-INFO +831 -689
  4. {crawlo-1.4.7 → crawlo-1.4.8}/README.md +780 -638
  5. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/__init__.py +90 -90
  6. crawlo-1.4.8/crawlo/__version__.py +1 -0
  7. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/cli.py +75 -75
  8. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/__init__.py +14 -14
  9. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/check.py +594 -594
  10. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/genspider.py +186 -186
  11. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/help.py +140 -140
  12. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/list.py +155 -155
  13. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/run.py +379 -379
  14. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/startproject.py +460 -460
  15. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/stats.py +187 -187
  16. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/commands/utils.py +196 -196
  17. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/config.py +320 -320
  18. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/config_validator.py +277 -277
  19. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/core/__init__.py +52 -52
  20. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/core/engine.py +451 -451
  21. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/core/processor.py +47 -47
  22. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/core/scheduler.py +290 -290
  23. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/crawler.py +698 -698
  24. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/data/__init__.py +5 -5
  25. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/data/user_agents.py +194 -194
  26. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/__init__.py +280 -280
  27. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/aiohttp_downloader.py +233 -233
  28. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/cffi_downloader.py +250 -250
  29. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/httpx_downloader.py +265 -265
  30. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/hybrid_downloader.py +212 -212
  31. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/playwright_downloader.py +425 -425
  32. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/downloader/selenium_downloader.py +486 -486
  33. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/event.py +45 -45
  34. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/exceptions.py +214 -214
  35. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/__init__.py +64 -64
  36. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/health_check.py +141 -141
  37. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/log_interval.py +94 -94
  38. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/log_stats.py +70 -70
  39. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/logging_extension.py +53 -53
  40. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/memory_monitor.py +104 -104
  41. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/performance_profiler.py +133 -133
  42. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/extension/request_recorder.py +107 -107
  43. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/factories/__init__.py +27 -27
  44. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/factories/base.py +68 -68
  45. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/factories/crawler.py +104 -104
  46. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/factories/registry.py +84 -84
  47. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/factories/utils.py +134 -134
  48. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/filters/__init__.py +170 -170
  49. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/filters/aioredis_filter.py +347 -347
  50. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/filters/memory_filter.py +261 -261
  51. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/framework.py +306 -306
  52. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/__init__.py +44 -44
  53. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/built_in.py +391 -391
  54. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/context.py +141 -141
  55. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/core.py +240 -240
  56. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/phases.py +229 -229
  57. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/registry.py +143 -143
  58. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/initialization/utils.py +48 -48
  59. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/interfaces.py +23 -23
  60. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/items/__init__.py +23 -23
  61. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/items/base.py +23 -23
  62. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/items/fields.py +52 -52
  63. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/items/items.py +104 -104
  64. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/logging/__init__.py +42 -42
  65. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/logging/config.py +280 -276
  66. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/logging/factory.py +175 -175
  67. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/logging/manager.py +104 -104
  68. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/__init__.py +87 -87
  69. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/default_header.py +132 -132
  70. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/download_delay.py +104 -104
  71. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/middleware_manager.py +142 -142
  72. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/offsite.py +123 -123
  73. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/proxy.py +209 -209
  74. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/request_ignore.py +86 -86
  75. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/response_code.py +150 -150
  76. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/response_filter.py +136 -136
  77. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/middleware/retry.py +124 -124
  78. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/mode_manager.py +287 -287
  79. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/network/__init__.py +21 -21
  80. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/network/request.py +408 -376
  81. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/network/response.py +598 -569
  82. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/__init__.py +52 -52
  83. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/base_pipeline.py +452 -452
  84. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/bloom_dedup_pipeline.py +145 -146
  85. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/console_pipeline.py +39 -39
  86. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/csv_pipeline.py +316 -316
  87. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/database_dedup_pipeline.py +196 -197
  88. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/json_pipeline.py +218 -218
  89. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/memory_dedup_pipeline.py +104 -105
  90. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/mongo_pipeline.py +140 -139
  91. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/mysql_pipeline.py +468 -469
  92. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/pipeline_manager.py +100 -100
  93. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/pipelines/redis_dedup_pipeline.py +155 -155
  94. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/project.py +347 -347
  95. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/queue/__init__.py +9 -9
  96. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/queue/pqueue.py +38 -38
  97. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/queue/queue_manager.py +591 -591
  98. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/queue/redis_priority_queue.py +518 -518
  99. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/settings/__init__.py +7 -7
  100. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/settings/default_settings.py +287 -284
  101. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/settings/setting_manager.py +219 -219
  102. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/spider/__init__.py +658 -657
  103. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/stats_collector.py +81 -81
  104. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/subscriber.py +129 -129
  105. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/task_manager.py +138 -138
  106. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  107. crawlo-1.4.8/crawlo/templates/project/__init__.py.tmpl +2 -0
  108. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/items.py.tmpl +13 -13
  109. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/middlewares.py.tmpl +38 -38
  110. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/pipelines.py.tmpl +35 -35
  111. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings.py.tmpl +113 -109
  112. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings_distributed.py.tmpl +160 -156
  113. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings_gentle.py.tmpl +174 -170
  114. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings_high_performance.py.tmpl +175 -171
  115. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings_minimal.py.tmpl +102 -98
  116. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/settings_simple.py.tmpl +172 -168
  117. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
  118. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/run.py.tmpl +23 -23
  119. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/spider/spider.py.tmpl +32 -32
  120. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/templates/spiders_init.py.tmpl +4 -4
  121. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/tools/__init__.py +86 -86
  122. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/tools/date_tools.py +289 -289
  123. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/tools/distributed_coordinator.py +384 -384
  124. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/tools/scenario_adapter.py +262 -262
  125. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/tools/text_cleaner.py +232 -232
  126. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/__init__.py +74 -50
  127. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/batch_processor.py +276 -276
  128. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/config_manager.py +442 -442
  129. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/controlled_spider_mixin.py +439 -439
  130. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/db_helper.py +250 -250
  131. crawlo-1.4.8/crawlo/utils/encoding_helper.py +190 -0
  132. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/error_handler.py +410 -410
  133. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/fingerprint.py +121 -121
  134. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/func_tools.py +82 -82
  135. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/large_scale_helper.py +344 -344
  136. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/leak_detector.py +335 -335
  137. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/misc.py +81 -81
  138. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/mongo_connection_pool.py +157 -157
  139. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/mysql_connection_pool.py +197 -197
  140. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/performance_monitor.py +285 -285
  141. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/queue_helper.py +175 -175
  142. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/redis_checker.py +90 -90
  143. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/redis_connection_pool.py +578 -578
  144. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/redis_key_validator.py +198 -198
  145. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/request.py +278 -278
  146. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/request_serializer.py +225 -225
  147. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/resource_manager.py +337 -337
  148. crawlo-1.4.8/crawlo/utils/response_helper.py +113 -0
  149. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/selector_helper.py +138 -137
  150. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/singleton.py +69 -69
  151. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/spider_loader.py +201 -201
  152. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo/utils/text_helper.py +94 -94
  153. {crawlo-1.4.7 → crawlo-1.4.8/crawlo.egg-info}/PKG-INFO +831 -689
  154. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo.egg-info/SOURCES.txt +2 -2
  155. {crawlo-1.4.7 → crawlo-1.4.8}/examples/__init__.py +7 -7
  156. {crawlo-1.4.7 → crawlo-1.4.8}/pyproject.toml +2 -2
  157. {crawlo-1.4.7 → crawlo-1.4.8}/requirements.txt +36 -34
  158. {crawlo-1.4.7 → crawlo-1.4.8}/setup.cfg +71 -71
  159. {crawlo-1.4.7 → crawlo-1.4.8}/tests/__init__.py +7 -7
  160. {crawlo-1.4.7 → crawlo-1.4.8}/tests/advanced_tools_example.py +217 -217
  161. {crawlo-1.4.7 → crawlo-1.4.8}/tests/authenticated_proxy_example.py +110 -110
  162. {crawlo-1.4.7 → crawlo-1.4.8}/tests/baidu_performance_test.py +108 -108
  163. {crawlo-1.4.7 → crawlo-1.4.8}/tests/baidu_test.py +59 -59
  164. {crawlo-1.4.7 → crawlo-1.4.8}/tests/bug_check_test.py +250 -250
  165. {crawlo-1.4.7 → crawlo-1.4.8}/tests/cleaners_example.py +160 -160
  166. {crawlo-1.4.7 → crawlo-1.4.8}/tests/comprehensive_framework_test.py +212 -212
  167. {crawlo-1.4.7 → crawlo-1.4.8}/tests/comprehensive_test.py +81 -81
  168. {crawlo-1.4.7 → crawlo-1.4.8}/tests/comprehensive_testing_summary.md +186 -186
  169. {crawlo-1.4.7 → crawlo-1.4.8}/tests/config_validation_demo.py +142 -142
  170. {crawlo-1.4.7 → crawlo-1.4.8}/tests/controlled_spider_example.py +205 -205
  171. {crawlo-1.4.7 → crawlo-1.4.8}/tests/date_tools_example.py +180 -180
  172. {crawlo-1.4.7 → crawlo-1.4.8}/tests/debug_configure.py +69 -69
  173. {crawlo-1.4.7 → crawlo-1.4.8}/tests/debug_framework_logger.py +84 -84
  174. {crawlo-1.4.7 → crawlo-1.4.8}/tests/debug_log_config.py +126 -126
  175. {crawlo-1.4.7 → crawlo-1.4.8}/tests/debug_log_levels.py +63 -63
  176. {crawlo-1.4.7 → crawlo-1.4.8}/tests/debug_pipelines.py +66 -66
  177. {crawlo-1.4.7 → crawlo-1.4.8}/tests/detailed_log_test.py +233 -233
  178. {crawlo-1.4.7 → crawlo-1.4.8}/tests/direct_selector_helper_test.py +96 -96
  179. {crawlo-1.4.7 → crawlo-1.4.8}/tests/distributed_dedup_test.py +467 -467
  180. {crawlo-1.4.7 → crawlo-1.4.8}/tests/distributed_test.py +66 -66
  181. {crawlo-1.4.7 → crawlo-1.4.8}/tests/distributed_test_debug.py +76 -76
  182. {crawlo-1.4.7 → crawlo-1.4.8}/tests/dynamic_loading_example.py +523 -523
  183. {crawlo-1.4.7 → crawlo-1.4.8}/tests/dynamic_loading_test.py +104 -104
  184. {crawlo-1.4.7 → crawlo-1.4.8}/tests/error_handling_example.py +171 -171
  185. {crawlo-1.4.7 → crawlo-1.4.8}/tests/explain_mysql_update_behavior.py +76 -76
  186. {crawlo-1.4.7 → crawlo-1.4.8}/tests/final_comprehensive_test.py +151 -151
  187. {crawlo-1.4.7 → crawlo-1.4.8}/tests/final_log_test.py +260 -260
  188. {crawlo-1.4.7 → crawlo-1.4.8}/tests/final_validation_test.py +182 -182
  189. {crawlo-1.4.7 → crawlo-1.4.8}/tests/fix_log_test.py +142 -142
  190. {crawlo-1.4.7 → crawlo-1.4.8}/tests/framework_performance_test.py +202 -202
  191. {crawlo-1.4.7 → crawlo-1.4.8}/tests/log_buffering_test.py +111 -111
  192. {crawlo-1.4.7 → crawlo-1.4.8}/tests/log_generation_timing_test.py +153 -153
  193. {crawlo-1.4.7 → crawlo-1.4.8}/tests/monitor_redis_dedup.sh +72 -72
  194. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
  195. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
  196. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
  197. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
  198. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -4
  199. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/scrapy.cfg +11 -11
  200. {crawlo-1.4.7 → crawlo-1.4.8}/tests/optimized_performance_test.py +211 -211
  201. {crawlo-1.4.7 → crawlo-1.4.8}/tests/performance_comparison.py +244 -244
  202. {crawlo-1.4.7 → crawlo-1.4.8}/tests/queue_blocking_test.py +113 -113
  203. {crawlo-1.4.7 → crawlo-1.4.8}/tests/queue_test.py +89 -89
  204. {crawlo-1.4.7 → crawlo-1.4.8}/tests/redis_key_validation_demo.py +130 -130
  205. {crawlo-1.4.7 → crawlo-1.4.8}/tests/request_params_example.py +150 -150
  206. {crawlo-1.4.7 → crawlo-1.4.8}/tests/response_improvements_example.py +144 -144
  207. {crawlo-1.4.7 → crawlo-1.4.8}/tests/scrapy_comparison/ofweek_scrapy.py +138 -138
  208. {crawlo-1.4.7 → crawlo-1.4.8}/tests/scrapy_comparison/scrapy_test.py +133 -133
  209. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_cli_test.py +54 -54
  210. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_command_test.py +119 -119
  211. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_crawlo_test.py +126 -126
  212. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_follow_test.py +38 -38
  213. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_log_test2.py +137 -137
  214. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_optimization_test.py +128 -128
  215. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_queue_type_test.py +41 -41
  216. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_response_selector_test.py +94 -94
  217. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_selector_helper_test.py +154 -154
  218. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_selector_test.py +207 -207
  219. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_spider_test.py +49 -49
  220. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simple_url_test.py +73 -73
  221. {crawlo-1.4.7 → crawlo-1.4.8}/tests/simulate_mysql_update_test.py +139 -139
  222. {crawlo-1.4.7 → crawlo-1.4.8}/tests/spider_log_timing_test.py +177 -177
  223. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_advanced_tools.py +148 -148
  224. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_all_commands.py +230 -230
  225. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_all_pipeline_fingerprints.py +133 -133
  226. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_all_redis_key_configs.py +145 -145
  227. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_asyncmy_usage.py +56 -56
  228. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_batch_processor.py +178 -178
  229. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_cleaners.py +54 -54
  230. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_cli_arguments.py +118 -118
  231. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_component_factory.py +174 -174
  232. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_config_consistency.py +80 -80
  233. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_config_merge.py +152 -152
  234. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_config_validator.py +182 -182
  235. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_controlled_spider_mixin.py +79 -79
  236. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_crawler_process_import.py +38 -38
  237. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_crawler_process_spider_modules.py +47 -47
  238. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_crawlo_proxy_integration.py +114 -114
  239. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_date_tools.py +123 -123
  240. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_dedup_fix.py +220 -220
  241. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_dedup_pipeline_consistency.py +124 -124
  242. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_default_header_middleware.py +313 -313
  243. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_distributed.py +65 -65
  244. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_double_crawlo_fix.py +204 -204
  245. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_double_crawlo_fix_simple.py +124 -124
  246. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_download_delay_middleware.py +221 -221
  247. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_downloader_proxy_compatibility.py +272 -272
  248. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_edge_cases.py +305 -305
  249. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_encoding_core.py +56 -56
  250. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_encoding_detection.py +126 -126
  251. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_enhanced_error_handler.py +270 -270
  252. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_enhanced_error_handler_comprehensive.py +245 -245
  253. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_error_handler_compatibility.py +112 -112
  254. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_factories.py +252 -252
  255. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_factory_compatibility.py +196 -196
  256. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_final_validation.py +153 -153
  257. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_fingerprint_consistency.py +135 -135
  258. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_fingerprint_simple.py +51 -51
  259. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_get_component_logger.py +83 -83
  260. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_hash_performance.py +99 -99
  261. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_integration.py +169 -169
  262. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_item_dedup_redis_key.py +122 -122
  263. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_large_scale_helper.py +235 -235
  264. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_logging_enhancements.py +374 -374
  265. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_logging_final.py +184 -184
  266. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_logging_integration.py +312 -312
  267. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_logging_system.py +282 -282
  268. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_middleware_debug.py +141 -141
  269. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mode_consistency.py +51 -51
  270. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_multi_directory.py +67 -67
  271. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_multiple_spider_modules.py +80 -80
  272. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_config.py +164 -164
  273. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_error.py +98 -98
  274. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_init_log.py +82 -82
  275. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_integration.py +132 -132
  276. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_refactor.py +143 -143
  277. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_refactor_simple.py +85 -85
  278. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_robustness.py +195 -195
  279. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_pipeline_types.py +88 -88
  280. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_mysql_update_columns.py +93 -93
  281. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_offsite_middleware.py +244 -244
  282. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_offsite_middleware_simple.py +203 -203
  283. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_optimized_selector_naming.py +100 -100
  284. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_parsel.py +29 -29
  285. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_performance.py +327 -327
  286. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_performance_monitor.py +115 -115
  287. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_pipeline_fingerprint_consistency.py +86 -86
  288. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_priority_behavior.py +211 -211
  289. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_priority_consistency.py +151 -151
  290. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_priority_consistency_fixed.py +249 -249
  291. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_health_check.py +32 -32
  292. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_middleware.py +217 -217
  293. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_middleware_enhanced.py +212 -212
  294. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_middleware_integration.py +142 -142
  295. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_middleware_refactored.py +207 -207
  296. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_only.py +83 -83
  297. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_providers.py +56 -56
  298. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_stats.py +19 -19
  299. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_strategies.py +59 -59
  300. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_proxy_with_downloader.py +152 -152
  301. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_empty_check.py +41 -41
  302. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_manager_double_crawlo.py +173 -173
  303. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_manager_redis_key.py +179 -179
  304. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_naming.py +154 -154
  305. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_type.py +106 -106
  306. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_queue_type_redis_config_consistency.py +130 -130
  307. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_random_headers_default.py +322 -322
  308. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_random_headers_necessity.py +308 -308
  309. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_random_user_agent.py +72 -72
  310. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_config.py +28 -28
  311. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_connection_pool.py +294 -294
  312. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_key_naming.py +181 -181
  313. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_key_validator.py +123 -123
  314. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_queue.py +224 -224
  315. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_queue_name_fix.py +175 -175
  316. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_redis_queue_type_fallback.py +129 -129
  317. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_request_ignore_middleware.py +182 -182
  318. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_request_params.py +111 -111
  319. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_request_serialization.py +70 -70
  320. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_code_middleware.py +349 -349
  321. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_filter_middleware.py +427 -427
  322. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_follow.py +104 -104
  323. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_improvements.py +152 -152
  324. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_selector_methods.py +92 -92
  325. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_url_methods.py +70 -70
  326. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_response_urljoin.py +86 -86
  327. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_retry_middleware.py +333 -333
  328. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_retry_middleware_realistic.py +273 -273
  329. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_scheduler.py +252 -252
  330. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_scheduler_config_update.py +133 -133
  331. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_scrapy_style_encoding.py +112 -112
  332. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_selector_helper.py +100 -100
  333. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_selector_optimizations.py +146 -146
  334. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_simple_response.py +61 -61
  335. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_spider_loader.py +49 -49
  336. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_spider_loader_comprehensive.py +69 -69
  337. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_spider_modules.py +84 -84
  338. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_spiders/test_spider.py +9 -9
  339. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_telecom_spider_redis_key.py +205 -205
  340. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_template_content.py +87 -87
  341. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_template_redis_key.py +134 -134
  342. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_tools.py +159 -159
  343. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_user_agent_randomness.py +176 -176
  344. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_user_agents.py +96 -96
  345. {crawlo-1.4.7 → crawlo-1.4.8}/tests/untested_features_report.md +138 -138
  346. {crawlo-1.4.7 → crawlo-1.4.8}/tests/verify_debug.py +51 -51
  347. {crawlo-1.4.7 → crawlo-1.4.8}/tests/verify_distributed.py +117 -117
  348. {crawlo-1.4.7 → crawlo-1.4.8}/tests/verify_log_fix.py +111 -111
  349. {crawlo-1.4.7 → crawlo-1.4.8}/tests/verify_mysql_warnings.py +109 -109
  350. crawlo-1.4.7/crawlo/__version__.py +0 -1
  351. crawlo-1.4.7/crawlo/templates/project/__init__.py.tmpl +0 -2
  352. crawlo-1.4.7/crawlo/utils/log.py +0 -80
  353. crawlo-1.4.7/crawlo/utils/url_utils.py +0 -40
  354. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo.egg-info/dependency_links.txt +0 -0
  355. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo.egg-info/entry_points.txt +0 -0
  356. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo.egg-info/requires.txt +0 -0
  357. {crawlo-1.4.7 → crawlo-1.4.8}/crawlo.egg-info/top_level.txt +0 -0
  358. {crawlo-1.4.7 → crawlo-1.4.8}/tests/ofweek_scrapy/ofweek_scrapy/__init__.py +0 -0
  359. {crawlo-1.4.7 → crawlo-1.4.8}/tests/test_spiders/__init__.py +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录