crawlo 1.2.0__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (234) hide show
  1. {crawlo-1.2.0 → crawlo-1.2.2}/LICENSE +22 -22
  2. {crawlo-1.2.0 → crawlo-1.2.2}/MANIFEST.in +16 -16
  3. {crawlo-1.2.0/crawlo.egg-info → crawlo-1.2.2}/PKG-INFO +692 -697
  4. {crawlo-1.2.0 → crawlo-1.2.2}/README.md +641 -646
  5. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/__init__.py +61 -61
  6. crawlo-1.2.2/crawlo/__version__.py +1 -0
  7. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/cleaners/__init__.py +60 -60
  8. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/cleaners/data_formatter.py +225 -225
  9. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/cleaners/encoding_converter.py +125 -125
  10. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/cleaners/text_cleaner.py +232 -232
  11. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/cli.py +81 -65
  12. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/__init__.py +14 -14
  13. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/check.py +594 -594
  14. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/genspider.py +151 -151
  15. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/help.py +143 -133
  16. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/list.py +155 -155
  17. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/run.py +292 -292
  18. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/startproject.py +418 -418
  19. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/stats.py +188 -188
  20. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/commands/utils.py +186 -186
  21. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/config.py +312 -312
  22. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/config_validator.py +252 -252
  23. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/core/__init__.py +2 -2
  24. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/core/engine.py +354 -354
  25. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/core/processor.py +40 -40
  26. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/core/scheduler.py +143 -143
  27. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/crawler.py +1027 -1027
  28. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/__init__.py +266 -266
  29. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/aiohttp_downloader.py +220 -220
  30. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/cffi_downloader.py +256 -256
  31. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/httpx_downloader.py +259 -259
  32. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/hybrid_downloader.py +213 -213
  33. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/playwright_downloader.py +402 -402
  34. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/downloader/selenium_downloader.py +472 -472
  35. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/event.py +11 -11
  36. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/exceptions.py +81 -81
  37. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/__init__.py +37 -37
  38. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/health_check.py +141 -141
  39. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/log_interval.py +57 -57
  40. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/log_stats.py +81 -81
  41. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/logging_extension.py +43 -43
  42. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/memory_monitor.py +104 -104
  43. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/performance_profiler.py +133 -133
  44. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/extension/request_recorder.py +107 -107
  45. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/filters/__init__.py +154 -154
  46. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/filters/aioredis_filter.py +280 -280
  47. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/filters/memory_filter.py +269 -269
  48. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/items/__init__.py +23 -23
  49. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/items/base.py +21 -21
  50. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/items/fields.py +53 -53
  51. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/items/items.py +104 -104
  52. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/middleware/__init__.py +21 -21
  53. crawlo-1.2.2/crawlo/middleware/default_header.py +132 -0
  54. crawlo-1.2.2/crawlo/middleware/download_delay.py +105 -0
  55. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/middleware/middleware_manager.py +135 -135
  56. crawlo-1.2.2/crawlo/middleware/offsite.py +116 -0
  57. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/middleware/proxy.py +366 -272
  58. crawlo-1.2.2/crawlo/middleware/request_ignore.py +88 -0
  59. crawlo-1.2.2/crawlo/middleware/response_code.py +165 -0
  60. crawlo-1.2.2/crawlo/middleware/response_filter.py +138 -0
  61. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/middleware/retry.py +124 -124
  62. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/mode_manager.py +211 -211
  63. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/network/__init__.py +21 -21
  64. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/network/request.py +338 -338
  65. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/network/response.py +359 -359
  66. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/__init__.py +21 -21
  67. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
  68. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/console_pipeline.py +39 -39
  69. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/csv_pipeline.py +316 -316
  70. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/database_dedup_pipeline.py +224 -224
  71. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/json_pipeline.py +218 -218
  72. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  73. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/mongo_pipeline.py +131 -131
  74. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/mysql_pipeline.py +316 -316
  75. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/pipeline_manager.py +61 -61
  76. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/pipelines/redis_dedup_pipeline.py +167 -167
  77. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/project.py +187 -187
  78. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/queue/pqueue.py +37 -37
  79. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/queue/queue_manager.py +337 -337
  80. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/queue/redis_priority_queue.py +298 -298
  81. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/settings/__init__.py +7 -7
  82. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/settings/default_settings.py +226 -219
  83. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/settings/setting_manager.py +122 -122
  84. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/spider/__init__.py +639 -639
  85. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/stats_collector.py +59 -59
  86. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/subscriber.py +130 -130
  87. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/task_manager.py +30 -30
  88. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  89. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/__init__.py.tmpl +3 -3
  90. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/items.py.tmpl +17 -17
  91. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/middlewares.py.tmpl +118 -109
  92. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/pipelines.py.tmpl +96 -96
  93. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/run.py.tmpl +45 -45
  94. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/settings.py.tmpl +327 -326
  95. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/settings_distributed.py.tmpl +119 -119
  96. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/settings_gentle.py.tmpl +94 -94
  97. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/settings_high_performance.py.tmpl +151 -151
  98. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/settings_simple.py.tmpl +68 -68
  99. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  100. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/templates/spider/spider.py.tmpl +143 -141
  101. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/__init__.py +182 -182
  102. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/anti_crawler.py +268 -268
  103. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/authenticated_proxy.py +240 -240
  104. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/data_validator.py +180 -180
  105. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/date_tools.py +35 -35
  106. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/distributed_coordinator.py +386 -386
  107. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/retry_mechanism.py +220 -220
  108. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/tools/scenario_adapter.py +262 -262
  109. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/__init__.py +35 -35
  110. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/batch_processor.py +260 -260
  111. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/controlled_spider_mixin.py +439 -439
  112. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/date_tools.py +290 -290
  113. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/db_helper.py +343 -343
  114. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/enhanced_error_handler.py +359 -359
  115. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/env_config.py +105 -105
  116. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/error_handler.py +125 -125
  117. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/func_tools.py +82 -82
  118. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/large_scale_config.py +286 -286
  119. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/large_scale_helper.py +343 -343
  120. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/log.py +128 -128
  121. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/performance_monitor.py +284 -284
  122. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/queue_helper.py +175 -175
  123. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/redis_connection_pool.py +334 -334
  124. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/redis_key_validator.py +199 -199
  125. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/request.py +267 -267
  126. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/request_serializer.py +219 -219
  127. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/spider_loader.py +62 -62
  128. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/system.py +11 -11
  129. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/tools.py +4 -4
  130. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/utils/url.py +39 -39
  131. {crawlo-1.2.0 → crawlo-1.2.2/crawlo.egg-info}/PKG-INFO +692 -697
  132. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo.egg-info/SOURCES.txt +30 -0
  133. {crawlo-1.2.0 → crawlo-1.2.2}/examples/__init__.py +7 -7
  134. crawlo-1.2.2/examples/aiohttp_settings.py +42 -0
  135. crawlo-1.2.2/examples/curl_cffi_settings.py +41 -0
  136. crawlo-1.2.2/examples/default_header_middleware_example.py +107 -0
  137. crawlo-1.2.2/examples/default_header_spider_example.py +129 -0
  138. crawlo-1.2.2/examples/download_delay_middleware_example.py +160 -0
  139. crawlo-1.2.2/examples/httpx_settings.py +42 -0
  140. crawlo-1.2.2/examples/multi_downloader_proxy_example.py +81 -0
  141. crawlo-1.2.2/examples/offsite_middleware_example.py +55 -0
  142. crawlo-1.2.2/examples/offsite_spider_example.py +107 -0
  143. crawlo-1.2.2/examples/proxy_spider_example.py +166 -0
  144. crawlo-1.2.2/examples/request_ignore_middleware_example.py +51 -0
  145. crawlo-1.2.2/examples/request_ignore_spider_example.py +99 -0
  146. crawlo-1.2.2/examples/response_code_middleware_example.py +52 -0
  147. crawlo-1.2.2/examples/response_filter_middleware_example.py +67 -0
  148. crawlo-1.2.2/examples/tong_hua_shun_settings.py +62 -0
  149. crawlo-1.2.2/examples/tong_hua_shun_spider.py +170 -0
  150. {crawlo-1.2.0 → crawlo-1.2.2}/pyproject.toml +2 -2
  151. {crawlo-1.2.0 → crawlo-1.2.2}/requirements.txt +28 -28
  152. {crawlo-1.2.0 → crawlo-1.2.2}/setup.cfg +71 -71
  153. {crawlo-1.2.0 → crawlo-1.2.2}/tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
  154. {crawlo-1.2.0 → crawlo-1.2.2}/tests/__init__.py +7 -7
  155. {crawlo-1.2.0 → crawlo-1.2.2}/tests/advanced_tools_example.py +275 -275
  156. {crawlo-1.2.0 → crawlo-1.2.2}/tests/authenticated_proxy_example.py +236 -236
  157. {crawlo-1.2.0 → crawlo-1.2.2}/tests/cleaners_example.py +160 -160
  158. {crawlo-1.2.0 → crawlo-1.2.2}/tests/config_validation_demo.py +102 -102
  159. {crawlo-1.2.0 → crawlo-1.2.2}/tests/controlled_spider_example.py +205 -205
  160. {crawlo-1.2.0 → crawlo-1.2.2}/tests/date_tools_example.py +180 -180
  161. {crawlo-1.2.0 → crawlo-1.2.2}/tests/dynamic_loading_example.py +523 -523
  162. {crawlo-1.2.0 → crawlo-1.2.2}/tests/dynamic_loading_test.py +104 -104
  163. {crawlo-1.2.0 → crawlo-1.2.2}/tests/env_config_example.py +133 -133
  164. {crawlo-1.2.0 → crawlo-1.2.2}/tests/error_handling_example.py +171 -171
  165. {crawlo-1.2.0 → crawlo-1.2.2}/tests/redis_key_validation_demo.py +130 -130
  166. {crawlo-1.2.0 → crawlo-1.2.2}/tests/response_improvements_example.py +144 -144
  167. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_advanced_tools.py +148 -148
  168. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_all_redis_key_configs.py +145 -145
  169. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_authenticated_proxy.py +141 -141
  170. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_cleaners.py +54 -54
  171. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_comprehensive.py +146 -146
  172. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_config_validator.py +193 -193
  173. crawlo-1.2.2/tests/test_crawlo_proxy_integration.py +173 -0
  174. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_date_tools.py +123 -123
  175. crawlo-1.2.2/tests/test_default_header_middleware.py +159 -0
  176. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_double_crawlo_fix.py +207 -207
  177. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_double_crawlo_fix_simple.py +124 -124
  178. crawlo-1.2.2/tests/test_download_delay_middleware.py +222 -0
  179. crawlo-1.2.2/tests/test_downloader_proxy_compatibility.py +269 -0
  180. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_dynamic_downloaders_proxy.py +124 -124
  181. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_dynamic_proxy.py +92 -92
  182. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_dynamic_proxy_config.py +146 -146
  183. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_dynamic_proxy_real.py +109 -109
  184. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_edge_cases.py +303 -303
  185. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_enhanced_error_handler.py +270 -270
  186. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_env_config.py +121 -121
  187. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_error_handler_compatibility.py +112 -112
  188. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_final_validation.py +153 -153
  189. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_framework_env_usage.py +103 -103
  190. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_integration.py +356 -356
  191. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_item_dedup_redis_key.py +122 -122
  192. crawlo-1.2.2/tests/test_offsite_middleware.py +222 -0
  193. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_parsel.py +29 -29
  194. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_performance.py +327 -327
  195. crawlo-1.2.2/tests/test_proxy_api.py +265 -0
  196. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_proxy_health_check.py +32 -32
  197. crawlo-1.2.2/tests/test_proxy_middleware.py +122 -0
  198. crawlo-1.2.2/tests/test_proxy_middleware_enhanced.py +217 -0
  199. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_proxy_middleware_integration.py +136 -136
  200. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_proxy_providers.py +56 -56
  201. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_proxy_stats.py +19 -19
  202. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_proxy_strategies.py +59 -59
  203. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_queue_manager_double_crawlo.py +173 -173
  204. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_queue_manager_redis_key.py +176 -176
  205. crawlo-1.2.2/tests/test_real_scenario_proxy.py +196 -0
  206. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_redis_config.py +28 -28
  207. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_redis_connection_pool.py +294 -294
  208. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_redis_key_naming.py +181 -181
  209. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_redis_key_validator.py +123 -123
  210. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_redis_queue.py +224 -224
  211. crawlo-1.2.2/tests/test_request_ignore_middleware.py +183 -0
  212. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_request_serialization.py +70 -70
  213. crawlo-1.2.2/tests/test_response_code_middleware.py +350 -0
  214. crawlo-1.2.2/tests/test_response_filter_middleware.py +428 -0
  215. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_response_improvements.py +152 -152
  216. crawlo-1.2.2/tests/test_retry_middleware.py +242 -0
  217. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_scheduler.py +241 -241
  218. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_simple_response.py +61 -61
  219. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_telecom_spider_redis_key.py +205 -205
  220. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_template_content.py +87 -87
  221. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_template_redis_key.py +134 -134
  222. {crawlo-1.2.0 → crawlo-1.2.2}/tests/test_tools.py +153 -153
  223. {crawlo-1.2.0 → crawlo-1.2.2}/tests/tools_example.py +257 -257
  224. crawlo-1.2.0/crawlo/__version__.py +0 -1
  225. crawlo-1.2.0/crawlo/middleware/default_header.py +0 -32
  226. crawlo-1.2.0/crawlo/middleware/download_delay.py +0 -28
  227. crawlo-1.2.0/crawlo/middleware/request_ignore.py +0 -30
  228. crawlo-1.2.0/crawlo/middleware/response_code.py +0 -19
  229. crawlo-1.2.0/crawlo/middleware/response_filter.py +0 -26
  230. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo/queue/__init__.py +0 -0
  231. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo.egg-info/dependency_links.txt +0 -0
  232. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo.egg-info/entry_points.txt +0 -0
  233. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo.egg-info/requires.txt +0 -0
  234. {crawlo-1.2.0 → crawlo-1.2.2}/crawlo.egg-info/top_level.txt +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录