crawlo 1.3.1__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (228) hide show
  1. {crawlo-1.3.1 → crawlo-1.3.3}/LICENSE +22 -22
  2. {crawlo-1.3.1 → crawlo-1.3.3}/MANIFEST.in +16 -16
  3. {crawlo-1.3.1/crawlo.egg-info → crawlo-1.3.3}/PKG-INFO +1020 -1020
  4. {crawlo-1.3.1 → crawlo-1.3.3}/README.md +969 -969
  5. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/__init__.py +63 -63
  6. crawlo-1.3.3/crawlo/__version__.py +1 -0
  7. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/cli.py +75 -75
  8. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/__init__.py +14 -14
  9. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/check.py +594 -594
  10. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/genspider.py +151 -151
  11. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/help.py +138 -138
  12. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/list.py +155 -155
  13. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/run.py +322 -314
  14. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/startproject.py +436 -436
  15. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/stats.py +187 -187
  16. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/commands/utils.py +196 -196
  17. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/config.py +312 -312
  18. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/config_validator.py +277 -277
  19. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/core/__init__.py +2 -2
  20. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/core/engine.py +365 -365
  21. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/core/processor.py +40 -40
  22. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/core/scheduler.py +256 -256
  23. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/crawler.py +1166 -1168
  24. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/data/__init__.py +5 -5
  25. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/data/user_agents.py +194 -194
  26. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/__init__.py +273 -273
  27. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/aiohttp_downloader.py +226 -226
  28. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/cffi_downloader.py +245 -245
  29. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/httpx_downloader.py +259 -259
  30. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/hybrid_downloader.py +212 -212
  31. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/playwright_downloader.py +402 -402
  32. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/downloader/selenium_downloader.py +472 -472
  33. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/event.py +11 -11
  34. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/exceptions.py +81 -81
  35. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/__init__.py +39 -39
  36. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/health_check.py +141 -141
  37. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/log_interval.py +57 -57
  38. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/log_stats.py +81 -81
  39. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/logging_extension.py +52 -45
  40. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/memory_monitor.py +104 -104
  41. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/performance_profiler.py +133 -133
  42. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/extension/request_recorder.py +107 -107
  43. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/filters/__init__.py +154 -154
  44. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/filters/aioredis_filter.py +234 -234
  45. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/filters/memory_filter.py +269 -269
  46. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/items/__init__.py +23 -23
  47. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/items/base.py +21 -21
  48. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/items/fields.py +52 -52
  49. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/items/items.py +104 -104
  50. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/__init__.py +21 -21
  51. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/default_header.py +132 -132
  52. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/download_delay.py +104 -104
  53. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/middleware_manager.py +135 -135
  54. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/offsite.py +123 -115
  55. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/proxy.py +386 -386
  56. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/request_ignore.py +86 -86
  57. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/response_code.py +163 -163
  58. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/response_filter.py +136 -136
  59. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/retry.py +124 -124
  60. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/middleware/simple_proxy.py +65 -65
  61. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/mode_manager.py +187 -148
  62. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/network/__init__.py +21 -21
  63. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/network/request.py +379 -379
  64. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/network/response.py +359 -359
  65. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/__init__.py +21 -21
  66. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
  67. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/console_pipeline.py +39 -39
  68. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/csv_pipeline.py +316 -316
  69. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/database_dedup_pipeline.py +222 -222
  70. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/json_pipeline.py +218 -218
  71. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  72. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/mongo_pipeline.py +131 -131
  73. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/mysql_pipeline.py +318 -318
  74. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/pipeline_manager.py +75 -75
  75. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/pipelines/redis_dedup_pipeline.py +166 -166
  76. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/project.py +325 -297
  77. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/queue/pqueue.py +37 -37
  78. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/queue/queue_manager.py +379 -379
  79. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/queue/redis_priority_queue.py +306 -306
  80. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/settings/__init__.py +7 -7
  81. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/settings/default_settings.py +225 -225
  82. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/settings/setting_manager.py +198 -198
  83. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/spider/__init__.py +639 -639
  84. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/stats_collector.py +59 -59
  85. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/subscriber.py +129 -129
  86. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/task_manager.py +30 -30
  87. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  88. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/__init__.py.tmpl +3 -3
  89. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/items.py.tmpl +17 -17
  90. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/middlewares.py.tmpl +118 -118
  91. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/pipelines.py.tmpl +96 -96
  92. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings.py.tmpl +266 -261
  93. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings_distributed.py.tmpl +179 -174
  94. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings_gentle.py.tmpl +60 -95
  95. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings_high_performance.py.tmpl +130 -125
  96. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings_minimal.py.tmpl +34 -29
  97. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/settings_simple.py.tmpl +101 -96
  98. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  99. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/run.py.tmpl +38 -47
  100. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/templates/spider/spider.py.tmpl +143 -143
  101. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/__init__.py +200 -200
  102. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/anti_crawler.py +268 -268
  103. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/authenticated_proxy.py +240 -240
  104. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/data_formatter.py +225 -225
  105. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/data_validator.py +180 -180
  106. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/date_tools.py +289 -289
  107. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/distributed_coordinator.py +388 -388
  108. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/encoding_converter.py +127 -127
  109. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/request_tools.py +82 -82
  110. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/retry_mechanism.py +224 -224
  111. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/scenario_adapter.py +262 -262
  112. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/tools/text_cleaner.py +232 -232
  113. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/__init__.py +34 -34
  114. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/batch_processor.py +259 -259
  115. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/controlled_spider_mixin.py +439 -439
  116. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/db_helper.py +343 -343
  117. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/enhanced_error_handler.py +356 -356
  118. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/env_config.py +142 -142
  119. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/error_handler.py +123 -123
  120. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/func_tools.py +82 -82
  121. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/large_scale_config.py +286 -286
  122. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/large_scale_helper.py +344 -344
  123. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/log.py +199 -146
  124. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/performance_monitor.py +285 -285
  125. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/queue_helper.py +175 -175
  126. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/redis_connection_pool.py +351 -351
  127. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/redis_key_validator.py +198 -198
  128. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/request.py +267 -267
  129. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/request_serializer.py +218 -218
  130. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/spider_loader.py +61 -61
  131. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/system.py +11 -11
  132. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/tools.py +4 -4
  133. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/utils/url.py +39 -39
  134. {crawlo-1.3.1 → crawlo-1.3.3/crawlo.egg-info}/PKG-INFO +1020 -1020
  135. {crawlo-1.3.1 → crawlo-1.3.3}/examples/__init__.py +7 -7
  136. {crawlo-1.3.1 → crawlo-1.3.3}/pyproject.toml +2 -2
  137. {crawlo-1.3.1 → crawlo-1.3.3}/requirements.txt +32 -32
  138. {crawlo-1.3.1 → crawlo-1.3.3}/setup.cfg +71 -71
  139. {crawlo-1.3.1 → crawlo-1.3.3}/tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
  140. {crawlo-1.3.1 → crawlo-1.3.3}/tests/__init__.py +7 -7
  141. {crawlo-1.3.1 → crawlo-1.3.3}/tests/advanced_tools_example.py +275 -275
  142. {crawlo-1.3.1 → crawlo-1.3.3}/tests/authenticated_proxy_example.py +107 -107
  143. {crawlo-1.3.1 → crawlo-1.3.3}/tests/cleaners_example.py +160 -160
  144. {crawlo-1.3.1 → crawlo-1.3.3}/tests/config_validation_demo.py +142 -142
  145. {crawlo-1.3.1 → crawlo-1.3.3}/tests/controlled_spider_example.py +205 -205
  146. {crawlo-1.3.1 → crawlo-1.3.3}/tests/date_tools_example.py +180 -180
  147. {crawlo-1.3.1 → crawlo-1.3.3}/tests/debug_pipelines.py +66 -66
  148. {crawlo-1.3.1 → crawlo-1.3.3}/tests/dynamic_loading_example.py +523 -523
  149. {crawlo-1.3.1 → crawlo-1.3.3}/tests/dynamic_loading_test.py +104 -104
  150. {crawlo-1.3.1 → crawlo-1.3.3}/tests/env_config_example.py +133 -133
  151. {crawlo-1.3.1 → crawlo-1.3.3}/tests/error_handling_example.py +171 -171
  152. {crawlo-1.3.1 → crawlo-1.3.3}/tests/redis_key_validation_demo.py +130 -130
  153. {crawlo-1.3.1 → crawlo-1.3.3}/tests/request_params_example.py +150 -150
  154. {crawlo-1.3.1 → crawlo-1.3.3}/tests/response_improvements_example.py +144 -144
  155. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_advanced_tools.py +148 -148
  156. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_all_redis_key_configs.py +145 -145
  157. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_authenticated_proxy.py +141 -141
  158. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_cleaners.py +54 -54
  159. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_comprehensive.py +146 -146
  160. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_config_consistency.py +80 -80
  161. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_config_merge.py +152 -152
  162. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_config_validator.py +182 -182
  163. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_crawlo_proxy_integration.py +108 -108
  164. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_date_tools.py +123 -123
  165. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_default_header_middleware.py +158 -158
  166. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_distributed.py +65 -65
  167. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_double_crawlo_fix.py +207 -207
  168. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_double_crawlo_fix_simple.py +124 -124
  169. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_download_delay_middleware.py +221 -221
  170. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_downloader_proxy_compatibility.py +268 -268
  171. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_dynamic_downloaders_proxy.py +124 -124
  172. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_dynamic_proxy.py +92 -92
  173. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_dynamic_proxy_config.py +146 -146
  174. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_dynamic_proxy_real.py +109 -109
  175. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_edge_cases.py +303 -303
  176. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_enhanced_error_handler.py +270 -270
  177. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_env_config.py +121 -121
  178. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_error_handler_compatibility.py +112 -112
  179. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_final_validation.py +153 -153
  180. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_framework_env_usage.py +103 -103
  181. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_integration.py +169 -169
  182. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_item_dedup_redis_key.py +122 -122
  183. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_mode_consistency.py +51 -51
  184. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_offsite_middleware.py +221 -221
  185. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_parsel.py +29 -29
  186. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_performance.py +327 -327
  187. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_api.py +264 -264
  188. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_health_check.py +32 -32
  189. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_middleware.py +121 -121
  190. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_middleware_enhanced.py +216 -216
  191. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_middleware_integration.py +136 -136
  192. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_middleware_refactored.py +184 -184
  193. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_providers.py +56 -56
  194. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_stats.py +19 -19
  195. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_proxy_strategies.py +59 -59
  196. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_queue_manager_double_crawlo.py +173 -173
  197. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_queue_manager_redis_key.py +176 -176
  198. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_random_user_agent.py +72 -72
  199. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_real_scenario_proxy.py +195 -195
  200. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_redis_config.py +28 -28
  201. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_redis_connection_pool.py +294 -294
  202. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_redis_key_naming.py +181 -181
  203. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_redis_key_validator.py +123 -123
  204. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_redis_queue.py +224 -224
  205. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_request_ignore_middleware.py +182 -182
  206. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_request_params.py +111 -111
  207. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_request_serialization.py +70 -70
  208. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_response_code_middleware.py +349 -349
  209. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_response_filter_middleware.py +427 -427
  210. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_response_improvements.py +152 -152
  211. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_retry_middleware.py +241 -241
  212. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_scheduler.py +252 -252
  213. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_scheduler_config_update.py +133 -133
  214. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_simple_response.py +61 -61
  215. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_telecom_spider_redis_key.py +205 -205
  216. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_template_content.py +87 -87
  217. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_template_redis_key.py +134 -134
  218. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_tools.py +159 -159
  219. {crawlo-1.3.1 → crawlo-1.3.3}/tests/test_user_agents.py +96 -96
  220. {crawlo-1.3.1 → crawlo-1.3.3}/tests/tools_example.py +260 -260
  221. {crawlo-1.3.1 → crawlo-1.3.3}/tests/verify_distributed.py +117 -117
  222. crawlo-1.3.1/crawlo/__version__.py +0 -1
  223. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo/queue/__init__.py +0 -0
  224. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo.egg-info/SOURCES.txt +0 -0
  225. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo.egg-info/dependency_links.txt +0 -0
  226. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo.egg-info/entry_points.txt +0 -0
  227. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo.egg-info/requires.txt +0 -0
  228. {crawlo-1.3.1 → crawlo-1.3.3}/crawlo.egg-info/top_level.txt +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录