crawlo 1.2.7__tar.gz → 1.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (237) hide show
  1. {crawlo-1.2.7 → crawlo-1.2.9}/LICENSE +22 -22
  2. {crawlo-1.2.7 → crawlo-1.2.9}/MANIFEST.in +16 -16
  3. {crawlo-1.2.7/crawlo.egg-info → crawlo-1.2.9}/PKG-INFO +1011 -764
  4. {crawlo-1.2.7 → crawlo-1.2.9}/README.md +960 -713
  5. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/__init__.py +63 -61
  6. crawlo-1.2.9/crawlo/__version__.py +1 -0
  7. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/cli.py +75 -75
  8. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/__init__.py +14 -14
  9. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/check.py +594 -594
  10. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/genspider.py +151 -151
  11. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/help.py +138 -138
  12. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/list.py +155 -155
  13. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/run.py +323 -323
  14. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/startproject.py +436 -436
  15. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/stats.py +187 -187
  16. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/commands/utils.py +186 -186
  17. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/config.py +312 -312
  18. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/config_validator.py +277 -251
  19. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/core/__init__.py +2 -2
  20. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/core/engine.py +366 -365
  21. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/core/processor.py +40 -40
  22. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/core/scheduler.py +256 -251
  23. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/crawler.py +1103 -1100
  24. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/data/__init__.py +5 -5
  25. crawlo-1.2.9/crawlo/data/user_agents.py +195 -0
  26. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/__init__.py +273 -266
  27. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/aiohttp_downloader.py +226 -228
  28. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/cffi_downloader.py +245 -256
  29. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/httpx_downloader.py +259 -259
  30. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/hybrid_downloader.py +212 -212
  31. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/playwright_downloader.py +402 -402
  32. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/downloader/selenium_downloader.py +472 -472
  33. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/event.py +11 -11
  34. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/exceptions.py +81 -81
  35. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/__init__.py +39 -39
  36. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/health_check.py +141 -141
  37. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/log_interval.py +57 -57
  38. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/log_stats.py +81 -81
  39. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/logging_extension.py +43 -43
  40. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/memory_monitor.py +104 -104
  41. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/performance_profiler.py +133 -133
  42. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/extension/request_recorder.py +107 -107
  43. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/filters/__init__.py +154 -154
  44. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/filters/aioredis_filter.py +234 -234
  45. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/filters/memory_filter.py +269 -269
  46. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/items/__init__.py +23 -23
  47. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/items/base.py +21 -21
  48. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/items/fields.py +52 -52
  49. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/items/items.py +104 -104
  50. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/__init__.py +21 -21
  51. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/default_header.py +132 -132
  52. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/download_delay.py +104 -104
  53. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/middleware_manager.py +136 -136
  54. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/offsite.py +114 -114
  55. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/proxy.py +386 -368
  56. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/request_ignore.py +86 -86
  57. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/response_code.py +163 -163
  58. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/response_filter.py +136 -136
  59. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/middleware/retry.py +124 -124
  60. crawlo-1.2.9/crawlo/middleware/simple_proxy.py +65 -0
  61. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/mode_manager.py +211 -211
  62. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/network/__init__.py +21 -21
  63. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/network/request.py +379 -338
  64. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/network/response.py +359 -359
  65. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/__init__.py +21 -21
  66. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/bloom_dedup_pipeline.py +157 -157
  67. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/console_pipeline.py +39 -39
  68. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/csv_pipeline.py +316 -316
  69. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/database_dedup_pipeline.py +223 -223
  70. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/json_pipeline.py +218 -218
  71. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  72. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/mongo_pipeline.py +131 -131
  73. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/mysql_pipeline.py +317 -317
  74. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/pipeline_manager.py +62 -62
  75. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/pipelines/redis_dedup_pipeline.py +167 -167
  76. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/project.py +290 -315
  77. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/queue/pqueue.py +37 -37
  78. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/queue/queue_manager.py +379 -378
  79. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/queue/redis_priority_queue.py +306 -306
  80. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/settings/__init__.py +7 -7
  81. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/settings/default_settings.py +216 -220
  82. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/settings/setting_manager.py +163 -122
  83. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/spider/__init__.py +639 -639
  84. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/stats_collector.py +59 -59
  85. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/subscriber.py +129 -129
  86. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/task_manager.py +30 -30
  87. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  88. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/__init__.py.tmpl +3 -3
  89. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/items.py.tmpl +17 -17
  90. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/middlewares.py.tmpl +118 -118
  91. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/pipelines.py.tmpl +96 -96
  92. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/settings.py.tmpl +261 -288
  93. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/settings_distributed.py.tmpl +174 -157
  94. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/settings_gentle.py.tmpl +95 -100
  95. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/settings_high_performance.py.tmpl +125 -134
  96. crawlo-1.2.9/crawlo/templates/project/settings_minimal.py.tmpl +30 -0
  97. crawlo-1.2.9/crawlo/templates/project/settings_simple.py.tmpl +97 -0
  98. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  99. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/run.py.tmpl +47 -45
  100. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/templates/spider/spider.py.tmpl +143 -143
  101. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/__init__.py +200 -182
  102. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/anti_crawler.py +268 -268
  103. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/authenticated_proxy.py +240 -240
  104. {crawlo-1.2.7/crawlo/cleaners → crawlo-1.2.9/crawlo/tools}/data_formatter.py +225 -225
  105. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/data_validator.py +180 -180
  106. {crawlo-1.2.7/crawlo/utils → crawlo-1.2.9/crawlo/tools}/date_tools.py +290 -290
  107. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/distributed_coordinator.py +388 -387
  108. {crawlo-1.2.7/crawlo/cleaners → crawlo-1.2.9/crawlo/tools}/encoding_converter.py +127 -126
  109. crawlo-1.2.9/crawlo/tools/request_tools.py +83 -0
  110. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/retry_mechanism.py +224 -221
  111. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/tools/scenario_adapter.py +262 -262
  112. {crawlo-1.2.7/crawlo/cleaners → crawlo-1.2.9/crawlo/tools}/text_cleaner.py +232 -232
  113. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/__init__.py +35 -35
  114. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/batch_processor.py +259 -259
  115. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/controlled_spider_mixin.py +439 -439
  116. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/db_helper.py +343 -343
  117. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/enhanced_error_handler.py +356 -356
  118. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/env_config.py +142 -142
  119. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/error_handler.py +123 -123
  120. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/func_tools.py +82 -82
  121. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/large_scale_config.py +286 -286
  122. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/large_scale_helper.py +344 -344
  123. crawlo-1.2.9/crawlo/utils/log.py +188 -0
  124. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/performance_monitor.py +285 -285
  125. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/queue_helper.py +175 -175
  126. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/redis_connection_pool.py +351 -351
  127. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/redis_key_validator.py +198 -198
  128. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/request.py +267 -267
  129. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/request_serializer.py +218 -218
  130. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/spider_loader.py +61 -61
  131. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/system.py +11 -11
  132. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/tools.py +4 -4
  133. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/utils/url.py +39 -39
  134. {crawlo-1.2.7 → crawlo-1.2.9/crawlo.egg-info}/PKG-INFO +1011 -764
  135. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo.egg-info/SOURCES.txt +16 -6
  136. {crawlo-1.2.7 → crawlo-1.2.9}/examples/__init__.py +7 -7
  137. {crawlo-1.2.7 → crawlo-1.2.9}/pyproject.toml +2 -2
  138. {crawlo-1.2.7 → crawlo-1.2.9}/requirements.txt +32 -32
  139. {crawlo-1.2.7 → crawlo-1.2.9}/setup.cfg +71 -71
  140. {crawlo-1.2.7 → crawlo-1.2.9}/tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
  141. {crawlo-1.2.7 → crawlo-1.2.9}/tests/__init__.py +7 -7
  142. {crawlo-1.2.7 → crawlo-1.2.9}/tests/advanced_tools_example.py +275 -275
  143. crawlo-1.2.9/tests/authenticated_proxy_example.py +107 -0
  144. {crawlo-1.2.7 → crawlo-1.2.9}/tests/cleaners_example.py +160 -160
  145. crawlo-1.2.9/tests/config_validation_demo.py +143 -0
  146. {crawlo-1.2.7 → crawlo-1.2.9}/tests/controlled_spider_example.py +205 -205
  147. {crawlo-1.2.7 → crawlo-1.2.9}/tests/date_tools_example.py +180 -180
  148. crawlo-1.2.9/tests/debug_pipelines.py +67 -0
  149. {crawlo-1.2.7 → crawlo-1.2.9}/tests/dynamic_loading_example.py +523 -523
  150. {crawlo-1.2.7 → crawlo-1.2.9}/tests/dynamic_loading_test.py +104 -104
  151. {crawlo-1.2.7 → crawlo-1.2.9}/tests/env_config_example.py +133 -133
  152. {crawlo-1.2.7 → crawlo-1.2.9}/tests/error_handling_example.py +171 -171
  153. {crawlo-1.2.7 → crawlo-1.2.9}/tests/redis_key_validation_demo.py +130 -130
  154. crawlo-1.2.9/tests/request_params_example.py +151 -0
  155. {crawlo-1.2.7 → crawlo-1.2.9}/tests/response_improvements_example.py +144 -144
  156. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_advanced_tools.py +148 -148
  157. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_all_redis_key_configs.py +145 -145
  158. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_authenticated_proxy.py +141 -141
  159. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_cleaners.py +54 -54
  160. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_comprehensive.py +146 -146
  161. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_config_consistency.py +80 -80
  162. crawlo-1.2.9/tests/test_config_merge.py +153 -0
  163. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_config_validator.py +182 -193
  164. crawlo-1.2.9/tests/test_crawlo_proxy_integration.py +109 -0
  165. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_date_tools.py +123 -123
  166. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_default_header_middleware.py +158 -158
  167. crawlo-1.2.9/tests/test_distributed.py +65 -0
  168. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_double_crawlo_fix.py +207 -207
  169. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_double_crawlo_fix_simple.py +124 -124
  170. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_download_delay_middleware.py +221 -221
  171. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_downloader_proxy_compatibility.py +268 -268
  172. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_dynamic_downloaders_proxy.py +124 -124
  173. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_dynamic_proxy.py +92 -92
  174. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_dynamic_proxy_config.py +146 -146
  175. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_dynamic_proxy_real.py +109 -109
  176. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_edge_cases.py +303 -303
  177. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_enhanced_error_handler.py +270 -270
  178. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_env_config.py +121 -121
  179. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_error_handler_compatibility.py +112 -112
  180. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_final_validation.py +153 -153
  181. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_framework_env_usage.py +103 -103
  182. crawlo-1.2.9/tests/test_integration.py +169 -0
  183. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_item_dedup_redis_key.py +122 -122
  184. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_mode_consistency.py +51 -51
  185. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_offsite_middleware.py +221 -221
  186. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_parsel.py +29 -29
  187. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_performance.py +327 -327
  188. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_api.py +264 -264
  189. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_health_check.py +32 -32
  190. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_middleware.py +121 -121
  191. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_middleware_enhanced.py +216 -216
  192. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_middleware_integration.py +136 -136
  193. crawlo-1.2.9/tests/test_proxy_middleware_refactored.py +185 -0
  194. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_providers.py +56 -56
  195. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_stats.py +19 -19
  196. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_proxy_strategies.py +59 -59
  197. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_queue_manager_double_crawlo.py +173 -173
  198. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_queue_manager_redis_key.py +176 -176
  199. crawlo-1.2.9/tests/test_random_user_agent.py +73 -0
  200. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_real_scenario_proxy.py +195 -195
  201. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_redis_config.py +28 -28
  202. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_redis_connection_pool.py +294 -294
  203. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_redis_key_naming.py +181 -181
  204. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_redis_key_validator.py +123 -123
  205. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_redis_queue.py +224 -224
  206. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_request_ignore_middleware.py +182 -182
  207. crawlo-1.2.9/tests/test_request_params.py +112 -0
  208. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_request_serialization.py +70 -70
  209. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_response_code_middleware.py +349 -349
  210. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_response_filter_middleware.py +427 -427
  211. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_response_improvements.py +152 -152
  212. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_retry_middleware.py +241 -241
  213. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_scheduler.py +252 -252
  214. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_scheduler_config_update.py +133 -133
  215. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_simple_response.py +61 -61
  216. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_telecom_spider_redis_key.py +205 -205
  217. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_template_content.py +87 -87
  218. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_template_redis_key.py +134 -134
  219. {crawlo-1.2.7 → crawlo-1.2.9}/tests/test_tools.py +159 -153
  220. crawlo-1.2.9/tests/test_user_agents.py +97 -0
  221. {crawlo-1.2.7 → crawlo-1.2.9}/tests/tools_example.py +260 -257
  222. crawlo-1.2.9/tests/verify_distributed.py +117 -0
  223. crawlo-1.2.7/crawlo/__version__.py +0 -1
  224. crawlo-1.2.7/crawlo/cleaners/__init__.py +0 -61
  225. crawlo-1.2.7/crawlo/data/user_agents.py +0 -108
  226. crawlo-1.2.7/crawlo/templates/project/settings_simple.py.tmpl +0 -99
  227. crawlo-1.2.7/crawlo/tools/date_tools.py +0 -36
  228. crawlo-1.2.7/crawlo/utils/log.py +0 -129
  229. crawlo-1.2.7/tests/authenticated_proxy_example.py +0 -237
  230. crawlo-1.2.7/tests/config_validation_demo.py +0 -103
  231. crawlo-1.2.7/tests/test_crawlo_proxy_integration.py +0 -173
  232. crawlo-1.2.7/tests/test_integration.py +0 -357
  233. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo/queue/__init__.py +0 -0
  234. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo.egg-info/dependency_links.txt +0 -0
  235. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo.egg-info/entry_points.txt +0 -0
  236. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo.egg-info/requires.txt +0 -0
  237. {crawlo-1.2.7 → crawlo-1.2.9}/crawlo.egg-info/top_level.txt +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录