crawlo 1.2.3__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (231) hide show
  1. {crawlo-1.2.3 → crawlo-1.2.4}/LICENSE +22 -22
  2. {crawlo-1.2.3 → crawlo-1.2.4}/MANIFEST.in +16 -16
  3. {crawlo-1.2.3/crawlo.egg-info → crawlo-1.2.4}/PKG-INFO +764 -692
  4. {crawlo-1.2.3 → crawlo-1.2.4}/README.md +713 -641
  5. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/__init__.py +61 -61
  6. crawlo-1.2.4/crawlo/__version__.py +1 -0
  7. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/cleaners/__init__.py +60 -60
  8. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/cleaners/data_formatter.py +225 -225
  9. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/cleaners/encoding_converter.py +125 -125
  10. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/cleaners/text_cleaner.py +232 -232
  11. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/cli.py +81 -81
  12. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/__init__.py +14 -14
  13. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/check.py +594 -594
  14. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/genspider.py +151 -151
  15. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/help.py +144 -142
  16. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/list.py +155 -155
  17. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/run.py +323 -292
  18. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/startproject.py +420 -417
  19. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/stats.py +187 -187
  20. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/commands/utils.py +186 -186
  21. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/config.py +312 -312
  22. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/config_validator.py +251 -251
  23. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/core/__init__.py +2 -2
  24. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/core/engine.py +354 -354
  25. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/core/processor.py +40 -40
  26. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/core/scheduler.py +143 -143
  27. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/crawler.py +1110 -1027
  28. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/data/__init__.py +5 -5
  29. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/data/user_agents.py +107 -107
  30. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/__init__.py +266 -266
  31. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/aiohttp_downloader.py +220 -220
  32. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/cffi_downloader.py +256 -256
  33. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/httpx_downloader.py +259 -259
  34. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/hybrid_downloader.py +212 -212
  35. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/playwright_downloader.py +402 -402
  36. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/downloader/selenium_downloader.py +472 -472
  37. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/event.py +11 -11
  38. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/exceptions.py +81 -81
  39. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/__init__.py +37 -37
  40. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/health_check.py +141 -141
  41. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/log_interval.py +57 -57
  42. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/log_stats.py +81 -81
  43. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/logging_extension.py +43 -43
  44. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/memory_monitor.py +104 -104
  45. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/performance_profiler.py +133 -133
  46. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/extension/request_recorder.py +107 -107
  47. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/filters/__init__.py +154 -154
  48. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/filters/aioredis_filter.py +280 -280
  49. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/filters/memory_filter.py +269 -269
  50. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/items/__init__.py +23 -23
  51. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/items/base.py +21 -21
  52. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/items/fields.py +52 -52
  53. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/items/items.py +104 -104
  54. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/__init__.py +21 -21
  55. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/default_header.py +131 -131
  56. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/download_delay.py +104 -104
  57. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/middleware_manager.py +135 -135
  58. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/offsite.py +114 -114
  59. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/proxy.py +367 -367
  60. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/request_ignore.py +86 -86
  61. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/response_code.py +163 -163
  62. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/response_filter.py +136 -136
  63. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/middleware/retry.py +124 -124
  64. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/mode_manager.py +211 -211
  65. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/network/__init__.py +21 -21
  66. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/network/request.py +338 -338
  67. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/network/response.py +359 -359
  68. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/__init__.py +21 -21
  69. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
  70. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/console_pipeline.py +39 -39
  71. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/csv_pipeline.py +316 -316
  72. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/database_dedup_pipeline.py +222 -222
  73. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/json_pipeline.py +218 -218
  74. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  75. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/mongo_pipeline.py +131 -131
  76. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/mysql_pipeline.py +317 -317
  77. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/pipeline_manager.py +61 -61
  78. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/pipelines/redis_dedup_pipeline.py +165 -165
  79. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/project.py +279 -187
  80. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/queue/pqueue.py +37 -37
  81. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/queue/queue_manager.py +337 -337
  82. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/queue/redis_priority_queue.py +298 -298
  83. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/settings/__init__.py +7 -7
  84. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/settings/default_settings.py +217 -226
  85. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/settings/setting_manager.py +122 -122
  86. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/spider/__init__.py +639 -639
  87. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/stats_collector.py +59 -59
  88. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/subscriber.py +129 -129
  89. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/task_manager.py +30 -30
  90. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  91. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/__init__.py.tmpl +3 -3
  92. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/items.py.tmpl +17 -17
  93. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/middlewares.py.tmpl +118 -118
  94. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/pipelines.py.tmpl +96 -96
  95. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/run.py.tmpl +47 -45
  96. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/settings.py.tmpl +350 -325
  97. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/settings_distributed.py.tmpl +160 -121
  98. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/settings_gentle.py.tmpl +133 -94
  99. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/settings_high_performance.py.tmpl +155 -151
  100. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/settings_simple.py.tmpl +108 -68
  101. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  102. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/templates/spider/spider.py.tmpl +143 -143
  103. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/__init__.py +182 -182
  104. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/anti_crawler.py +268 -268
  105. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/authenticated_proxy.py +240 -240
  106. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/data_validator.py +180 -180
  107. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/date_tools.py +35 -35
  108. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/distributed_coordinator.py +386 -386
  109. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/retry_mechanism.py +220 -220
  110. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/tools/scenario_adapter.py +262 -262
  111. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/__init__.py +35 -35
  112. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/batch_processor.py +259 -259
  113. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/controlled_spider_mixin.py +439 -439
  114. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/date_tools.py +290 -290
  115. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/db_helper.py +343 -343
  116. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/enhanced_error_handler.py +356 -356
  117. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/env_config.py +105 -105
  118. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/error_handler.py +123 -123
  119. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/func_tools.py +82 -82
  120. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/large_scale_config.py +286 -286
  121. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/large_scale_helper.py +344 -344
  122. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/log.py +128 -128
  123. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/performance_monitor.py +285 -285
  124. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/queue_helper.py +175 -175
  125. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/redis_connection_pool.py +334 -334
  126. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/redis_key_validator.py +198 -198
  127. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/request.py +267 -267
  128. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/request_serializer.py +218 -218
  129. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/spider_loader.py +61 -61
  130. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/system.py +11 -11
  131. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/tools.py +4 -4
  132. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/utils/url.py +39 -39
  133. {crawlo-1.2.3 → crawlo-1.2.4/crawlo.egg-info}/PKG-INFO +764 -692
  134. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo.egg-info/SOURCES.txt +0 -16
  135. {crawlo-1.2.3 → crawlo-1.2.4}/examples/__init__.py +7 -7
  136. {crawlo-1.2.3 → crawlo-1.2.4}/pyproject.toml +2 -2
  137. {crawlo-1.2.3 → crawlo-1.2.4}/requirements.txt +32 -32
  138. {crawlo-1.2.3 → crawlo-1.2.4}/setup.cfg +71 -71
  139. {crawlo-1.2.3 → crawlo-1.2.4}/tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
  140. {crawlo-1.2.3 → crawlo-1.2.4}/tests/__init__.py +7 -7
  141. {crawlo-1.2.3 → crawlo-1.2.4}/tests/advanced_tools_example.py +275 -275
  142. {crawlo-1.2.3 → crawlo-1.2.4}/tests/authenticated_proxy_example.py +236 -236
  143. {crawlo-1.2.3 → crawlo-1.2.4}/tests/cleaners_example.py +160 -160
  144. {crawlo-1.2.3 → crawlo-1.2.4}/tests/config_validation_demo.py +102 -102
  145. {crawlo-1.2.3 → crawlo-1.2.4}/tests/controlled_spider_example.py +205 -205
  146. {crawlo-1.2.3 → crawlo-1.2.4}/tests/date_tools_example.py +180 -180
  147. {crawlo-1.2.3 → crawlo-1.2.4}/tests/dynamic_loading_example.py +523 -523
  148. {crawlo-1.2.3 → crawlo-1.2.4}/tests/dynamic_loading_test.py +104 -104
  149. {crawlo-1.2.3 → crawlo-1.2.4}/tests/env_config_example.py +133 -133
  150. {crawlo-1.2.3 → crawlo-1.2.4}/tests/error_handling_example.py +171 -171
  151. {crawlo-1.2.3 → crawlo-1.2.4}/tests/redis_key_validation_demo.py +130 -130
  152. {crawlo-1.2.3 → crawlo-1.2.4}/tests/response_improvements_example.py +144 -144
  153. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_advanced_tools.py +148 -148
  154. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_all_redis_key_configs.py +145 -145
  155. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_authenticated_proxy.py +141 -141
  156. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_cleaners.py +54 -54
  157. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_comprehensive.py +146 -146
  158. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_config_validator.py +193 -193
  159. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_crawlo_proxy_integration.py +172 -172
  160. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_date_tools.py +123 -123
  161. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_default_header_middleware.py +158 -158
  162. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_double_crawlo_fix.py +207 -207
  163. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_double_crawlo_fix_simple.py +124 -124
  164. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_download_delay_middleware.py +221 -221
  165. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_downloader_proxy_compatibility.py +268 -268
  166. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_dynamic_downloaders_proxy.py +124 -124
  167. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_dynamic_proxy.py +92 -92
  168. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_dynamic_proxy_config.py +146 -146
  169. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_dynamic_proxy_real.py +109 -109
  170. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_edge_cases.py +303 -303
  171. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_enhanced_error_handler.py +270 -270
  172. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_env_config.py +121 -121
  173. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_error_handler_compatibility.py +112 -112
  174. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_final_validation.py +153 -153
  175. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_framework_env_usage.py +103 -103
  176. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_integration.py +356 -356
  177. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_item_dedup_redis_key.py +122 -122
  178. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_offsite_middleware.py +221 -221
  179. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_parsel.py +29 -29
  180. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_performance.py +327 -327
  181. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_api.py +264 -264
  182. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_health_check.py +32 -32
  183. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_middleware.py +121 -121
  184. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_middleware_enhanced.py +216 -216
  185. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_middleware_integration.py +136 -136
  186. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_providers.py +56 -56
  187. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_stats.py +19 -19
  188. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_proxy_strategies.py +59 -59
  189. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_queue_manager_double_crawlo.py +173 -173
  190. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_queue_manager_redis_key.py +176 -176
  191. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_real_scenario_proxy.py +195 -195
  192. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_redis_config.py +28 -28
  193. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_redis_connection_pool.py +294 -294
  194. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_redis_key_naming.py +181 -181
  195. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_redis_key_validator.py +123 -123
  196. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_redis_queue.py +224 -224
  197. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_request_ignore_middleware.py +182 -182
  198. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_request_serialization.py +70 -70
  199. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_response_code_middleware.py +349 -349
  200. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_response_filter_middleware.py +427 -427
  201. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_response_improvements.py +152 -152
  202. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_retry_middleware.py +241 -241
  203. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_scheduler.py +241 -241
  204. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_simple_response.py +61 -61
  205. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_telecom_spider_redis_key.py +205 -205
  206. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_template_content.py +87 -87
  207. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_template_redis_key.py +134 -134
  208. {crawlo-1.2.3 → crawlo-1.2.4}/tests/test_tools.py +153 -153
  209. {crawlo-1.2.3 → crawlo-1.2.4}/tests/tools_example.py +257 -257
  210. crawlo-1.2.3/crawlo/__version__.py +0 -1
  211. crawlo-1.2.3/examples/aiohttp_settings.py +0 -42
  212. crawlo-1.2.3/examples/curl_cffi_settings.py +0 -41
  213. crawlo-1.2.3/examples/default_header_middleware_example.py +0 -107
  214. crawlo-1.2.3/examples/default_header_spider_example.py +0 -129
  215. crawlo-1.2.3/examples/download_delay_middleware_example.py +0 -160
  216. crawlo-1.2.3/examples/httpx_settings.py +0 -42
  217. crawlo-1.2.3/examples/multi_downloader_proxy_example.py +0 -81
  218. crawlo-1.2.3/examples/offsite_middleware_example.py +0 -55
  219. crawlo-1.2.3/examples/offsite_spider_example.py +0 -107
  220. crawlo-1.2.3/examples/proxy_spider_example.py +0 -166
  221. crawlo-1.2.3/examples/request_ignore_middleware_example.py +0 -51
  222. crawlo-1.2.3/examples/request_ignore_spider_example.py +0 -99
  223. crawlo-1.2.3/examples/response_code_middleware_example.py +0 -52
  224. crawlo-1.2.3/examples/response_filter_middleware_example.py +0 -67
  225. crawlo-1.2.3/examples/tong_hua_shun_settings.py +0 -62
  226. crawlo-1.2.3/examples/tong_hua_shun_spider.py +0 -170
  227. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo/queue/__init__.py +0 -0
  228. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo.egg-info/dependency_links.txt +0 -0
  229. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo.egg-info/entry_points.txt +0 -0
  230. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo.egg-info/requires.txt +0 -0
  231. {crawlo-1.2.3 → crawlo-1.2.4}/crawlo.egg-info/top_level.txt +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录