crawlo 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (231) hide show
  1. {crawlo-1.2.2 → crawlo-1.2.4}/LICENSE +22 -22
  2. {crawlo-1.2.2 → crawlo-1.2.4}/MANIFEST.in +16 -16
  3. {crawlo-1.2.2/crawlo.egg-info → crawlo-1.2.4}/PKG-INFO +764 -692
  4. {crawlo-1.2.2 → crawlo-1.2.4}/README.md +713 -641
  5. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/__init__.py +61 -61
  6. crawlo-1.2.4/crawlo/__version__.py +1 -0
  7. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/cleaners/__init__.py +60 -60
  8. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/cleaners/data_formatter.py +225 -225
  9. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/cleaners/encoding_converter.py +125 -125
  10. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/cleaners/text_cleaner.py +232 -232
  11. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/cli.py +81 -81
  12. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/__init__.py +14 -14
  13. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/check.py +594 -594
  14. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/genspider.py +151 -151
  15. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/help.py +144 -142
  16. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/list.py +155 -155
  17. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/run.py +323 -292
  18. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/startproject.py +420 -418
  19. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/stats.py +188 -188
  20. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/commands/utils.py +186 -186
  21. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/config.py +312 -312
  22. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/config_validator.py +251 -252
  23. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/core/__init__.py +2 -2
  24. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/core/engine.py +354 -354
  25. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/core/processor.py +40 -40
  26. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/core/scheduler.py +143 -143
  27. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/crawler.py +1110 -1027
  28. crawlo-1.2.4/crawlo/data/__init__.py +6 -0
  29. crawlo-1.2.4/crawlo/data/user_agents.py +108 -0
  30. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/__init__.py +266 -266
  31. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/aiohttp_downloader.py +220 -220
  32. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/cffi_downloader.py +256 -256
  33. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/httpx_downloader.py +259 -259
  34. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/hybrid_downloader.py +212 -213
  35. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/playwright_downloader.py +402 -402
  36. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/downloader/selenium_downloader.py +472 -472
  37. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/event.py +11 -11
  38. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/exceptions.py +81 -81
  39. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/__init__.py +37 -37
  40. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/health_check.py +141 -141
  41. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/log_interval.py +57 -57
  42. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/log_stats.py +81 -81
  43. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/logging_extension.py +43 -43
  44. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/memory_monitor.py +104 -104
  45. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/performance_profiler.py +133 -133
  46. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/extension/request_recorder.py +107 -107
  47. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/filters/__init__.py +154 -154
  48. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/filters/aioredis_filter.py +280 -280
  49. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/filters/memory_filter.py +269 -269
  50. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/items/__init__.py +23 -23
  51. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/items/base.py +21 -21
  52. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/items/fields.py +52 -53
  53. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/items/items.py +104 -104
  54. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/__init__.py +21 -21
  55. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/default_header.py +131 -131
  56. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/download_delay.py +104 -104
  57. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/middleware_manager.py +135 -135
  58. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/offsite.py +114 -115
  59. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/proxy.py +367 -366
  60. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/request_ignore.py +86 -87
  61. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/response_code.py +163 -164
  62. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/response_filter.py +136 -137
  63. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/middleware/retry.py +124 -124
  64. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/mode_manager.py +211 -211
  65. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/network/__init__.py +21 -21
  66. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/network/request.py +338 -338
  67. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/network/response.py +359 -359
  68. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/__init__.py +21 -21
  69. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
  70. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/console_pipeline.py +39 -39
  71. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/csv_pipeline.py +316 -316
  72. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/database_dedup_pipeline.py +222 -224
  73. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/json_pipeline.py +218 -218
  74. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/memory_dedup_pipeline.py +115 -115
  75. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/mongo_pipeline.py +131 -131
  76. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/mysql_pipeline.py +317 -316
  77. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/pipeline_manager.py +61 -61
  78. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/pipelines/redis_dedup_pipeline.py +165 -167
  79. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/project.py +279 -187
  80. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/queue/pqueue.py +37 -37
  81. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/queue/queue_manager.py +337 -337
  82. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/queue/redis_priority_queue.py +298 -298
  83. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/settings/__init__.py +7 -7
  84. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/settings/default_settings.py +217 -226
  85. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/settings/setting_manager.py +122 -122
  86. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/spider/__init__.py +639 -639
  87. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/stats_collector.py +59 -59
  88. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/subscriber.py +129 -130
  89. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/task_manager.py +30 -30
  90. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/crawlo.cfg.tmpl +10 -10
  91. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/__init__.py.tmpl +3 -3
  92. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/items.py.tmpl +17 -17
  93. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/middlewares.py.tmpl +118 -118
  94. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/pipelines.py.tmpl +96 -96
  95. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/run.py.tmpl +47 -45
  96. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/settings.py.tmpl +350 -327
  97. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/settings_distributed.py.tmpl +160 -119
  98. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/settings_gentle.py.tmpl +133 -94
  99. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/settings_high_performance.py.tmpl +155 -151
  100. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/settings_simple.py.tmpl +108 -68
  101. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
  102. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/templates/spider/spider.py.tmpl +143 -143
  103. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/__init__.py +182 -182
  104. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/anti_crawler.py +268 -268
  105. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/authenticated_proxy.py +240 -240
  106. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/data_validator.py +180 -180
  107. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/date_tools.py +35 -35
  108. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/distributed_coordinator.py +386 -386
  109. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/retry_mechanism.py +220 -220
  110. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/tools/scenario_adapter.py +262 -262
  111. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/__init__.py +35 -35
  112. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/batch_processor.py +259 -260
  113. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/controlled_spider_mixin.py +439 -439
  114. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/date_tools.py +290 -290
  115. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/db_helper.py +343 -343
  116. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/enhanced_error_handler.py +356 -359
  117. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/env_config.py +105 -105
  118. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/error_handler.py +123 -125
  119. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/func_tools.py +82 -82
  120. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/large_scale_config.py +286 -286
  121. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/large_scale_helper.py +344 -343
  122. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/log.py +128 -128
  123. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/performance_monitor.py +285 -284
  124. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/queue_helper.py +175 -175
  125. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/redis_connection_pool.py +334 -334
  126. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/redis_key_validator.py +198 -199
  127. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/request.py +267 -267
  128. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/request_serializer.py +218 -219
  129. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/spider_loader.py +61 -62
  130. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/system.py +11 -11
  131. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/tools.py +4 -4
  132. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/utils/url.py +39 -39
  133. {crawlo-1.2.2 → crawlo-1.2.4/crawlo.egg-info}/PKG-INFO +764 -692
  134. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo.egg-info/SOURCES.txt +2 -16
  135. {crawlo-1.2.2 → crawlo-1.2.4}/examples/__init__.py +7 -7
  136. {crawlo-1.2.2 → crawlo-1.2.4}/pyproject.toml +2 -2
  137. {crawlo-1.2.2 → crawlo-1.2.4}/requirements.txt +33 -29
  138. {crawlo-1.2.2 → crawlo-1.2.4}/setup.cfg +71 -71
  139. {crawlo-1.2.2 → crawlo-1.2.4}/tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
  140. {crawlo-1.2.2 → crawlo-1.2.4}/tests/__init__.py +7 -7
  141. {crawlo-1.2.2 → crawlo-1.2.4}/tests/advanced_tools_example.py +275 -275
  142. {crawlo-1.2.2 → crawlo-1.2.4}/tests/authenticated_proxy_example.py +236 -236
  143. {crawlo-1.2.2 → crawlo-1.2.4}/tests/cleaners_example.py +160 -160
  144. {crawlo-1.2.2 → crawlo-1.2.4}/tests/config_validation_demo.py +102 -102
  145. {crawlo-1.2.2 → crawlo-1.2.4}/tests/controlled_spider_example.py +205 -205
  146. {crawlo-1.2.2 → crawlo-1.2.4}/tests/date_tools_example.py +180 -180
  147. {crawlo-1.2.2 → crawlo-1.2.4}/tests/dynamic_loading_example.py +523 -523
  148. {crawlo-1.2.2 → crawlo-1.2.4}/tests/dynamic_loading_test.py +104 -104
  149. {crawlo-1.2.2 → crawlo-1.2.4}/tests/env_config_example.py +133 -133
  150. {crawlo-1.2.2 → crawlo-1.2.4}/tests/error_handling_example.py +171 -171
  151. {crawlo-1.2.2 → crawlo-1.2.4}/tests/redis_key_validation_demo.py +130 -130
  152. {crawlo-1.2.2 → crawlo-1.2.4}/tests/response_improvements_example.py +144 -144
  153. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_advanced_tools.py +148 -148
  154. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_all_redis_key_configs.py +145 -145
  155. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_authenticated_proxy.py +141 -141
  156. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_cleaners.py +54 -54
  157. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_comprehensive.py +146 -146
  158. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_config_validator.py +193 -193
  159. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_crawlo_proxy_integration.py +172 -172
  160. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_date_tools.py +123 -123
  161. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_default_header_middleware.py +158 -158
  162. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_double_crawlo_fix.py +207 -207
  163. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_double_crawlo_fix_simple.py +124 -124
  164. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_download_delay_middleware.py +221 -221
  165. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_downloader_proxy_compatibility.py +268 -268
  166. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_dynamic_downloaders_proxy.py +124 -124
  167. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_dynamic_proxy.py +92 -92
  168. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_dynamic_proxy_config.py +146 -146
  169. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_dynamic_proxy_real.py +109 -109
  170. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_edge_cases.py +303 -303
  171. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_enhanced_error_handler.py +270 -270
  172. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_env_config.py +121 -121
  173. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_error_handler_compatibility.py +112 -112
  174. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_final_validation.py +153 -153
  175. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_framework_env_usage.py +103 -103
  176. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_integration.py +356 -356
  177. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_item_dedup_redis_key.py +122 -122
  178. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_offsite_middleware.py +221 -221
  179. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_parsel.py +29 -29
  180. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_performance.py +327 -327
  181. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_api.py +264 -264
  182. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_health_check.py +32 -32
  183. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_middleware.py +121 -121
  184. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_middleware_enhanced.py +216 -216
  185. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_middleware_integration.py +136 -136
  186. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_providers.py +56 -56
  187. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_stats.py +19 -19
  188. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_proxy_strategies.py +59 -59
  189. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_queue_manager_double_crawlo.py +173 -173
  190. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_queue_manager_redis_key.py +176 -176
  191. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_real_scenario_proxy.py +195 -195
  192. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_redis_config.py +28 -28
  193. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_redis_connection_pool.py +294 -294
  194. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_redis_key_naming.py +181 -181
  195. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_redis_key_validator.py +123 -123
  196. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_redis_queue.py +224 -224
  197. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_request_ignore_middleware.py +182 -182
  198. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_request_serialization.py +70 -70
  199. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_response_code_middleware.py +349 -349
  200. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_response_filter_middleware.py +427 -427
  201. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_response_improvements.py +152 -152
  202. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_retry_middleware.py +241 -241
  203. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_scheduler.py +241 -241
  204. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_simple_response.py +61 -61
  205. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_telecom_spider_redis_key.py +205 -205
  206. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_template_content.py +87 -87
  207. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_template_redis_key.py +134 -134
  208. {crawlo-1.2.2 → crawlo-1.2.4}/tests/test_tools.py +153 -153
  209. {crawlo-1.2.2 → crawlo-1.2.4}/tests/tools_example.py +257 -257
  210. crawlo-1.2.2/crawlo/__version__.py +0 -1
  211. crawlo-1.2.2/examples/aiohttp_settings.py +0 -42
  212. crawlo-1.2.2/examples/curl_cffi_settings.py +0 -41
  213. crawlo-1.2.2/examples/default_header_middleware_example.py +0 -107
  214. crawlo-1.2.2/examples/default_header_spider_example.py +0 -129
  215. crawlo-1.2.2/examples/download_delay_middleware_example.py +0 -160
  216. crawlo-1.2.2/examples/httpx_settings.py +0 -42
  217. crawlo-1.2.2/examples/multi_downloader_proxy_example.py +0 -81
  218. crawlo-1.2.2/examples/offsite_middleware_example.py +0 -55
  219. crawlo-1.2.2/examples/offsite_spider_example.py +0 -107
  220. crawlo-1.2.2/examples/proxy_spider_example.py +0 -166
  221. crawlo-1.2.2/examples/request_ignore_middleware_example.py +0 -51
  222. crawlo-1.2.2/examples/request_ignore_spider_example.py +0 -99
  223. crawlo-1.2.2/examples/response_code_middleware_example.py +0 -52
  224. crawlo-1.2.2/examples/response_filter_middleware_example.py +0 -67
  225. crawlo-1.2.2/examples/tong_hua_shun_settings.py +0 -62
  226. crawlo-1.2.2/examples/tong_hua_shun_spider.py +0 -170
  227. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo/queue/__init__.py +0 -0
  228. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo.egg-info/dependency_links.txt +0 -0
  229. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo.egg-info/entry_points.txt +0 -0
  230. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo.egg-info/requires.txt +0 -0
  231. {crawlo-1.2.2 → crawlo-1.2.4}/crawlo.egg-info/top_level.txt +0 -0
@@ -1,23 +1,23 @@
1
- MIT License
2
-
3
- Modifications:
4
-
5
- Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
-
7
- Permission is hereby granted, free of charge, to any person obtaining a copy
8
- of this software and associated documentation files (the "Software"), to deal
9
- in the Software without restriction, including without limitation the rights
10
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- copies of the Software, and to permit persons to whom the Software is
12
- furnished to do so, subject to the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be included in all
15
- copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Modifications:
4
+
5
+ Copyright (c) 2020 crawl-coder <2251018029@qq.com>
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
23
  SOFTWARE.
@@ -1,17 +1,17 @@
1
- include README.md
2
- include LICENSE
3
- include requirements.txt # 如果根目录有全局requirements.txt
4
- include VERSION # 如果根目录有全局VERSION文件
5
-
6
- # 包内文件包含
7
- recursive-include crawlo/utils/js *
8
- recursive-include crawlo/templates *
9
-
10
- # 测试文件(如果需要在分发包中包含测试)
11
- recursive-include tests *
12
-
13
- # 排除项
14
- global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
- global-exclude *.bak *.swp *.orig *.rej
16
- prune samples # 排除示例目录
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt # 如果根目录有全局requirements.txt
4
+ include VERSION # 如果根目录有全局VERSION文件
5
+
6
+ # 包内文件包含
7
+ recursive-include crawlo/utils/js *
8
+ recursive-include crawlo/templates *
9
+
10
+ # 测试文件(如果需要在分发包中包含测试)
11
+ recursive-include tests *
12
+
13
+ # 排除项
14
+ global-exclude __pycache__ *.py[cod] .DS_Store *.so
15
+ global-exclude *.bak *.swp *.orig *.rej
16
+ prune samples # 排除示例目录
17
17
  prune docs # 排除文档目录