crawlo 1.4.4__py3-none-any.whl → 1.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (120) hide show
  1. crawlo/__init__.py +11 -15
  2. crawlo/__version__.py +1 -1
  3. crawlo/commands/startproject.py +24 -0
  4. crawlo/core/engine.py +2 -2
  5. crawlo/core/scheduler.py +4 -4
  6. crawlo/crawler.py +8 -7
  7. crawlo/downloader/__init__.py +5 -2
  8. crawlo/downloader/cffi_downloader.py +3 -1
  9. crawlo/extension/__init__.py +2 -2
  10. crawlo/filters/aioredis_filter.py +8 -1
  11. crawlo/filters/memory_filter.py +8 -1
  12. crawlo/initialization/built_in.py +13 -4
  13. crawlo/initialization/core.py +5 -4
  14. crawlo/interfaces.py +24 -0
  15. crawlo/middleware/__init__.py +7 -4
  16. crawlo/middleware/middleware_manager.py +15 -8
  17. crawlo/middleware/proxy.py +171 -348
  18. crawlo/mode_manager.py +45 -11
  19. crawlo/network/response.py +374 -69
  20. crawlo/pipelines/mysql_pipeline.py +340 -189
  21. crawlo/pipelines/pipeline_manager.py +2 -2
  22. crawlo/project.py +2 -4
  23. crawlo/settings/default_settings.py +42 -30
  24. crawlo/stats_collector.py +10 -1
  25. crawlo/task_manager.py +2 -2
  26. crawlo/templates/project/items.py.tmpl +2 -2
  27. crawlo/templates/project/middlewares.py.tmpl +9 -89
  28. crawlo/templates/project/pipelines.py.tmpl +8 -68
  29. crawlo/templates/project/settings.py.tmpl +10 -55
  30. crawlo/templates/project/settings_distributed.py.tmpl +20 -22
  31. crawlo/templates/project/settings_gentle.py.tmpl +5 -0
  32. crawlo/templates/project/settings_high_performance.py.tmpl +5 -0
  33. crawlo/templates/project/settings_minimal.py.tmpl +25 -1
  34. crawlo/templates/project/settings_simple.py.tmpl +5 -0
  35. crawlo/templates/run.py.tmpl +1 -8
  36. crawlo/templates/spider/spider.py.tmpl +5 -108
  37. crawlo/tools/__init__.py +0 -11
  38. crawlo/utils/__init__.py +17 -1
  39. crawlo/utils/db_helper.py +226 -319
  40. crawlo/utils/error_handler.py +313 -67
  41. crawlo/utils/fingerprint.py +3 -4
  42. crawlo/utils/misc.py +82 -0
  43. crawlo/utils/request.py +55 -66
  44. crawlo/utils/selector_helper.py +138 -0
  45. crawlo/utils/spider_loader.py +185 -45
  46. crawlo/utils/text_helper.py +95 -0
  47. crawlo-1.4.6.dist-info/METADATA +329 -0
  48. {crawlo-1.4.4.dist-info → crawlo-1.4.6.dist-info}/RECORD +110 -69
  49. tests/authenticated_proxy_example.py +10 -6
  50. tests/bug_check_test.py +251 -0
  51. tests/direct_selector_helper_test.py +97 -0
  52. tests/explain_mysql_update_behavior.py +77 -0
  53. tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -0
  54. tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -0
  55. tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -0
  56. tests/ofweek_scrapy/ofweek_scrapy/settings.py +85 -0
  57. tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -0
  58. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +162 -0
  59. tests/ofweek_scrapy/scrapy.cfg +11 -0
  60. tests/performance_comparison.py +4 -5
  61. tests/simple_crawlo_test.py +1 -2
  62. tests/simple_follow_test.py +39 -0
  63. tests/simple_response_selector_test.py +95 -0
  64. tests/simple_selector_helper_test.py +155 -0
  65. tests/simple_selector_test.py +208 -0
  66. tests/simple_url_test.py +74 -0
  67. tests/simulate_mysql_update_test.py +140 -0
  68. tests/test_asyncmy_usage.py +57 -0
  69. tests/test_crawler_process_import.py +39 -0
  70. tests/test_crawler_process_spider_modules.py +48 -0
  71. tests/test_crawlo_proxy_integration.py +8 -2
  72. tests/test_downloader_proxy_compatibility.py +24 -20
  73. tests/test_edge_cases.py +7 -5
  74. tests/test_encoding_core.py +57 -0
  75. tests/test_encoding_detection.py +127 -0
  76. tests/test_factory_compatibility.py +197 -0
  77. tests/test_mysql_pipeline_config.py +165 -0
  78. tests/test_mysql_pipeline_error.py +99 -0
  79. tests/test_mysql_pipeline_init_log.py +83 -0
  80. tests/test_mysql_pipeline_integration.py +133 -0
  81. tests/test_mysql_pipeline_refactor.py +144 -0
  82. tests/test_mysql_pipeline_refactor_simple.py +86 -0
  83. tests/test_mysql_pipeline_robustness.py +196 -0
  84. tests/test_mysql_pipeline_types.py +89 -0
  85. tests/test_mysql_update_columns.py +94 -0
  86. tests/test_optimized_selector_naming.py +101 -0
  87. tests/test_priority_behavior.py +18 -18
  88. tests/test_proxy_middleware.py +104 -8
  89. tests/test_proxy_middleware_enhanced.py +1 -5
  90. tests/test_proxy_middleware_integration.py +7 -2
  91. tests/test_proxy_middleware_refactored.py +25 -2
  92. tests/test_proxy_only.py +84 -0
  93. tests/test_proxy_with_downloader.py +153 -0
  94. tests/test_real_scenario_proxy.py +17 -17
  95. tests/test_response_follow.py +105 -0
  96. tests/test_response_selector_methods.py +93 -0
  97. tests/test_response_url_methods.py +71 -0
  98. tests/test_response_urljoin.py +87 -0
  99. tests/test_scrapy_style_encoding.py +113 -0
  100. tests/test_selector_helper.py +101 -0
  101. tests/test_selector_optimizations.py +147 -0
  102. tests/test_spider_loader.py +50 -0
  103. tests/test_spider_loader_comprehensive.py +70 -0
  104. tests/test_spiders/__init__.py +1 -0
  105. tests/test_spiders/test_spider.py +10 -0
  106. tests/verify_mysql_warnings.py +110 -0
  107. crawlo/middleware/simple_proxy.py +0 -65
  108. crawlo/tools/anti_crawler.py +0 -269
  109. crawlo/utils/class_loader.py +0 -26
  110. crawlo/utils/enhanced_error_handler.py +0 -357
  111. crawlo-1.4.4.dist-info/METADATA +0 -190
  112. tests/simple_log_test.py +0 -58
  113. tests/simple_test.py +0 -48
  114. tests/test_framework_logger.py +0 -67
  115. tests/test_framework_startup.py +0 -65
  116. tests/test_mode_change.py +0 -73
  117. {crawlo-1.4.4.dist-info → crawlo-1.4.6.dist-info}/WHEEL +0 -0
  118. {crawlo-1.4.4.dist-info → crawlo-1.4.6.dist-info}/entry_points.txt +0 -0
  119. {crawlo-1.4.4.dist-info → crawlo-1.4.6.dist-info}/top_level.txt +0 -0
  120. /tests/{final_command_test_report.md → ofweek_scrapy/ofweek_scrapy/__init__.py} +0 -0
@@ -1,40 +1,41 @@
1
- crawlo/__init__.py,sha256=2Io5P9qJghOAjjD3YWdaiIq5laPLyLWVkEqgiVfUa3o,2381
2
- crawlo/__version__.py,sha256=2ik6wvURqg571WApVvR_ELhg_eclmC_WvbDLEPmoO4Q,23
1
+ crawlo/__init__.py,sha256=n5vFwi0iuYrpAIyoNJZzWHV1gvF-vh-Yze3jiuwEXqM,2180
2
+ crawlo/__version__.py,sha256=C1PbImXkZPhAW7rUcTV61OKrbIa2DpoQJ2Kmga3lWwM,23
3
3
  crawlo/cli.py,sha256=AQnAB5NMI-Ic1VPw_Jjng8L4AI4-wMozOwzE6CfXkZU,2402
4
4
  crawlo/config.py,sha256=EQIT7WpkXAlr2ocd5SYJYOKTSWUlQx2AkTHX7ErEWxw,9798
5
5
  crawlo/config_validator.py,sha256=oY4-2bwXUlwHAnGgkI-EznviDfML_dcxbWSGXNSxC2k,11516
6
- crawlo/crawler.py,sha256=E-fgYVtx6v2xEPixlQeWfNYVbW1oeWE0fQFZTQ6_K-I,27305
6
+ crawlo/crawler.py,sha256=6f9eDeUEZVfnUywaZ6CnL5R3bHO4sG82z-Syl3zZKvE,27360
7
7
  crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
8
8
  crawlo/exceptions.py,sha256=YVIDnC1bKSMv3fXH_6tinWMuD9HmKHIaUfO4_fkX5sY,1247
9
9
  crawlo/framework.py,sha256=9gP6VN4MHqutGXaxnwpNMSULfVYbNp906UdZiJGywlQ,9458
10
- crawlo/mode_manager.py,sha256=S4dUoeVZ4fMnd4pXWutcHwk5Zv68ZBTgo9taR9OkQiM,7768
11
- crawlo/project.py,sha256=nVRc0CIdd9g863NGfuItvajl8zlO5mEta4FQCx9_vZ8,14060
12
- crawlo/stats_collector.py,sha256=hIjlnX750jU4Oncyand1jBccfaX4Tu7egd2DBYu2N7A,2379
10
+ crawlo/interfaces.py,sha256=q1vwMSiZLfLpPhFa9Y0hAcjYEKvLkW2fZ2fmoAZ-5TE,653
11
+ crawlo/mode_manager.py,sha256=e8QmwsnndFx_hGME_7w-hazKo0GOYjUr-7FBf7dWxgc,8903
12
+ crawlo/project.py,sha256=9wnlHd-rYAC3TT1Fc1ftyUBx7mbDT6TQCqoaIP6N3iA,13998
13
+ crawlo/stats_collector.py,sha256=mzNHu628a31PwqpkBXN90PhD-xhMSunNNxAm-ney5JU,2803
13
14
  crawlo/subscriber.py,sha256=h8fx69NJZeWem0ZkCmfHAi2kgfDGFObHpwN0aGNUM6Y,5115
14
- crawlo/task_manager.py,sha256=I9h3Rl0VRAfwqp24CHT3TuEAapNdTbVghkmuJhtM7jg,5966
15
+ crawlo/task_manager.py,sha256=Ic6PFUqZOhLXuZ_UEk_8Neb9FmqYv8I2RzV3vLzFNSU,5966
15
16
  crawlo/commands/__init__.py,sha256=orvY6wLOBwGUEJKeF3h_T1fxj8AaQLjngBDd-3xKOE4,392
16
17
  crawlo/commands/check.py,sha256=TKDhI_sj7kErgiJpt2vCZ9QL-g6yWjrrPWKbgh8pgEU,23199
17
18
  crawlo/commands/genspider.py,sha256=JB4ZuFpKsYwtjx3DSsxugH7e3kqxhDWPG5ZKfvM0isI,6041
18
19
  crawlo/commands/help.py,sha256=8xPC0iNCg1rRBoK2bb6noAEANc1JwrdM35eF-j6yeZM,5111
19
20
  crawlo/commands/list.py,sha256=trzcd3kG6DhkOqYZADcl3yR7M8iJBgRw5fE-g9e0gVM,5877
20
21
  crawlo/commands/run.py,sha256=EjpIilgCTkXGVSV4rEISbJubdhqrok9nNe5-xDfDK5E,13169
21
- crawlo/commands/startproject.py,sha256=-Bo8vvDfIhqzGmWyhxMatBlPLhYpRwJC7l4fpbN8vVk,16506
22
+ crawlo/commands/startproject.py,sha256=boZrMyn6TgCi1jt3D3DQfui6hJitjwNO8mqlWKNOBns,17366
22
23
  crawlo/commands/stats.py,sha256=vlGJLyiXZtY0ASdzCK59JNereSsAel4W9JCGaOzCr-8,6201
23
24
  crawlo/commands/utils.py,sha256=YVNEEzlm_qNY3SVvU8h6o2lQMkVgypvoB4ZFrP4gln0,5578
24
25
  crawlo/core/__init__.py,sha256=BWkj3AqZwp2Bk73UzUlC_qsqv_MH_HNrzy4DY1hosj4,1330
25
- crawlo/core/engine.py,sha256=y9mj0nKHb3Ki4scXkxsMO6XoTIqxmbsD0WuryR_6iHg,19385
26
+ crawlo/core/engine.py,sha256=znJ0VDFBImYi6KkTD8GHNo-V9BDnPSv9iYfTYLPsVSc,19379
26
27
  crawlo/core/processor.py,sha256=hR5MrbeZvDUx0ShKntr4qwkeVZzjlPJ8EAKgIFkNVts,1555
27
- crawlo/core/scheduler.py,sha256=-6DBz7gUg8WwUl39DAbi6Ng2AJSswNBCDr_mV3sUZFs,14088
28
+ crawlo/core/scheduler.py,sha256=G9xtrvE1wsTSOTOFUKDEphJvy6Xk5icuCGXTScYy7nQ,14084
28
29
  crawlo/data/__init__.py,sha256=UPqgioMdu3imSUmpLWzVlpvoBnEfaPSAT-crCcWd7iw,121
29
30
  crawlo/data/user_agents.py,sha256=zjjFkldQkqtrn45j0WZplaZLannPxZDeAU0JofxQcBc,9891
30
- crawlo/downloader/__init__.py,sha256=VZG5HiSHOmimiH9okQN3MBwgXsCzxr2awflVz5UiboY,8897
31
+ crawlo/downloader/__init__.py,sha256=P5pl-BGYCkdKWgoIewcYPz7ocVLixVfYuCDFmYyuqIw,8966
31
32
  crawlo/downloader/aiohttp_downloader.py,sha256=-dIFucMOQhiiEmtgEpG2Lqh1vF-PvDddbIrZ8Hge0Ig,9556
32
- crawlo/downloader/cffi_downloader.py,sha256=QxoeocCE2DsQCnhZla6-BjhplaTZDWMbEJmNrghWSDA,10488
33
+ crawlo/downloader/cffi_downloader.py,sha256=aKmrooictEFNfsmM3t4dpkGEALI85E7eLOAxm4LPQAU,10585
33
34
  crawlo/downloader/httpx_downloader.py,sha256=MpgDeIdGqNsiSKLOEDBnr5Z0eUbhHnqVEmAuoIfJmFU,12296
34
35
  crawlo/downloader/hybrid_downloader.py,sha256=dNnFeegRnyLaOxTWI6XrWKqqVPx80AZBZNgmrcKRVBM,8240
35
36
  crawlo/downloader/playwright_downloader.py,sha256=L-TVzG7cYfuBlqW0XSZuz5C_r9fpJrmYNcoQ-cDEna4,16663
36
37
  crawlo/downloader/selenium_downloader.py,sha256=P8GuhEw6OYVeN3oeksuBLpUJCELXiu0mAR23X6IIOAA,21508
37
- crawlo/extension/__init__.py,sha256=-R4P9fklpgSB8cGEduMsjkbJZ7ReYSrZaYjApgYUm9U,2986
38
+ crawlo/extension/__init__.py,sha256=wwaTTWYUzbg5b84sQn2JvBlyuhVGkw-REkhVlR2vymA,2980
38
39
  crawlo/extension/health_check.py,sha256=stDpyP4gOzAdbBlPbSf0rge0QounAhF8CtrGq5fa_7c,5657
39
40
  crawlo/extension/log_interval.py,sha256=N25aNjFkjh9br6g3ViFqRrz06C2geAdfGas-OT2oZh8,4497
40
41
  crawlo/extension/log_stats.py,sha256=CWjMb_V1o8j8uwGFvh9SZ7EYLl_OYzmuIsOT5V-_BE4,2452
@@ -47,12 +48,12 @@ crawlo/factories/base.py,sha256=loB_vyc0CsQK0BgwRoSOFS8gPcmv-b9irtjC9UaBGA4,1832
47
48
  crawlo/factories/crawler.py,sha256=e9zl4qytByzsYbz66klY3cZTvQei0-9GjdFK4XCyXcg,3198
48
49
  crawlo/factories/registry.py,sha256=YU87CdsntOz609M0aQbGcCG9glPinUJxOn-_CaM4f-M,2595
49
50
  crawlo/filters/__init__.py,sha256=noSe07tp2Ip_zXwAbS021BojrqNRaObDO-2YV6DOQfc,4381
50
- crawlo/filters/aioredis_filter.py,sha256=unms0WaRsxbCL6VaAQMT-SsBHKyxR6-o118pf-3ErK0,9512
51
- crawlo/filters/memory_filter.py,sha256=ZojFhZ6gE76aQBC-rfImxSkSMwQtiotenx0pIcQOaFg,9561
51
+ crawlo/filters/aioredis_filter.py,sha256=WglGW-XLjsy8r_NDrNsXk_nzwaIq081MBnooHqCCQZA,9841
52
+ crawlo/filters/memory_filter.py,sha256=gIPXCw650v81XRiz0MhWXH-zcn24ERzDTzBQZRoy1YU,9890
52
53
  crawlo/initialization/__init__.py,sha256=uNRMm9GccMYZi51scpvo-CPx_3ayp3Y81psBHlUoDfw,1132
53
- crawlo/initialization/built_in.py,sha256=1uEEtYCTQlfx5uRW-s9oumlmIEJrjZa0QpOLAbZbZqI,15758
54
+ crawlo/initialization/built_in.py,sha256=DlZf4k9FlU52tnwlFtKqWHqlFZpo-VHB0qP61rVqJzo,16259
54
55
  crawlo/initialization/context.py,sha256=wG9t-M-Qttj7TN6gDumPX5Q5GHaPDUpLTZZDne2r3TE,4863
55
- crawlo/initialization/core.py,sha256=sMiSBueoaWoDohQJ50IFC_DSvj0EeYSB7G1MORlDtMc,6872
56
+ crawlo/initialization/core.py,sha256=GWc9QNSp2JmHlCAhgq1aqGDXHcO6QlxFAVfePKC1xeo,6872
56
57
  crawlo/initialization/phases.py,sha256=iWhGITh9eudfSmzf2G3DLPAIJkCDrv9TVBtnAoS1_3c,4176
57
58
  crawlo/initialization/registry.py,sha256=kKVegqWxtPCaZ1mTyVHN4yFecAGDOPFJfebkP-SoobE,4919
58
59
  crawlo/items/__init__.py,sha256=rFpx1qFBo0Ik7bSdnXC8EVTJUOQdoJYGVdhYjaH00nk,409
@@ -66,20 +67,19 @@ crawlo/logging/factory.py,sha256=b4Z0fBmP00GpvpJ7k4QxqYP32n_EqG5KD3ouUWU7L4U,665
66
67
  crawlo/logging/manager.py,sha256=aem7yla0q83rf2CpwQEyg5YMbey4TzkquBVWiKtcqdQ,3182
67
68
  crawlo/logging/monitor.py,sha256=mzZWm3rQ2mGUoAmkEJPUkBmR0VWK66l14aqqhQ0zwE8,4935
68
69
  crawlo/logging/sampler.py,sha256=1BoRMpusP3wbXRnet5xl9_Yb_3_-AUq9WJhK9gYg7v4,5292
69
- crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
70
+ crawlo/middleware/__init__.py,sha256=khNCstVcYlL14SbLZ8ys9ub1-C8k4FIiMQ99Vw9wA-0,635
70
71
  crawlo/middleware/default_header.py,sha256=Pw-ev8ffi16GeCh84R5L3hAZgp3G1QXS-H5kV3JEp4Q,5164
71
72
  crawlo/middleware/download_delay.py,sha256=2iWnJFtWDlqDy5MsAob8TPiJQoiz9v21yatkBI0eptg,3542
72
- crawlo/middleware/middleware_manager.py,sha256=_Kgd6Ir4cRUiPCEHJELZPOkKNtmu-WAE59dRWKPpAU8,6415
73
+ crawlo/middleware/middleware_manager.py,sha256=H_o0nwo_xQ8aSRnnvEs2Ho3fS-3WNi_5AjChhqvRYnk,6645
73
74
  crawlo/middleware/offsite.py,sha256=4tUkPqXMMXsi1WwYnJ_e7wMd6sRgK19QHRCYq8-w8jk,4682
74
- crawlo/middleware/proxy.py,sha256=uKk5OSLIs7jv9bBgkZwsi1rIpthooxhMrGBC2BPRDCc,16022
75
+ crawlo/middleware/proxy.py,sha256=jfaM4gL78ga_F7LN891dULjjO2zqFmulwQMDs5eJD6k,9591
75
76
  crawlo/middleware/request_ignore.py,sha256=7qdX4zAimjSGwdod_aWUbOTfzLBWZ5KzLVFchGMCxCI,2663
76
77
  crawlo/middleware/response_code.py,sha256=d5t0hmP8QliuvvtFOqW-ogCBtZxg2eyjsOtlQAEUxM8,4533
77
78
  crawlo/middleware/response_filter.py,sha256=tVGr06bfJBR3xAHI2G5c3WimFsGHu8qoJtDcsVuCATU,4384
78
79
  crawlo/middleware/retry.py,sha256=Acfo95B9wF8fQTCQIqluZOS2hHdnknQu_FOHvpGKJp0,4248
79
- crawlo/middleware/simple_proxy.py,sha256=rQ4RkqewGvDRCw021nGrg8ngkBzg3wqrEVqvSmBgQ6M,2256
80
80
  crawlo/network/__init__.py,sha256=bvEnpEUBZJ79URfNZbsHhsBKna54hM2-x_BV8eotTA4,418
81
81
  crawlo/network/request.py,sha256=e6-YLgK7SU8D19n21mQwqt_b_aeRVJFOgWPIBPal2ys,14178
82
- crawlo/network/response.py,sha256=QwJhL3xJfPVy_gwtGrg61oAgaqCoCmjyj1Ug7Zju7Pg,13060
82
+ crawlo/network/response.py,sha256=-URnNc_J7qBSG19uJbfuF6A_14MHLOtY78FvcZDzbsI,23418
83
83
  crawlo/pipelines/__init__.py,sha256=FDe2Pr5tiHtV8hFlheElRO_O1aVKvSWlkTcAl9BXAKA,637
84
84
  crawlo/pipelines/bloom_dedup_pipeline.py,sha256=vIF_6noJAdpotrJpnCmrVXCi59gRmHHn28mYW6VukbM,5465
85
85
  crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
@@ -88,34 +88,33 @@ crawlo/pipelines/database_dedup_pipeline.py,sha256=IxahtD_mhni-Y21_idOMX58_Htf46
88
88
  crawlo/pipelines/json_pipeline.py,sha256=wrCsh8YInmcPLAkhPrHObMx89VZfhf-c7qRrYsTixPE,8585
89
89
  crawlo/pipelines/memory_dedup_pipeline.py,sha256=lKkYPu6vkpPjfQ1-xOLvPFT4VdTI8QVx0yjqtVR0ZB0,3598
90
90
  crawlo/pipelines/mongo_pipeline.py,sha256=PohTKTGw3QRvuP-T6SrquwW3FAHSno8jQ2D2cH_d75U,5837
91
- crawlo/pipelines/mysql_pipeline.py,sha256=Kjgu6cks1KD4FPXwlTnFaos2LG-N8LLaBDyKZ_MEcsI,14196
92
- crawlo/pipelines/pipeline_manager.py,sha256=R6MRb5d-caOit7PZoglJLHa3qQ68U5YAQlwt8KcjRxo,4393
91
+ crawlo/pipelines/mysql_pipeline.py,sha256=jlTP1X5QMrSVZjLD4lMS1BUTz-x6bagUEODddvHI2Vg,23702
92
+ crawlo/pipelines/pipeline_manager.py,sha256=_DtWfxcTinIf5ApzUOVjZksd2tPbc7qeKi92IVd_kbs,4387
93
93
  crawlo/pipelines/redis_dedup_pipeline.py,sha256=RB1kXLr8ZuWNrgZKYwt--tlmnWsQTbuwTsSt3pafol8,6077
94
94
  crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
95
  crawlo/queue/pqueue.py,sha256=bbgd3l1VfqYXfz-4VFaiWLmJit1LdB3qHalCtNqyrqI,1210
96
96
  crawlo/queue/queue_manager.py,sha256=8rKygMxr6DgSjnGsKFmvlTI5XAARvQIN_ENkAruHGXs,21532
97
97
  crawlo/queue/redis_priority_queue.py,sha256=vLvg2toKaRrXD1QyEdu1ZjTmANv7clFaBF7mCtstBmI,15995
98
98
  crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
99
- crawlo/settings/default_settings.py,sha256=IKh2eZ9WWXkAbHx5K5KX0whNtumATRpZ7ifFPZJFfBk,11827
99
+ crawlo/settings/default_settings.py,sha256=TvtXgLzgc9_j_ITt8_xYhag29k6dCJiPU0Yq-snMkt4,12704
100
100
  crawlo/settings/setting_manager.py,sha256=yI1tGaludevxKGGZO3Pn4aYofrg2cwYwvMZCFC5PPZw,8595
101
101
  crawlo/spider/__init__.py,sha256=QGhe_yNsnfnCF3G9nSoWEw23b8SkP5oSFU5W79C5DzI,21881
102
102
  crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
103
- crawlo/templates/run.py.tmpl,sha256=g8yst2hkqhKGNotR33fDxwmEsX6aEvhrXY_cfYos_vc,788
103
+ crawlo/templates/run.py.tmpl,sha256=1ge0XILc3O5u7S8rsyg_rpe2B2ULokJcrKRVHMwPKj0,511
104
104
  crawlo/templates/spiders_init.py.tmpl,sha256=p6UK8KWr8FDydNxiAh6Iz29MY5WmgXIkf2z-buOGhOM,354
105
105
  crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
106
- crawlo/templates/project/items.py.tmpl,sha256=8_3DBA8HrS2XbfHzsMZNJiZbFY6fDJUUMFoFti_obJk,314
107
- crawlo/templates/project/middlewares.py.tmpl,sha256=fxHqi-Sjec5GiHJciprOU-6SAUTzM728NlZckIqf9hM,4278
108
- crawlo/templates/project/pipelines.py.tmpl,sha256=j9oqEhCezmmHlBhMWgYtlgup4jhWnMlv6AEiAOHODkg,2704
109
- crawlo/templates/project/settings.py.tmpl,sha256=mL9_JAyz8R35r-ywRHi4T-dtal7oczU5kodEWxldw40,5265
110
- crawlo/templates/project/settings_distributed.py.tmpl,sha256=RHzfWZITv-0ErCR9OYEswAZHpA5d9fYil0ZoGCtFt8g,5459
111
- crawlo/templates/project/settings_gentle.py.tmpl,sha256=pmjrBLjnpGcR90RkcJrM5O8PsTrRhUB92QR3R4TJyko,5733
112
- crawlo/templates/project/settings_high_performance.py.tmpl,sha256=9QhXSzfxIsMPyq0kZY9h2YBllyXGpGE37bMEbSrs_Ag,5823
113
- crawlo/templates/project/settings_minimal.py.tmpl,sha256=1qUPhSdHtvLSHTpytUJ8K63sMROhTwkz8e4tVg1fYoM,2222
114
- crawlo/templates/project/settings_simple.py.tmpl,sha256=sIyrCIVXsHSKl8Yjj8HkGs-ppMFH26a5yp6egVNlT2Q,5585
106
+ crawlo/templates/project/items.py.tmpl,sha256=hpQ2AfUmhddnzMuKM5LF6t44dOfFXwJRAZlWFKUFOZw,343
107
+ crawlo/templates/project/middlewares.py.tmpl,sha256=eEobZl8g_0DtiwLYbirQULqOacH-yUrrs4PUrGcJ2UE,1098
108
+ crawlo/templates/project/pipelines.py.tmpl,sha256=7BeaQDMHbIjhKzRtzlCMiFlU8xgMzDs2PIHq3EVUAlQ,887
109
+ crawlo/templates/project/settings.py.tmpl,sha256=fYK2NCJOc_jVRraKkEzH8beyax16KgNa-9s6TsQrdpI,3606
110
+ crawlo/templates/project/settings_distributed.py.tmpl,sha256=ULXyi5GDsZggk1Z4SRkalm2g7kJQx9ul6bCARN2I-TM,5566
111
+ crawlo/templates/project/settings_gentle.py.tmpl,sha256=NZjSqAqWmYlNE15Zt6-wY4rtxp7ID6HFUafoOvt7VAE,6039
112
+ crawlo/templates/project/settings_high_performance.py.tmpl,sha256=QYN4hJqvGmL7oayJjLcx4Mr3jedqRSvdlWkivom2M2o,6129
113
+ crawlo/templates/project/settings_minimal.py.tmpl,sha256=8XS_ButRDJxYRQSRHTc_l8ej2DbUnR0j891m0j-gjTY,3122
114
+ crawlo/templates/project/settings_simple.py.tmpl,sha256=OmL4GCPpFseRIG0CgL7625IWipc6vG_Da5tefXv_MD0,5891
115
115
  crawlo/templates/project/spiders/__init__.py.tmpl,sha256=llhcIItXpm0TlEeumeLwp4fcYv2NHl8Iru7tLhDhxiE,216
116
- crawlo/templates/spider/spider.py.tmpl,sha256=KvU-9YpN6MifDE7XzejjyyQS7RUjLDLZ8zqJcLwSsu0,5198
117
- crawlo/tools/__init__.py,sha256=tOYfYPvZlrO8cmvnMWBjTma6UTLTFZN3qdC8pJwHrzI,4142
118
- crawlo/tools/anti_crawler.py,sha256=LwLC6BkxDSkxc5H1hQ6kY9j7O0PZGAMPZECr7gbqw2M,9431
116
+ crawlo/templates/spider/spider.py.tmpl,sha256=4E4DDoOfI0vN_zLjfmMX_QNmWCx8EbrOKWBg6zozVqs,1065
117
+ crawlo/tools/__init__.py,sha256=sXDMZNP6EwZIFivGcRthxqD1DFMMS8UOJvULAzHD-w4,3927
119
118
  crawlo/tools/authenticated_proxy.py,sha256=ULCK0Cc9F2rGhRqu6kzKBdxzK9v2n1CsatSQ_PMxpAg,7272
120
119
  crawlo/tools/data_formatter.py,sha256=iBDHpZBZvn9O7pLkTQilE1TzYJQEc3z3f6HXoVus0f0,7808
121
120
  crawlo/tools/data_validator.py,sha256=bLWnkpFdclJuqjtSAgMI5nznN4vAuPwE1YaiFWKWenM,5490
@@ -127,35 +126,37 @@ crawlo/tools/request_tools.py,sha256=oXrk4yWMACVa65fDQCQgzsg6a94FH4_lS7qNR53FHYU
127
126
  crawlo/tools/retry_mechanism.py,sha256=4AQ_HLuYt4hYMI9XHoKFk2GQKEiDJB5pAnsMCfjc6Bk,7777
128
127
  crawlo/tools/scenario_adapter.py,sha256=pzysL1B2uQ1ZSEncVHd9Hv2viHNgaxP44YAUcDcppfw,9660
129
128
  crawlo/tools/text_cleaner.py,sha256=UrMGcgRnJaufjmDKIDsRYKMA8znCAArHDgouttWPygk,6690
130
- crawlo/utils/__init__.py,sha256=8kMbOZf9bzOUjtvh2QvqXZmiZh3pYzxXH9YQhYcwcoY,597
129
+ crawlo/utils/__init__.py,sha256=nxLnfqcEGLnsfSEagoKNyu-pm2ByU9BwE5tLxcS71Qo,1003
131
130
  crawlo/utils/batch_processor.py,sha256=8LNy-K2SrQVUxmGEWxQyYw_j9M-erN4Ie7O4d3zpBvM,9142
132
- crawlo/utils/class_loader.py,sha256=kZRGfyA3OPAH2QsQ-beOKjw3JKKBs6OyJyJyXkvWDrc,675
133
131
  crawlo/utils/controlled_spider_mixin.py,sha256=8CuM3Cr2wQLHbaO_ohbCsPcImJnyfZHpERbSeMgQ-AQ,16936
134
- crawlo/utils/db_helper.py,sha256=ZqOt1d3mErVv4TOvoWlov0niUxORB9aHByTmMoNFIDw,10902
135
- crawlo/utils/enhanced_error_handler.py,sha256=fJC__rnYNKTNUHNbgjZtT846HoE31qyGbPft9bwyYLU,14214
132
+ crawlo/utils/db_helper.py,sha256=zFr4BpEMbaY86DrR5Ol5-hfvkSXcG66prl00LPHLl8E,8702
136
133
  crawlo/utils/env_config.py,sha256=W-VD_WF63DHxsyJysvp1eJwRh3L_pBRl_PitQAY3nQY,4079
137
- crawlo/utils/error_handler.py,sha256=vJ_4EVkuVn_TrM16VgN4doyhe_Pg7xWwVUWIENgJQAg,5455
138
- crawlo/utils/fingerprint.py,sha256=70Me5avs40HYbz6LQ9La56EVP2qRapYjX-zF7WQDGkM,3687
134
+ crawlo/utils/error_handler.py,sha256=e2LeUGT_OMcNKcjiX9Pp-NuQh5spsHBqIPBd7VxA2IQ,16247
135
+ crawlo/utils/fingerprint.py,sha256=3IbctH3zwyBjN_12SH7-vrFt-akA2WSo3iAzHc6u--s,3689
139
136
  crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
140
137
  crawlo/utils/large_scale_config.py,sha256=NZMsDj4qbVx06Fu0aHqNKX1yzo6WFT7CgrhVnvw1ZFs,8372
141
138
  crawlo/utils/large_scale_helper.py,sha256=4ORkZcIrwJ0SlKOUh7l7WIuERORuRhNBgHCM71Rz0n0,12452
142
139
  crawlo/utils/log.py,sha256=KmUWVYq8t6fSGOC88nnYCDxwBUdoPWvaBmpOSHn2oWI,2914
140
+ crawlo/utils/misc.py,sha256=m_TbfMf4Aoe70zmkv7XWyFg8Rz0qOYPXepwB6EcYr7Y,2519
143
141
  crawlo/utils/performance_monitor.py,sha256=32KspSo7RWvCX_fl0ZFn4ScWWOqbVVwEhPRd921Ez6I,9832
144
142
  crawlo/utils/queue_helper.py,sha256=gFmkh1jKlIcN1rmo2Jl6vYcLP5ByUWlfHO9eNlZPBLs,4918
145
143
  crawlo/utils/redis_connection_pool.py,sha256=EsPZkmQctWkoYU2wcrqkgwnIWnE6nG4XCXECKn216JA,12575
146
144
  crawlo/utils/redis_key_validator.py,sha256=-UTTx0Ul184pzwSply8hVdH0lp-gkXXOc_gEHR_7VlU,5809
147
- crawlo/utils/request.py,sha256=ejdKpTwc-HE04HQybafhOVywzz57IV3pY0YMkSLyGUo,9065
145
+ crawlo/utils/request.py,sha256=RcINrLvShfZ5VHu1T_hJJRXp-viKWSo35C2JOgWyl2k,8641
148
146
  crawlo/utils/request_serializer.py,sha256=b5abcgjJk4IU6Wfg46AmOAU2wmzu_WqcpEbuAncRMGQ,8931
149
- crawlo/utils/spider_loader.py,sha256=xNzQb7qhQ7TqZsfFtCLpuVcsGi-USriZosU0YSBr9II,2233
147
+ crawlo/utils/selector_helper.py,sha256=BVczzsSzPL5zF5KHXK3hyuqEl9o0ADYEuCH7Aw8aj98,4332
148
+ crawlo/utils/spider_loader.py,sha256=oxifl0p4SOFhvvnD38Em4zGtC7sRr_pw4dki01MoAq0,7677
150
149
  crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
150
+ crawlo/utils/text_helper.py,sha256=TTZgQPayMFUOYj8syt47Gwa4AQVY15W1b56STJetAKE,2920
151
151
  crawlo/utils/tools.py,sha256=uy7qw5Z1BIhyEgiHENvtM7WoGCJxlS8EX3PmOA7ouCo,275
152
152
  crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
153
153
  examples/__init__.py,sha256=NkRbV8_S1tb8S2AW6BE2U6P2-eGOPwMR1k0YQAwQpSE,130
154
154
  tests/__init__.py,sha256=409aRX8hsPffiZCVjOogtxwhACzBp8G2UTJyUQSxhK0,136
155
155
  tests/advanced_tools_example.py,sha256=1_iitECKCuWUYMNNGo61l3lmwMRrWdA8F_Xw56UaGZY,9340
156
- tests/authenticated_proxy_example.py,sha256=fKmHXXxIxCJXjEplttCWRh7PZhbxkBSxJF91Bx-qOME,3019
156
+ tests/authenticated_proxy_example.py,sha256=ZgLrU-1GaBhkJK1Wy0X93lHP1GT2sU2_wi3RI1CfrVc,3135
157
157
  tests/baidu_performance_test.py,sha256=wxdaI7UwKboMYH_qcaqZLxAStvndH60bvKGzD8F-jaI,3974
158
158
  tests/baidu_test.py,sha256=NKYnwDbPJX3tmKtRn7uQ_QWzUXiLTQC-Gdr1cQkJzEo,1874
159
+ tests/bug_check_test.py,sha256=EIDOUk_QgtBOWKuBLm_WHbgJ0fsDuJACJ-nuxnBIdkQ,8056
159
160
  tests/cleaners_example.py,sha256=blVqSJ7SeWUNd17JjHZJgVTzWH65XKevLyaMB_Wg8qA,5324
160
161
  tests/comprehensive_framework_test.py,sha256=_1N-OGbKvBTNachNvIjkL_izr4uv6OUybUkhxxz5MAk,5977
161
162
  tests/comprehensive_test.py,sha256=wypCaB56IV8w8nd5VA5LSXUQ3IgLf0AKKUiCci6yEJQ,2969
@@ -169,13 +170,14 @@ tests/debug_log_config.py,sha256=cPS6qOLnynYTFOxpjcy9OUgIqrkasWb9f2c_PASc2_E,371
169
170
  tests/debug_log_levels.py,sha256=CZWG3KGDq-hYJ5TPhoZTyjKFKkkM-AoK3oP1w-JC1sc,2168
170
171
  tests/debug_pipelines.py,sha256=FMb36bH9lQxBLb-nM579hBRK1S16Vxu1t_BC3Dj8O2w,2164
171
172
  tests/detailed_log_test.py,sha256=oTCFF_Un7Jq2gV4rpRDFOxlHJSthnQhvEf0CSItfB7I,7501
173
+ tests/direct_selector_helper_test.py,sha256=p7_x3x87JUnpKplmwYO4zN5ympcPJSPdHsviso-LmpI,2862
172
174
  tests/distributed_test.py,sha256=u6cEiymZzCItaTClKTxwVjNmOj9_PZii4_eGNAVMDW8,1825
173
175
  tests/distributed_test_debug.py,sha256=pUv6ZKEJ5pK9xOA7lgVk6WW3cBAtnb1bsuZzJ8oGLvY,2181
174
176
  tests/dynamic_loading_example.py,sha256=7LdeQZFevrb-U1_dgr4oX3aYo2Da4HvE_0KIf1fw4Ew,18786
175
177
  tests/dynamic_loading_test.py,sha256=dzDW7b66HeDsIYsYgvNRihE3V6b6gEbUGQpp-eJbcIM,3413
176
178
  tests/env_config_example.py,sha256=_ZRDh_LR23ZKpy9E--y_KM0QIOiZF5vRT98QTn52TY8,4951
177
179
  tests/error_handling_example.py,sha256=grTeo1X17rFz4lhgASb0g5yu4NWbmNz5neyuonnNR40,5294
178
- tests/final_command_test_report.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
+ tests/explain_mysql_update_behavior.py,sha256=uBrJwiYujTJF35oF1kYMRjYU5k5Y3YlqOfOni0oPQtY,2865
179
181
  tests/final_comprehensive_test.py,sha256=szTNbtwKfYNmE0kzDPCsE_kvnTG7FNKl2JERakGhKIk,4314
180
182
  tests/final_log_test.py,sha256=CpZ4ZvvuvFiBvz1a50qN599XIU086ett_I0bSX42BLU,9367
181
183
  tests/final_validation_test.py,sha256=4cuTr58i46JI6M4Tz54e7vrVFrOr3R7HSWgyQPKmM9M,5244
@@ -184,25 +186,30 @@ tests/framework_performance_test.py,sha256=Qp47VrsCK0ylEhDkFOm7lnD8rVkaJ7u1MopsE
184
186
  tests/log_buffering_test.py,sha256=0B5UY1yQuxnBU1pEyz3IBYweN__4fOkPXly-kYfOpNU,3226
185
187
  tests/log_generation_timing_test.py,sha256=zHb_m2FqlpRCYw-wqFWFn8cbVH8UR3VvXKSM6nNnbgo,4681
186
188
  tests/optimized_performance_test.py,sha256=bA0dN4j7ViyTSSiCJEjlkJ9Y7jspTFKs2xX7UXHE8Gs,7379
187
- tests/performance_comparison.py,sha256=CKGFbcwqsF3CAWIaOWF3Ca6o_OdEdZCgIfYb_m6CzIw,9302
189
+ tests/performance_comparison.py,sha256=UevHOM_9z2ILedf_xZ_8F8QiPjb_M8WTfGQrxzKtgco,9266
188
190
  tests/queue_blocking_test.py,sha256=hp-6hmTOO64oOAWVtlN8cFJ95GjbK3t9fj-4q_TKowk,3955
189
191
  tests/queue_test.py,sha256=HeBiBXqAgIAbUkLVQ3McS6NdRselA30m3lnuxNBvZbk,2689
190
192
  tests/redis_key_validation_demo.py,sha256=WD2jvuBwHhLYIb3lVFtvYSSnmXWn1EW4EPCEwFhfi6M,4467
191
193
  tests/request_params_example.py,sha256=J50NdsnK1sDrqG-5m3oA-mu1_wHwVwHIfsWxGeQpz7o,4250
192
194
  tests/response_improvements_example.py,sha256=t1cbG3nesp82bqog4_ku1GvQzNbhRyWa5EaKTmOPrSk,5402
193
195
  tests/simple_command_test.py,sha256=8TowzW45ukKTPeaNC5uij3RR7rqPULiBr2PguSSMdP8,3688
194
- tests/simple_crawlo_test.py,sha256=gfL910xt56HhrERAB9QhO_oxKWKw2j2IsifQkgYtwvE,4848
195
- tests/simple_log_test.py,sha256=xOEGH5UzRLsCpwqgi1VEoZ7NY3DoWckwy6Wy9lKQ6ZE,1757
196
+ tests/simple_crawlo_test.py,sha256=FYDn5cgAxHN81QSYa_wcJcxJit7aLnIopnkHKKr83dE,4801
197
+ tests/simple_follow_test.py,sha256=3vNT5Eqwza6fxAY9Xl_9xtFGdfrPwm6NnVHdRmJsH8A,1053
196
198
  tests/simple_log_test2.py,sha256=Rn3XerVlkT0M-vbQmrQL7bVIZG3REnJNmMvUvKr6C20,3944
197
199
  tests/simple_optimization_test.py,sha256=hflvaC81ra1ZrPOp-Z7rQrH95OnSADvAjy95BLulD6o,3678
198
200
  tests/simple_queue_type_test.py,sha256=wAf4XLKl9oS5BlfrRJ1SLY-kYmNq4YY0LdIC7HmW-yg,1193
201
+ tests/simple_response_selector_test.py,sha256=0naeRUX1n-oAW6VRj-12c6nre2D0RjJ0dD3Nx7BBTjY,2844
202
+ tests/simple_selector_helper_test.py,sha256=l9FsVhQ-z-ICqqetLIyeSaI8Dn6bXNCD8sLdr0tpvms,4438
203
+ tests/simple_selector_test.py,sha256=XzOYzpEzr0yaioLV6v-4XC60VZMd5jRthlyp7Ud02o4,6630
199
204
  tests/simple_spider_test.py,sha256=RzziJg-fbIVJ6_CgbismfkwrLwpJp4WWp2RLgG7Tpws,1168
200
- tests/simple_test.py,sha256=Pyxgg0YnBG_3_NRFla0HgJ21CYlfO4K-js3x6_-6ZEk,1258
205
+ tests/simple_url_test.py,sha256=g9RBn46V7fHZTU0BrB5pl5AGCbw6QuKOXClVACb-MEQ,2297
206
+ tests/simulate_mysql_update_test.py,sha256=7BEFdQkYjgCdLN5vnieTf-ByosCcSj2QJUMOUeYlLgQ,4597
201
207
  tests/spider_log_timing_test.py,sha256=pvYpKZemClr4mCR76xywhsiWbT5sPdzD_taZKFjlgvM,5573
202
208
  tests/test_advanced_tools.py,sha256=HT_TcwfFzli-CavIJSqQqnCxnBn5FDMX09zL7AJ5tNY,5398
203
209
  tests/test_all_commands.py,sha256=VgVa9SzU5Irvn5igHpC2W4E_6ZDWDt7jc-T4UPK_PFE,7718
204
210
  tests/test_all_pipeline_fingerprints.py,sha256=NDrBYr0f9CAhjmSezTS4NUrAdcotrSX3ElJTWqjXXbU,5308
205
211
  tests/test_all_redis_key_configs.py,sha256=dWc4Dsr07_vuSpb4hwkMpyy6XO8SI7vglVjGuGvXoa4,5710
212
+ tests/test_asyncmy_usage.py,sha256=gxENdxrcLlDG2m8V-j4ZnSJYFc3x6CvKvgPAhOC13DE,1688
206
213
  tests/test_authenticated_proxy.py,sha256=lnvmQwuf0zaZP_E05EzcNFR2VJbwTkLjOmZGNoJKaC4,4339
207
214
  tests/test_batch_processor.py,sha256=4_nYlu9R1JkDCFHq0bYc9LUNqsg41r7sQ879hkrhEts,7212
208
215
  tests/test_cleaners.py,sha256=HDK8_YU7GUj_3hGU415cxEeUR74mnDSk0yroLlgDI0I,1816
@@ -212,7 +219,9 @@ tests/test_config_consistency.py,sha256=RgSxyaypMpysltsGSh1vFMeOShiZZG0rmUKzEhNL
212
219
  tests/test_config_merge.py,sha256=ts1j-TIKkFS0EO5q1I4O7f4YUKR5MLTmRSqOpOlv094,5606
213
220
  tests/test_config_validator.py,sha256=Z4gBHkI0_fEx-xgiiG4T33F4BAuePuF81obpNTXfseY,6202
214
221
  tests/test_controlled_spider_mixin.py,sha256=AQ493ic6AxZAKd7QCgnUES92BBWCMNteTd5DjoQlhwo,2864
215
- tests/test_crawlo_proxy_integration.py,sha256=81DVwosMoiSMxj4V_jLzcL7aqvSv_8ucggkQyXsvzT0,2733
222
+ tests/test_crawler_process_import.py,sha256=iIPqSCpv2VRb_hWTu5euLME4PDFf7NwixeBypRuv39Y,1175
223
+ tests/test_crawler_process_spider_modules.py,sha256=uMr4esj6ascVBzt0WrPd3ZOQfKD00O6tJrNhuWOdvV0,1395
224
+ tests/test_crawlo_proxy_integration.py,sha256=JFBI82ILXMwAIJ29C8uhu5r-hH3UhMC50jKr5-jy6Ng,3059
216
225
  tests/test_date_tools.py,sha256=pcLDyhLrZ_jh-PhPm4CvLZEgNeH9kLMPKN5zacHwuWM,4053
217
226
  tests/test_dedup_fix.py,sha256=UFdm8lIi0ZIdp40W8ruxRD69bxzijuFUfNyJmB4Fwl0,8788
218
227
  tests/test_dedup_pipeline_consistency.py,sha256=dn5EAZSU5gQOV5EQwreHp76i5aQZ9tEdltSGO7dif5M,5176
@@ -221,23 +230,24 @@ tests/test_distributed.py,sha256=78Pn4HPLIaO8t1IiaSkckBmuEVTcnC8IDw7znf9_Zcw,179
221
230
  tests/test_double_crawlo_fix.py,sha256=lZwrT5ij6Jbh0EzZswhw05FXwgKaEZsSHekLTrJJajg,7856
222
231
  tests/test_double_crawlo_fix_simple.py,sha256=NDmCEeyvpf_D1tGQMA66iLPPKlAnSZcEg71e7GHYcjg,4768
223
232
  tests/test_download_delay_middleware.py,sha256=Idc6KzhL3hY3aDKgn1j_v5-mLIHz7dTnV5c4tJVZh5Q,9107
224
- tests/test_downloader_proxy_compatibility.py,sha256=0hgIzWXIqd92YXEB5sNneyp4Sk7PaG76up2cd6N9QQY,8903
233
+ tests/test_downloader_proxy_compatibility.py,sha256=NJJ-g_I665lHLsJZd7ONvKubHRxv82FADZR9WYzgyzA,9418
225
234
  tests/test_dynamic_downloaders_proxy.py,sha256=t_aWpxOHi4h3_fg2ImtIq7IIJ0r3PTHtnXiopPe2ZlM,4450
226
235
  tests/test_dynamic_proxy.py,sha256=zi7Ocbhc9GL1zCs0XhmG2NvBBeIZ2d2hPJVh18lH4Y0,3172
227
236
  tests/test_dynamic_proxy_config.py,sha256=C_9CEjCJtrr0SxIXCyLDhSIi88ujF7UAT1F-FAphd0w,5853
228
237
  tests/test_dynamic_proxy_real.py,sha256=krWnbFIH26mWNPhOfPMmx3ZxJfOreZxMZFGwVb_8-K8,3511
229
- tests/test_edge_cases.py,sha256=1RnFaCebYTDNNz_LK8M0MepiSwPvJUk_FBK4nQTCUbg,10729
238
+ tests/test_edge_cases.py,sha256=460JtYR6yuTo8J4wqJScMzDkrrDUE2Q8R425AaUycIQ,11127
239
+ tests/test_encoding_core.py,sha256=k5fZET0R1KInhAlbbHEJv4m9d6NuibOxxfIcR43TS7Y,1681
240
+ tests/test_encoding_detection.py,sha256=Zb1KkF2CR57qa0Hr_Iv8msompGJZT2EIL_2mGp0zX9Q,4245
230
241
  tests/test_enhanced_error_handler.py,sha256=Ku_86jv7iDe25v8ZxalcXxJJjIiIvQXWH8ZldbwdVm8,8581
231
242
  tests/test_enhanced_error_handler_comprehensive.py,sha256=j_cxyIPGks9A3untKhAdj5HU0hrLbbzOLu0uAtGUlJo,9369
232
243
  tests/test_env_config.py,sha256=Qu1sDeADs69dSr1x0QmEe8nJrMHneE_4JClt-N901e8,4867
233
244
  tests/test_error_handler_compatibility.py,sha256=xJ43cmCwfBGh-qBwCGiMDPPlfNDLw4ZrmlrHN9IojkU,4241
234
245
  tests/test_factories.py,sha256=wKFfr8YBXPs-AQ8YOFgDhINn5uivKqPBZQPUe5GL9Ig,8865
246
+ tests/test_factory_compatibility.py,sha256=zzTXd3ku3iedgxgB1DxTt3zfetiIl6wCjL9yXIUCpic,6260
235
247
  tests/test_final_validation.py,sha256=OuZI01O0E68Pao--bD-BFDTRZFPc_Mt4W-OXUzlt6ZA,4966
236
248
  tests/test_fingerprint_consistency.py,sha256=68V5u_2hNABI5pNWzXUrA1PJ08Xh9x3-JsMSNNjORMo,4956
237
249
  tests/test_fingerprint_simple.py,sha256=qiSba8gF3Zl91QO_ijJO7KstLdjATs30V_GZCNHShig,1626
238
250
  tests/test_framework_env_usage.py,sha256=bFb_ptdLeX2obdJWEqEHPWweiWR-wR2BpvEaJMQK7h4,4201
239
- tests/test_framework_logger.py,sha256=nAtL_N49L7OurthY329vZK_jBjJIcPcETEIiV0HGqt0,2560
240
- tests/test_framework_startup.py,sha256=I_ij7J6NO3DTBuHlh1Z4CJUcGxLEjRaIB6EdykiISEc,2267
241
251
  tests/test_get_component_logger.py,sha256=UKj5uT1F3L3atoJFmpk7QSDO2fZHgw-7Y84vMFbHRkM,2285
242
252
  tests/test_hash_performance.py,sha256=4eVPwbu66Oun0LVyTTNd9d2cj2V1xq0YZkRg8Z0TO-Q,3211
243
253
  tests/test_integration.py,sha256=lVEzKNAjFzFZHRNZAyJmXxa_5Ogf_qqL4APqs620o58,4839
@@ -249,28 +259,39 @@ tests/test_logging_final.py,sha256=K9vxyODslXza05hElVEcvzbXgzthYKK5CRj4UJTftIw,6
249
259
  tests/test_logging_integration.py,sha256=5WpExyt6BmYBZwrjqtQIGOw1Id64opJBAIahDk70Mlc,11131
250
260
  tests/test_logging_system.py,sha256=LGfK14ZEWzRtl3_VkBGz-AaVa_dDtuC5zu40m8FvmMo,9206
251
261
  tests/test_middleware_debug.py,sha256=gtiaWCxBSTcaNkdqXirM7CsThr_HfiCueBdQCpp7rqg,4572
252
- tests/test_mode_change.py,sha256=GT53CBdxcG3-evcKz_OOfH4PBiq_oqQyuDjRXrvv1UU,2665
253
262
  tests/test_mode_consistency.py,sha256=t72WX0etC_AayaL2AT6e2lIgbfP-zxTgYAiTARSN2Jk,1276
254
263
  tests/test_multi_directory.py,sha256=sH9Y3B-fuESlc7J1aICa-AlBcCW8HFR-Q5j2anUr8l0,2196
255
264
  tests/test_multiple_spider_modules.py,sha256=M0wPyQW7HMasbMIgn_R78wjZEj4A_DgqaGHp0qF9Y0c,2567
265
+ tests/test_mysql_pipeline_config.py,sha256=5Yveo4cPiGOG22EO5493QkC2m3ocKfv0Y2jK9m_4aZU,6793
266
+ tests/test_mysql_pipeline_error.py,sha256=htqZBnEIF3kIML53u8Sv4_PnyRep-0JJFApuD8FpkFQ,3529
267
+ tests/test_mysql_pipeline_init_log.py,sha256=-x9M2wqfa5g3jZ-y7iIPIOqEle0HouC28YECWfSE5OQ,2516
268
+ tests/test_mysql_pipeline_integration.py,sha256=fhBwU0ewH3nc1ol1JH4xpVTGrqlIttBghkqtxtOgMF0,4208
269
+ tests/test_mysql_pipeline_refactor.py,sha256=yJzBBgoIavQjXWQtivP0j8kAwmbb8zybypHqdLbfd_c,5804
270
+ tests/test_mysql_pipeline_refactor_simple.py,sha256=QmF2Zv-0FyWMs6SYNXQPC3GW1rVyPnKmM_2rGOtxCps,3724
271
+ tests/test_mysql_pipeline_robustness.py,sha256=cmjDOv9FX1OAFHJaY3WkveCSOTZiiZKu5ehjHaI-QW0,6138
272
+ tests/test_mysql_pipeline_types.py,sha256=dIs4aYlV9vsGfhvmDHOc-LCx-jDqUzoAkn-v8i2ae7Y,2474
273
+ tests/test_mysql_update_columns.py,sha256=CyEshc7b_yprIXcQtNOaWvCC2ZDb0kzjLOfmd8r3sOY,3458
256
274
  tests/test_offsite_middleware.py,sha256=njpXTdngOqBs60Wj6xgo5EEXlJnMHd7vtYGi9dVauW0,10602
257
275
  tests/test_offsite_middleware_simple.py,sha256=4MfDKSXGHcoFLYnnxCH2rmnzztWyN0xByYLoHtepyiA,7918
276
+ tests/test_optimized_selector_naming.py,sha256=fbmlB5S2kBwtQWpWoQ4lQ7rUQm2_DeWK-t6KqvIRTUQ,2787
258
277
  tests/test_parsel.py,sha256=wuZqRFIm9xx1tt6o3Xi_OjvwhT_MPmHiUEj2ax06zlo,701
259
278
  tests/test_performance.py,sha256=Lqs2iu3dmWipZkBPARcwIjDLXsqe42ntz1M4RzqqXKo,11457
260
279
  tests/test_performance_monitor.py,sha256=paW3HGg6ReHb9lwnOivGCrI8STwbwp_mbuhgfds1h3I,4187
261
280
  tests/test_pipeline_fingerprint_consistency.py,sha256=LL55oGSDGy0K8LxoyKa6ogNHXhJlZHe509vCFbibLkk,2847
262
- tests/test_priority_behavior.py,sha256=p04M0HIgBaXyuVHmp-ImITA9jGaKI_RPwZ3DPY_Trt4,9134
281
+ tests/test_priority_behavior.py,sha256=JQ5uv80cAUKV9Eh3S8j5zxYSSL-dmzhwhuKOINM26zU,9325
263
282
  tests/test_priority_consistency.py,sha256=rVX7nku5N_QpB_ffDu3xqREkCWPX5aNNiXy112o9wpA,5756
264
283
  tests/test_priority_consistency_fixed.py,sha256=MlYi5PIr5wxunC3Ku4ilnxOatKyRu2qIvhV7pjadkjg,10765
265
284
  tests/test_proxy_api.py,sha256=XnmklS-xU4ke_560gV6AIlBsRmG8YLQTGFAZrTUZuhc,11013
266
285
  tests/test_proxy_health_check.py,sha256=_tDlxa_6TdL3M5RLkHF82roXJ8WIuG5hELBp2GADyKQ,1123
267
- tests/test_proxy_middleware.py,sha256=EdQAfwwAJIBxw9JmUFTDEu_pdxapaTlcJr7KcrY6-AY,4021
268
- tests/test_proxy_middleware_enhanced.py,sha256=QR-p26F63N7MxNjZ2QJUeerh_xdnCDejkrGPIh7Fh4U,7035
269
- tests/test_proxy_middleware_integration.py,sha256=mTPK_XvbmLCV_QoVZzA3ybWOOX61493Ew78WfTp-bYQ,4441
270
- tests/test_proxy_middleware_refactored.py,sha256=VbkTWkmmomcyswobA_gf3p_bERl_eexY2e6ohJQS_A8,6960
286
+ tests/test_proxy_middleware.py,sha256=MC2Hg88Pdpv6i_gTAy4ocIWOOxQ8bF7hYtszwpOzilE,8716
287
+ tests/test_proxy_middleware_enhanced.py,sha256=N7Ly3koCH2uRYk6pxhEJwWpChKdIucdrj0nKvq_E4bw,6896
288
+ tests/test_proxy_middleware_integration.py,sha256=PQhJKM1uGtQTlBh7XlKWAMwNwQ6K8of-P15KHDF2dJg,4729
289
+ tests/test_proxy_middleware_refactored.py,sha256=Z4szCDqyjAwWtgDoddgfeNIVsVefPcrfsZP57gCMrJQ,8272
290
+ tests/test_proxy_only.py,sha256=OqF3An_s9VY4mfLX7kDRz_LMtLpNzC6LS2kQkEyiBRw,2563
271
291
  tests/test_proxy_providers.py,sha256=u_R2fhab90vqvQEaOAztpAOe9tJXvUMIdoDxmStmXJ4,1749
272
292
  tests/test_proxy_stats.py,sha256=ES00CEoDITYPFBGPk8pecFzD3ItYIv6NSpcqNd8-kvo,526
273
293
  tests/test_proxy_strategies.py,sha256=9Z1pXmTNyw-eIhGXlf2abZbJx6igLohYq-_3hldQ5uE,1868
294
+ tests/test_proxy_with_downloader.py,sha256=6OqyLcIM9nPMhL9bCYUIeSvYValKPw72XC-Up8jRri8,4597
274
295
  tests/test_queue_empty_check.py,sha256=ZJC6jOgZq0Wb0-ubrB1ZNcCaUiWeCxoNZmjkd6PY6t0,1182
275
296
  tests/test_queue_manager_double_crawlo.py,sha256=MijZ3JuyHMuqGbRC-8kclFr-4O7m_T8CqezP4qiWk-E,6957
276
297
  tests/test_queue_manager_redis_key.py,sha256=txHLq5XUZZN7h9HUlqlUCEVCTe2IXdf9r7F_P2zNVdY,7117
@@ -280,7 +301,7 @@ tests/test_queue_type_redis_config_consistency.py,sha256=1ew7Zp9CxH1DQ0RUmsZMV-n
280
301
  tests/test_random_headers_default.py,sha256=ulDb3_kRpnTCN1-TO3m6wVM-eMkZS_ezsSbd1ur8Xpg,12772
281
302
  tests/test_random_headers_necessity.py,sha256=SSbNQIE347oCQvuG6yaAambFU-3MyQzTV5jN1kArRGY,11741
282
303
  tests/test_random_user_agent.py,sha256=6HjU4iUcMk-J6bR2N5FhIkWDfnaFKAPNVyRzxmQQ14k,2302
283
- tests/test_real_scenario_proxy.py,sha256=clmLvBfap5OpsaCE08MAWap-78jhVrxYfVfDNyoa4Hg,8454
304
+ tests/test_real_scenario_proxy.py,sha256=L2Mfwt47pvs6dYJDcazeyupoQ_DuvhdulCz6-2GFR9Y,7527
284
305
  tests/test_redis_config.py,sha256=51_Fy1PqIhS0MMO2nR4q6oQjBFxfqcUPK_4NNf5s83g,903
285
306
  tests/test_redis_connection_pool.py,sha256=pKfXdE3Cm_L_fNqI9zqFmqiidCwR0t7hiM_Fu_V1cNI,9328
286
307
  tests/test_redis_key_naming.py,sha256=MTFk656JhiGVTsMctBDhBNOMFcBDZrsQA3UfPZ-Dgj4,6911
@@ -293,12 +314,21 @@ tests/test_request_params.py,sha256=l2etiDebqylPBym1e9DSDn4wxwTHv8DQHKq9AzlzlG0,
293
314
  tests/test_request_serialization.py,sha256=Ikgec8tt_sPCK6jcZyK8vRw84zRNE6nxQy9rba1WKmE,2332
294
315
  tests/test_response_code_middleware.py,sha256=wSe525bm-bk_iWMjPDzUu1LfOQrwJY8_MLKAspq2dzk,12193
295
316
  tests/test_response_filter_middleware.py,sha256=YWrGzJ7wmftTjJXcNTtJl3b3EdJsO4oR22ZLWwgErhg,16327
317
+ tests/test_response_follow.py,sha256=gjVZ_knsuHUaCDOjRPk-qG9HRCwReXlVrIx_KpveRHM,3738
296
318
  tests/test_response_improvements.py,sha256=vNqHKyoEoYeEGAUiRzdsff2V6yvJ9QnDwGg7gmN38Ow,6028
319
+ tests/test_response_selector_methods.py,sha256=6aS7q_PBx601MnXbCze-ZWNO-uCKFVjhxcCg9NJqKrI,2738
320
+ tests/test_response_url_methods.py,sha256=plOpSN3JLRI8-lbj4cva8-_jRFdDwmax9Gkv6O2Ac-s,2759
321
+ tests/test_response_urljoin.py,sha256=uXTWhFx8-XBb-Vaghn9YKJz5ThkwRuNykBWW4S7f3go,3379
297
322
  tests/test_retry_middleware.py,sha256=mi7s4HDAqmmd9nvyxs3ZgxdEKOYkCgDu3rDvU_9o8vQ,11133
298
323
  tests/test_retry_middleware_realistic.py,sha256=Sam5y4jCN8oeElU4xxeS5zjAyzS-P8siPV7OaifgsyU,9679
299
324
  tests/test_scheduler.py,sha256=1fCu35QgK5gzgrhD0aUZj5lxL0QbokzPav-yEJxz9Ig,8182
300
325
  tests/test_scheduler_config_update.py,sha256=LuxjEbt20QrPyVkjSFxvTnFtUxwMaHB6TcqjFyo8bow,4261
326
+ tests/test_scrapy_style_encoding.py,sha256=2K_0lHsYqop4qb5lO1U8g7hbae4nkMPrbEvVTl5TT9Y,3408
327
+ tests/test_selector_helper.py,sha256=-fw8p-uJixTKso7OLUBTVJ2oOjL8LIJA1WDetzthGO0,2818
328
+ tests/test_selector_optimizations.py,sha256=5t5RrDkcy0YtK2Es9DBfi3Cejfv6yV4dagulIQhmEho,4665
301
329
  tests/test_simple_response.py,sha256=_ui2PuVZvJcAuLY7HZ8xcsy_tDBimgBqX0ukj3kE5J0,1549
330
+ tests/test_spider_loader.py,sha256=-myi78LztwABeaCpJj-DzO2CxNEYW8lavtVuUreoHcI,1314
331
+ tests/test_spider_loader_comprehensive.py,sha256=gp6SWrDQcrg4RFNkLJQWDQ16NDfpdOlg0rCyJ86-F-8,2591
302
332
  tests/test_spider_modules.py,sha256=wxPs28FtpGnQTemMY6r7WxVrwYo3bHnAd5dq94qj1K4,2797
303
333
  tests/test_telecom_spider_redis_key.py,sha256=c-gfixPul2VlYMQJGf0H5ZgYJ461fQgSKbCPrbAU45M,7625
304
334
  tests/test_template_content.py,sha256=2RgCdOA3pMUSOqC_JbTGeW7KonbTqJ0ySYJNWegU-v0,2903
@@ -311,10 +341,21 @@ tests/untested_features_report.md,sha256=31aUlsw_1OKe0_ijAjeH85kJ7HJ8qzKLJdOHDjW
311
341
  tests/verify_debug.py,sha256=iQ4Efwg9bQTHscr73VYAAZ8rBIe1u6mQfeaEK5YgneY,1564
312
342
  tests/verify_distributed.py,sha256=0IolM4ymuPOz_uTfHSWFO3Vxzp7Lo6i0zhSbzJhHFtI,4045
313
343
  tests/verify_log_fix.py,sha256=7reyVl3MXTDASyChgU5BAYuzuxvFjSLG9HywAHso0qg,4336
344
+ tests/verify_mysql_warnings.py,sha256=TMPsB1yp7R_c3S6LllgPJ-n_4He6gHVygAC81zbeQrc,4106
345
+ tests/ofweek_scrapy/scrapy.cfg,sha256=D_8rsW65iTbH7nG1kI25jYTCpoQKBVa2shajrsC6fBw,280
346
+ tests/ofweek_scrapy/ofweek_scrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
347
+ tests/ofweek_scrapy/ofweek_scrapy/items.py,sha256=Y_TwwHPAgOXTuCTdnhRxil7vYPk1_rzj1ZatTq4AX-I,280
348
+ tests/ofweek_scrapy/ofweek_scrapy/middlewares.py,sha256=O4jVSXZgxtsRzU9O_O3YdkS7_QLndzv3uYP-Op8g254,3654
349
+ tests/ofweek_scrapy/ofweek_scrapy/pipelines.py,sha256=ZO6WqTqPpTwLvnwO7YL0E35OPp4zSfJ_GhMeshNRSow,379
350
+ tests/ofweek_scrapy/ofweek_scrapy/settings.py,sha256=X3Y6goZluAz0n2bepWAKEhZX0URFfe9_lBRBCPgtLPk,2933
351
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
352
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py,sha256=gcfKze-ipzP7JTDGCL3TgtjwIwfgI7dPL6GmdXVT0fs,6880
314
353
  tests/scrapy_comparison/ofweek_scrapy.py,sha256=rhVds_WjYum1bLuWWe90HtXE51fZXEqhhPSc822ZasQ,5790
315
354
  tests/scrapy_comparison/scrapy_test.py,sha256=-IsGUHPBgEL0TmXjeLZl-TUA01B7Dsc2nRo4JZbFwZA,5599
316
- crawlo-1.4.4.dist-info/METADATA,sha256=LAg9xmMfxLUwVUGPqw_p48hGJYZqsRC9Mc4KqDroAUQ,4848
317
- crawlo-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
318
- crawlo-1.4.4.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
319
- crawlo-1.4.4.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
320
- crawlo-1.4.4.dist-info/RECORD,,
355
+ tests/test_spiders/__init__.py,sha256=Ws2DhfUA0Xh5Cxr9M46td7B6hyNoLTyAhZ60FnIh6D0,20
356
+ tests/test_spiders/test_spider.py,sha256=kNGEg80HMMFgzVseI1jJjljZEBy3QYKt_3SXGASffFM,168
357
+ crawlo-1.4.6.dist-info/METADATA,sha256=j66m-xE1oVuLE4WEnDbBjH6PXGbfbgM7yxSF616EOHo,9355
358
+ crawlo-1.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
359
+ crawlo-1.4.6.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
360
+ crawlo-1.4.6.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
361
+ crawlo-1.4.6.dist-info/RECORD,,
@@ -74,14 +74,18 @@ async def main():
74
74
  config = CrawloConfig.standalone(
75
75
  concurrency=2,
76
76
  download_delay=1.0,
77
- PROXY_ENABLED=True,
78
- # 配置认证代理(请替换为实际的代理信息)
79
- PROXY_API_URL="http://your-proxy-provider.com/api/get", # 代理API地址
80
- # 如果使用固定代理,可以直接设置:
77
+ # 代理配置
78
+ # 高级代理配置(适用于ProxyMiddleware)
79
+ # 只要配置了代理API URL,中间件就会自动启用
80
+ PROXY_API_URL="http://proxy-api.example.com/get", # 代理API地址
81
+
82
+ # 代理配置(适用于ProxyMiddleware)
83
+ # 只要配置了代理列表,中间件就会自动启用
81
84
  # PROXY_LIST=[
82
- # "http://username:password@proxy1.example.com:8080",
83
- # "http://username:password@proxy2.example.com:8080",
85
+ # "http://user:pass@proxy1.example.com:8080",
86
+ # "http://user:pass@proxy2.example.com:8080"
84
87
  # ],
88
+
85
89
  LOG_LEVEL='INFO'
86
90
  )
87
91