crawlo 1.4.5__py3-none-any.whl → 1.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (44) hide show
  1. crawlo/__version__.py +1 -1
  2. crawlo/downloader/cffi_downloader.py +3 -1
  3. crawlo/middleware/proxy.py +171 -348
  4. crawlo/pipelines/mysql_pipeline.py +339 -188
  5. crawlo/settings/default_settings.py +38 -30
  6. crawlo/stats_collector.py +10 -1
  7. crawlo/templates/project/settings.py.tmpl +10 -55
  8. crawlo/templates/project/settings_distributed.py.tmpl +20 -22
  9. crawlo/templates/project/settings_gentle.py.tmpl +5 -0
  10. crawlo/templates/project/settings_high_performance.py.tmpl +5 -0
  11. crawlo/templates/project/settings_minimal.py.tmpl +25 -1
  12. crawlo/templates/project/settings_simple.py.tmpl +5 -0
  13. crawlo/templates/run.py.tmpl +1 -8
  14. crawlo/templates/spider/spider.py.tmpl +5 -108
  15. crawlo/utils/db_helper.py +11 -5
  16. {crawlo-1.4.5.dist-info → crawlo-1.4.6.dist-info}/METADATA +1 -1
  17. {crawlo-1.4.5.dist-info → crawlo-1.4.6.dist-info}/RECORD +43 -29
  18. tests/authenticated_proxy_example.py +10 -6
  19. tests/explain_mysql_update_behavior.py +77 -0
  20. tests/simulate_mysql_update_test.py +140 -0
  21. tests/test_asyncmy_usage.py +57 -0
  22. tests/test_crawlo_proxy_integration.py +8 -2
  23. tests/test_downloader_proxy_compatibility.py +24 -20
  24. tests/test_mysql_pipeline_config.py +165 -0
  25. tests/test_mysql_pipeline_error.py +99 -0
  26. tests/test_mysql_pipeline_init_log.py +83 -0
  27. tests/test_mysql_pipeline_integration.py +133 -0
  28. tests/test_mysql_pipeline_refactor.py +144 -0
  29. tests/test_mysql_pipeline_refactor_simple.py +86 -0
  30. tests/test_mysql_pipeline_robustness.py +196 -0
  31. tests/test_mysql_pipeline_types.py +89 -0
  32. tests/test_mysql_update_columns.py +94 -0
  33. tests/test_proxy_middleware.py +104 -8
  34. tests/test_proxy_middleware_enhanced.py +1 -5
  35. tests/test_proxy_middleware_integration.py +7 -2
  36. tests/test_proxy_middleware_refactored.py +25 -2
  37. tests/test_proxy_only.py +84 -0
  38. tests/test_proxy_with_downloader.py +153 -0
  39. tests/test_real_scenario_proxy.py +17 -17
  40. tests/verify_mysql_warnings.py +110 -0
  41. crawlo/middleware/simple_proxy.py +0 -65
  42. {crawlo-1.4.5.dist-info → crawlo-1.4.6.dist-info}/WHEEL +0 -0
  43. {crawlo-1.4.5.dist-info → crawlo-1.4.6.dist-info}/entry_points.txt +0 -0
  44. {crawlo-1.4.5.dist-info → crawlo-1.4.6.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  crawlo/__init__.py,sha256=n5vFwi0iuYrpAIyoNJZzWHV1gvF-vh-Yze3jiuwEXqM,2180
2
- crawlo/__version__.py,sha256=47Hd5fKyrYgSfmOfBF7ibw9EyAE1ctXOQOLg_x_Ld9w,23
2
+ crawlo/__version__.py,sha256=C1PbImXkZPhAW7rUcTV61OKrbIa2DpoQJ2Kmga3lWwM,23
3
3
  crawlo/cli.py,sha256=AQnAB5NMI-Ic1VPw_Jjng8L4AI4-wMozOwzE6CfXkZU,2402
4
4
  crawlo/config.py,sha256=EQIT7WpkXAlr2ocd5SYJYOKTSWUlQx2AkTHX7ErEWxw,9798
5
5
  crawlo/config_validator.py,sha256=oY4-2bwXUlwHAnGgkI-EznviDfML_dcxbWSGXNSxC2k,11516
@@ -10,7 +10,7 @@ crawlo/framework.py,sha256=9gP6VN4MHqutGXaxnwpNMSULfVYbNp906UdZiJGywlQ,9458
10
10
  crawlo/interfaces.py,sha256=q1vwMSiZLfLpPhFa9Y0hAcjYEKvLkW2fZ2fmoAZ-5TE,653
11
11
  crawlo/mode_manager.py,sha256=e8QmwsnndFx_hGME_7w-hazKo0GOYjUr-7FBf7dWxgc,8903
12
12
  crawlo/project.py,sha256=9wnlHd-rYAC3TT1Fc1ftyUBx7mbDT6TQCqoaIP6N3iA,13998
13
- crawlo/stats_collector.py,sha256=hIjlnX750jU4Oncyand1jBccfaX4Tu7egd2DBYu2N7A,2379
13
+ crawlo/stats_collector.py,sha256=mzNHu628a31PwqpkBXN90PhD-xhMSunNNxAm-ney5JU,2803
14
14
  crawlo/subscriber.py,sha256=h8fx69NJZeWem0ZkCmfHAi2kgfDGFObHpwN0aGNUM6Y,5115
15
15
  crawlo/task_manager.py,sha256=Ic6PFUqZOhLXuZ_UEk_8Neb9FmqYv8I2RzV3vLzFNSU,5966
16
16
  crawlo/commands/__init__.py,sha256=orvY6wLOBwGUEJKeF3h_T1fxj8AaQLjngBDd-3xKOE4,392
@@ -30,7 +30,7 @@ crawlo/data/__init__.py,sha256=UPqgioMdu3imSUmpLWzVlpvoBnEfaPSAT-crCcWd7iw,121
30
30
  crawlo/data/user_agents.py,sha256=zjjFkldQkqtrn45j0WZplaZLannPxZDeAU0JofxQcBc,9891
31
31
  crawlo/downloader/__init__.py,sha256=P5pl-BGYCkdKWgoIewcYPz7ocVLixVfYuCDFmYyuqIw,8966
32
32
  crawlo/downloader/aiohttp_downloader.py,sha256=-dIFucMOQhiiEmtgEpG2Lqh1vF-PvDddbIrZ8Hge0Ig,9556
33
- crawlo/downloader/cffi_downloader.py,sha256=QxoeocCE2DsQCnhZla6-BjhplaTZDWMbEJmNrghWSDA,10488
33
+ crawlo/downloader/cffi_downloader.py,sha256=aKmrooictEFNfsmM3t4dpkGEALI85E7eLOAxm4LPQAU,10585
34
34
  crawlo/downloader/httpx_downloader.py,sha256=MpgDeIdGqNsiSKLOEDBnr5Z0eUbhHnqVEmAuoIfJmFU,12296
35
35
  crawlo/downloader/hybrid_downloader.py,sha256=dNnFeegRnyLaOxTWI6XrWKqqVPx80AZBZNgmrcKRVBM,8240
36
36
  crawlo/downloader/playwright_downloader.py,sha256=L-TVzG7cYfuBlqW0XSZuz5C_r9fpJrmYNcoQ-cDEna4,16663
@@ -72,12 +72,11 @@ crawlo/middleware/default_header.py,sha256=Pw-ev8ffi16GeCh84R5L3hAZgp3G1QXS-H5kV
72
72
  crawlo/middleware/download_delay.py,sha256=2iWnJFtWDlqDy5MsAob8TPiJQoiz9v21yatkBI0eptg,3542
73
73
  crawlo/middleware/middleware_manager.py,sha256=H_o0nwo_xQ8aSRnnvEs2Ho3fS-3WNi_5AjChhqvRYnk,6645
74
74
  crawlo/middleware/offsite.py,sha256=4tUkPqXMMXsi1WwYnJ_e7wMd6sRgK19QHRCYq8-w8jk,4682
75
- crawlo/middleware/proxy.py,sha256=uKk5OSLIs7jv9bBgkZwsi1rIpthooxhMrGBC2BPRDCc,16022
75
+ crawlo/middleware/proxy.py,sha256=jfaM4gL78ga_F7LN891dULjjO2zqFmulwQMDs5eJD6k,9591
76
76
  crawlo/middleware/request_ignore.py,sha256=7qdX4zAimjSGwdod_aWUbOTfzLBWZ5KzLVFchGMCxCI,2663
77
77
  crawlo/middleware/response_code.py,sha256=d5t0hmP8QliuvvtFOqW-ogCBtZxg2eyjsOtlQAEUxM8,4533
78
78
  crawlo/middleware/response_filter.py,sha256=tVGr06bfJBR3xAHI2G5c3WimFsGHu8qoJtDcsVuCATU,4384
79
79
  crawlo/middleware/retry.py,sha256=Acfo95B9wF8fQTCQIqluZOS2hHdnknQu_FOHvpGKJp0,4248
80
- crawlo/middleware/simple_proxy.py,sha256=rQ4RkqewGvDRCw021nGrg8ngkBzg3wqrEVqvSmBgQ6M,2256
81
80
  crawlo/network/__init__.py,sha256=bvEnpEUBZJ79URfNZbsHhsBKna54hM2-x_BV8eotTA4,418
82
81
  crawlo/network/request.py,sha256=e6-YLgK7SU8D19n21mQwqt_b_aeRVJFOgWPIBPal2ys,14178
83
82
  crawlo/network/response.py,sha256=-URnNc_J7qBSG19uJbfuF6A_14MHLOtY78FvcZDzbsI,23418
@@ -89,7 +88,7 @@ crawlo/pipelines/database_dedup_pipeline.py,sha256=IxahtD_mhni-Y21_idOMX58_Htf46
89
88
  crawlo/pipelines/json_pipeline.py,sha256=wrCsh8YInmcPLAkhPrHObMx89VZfhf-c7qRrYsTixPE,8585
90
89
  crawlo/pipelines/memory_dedup_pipeline.py,sha256=lKkYPu6vkpPjfQ1-xOLvPFT4VdTI8QVx0yjqtVR0ZB0,3598
91
90
  crawlo/pipelines/mongo_pipeline.py,sha256=PohTKTGw3QRvuP-T6SrquwW3FAHSno8jQ2D2cH_d75U,5837
92
- crawlo/pipelines/mysql_pipeline.py,sha256=pLJQJUKqzWrrOxuO-eHXNq5xLza0DHeuGnpwX2Pc4NI,14186
91
+ crawlo/pipelines/mysql_pipeline.py,sha256=jlTP1X5QMrSVZjLD4lMS1BUTz-x6bagUEODddvHI2Vg,23702
93
92
  crawlo/pipelines/pipeline_manager.py,sha256=_DtWfxcTinIf5ApzUOVjZksd2tPbc7qeKi92IVd_kbs,4387
94
93
  crawlo/pipelines/redis_dedup_pipeline.py,sha256=RB1kXLr8ZuWNrgZKYwt--tlmnWsQTbuwTsSt3pafol8,6077
95
94
  crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -97,24 +96,24 @@ crawlo/queue/pqueue.py,sha256=bbgd3l1VfqYXfz-4VFaiWLmJit1LdB3qHalCtNqyrqI,1210
97
96
  crawlo/queue/queue_manager.py,sha256=8rKygMxr6DgSjnGsKFmvlTI5XAARvQIN_ENkAruHGXs,21532
98
97
  crawlo/queue/redis_priority_queue.py,sha256=vLvg2toKaRrXD1QyEdu1ZjTmANv7clFaBF7mCtstBmI,15995
99
98
  crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
100
- crawlo/settings/default_settings.py,sha256=kBcE5PF-sfB12cjIxHeNPEvzSWSHYDu6saEgrTGXn5o,11970
99
+ crawlo/settings/default_settings.py,sha256=TvtXgLzgc9_j_ITt8_xYhag29k6dCJiPU0Yq-snMkt4,12704
101
100
  crawlo/settings/setting_manager.py,sha256=yI1tGaludevxKGGZO3Pn4aYofrg2cwYwvMZCFC5PPZw,8595
102
101
  crawlo/spider/__init__.py,sha256=QGhe_yNsnfnCF3G9nSoWEw23b8SkP5oSFU5W79C5DzI,21881
103
102
  crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
104
- crawlo/templates/run.py.tmpl,sha256=g8yst2hkqhKGNotR33fDxwmEsX6aEvhrXY_cfYos_vc,788
103
+ crawlo/templates/run.py.tmpl,sha256=1ge0XILc3O5u7S8rsyg_rpe2B2ULokJcrKRVHMwPKj0,511
105
104
  crawlo/templates/spiders_init.py.tmpl,sha256=p6UK8KWr8FDydNxiAh6Iz29MY5WmgXIkf2z-buOGhOM,354
106
105
  crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
107
106
  crawlo/templates/project/items.py.tmpl,sha256=hpQ2AfUmhddnzMuKM5LF6t44dOfFXwJRAZlWFKUFOZw,343
108
107
  crawlo/templates/project/middlewares.py.tmpl,sha256=eEobZl8g_0DtiwLYbirQULqOacH-yUrrs4PUrGcJ2UE,1098
109
108
  crawlo/templates/project/pipelines.py.tmpl,sha256=7BeaQDMHbIjhKzRtzlCMiFlU8xgMzDs2PIHq3EVUAlQ,887
110
- crawlo/templates/project/settings.py.tmpl,sha256=mL9_JAyz8R35r-ywRHi4T-dtal7oczU5kodEWxldw40,5265
111
- crawlo/templates/project/settings_distributed.py.tmpl,sha256=RHzfWZITv-0ErCR9OYEswAZHpA5d9fYil0ZoGCtFt8g,5459
112
- crawlo/templates/project/settings_gentle.py.tmpl,sha256=pmjrBLjnpGcR90RkcJrM5O8PsTrRhUB92QR3R4TJyko,5733
113
- crawlo/templates/project/settings_high_performance.py.tmpl,sha256=9QhXSzfxIsMPyq0kZY9h2YBllyXGpGE37bMEbSrs_Ag,5823
114
- crawlo/templates/project/settings_minimal.py.tmpl,sha256=1qUPhSdHtvLSHTpytUJ8K63sMROhTwkz8e4tVg1fYoM,2222
115
- crawlo/templates/project/settings_simple.py.tmpl,sha256=sIyrCIVXsHSKl8Yjj8HkGs-ppMFH26a5yp6egVNlT2Q,5585
109
+ crawlo/templates/project/settings.py.tmpl,sha256=fYK2NCJOc_jVRraKkEzH8beyax16KgNa-9s6TsQrdpI,3606
110
+ crawlo/templates/project/settings_distributed.py.tmpl,sha256=ULXyi5GDsZggk1Z4SRkalm2g7kJQx9ul6bCARN2I-TM,5566
111
+ crawlo/templates/project/settings_gentle.py.tmpl,sha256=NZjSqAqWmYlNE15Zt6-wY4rtxp7ID6HFUafoOvt7VAE,6039
112
+ crawlo/templates/project/settings_high_performance.py.tmpl,sha256=QYN4hJqvGmL7oayJjLcx4Mr3jedqRSvdlWkivom2M2o,6129
113
+ crawlo/templates/project/settings_minimal.py.tmpl,sha256=8XS_ButRDJxYRQSRHTc_l8ej2DbUnR0j891m0j-gjTY,3122
114
+ crawlo/templates/project/settings_simple.py.tmpl,sha256=OmL4GCPpFseRIG0CgL7625IWipc6vG_Da5tefXv_MD0,5891
116
115
  crawlo/templates/project/spiders/__init__.py.tmpl,sha256=llhcIItXpm0TlEeumeLwp4fcYv2NHl8Iru7tLhDhxiE,216
117
- crawlo/templates/spider/spider.py.tmpl,sha256=KvU-9YpN6MifDE7XzejjyyQS7RUjLDLZ8zqJcLwSsu0,5198
116
+ crawlo/templates/spider/spider.py.tmpl,sha256=4E4DDoOfI0vN_zLjfmMX_QNmWCx8EbrOKWBg6zozVqs,1065
118
117
  crawlo/tools/__init__.py,sha256=sXDMZNP6EwZIFivGcRthxqD1DFMMS8UOJvULAzHD-w4,3927
119
118
  crawlo/tools/authenticated_proxy.py,sha256=ULCK0Cc9F2rGhRqu6kzKBdxzK9v2n1CsatSQ_PMxpAg,7272
120
119
  crawlo/tools/data_formatter.py,sha256=iBDHpZBZvn9O7pLkTQilE1TzYJQEc3z3f6HXoVus0f0,7808
@@ -130,7 +129,7 @@ crawlo/tools/text_cleaner.py,sha256=UrMGcgRnJaufjmDKIDsRYKMA8znCAArHDgouttWPygk,
130
129
  crawlo/utils/__init__.py,sha256=nxLnfqcEGLnsfSEagoKNyu-pm2ByU9BwE5tLxcS71Qo,1003
131
130
  crawlo/utils/batch_processor.py,sha256=8LNy-K2SrQVUxmGEWxQyYw_j9M-erN4Ie7O4d3zpBvM,9142
132
131
  crawlo/utils/controlled_spider_mixin.py,sha256=8CuM3Cr2wQLHbaO_ohbCsPcImJnyfZHpERbSeMgQ-AQ,16936
133
- crawlo/utils/db_helper.py,sha256=xTgBTXSWTNXM19rLsypPtnsswO0HdDV1K7zn_wYk4s0,8137
132
+ crawlo/utils/db_helper.py,sha256=zFr4BpEMbaY86DrR5Ol5-hfvkSXcG66prl00LPHLl8E,8702
134
133
  crawlo/utils/env_config.py,sha256=W-VD_WF63DHxsyJysvp1eJwRh3L_pBRl_PitQAY3nQY,4079
135
134
  crawlo/utils/error_handler.py,sha256=e2LeUGT_OMcNKcjiX9Pp-NuQh5spsHBqIPBd7VxA2IQ,16247
136
135
  crawlo/utils/fingerprint.py,sha256=3IbctH3zwyBjN_12SH7-vrFt-akA2WSo3iAzHc6u--s,3689
@@ -154,7 +153,7 @@ crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
154
153
  examples/__init__.py,sha256=NkRbV8_S1tb8S2AW6BE2U6P2-eGOPwMR1k0YQAwQpSE,130
155
154
  tests/__init__.py,sha256=409aRX8hsPffiZCVjOogtxwhACzBp8G2UTJyUQSxhK0,136
156
155
  tests/advanced_tools_example.py,sha256=1_iitECKCuWUYMNNGo61l3lmwMRrWdA8F_Xw56UaGZY,9340
157
- tests/authenticated_proxy_example.py,sha256=fKmHXXxIxCJXjEplttCWRh7PZhbxkBSxJF91Bx-qOME,3019
156
+ tests/authenticated_proxy_example.py,sha256=ZgLrU-1GaBhkJK1Wy0X93lHP1GT2sU2_wi3RI1CfrVc,3135
158
157
  tests/baidu_performance_test.py,sha256=wxdaI7UwKboMYH_qcaqZLxAStvndH60bvKGzD8F-jaI,3974
159
158
  tests/baidu_test.py,sha256=NKYnwDbPJX3tmKtRn7uQ_QWzUXiLTQC-Gdr1cQkJzEo,1874
160
159
  tests/bug_check_test.py,sha256=EIDOUk_QgtBOWKuBLm_WHbgJ0fsDuJACJ-nuxnBIdkQ,8056
@@ -178,6 +177,7 @@ tests/dynamic_loading_example.py,sha256=7LdeQZFevrb-U1_dgr4oX3aYo2Da4HvE_0KIf1fw
178
177
  tests/dynamic_loading_test.py,sha256=dzDW7b66HeDsIYsYgvNRihE3V6b6gEbUGQpp-eJbcIM,3413
179
178
  tests/env_config_example.py,sha256=_ZRDh_LR23ZKpy9E--y_KM0QIOiZF5vRT98QTn52TY8,4951
180
179
  tests/error_handling_example.py,sha256=grTeo1X17rFz4lhgASb0g5yu4NWbmNz5neyuonnNR40,5294
180
+ tests/explain_mysql_update_behavior.py,sha256=uBrJwiYujTJF35oF1kYMRjYU5k5Y3YlqOfOni0oPQtY,2865
181
181
  tests/final_comprehensive_test.py,sha256=szTNbtwKfYNmE0kzDPCsE_kvnTG7FNKl2JERakGhKIk,4314
182
182
  tests/final_log_test.py,sha256=CpZ4ZvvuvFiBvz1a50qN599XIU086ett_I0bSX42BLU,9367
183
183
  tests/final_validation_test.py,sha256=4cuTr58i46JI6M4Tz54e7vrVFrOr3R7HSWgyQPKmM9M,5244
@@ -203,11 +203,13 @@ tests/simple_selector_helper_test.py,sha256=l9FsVhQ-z-ICqqetLIyeSaI8Dn6bXNCD8sLd
203
203
  tests/simple_selector_test.py,sha256=XzOYzpEzr0yaioLV6v-4XC60VZMd5jRthlyp7Ud02o4,6630
204
204
  tests/simple_spider_test.py,sha256=RzziJg-fbIVJ6_CgbismfkwrLwpJp4WWp2RLgG7Tpws,1168
205
205
  tests/simple_url_test.py,sha256=g9RBn46V7fHZTU0BrB5pl5AGCbw6QuKOXClVACb-MEQ,2297
206
+ tests/simulate_mysql_update_test.py,sha256=7BEFdQkYjgCdLN5vnieTf-ByosCcSj2QJUMOUeYlLgQ,4597
206
207
  tests/spider_log_timing_test.py,sha256=pvYpKZemClr4mCR76xywhsiWbT5sPdzD_taZKFjlgvM,5573
207
208
  tests/test_advanced_tools.py,sha256=HT_TcwfFzli-CavIJSqQqnCxnBn5FDMX09zL7AJ5tNY,5398
208
209
  tests/test_all_commands.py,sha256=VgVa9SzU5Irvn5igHpC2W4E_6ZDWDt7jc-T4UPK_PFE,7718
209
210
  tests/test_all_pipeline_fingerprints.py,sha256=NDrBYr0f9CAhjmSezTS4NUrAdcotrSX3ElJTWqjXXbU,5308
210
211
  tests/test_all_redis_key_configs.py,sha256=dWc4Dsr07_vuSpb4hwkMpyy6XO8SI7vglVjGuGvXoa4,5710
212
+ tests/test_asyncmy_usage.py,sha256=gxENdxrcLlDG2m8V-j4ZnSJYFc3x6CvKvgPAhOC13DE,1688
211
213
  tests/test_authenticated_proxy.py,sha256=lnvmQwuf0zaZP_E05EzcNFR2VJbwTkLjOmZGNoJKaC4,4339
212
214
  tests/test_batch_processor.py,sha256=4_nYlu9R1JkDCFHq0bYc9LUNqsg41r7sQ879hkrhEts,7212
213
215
  tests/test_cleaners.py,sha256=HDK8_YU7GUj_3hGU415cxEeUR74mnDSk0yroLlgDI0I,1816
@@ -219,7 +221,7 @@ tests/test_config_validator.py,sha256=Z4gBHkI0_fEx-xgiiG4T33F4BAuePuF81obpNTXfse
219
221
  tests/test_controlled_spider_mixin.py,sha256=AQ493ic6AxZAKd7QCgnUES92BBWCMNteTd5DjoQlhwo,2864
220
222
  tests/test_crawler_process_import.py,sha256=iIPqSCpv2VRb_hWTu5euLME4PDFf7NwixeBypRuv39Y,1175
221
223
  tests/test_crawler_process_spider_modules.py,sha256=uMr4esj6ascVBzt0WrPd3ZOQfKD00O6tJrNhuWOdvV0,1395
222
- tests/test_crawlo_proxy_integration.py,sha256=81DVwosMoiSMxj4V_jLzcL7aqvSv_8ucggkQyXsvzT0,2733
224
+ tests/test_crawlo_proxy_integration.py,sha256=JFBI82ILXMwAIJ29C8uhu5r-hH3UhMC50jKr5-jy6Ng,3059
223
225
  tests/test_date_tools.py,sha256=pcLDyhLrZ_jh-PhPm4CvLZEgNeH9kLMPKN5zacHwuWM,4053
224
226
  tests/test_dedup_fix.py,sha256=UFdm8lIi0ZIdp40W8ruxRD69bxzijuFUfNyJmB4Fwl0,8788
225
227
  tests/test_dedup_pipeline_consistency.py,sha256=dn5EAZSU5gQOV5EQwreHp76i5aQZ9tEdltSGO7dif5M,5176
@@ -228,7 +230,7 @@ tests/test_distributed.py,sha256=78Pn4HPLIaO8t1IiaSkckBmuEVTcnC8IDw7znf9_Zcw,179
228
230
  tests/test_double_crawlo_fix.py,sha256=lZwrT5ij6Jbh0EzZswhw05FXwgKaEZsSHekLTrJJajg,7856
229
231
  tests/test_double_crawlo_fix_simple.py,sha256=NDmCEeyvpf_D1tGQMA66iLPPKlAnSZcEg71e7GHYcjg,4768
230
232
  tests/test_download_delay_middleware.py,sha256=Idc6KzhL3hY3aDKgn1j_v5-mLIHz7dTnV5c4tJVZh5Q,9107
231
- tests/test_downloader_proxy_compatibility.py,sha256=0hgIzWXIqd92YXEB5sNneyp4Sk7PaG76up2cd6N9QQY,8903
233
+ tests/test_downloader_proxy_compatibility.py,sha256=NJJ-g_I665lHLsJZd7ONvKubHRxv82FADZR9WYzgyzA,9418
232
234
  tests/test_dynamic_downloaders_proxy.py,sha256=t_aWpxOHi4h3_fg2ImtIq7IIJ0r3PTHtnXiopPe2ZlM,4450
233
235
  tests/test_dynamic_proxy.py,sha256=zi7Ocbhc9GL1zCs0XhmG2NvBBeIZ2d2hPJVh18lH4Y0,3172
234
236
  tests/test_dynamic_proxy_config.py,sha256=C_9CEjCJtrr0SxIXCyLDhSIi88ujF7UAT1F-FAphd0w,5853
@@ -260,6 +262,15 @@ tests/test_middleware_debug.py,sha256=gtiaWCxBSTcaNkdqXirM7CsThr_HfiCueBdQCpp7rq
260
262
  tests/test_mode_consistency.py,sha256=t72WX0etC_AayaL2AT6e2lIgbfP-zxTgYAiTARSN2Jk,1276
261
263
  tests/test_multi_directory.py,sha256=sH9Y3B-fuESlc7J1aICa-AlBcCW8HFR-Q5j2anUr8l0,2196
262
264
  tests/test_multiple_spider_modules.py,sha256=M0wPyQW7HMasbMIgn_R78wjZEj4A_DgqaGHp0qF9Y0c,2567
265
+ tests/test_mysql_pipeline_config.py,sha256=5Yveo4cPiGOG22EO5493QkC2m3ocKfv0Y2jK9m_4aZU,6793
266
+ tests/test_mysql_pipeline_error.py,sha256=htqZBnEIF3kIML53u8Sv4_PnyRep-0JJFApuD8FpkFQ,3529
267
+ tests/test_mysql_pipeline_init_log.py,sha256=-x9M2wqfa5g3jZ-y7iIPIOqEle0HouC28YECWfSE5OQ,2516
268
+ tests/test_mysql_pipeline_integration.py,sha256=fhBwU0ewH3nc1ol1JH4xpVTGrqlIttBghkqtxtOgMF0,4208
269
+ tests/test_mysql_pipeline_refactor.py,sha256=yJzBBgoIavQjXWQtivP0j8kAwmbb8zybypHqdLbfd_c,5804
270
+ tests/test_mysql_pipeline_refactor_simple.py,sha256=QmF2Zv-0FyWMs6SYNXQPC3GW1rVyPnKmM_2rGOtxCps,3724
271
+ tests/test_mysql_pipeline_robustness.py,sha256=cmjDOv9FX1OAFHJaY3WkveCSOTZiiZKu5ehjHaI-QW0,6138
272
+ tests/test_mysql_pipeline_types.py,sha256=dIs4aYlV9vsGfhvmDHOc-LCx-jDqUzoAkn-v8i2ae7Y,2474
273
+ tests/test_mysql_update_columns.py,sha256=CyEshc7b_yprIXcQtNOaWvCC2ZDb0kzjLOfmd8r3sOY,3458
263
274
  tests/test_offsite_middleware.py,sha256=njpXTdngOqBs60Wj6xgo5EEXlJnMHd7vtYGi9dVauW0,10602
264
275
  tests/test_offsite_middleware_simple.py,sha256=4MfDKSXGHcoFLYnnxCH2rmnzztWyN0xByYLoHtepyiA,7918
265
276
  tests/test_optimized_selector_naming.py,sha256=fbmlB5S2kBwtQWpWoQ4lQ7rUQm2_DeWK-t6KqvIRTUQ,2787
@@ -272,13 +283,15 @@ tests/test_priority_consistency.py,sha256=rVX7nku5N_QpB_ffDu3xqREkCWPX5aNNiXy112
272
283
  tests/test_priority_consistency_fixed.py,sha256=MlYi5PIr5wxunC3Ku4ilnxOatKyRu2qIvhV7pjadkjg,10765
273
284
  tests/test_proxy_api.py,sha256=XnmklS-xU4ke_560gV6AIlBsRmG8YLQTGFAZrTUZuhc,11013
274
285
  tests/test_proxy_health_check.py,sha256=_tDlxa_6TdL3M5RLkHF82roXJ8WIuG5hELBp2GADyKQ,1123
275
- tests/test_proxy_middleware.py,sha256=EdQAfwwAJIBxw9JmUFTDEu_pdxapaTlcJr7KcrY6-AY,4021
276
- tests/test_proxy_middleware_enhanced.py,sha256=QR-p26F63N7MxNjZ2QJUeerh_xdnCDejkrGPIh7Fh4U,7035
277
- tests/test_proxy_middleware_integration.py,sha256=mTPK_XvbmLCV_QoVZzA3ybWOOX61493Ew78WfTp-bYQ,4441
278
- tests/test_proxy_middleware_refactored.py,sha256=VbkTWkmmomcyswobA_gf3p_bERl_eexY2e6ohJQS_A8,6960
286
+ tests/test_proxy_middleware.py,sha256=MC2Hg88Pdpv6i_gTAy4ocIWOOxQ8bF7hYtszwpOzilE,8716
287
+ tests/test_proxy_middleware_enhanced.py,sha256=N7Ly3koCH2uRYk6pxhEJwWpChKdIucdrj0nKvq_E4bw,6896
288
+ tests/test_proxy_middleware_integration.py,sha256=PQhJKM1uGtQTlBh7XlKWAMwNwQ6K8of-P15KHDF2dJg,4729
289
+ tests/test_proxy_middleware_refactored.py,sha256=Z4szCDqyjAwWtgDoddgfeNIVsVefPcrfsZP57gCMrJQ,8272
290
+ tests/test_proxy_only.py,sha256=OqF3An_s9VY4mfLX7kDRz_LMtLpNzC6LS2kQkEyiBRw,2563
279
291
  tests/test_proxy_providers.py,sha256=u_R2fhab90vqvQEaOAztpAOe9tJXvUMIdoDxmStmXJ4,1749
280
292
  tests/test_proxy_stats.py,sha256=ES00CEoDITYPFBGPk8pecFzD3ItYIv6NSpcqNd8-kvo,526
281
293
  tests/test_proxy_strategies.py,sha256=9Z1pXmTNyw-eIhGXlf2abZbJx6igLohYq-_3hldQ5uE,1868
294
+ tests/test_proxy_with_downloader.py,sha256=6OqyLcIM9nPMhL9bCYUIeSvYValKPw72XC-Up8jRri8,4597
282
295
  tests/test_queue_empty_check.py,sha256=ZJC6jOgZq0Wb0-ubrB1ZNcCaUiWeCxoNZmjkd6PY6t0,1182
283
296
  tests/test_queue_manager_double_crawlo.py,sha256=MijZ3JuyHMuqGbRC-8kclFr-4O7m_T8CqezP4qiWk-E,6957
284
297
  tests/test_queue_manager_redis_key.py,sha256=txHLq5XUZZN7h9HUlqlUCEVCTe2IXdf9r7F_P2zNVdY,7117
@@ -288,7 +301,7 @@ tests/test_queue_type_redis_config_consistency.py,sha256=1ew7Zp9CxH1DQ0RUmsZMV-n
288
301
  tests/test_random_headers_default.py,sha256=ulDb3_kRpnTCN1-TO3m6wVM-eMkZS_ezsSbd1ur8Xpg,12772
289
302
  tests/test_random_headers_necessity.py,sha256=SSbNQIE347oCQvuG6yaAambFU-3MyQzTV5jN1kArRGY,11741
290
303
  tests/test_random_user_agent.py,sha256=6HjU4iUcMk-J6bR2N5FhIkWDfnaFKAPNVyRzxmQQ14k,2302
291
- tests/test_real_scenario_proxy.py,sha256=clmLvBfap5OpsaCE08MAWap-78jhVrxYfVfDNyoa4Hg,8454
304
+ tests/test_real_scenario_proxy.py,sha256=L2Mfwt47pvs6dYJDcazeyupoQ_DuvhdulCz6-2GFR9Y,7527
292
305
  tests/test_redis_config.py,sha256=51_Fy1PqIhS0MMO2nR4q6oQjBFxfqcUPK_4NNf5s83g,903
293
306
  tests/test_redis_connection_pool.py,sha256=pKfXdE3Cm_L_fNqI9zqFmqiidCwR0t7hiM_Fu_V1cNI,9328
294
307
  tests/test_redis_key_naming.py,sha256=MTFk656JhiGVTsMctBDhBNOMFcBDZrsQA3UfPZ-Dgj4,6911
@@ -328,6 +341,7 @@ tests/untested_features_report.md,sha256=31aUlsw_1OKe0_ijAjeH85kJ7HJ8qzKLJdOHDjW
328
341
  tests/verify_debug.py,sha256=iQ4Efwg9bQTHscr73VYAAZ8rBIe1u6mQfeaEK5YgneY,1564
329
342
  tests/verify_distributed.py,sha256=0IolM4ymuPOz_uTfHSWFO3Vxzp7Lo6i0zhSbzJhHFtI,4045
330
343
  tests/verify_log_fix.py,sha256=7reyVl3MXTDASyChgU5BAYuzuxvFjSLG9HywAHso0qg,4336
344
+ tests/verify_mysql_warnings.py,sha256=TMPsB1yp7R_c3S6LllgPJ-n_4He6gHVygAC81zbeQrc,4106
331
345
  tests/ofweek_scrapy/scrapy.cfg,sha256=D_8rsW65iTbH7nG1kI25jYTCpoQKBVa2shajrsC6fBw,280
332
346
  tests/ofweek_scrapy/ofweek_scrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
347
  tests/ofweek_scrapy/ofweek_scrapy/items.py,sha256=Y_TwwHPAgOXTuCTdnhRxil7vYPk1_rzj1ZatTq4AX-I,280
@@ -340,8 +354,8 @@ tests/scrapy_comparison/ofweek_scrapy.py,sha256=rhVds_WjYum1bLuWWe90HtXE51fZXEqh
340
354
  tests/scrapy_comparison/scrapy_test.py,sha256=-IsGUHPBgEL0TmXjeLZl-TUA01B7Dsc2nRo4JZbFwZA,5599
341
355
  tests/test_spiders/__init__.py,sha256=Ws2DhfUA0Xh5Cxr9M46td7B6hyNoLTyAhZ60FnIh6D0,20
342
356
  tests/test_spiders/test_spider.py,sha256=kNGEg80HMMFgzVseI1jJjljZEBy3QYKt_3SXGASffFM,168
343
- crawlo-1.4.5.dist-info/METADATA,sha256=o0MSsONyv_KU7dMNANtCZlkLpVdDUz8zGJKd5i2DM1g,9355
344
- crawlo-1.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
345
- crawlo-1.4.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
346
- crawlo-1.4.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
347
- crawlo-1.4.5.dist-info/RECORD,,
357
+ crawlo-1.4.6.dist-info/METADATA,sha256=j66m-xE1oVuLE4WEnDbBjH6PXGbfbgM7yxSF616EOHo,9355
358
+ crawlo-1.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
359
+ crawlo-1.4.6.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
360
+ crawlo-1.4.6.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
361
+ crawlo-1.4.6.dist-info/RECORD,,
@@ -74,14 +74,18 @@ async def main():
74
74
  config = CrawloConfig.standalone(
75
75
  concurrency=2,
76
76
  download_delay=1.0,
77
- PROXY_ENABLED=True,
78
- # 配置认证代理(请替换为实际的代理信息)
79
- PROXY_API_URL="http://your-proxy-provider.com/api/get", # 代理API地址
80
- # 如果使用固定代理,可以直接设置:
77
+ # 代理配置
78
+ # 高级代理配置(适用于ProxyMiddleware)
79
+ # 只要配置了代理API URL,中间件就会自动启用
80
+ PROXY_API_URL="http://proxy-api.example.com/get", # 代理API地址
81
+
82
+ # 代理配置(适用于ProxyMiddleware)
83
+ # 只要配置了代理列表,中间件就会自动启用
81
84
  # PROXY_LIST=[
82
- # "http://username:password@proxy1.example.com:8080",
83
- # "http://username:password@proxy2.example.com:8080",
85
+ # "http://user:pass@proxy1.example.com:8080",
86
+ # "http://user:pass@proxy2.example.com:8080"
84
87
  # ],
88
+
85
89
  LOG_LEVEL='INFO'
86
90
  )
87
91
 
@@ -0,0 +1,77 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 解释 MySQL ON DUPLICATE KEY UPDATE 行为
4
+ 帮助理解为什么在使用 MYSQL_UPDATE_COLUMNS 时可能显示"未插入新记录"
5
+ """
6
+ import sys
7
+ import os
8
+
9
+ # 添加项目根目录到 Python 路径
10
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11
+
12
+ from crawlo.utils.db_helper import SQLBuilder
13
+
14
+
15
+ def explain_mysql_behavior():
16
+ """解释 MySQL ON DUPLICATE KEY UPDATE 的行为"""
17
+ print("=== MySQL ON DUPLICATE KEY UPDATE 行为解释 ===\n")
18
+
19
+ # 模拟实际使用的数据
20
+ table = "news_items"
21
+ item_data = {
22
+ 'title': '新一代OLED屏下光谱颜色传感技术:解锁显示新密码,重塑视觉新体验',
23
+ 'publish_time': '2025-10-09 09:57',
24
+ 'url': 'https://ee.ofweek.com/2025-10/ART-8460-2806-30671544.html',
25
+ 'source': '',
26
+ 'content': '在全球智能手机市场竞争日趋白热化的当下,消费者对手机屏幕显示效果的要求愈发严苛...'
27
+ }
28
+
29
+ print("当使用 MYSQL_UPDATE_COLUMNS 配置时:")
30
+ print("MYSQL_UPDATE_COLUMNS = ('title', 'publish_time')")
31
+ print()
32
+
33
+ # 生成 SQL
34
+ sql = SQLBuilder.make_insert(
35
+ table=table,
36
+ data=item_data,
37
+ auto_update=False,
38
+ update_columns=('title', 'publish_time'),
39
+ insert_ignore=False
40
+ )
41
+
42
+ print("生成的 SQL 语句:")
43
+ print(sql)
44
+ print()
45
+
46
+ print("MySQL 行为说明:")
47
+ print("1. 如果这是一条新记录(没有主键或唯一键冲突):")
48
+ print(" - MySQL 会正常插入记录")
49
+ print(" - 返回影响行数为 1")
50
+ print()
51
+ print("2. 如果遇到主键或唯一键冲突:")
52
+ print(" - MySQL 会执行 ON DUPLICATE KEY UPDATE 子句")
53
+ print(" - 更新指定的字段: title 和 publish_time")
54
+ print()
55
+ print("3. 关键点 - 如果更新的字段值与现有记录完全相同:")
56
+ print(" - MySQL 不会实际更新任何数据")
57
+ print(" - 返回影响行数为 0")
58
+ print(" - 这就是你看到 'SQL执行成功但未插入新记录' 的原因")
59
+ print()
60
+
61
+ print("如何验证是否真的更新了数据:")
62
+ print("1. 检查数据库中的记录是否发生变化")
63
+ print("2. 如果内容字段有变化但未在 update_columns 中指定,则不会更新")
64
+ print("3. 可以在 update_columns 中添加更多字段以确保更新")
65
+ print()
66
+
67
+ print("建议的配置:")
68
+ print("# 如果希望在冲突时更新更多字段,可以这样配置:")
69
+ print("MYSQL_UPDATE_COLUMNS = ('title', 'publish_time', 'content')")
70
+ print()
71
+ print("# 或者如果希望完全替换记录:")
72
+ print("MYSQL_AUTO_UPDATE = True")
73
+ print("MYSQL_UPDATE_COLUMNS = () # 空元组")
74
+
75
+
76
+ if __name__ == "__main__":
77
+ explain_mysql_behavior()
@@ -0,0 +1,140 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 模拟 MySQL ON DUPLICATE KEY UPDATE 行为测试
4
+ 演示不同情况下的影响行数
5
+ """
6
+ import sys
7
+ import os
8
+
9
+ # 添加项目根目录到 Python 路径
10
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
11
+
12
+ from crawlo.utils.db_helper import SQLBuilder
13
+
14
+
15
+ def simulate_mysql_scenarios():
16
+ """模拟不同的 MySQL 场景"""
17
+ print("=== MySQL 场景模拟测试 ===\n")
18
+
19
+ table = "news_items"
20
+
21
+ # 场景1: 新记录插入
22
+ print("场景1: 插入新记录")
23
+ new_data = {
24
+ 'title': '新文章标题',
25
+ 'publish_time': '2025-10-09 10:00',
26
+ 'url': 'https://example.com/new-article',
27
+ 'source': '新来源',
28
+ 'content': '新文章内容'
29
+ }
30
+
31
+ sql1 = SQLBuilder.make_insert(
32
+ table=table,
33
+ data=new_data,
34
+ auto_update=False,
35
+ update_columns=('title', 'publish_time'),
36
+ insert_ignore=False
37
+ )
38
+
39
+ print(f"SQL: {sql1[:100]}...")
40
+ print("预期行为: 正常插入,影响行数 = 1")
41
+ print()
42
+
43
+ # 场景2: 冲突但字段值相同
44
+ print("场景2: 主键冲突,更新字段值相同")
45
+ duplicate_data = {
46
+ 'title': '已有文章标题', # 假设数据库中已存在相同标题的记录
47
+ 'publish_time': '2025-10-09 09:00', # 与数据库中记录相同的发布时间
48
+ 'url': 'https://example.com/existing-article',
49
+ 'source': '来源',
50
+ 'content': '文章内容'
51
+ }
52
+
53
+ sql2 = SQLBuilder.make_insert(
54
+ table=table,
55
+ data=duplicate_data,
56
+ auto_update=False,
57
+ update_columns=('title', 'publish_time'),
58
+ insert_ignore=False
59
+ )
60
+
61
+ print(f"SQL: {sql2[:100]}...")
62
+ print("预期行为: 触发 ON DUPLICATE KEY UPDATE,但字段值未变化,影响行数 = 0")
63
+ print()
64
+
65
+ # 场景3: 冲突且字段值不同
66
+ print("场景3: 主键冲突,更新字段值不同")
67
+ updated_data = {
68
+ 'title': '已有文章标题', # 与数据库中记录相同
69
+ 'publish_time': '2025-10-09 11:00', # 与数据库中记录不同的发布时间
70
+ 'url': 'https://example.com/existing-article',
71
+ 'source': '来源',
72
+ 'content': '文章内容'
73
+ }
74
+
75
+ sql3 = SQLBuilder.make_insert(
76
+ table=table,
77
+ data=updated_data,
78
+ auto_update=False,
79
+ update_columns=('title', 'publish_time'),
80
+ insert_ignore=False
81
+ )
82
+
83
+ print(f"SQL: {sql3[:100]}...")
84
+ print("预期行为: 触发 ON DUPLICATE KEY UPDATE,字段值变化,影响行数 = 2")
85
+ print("(MySQL 5.7+ 版本中,更新操作返回的影响行数为 2)")
86
+ print()
87
+
88
+ # 场景4: 使用 INSERT IGNORE
89
+ print("场景4: 使用 INSERT IGNORE")
90
+ ignore_data = {
91
+ 'title': '忽略重复标题', # 假设数据库中已存在相同标题的记录
92
+ 'publish_time': '2025-10-09 12:00',
93
+ 'url': 'https://example.com/ignore-article',
94
+ 'source': '忽略来源',
95
+ 'content': '忽略内容'
96
+ }
97
+
98
+ sql4 = SQLBuilder.make_insert(
99
+ table=table,
100
+ data=ignore_data,
101
+ auto_update=False,
102
+ update_columns=(),
103
+ insert_ignore=True
104
+ )
105
+
106
+ print(f"SQL: {sql4[:100]}...")
107
+ print("预期行为: 遇到重复记录时忽略插入,影响行数 = 0")
108
+ print()
109
+
110
+ # 场景5: 使用 REPLACE INTO
111
+ print("场景5: 使用 REPLACE INTO")
112
+ replace_data = {
113
+ 'title': '替换文章标题', # 假设数据库中已存在相同标题的记录
114
+ 'publish_time': '2025-10-09 13:00',
115
+ 'url': 'https://example.com/replace-article',
116
+ 'source': '替换来源',
117
+ 'content': '替换内容'
118
+ }
119
+
120
+ sql5 = SQLBuilder.make_insert(
121
+ table=table,
122
+ data=replace_data,
123
+ auto_update=True, # 使用 REPLACE INTO
124
+ update_columns=(),
125
+ insert_ignore=False
126
+ )
127
+
128
+ print(f"SQL: {sql5[:100]}...")
129
+ print("预期行为: 删除旧记录并插入新记录,影响行数 = 2")
130
+ print()
131
+
132
+ print("=== 总结 ===")
133
+ print("1. 当使用 MYSQL_UPDATE_COLUMNS 时,影响行数为 0 并不表示错误")
134
+ print("2. 这可能意味着更新的字段值与现有记录相同")
135
+ print("3. 如果需要确保更新,可以在 update_columns 中包含更多字段")
136
+ print("4. 如果需要完全替换记录,使用 MYSQL_AUTO_UPDATE = True")
137
+
138
+
139
+ if __name__ == "__main__":
140
+ simulate_mysql_scenarios()
@@ -0,0 +1,57 @@
1
+ # -*- coding: utf-8 -*-
2
+ import asyncio
3
+ from asyncmy import create_pool
4
+
5
+ async def test_asyncmy_usage():
6
+ """测试asyncmy库的正确使用方式"""
7
+ try:
8
+ # 创建连接池
9
+ pool = await create_pool(
10
+ host='127.0.0.1',
11
+ port=3306,
12
+ user='root',
13
+ password='123456',
14
+ db='test',
15
+ minsize=1,
16
+ maxsize=5
17
+ )
18
+
19
+ # 获取连接
20
+ conn = await pool.acquire()
21
+ try:
22
+ # 获取游标
23
+ cursor = await conn.cursor()
24
+ try:
25
+ # 执行SQL
26
+ result = cursor.execute("SELECT 1")
27
+ print(f"execute返回类型: {type(result)}")
28
+ print(f"execute返回值: {result}")
29
+
30
+ # 检查是否需要await
31
+ if hasattr(result, '__await__'):
32
+ print("execute返回的是协程对象,需要await")
33
+ result = await result
34
+ else:
35
+ print("execute返回的不是协程对象,不需要await")
36
+
37
+ # 提交事务
38
+ await conn.commit()
39
+
40
+ finally:
41
+ await cursor.close()
42
+ finally:
43
+ pool.release(conn)
44
+
45
+ # 关闭连接池
46
+ pool.close()
47
+ await pool.wait_closed()
48
+
49
+ print("测试完成")
50
+
51
+ except Exception as e:
52
+ print(f"测试出错: {e}")
53
+ import traceback
54
+ traceback.print_exc()
55
+
56
+ if __name__ == "__main__":
57
+ asyncio.run(test_asyncmy_usage())
@@ -61,8 +61,14 @@ async def test_proxy_integration():
61
61
  config = CrawloConfig.standalone(
62
62
  concurrency=1,
63
63
  download_delay=0.1,
64
- PROXY_ENABLED=True,
65
- PROXY_API_URL="https://api.proxyprovider.com/get", # 模拟代理API
64
+ # 代理配置
65
+ # 高级代理配置(适用于ProxyMiddleware)
66
+ # 只要配置了代理API URL,中间件就会自动启用
67
+ PROXY_API_URL="https://proxy-api.example.com/get", # 模拟代理API
68
+
69
+ # 代理配置(适用于ProxyMiddleware)
70
+ # 只要配置了代理列表,中间件就会自动启用
71
+ # PROXY_LIST=["http://proxy1:8080", "http://proxy2:8080"],
66
72
  LOG_LEVEL='WARNING' # 减少日志输出
67
73
  )
68
74
 
@@ -35,7 +35,7 @@ class MockCrawler:
35
35
  self.spider = MockSpider(self) # 添加spider属性
36
36
 
37
37
 
38
- def create_test_settings(proxy_url=None):
38
+ def create_test_settings(proxy_url=None, proxy_list=None):
39
39
  """创建测试设置"""
40
40
  settings = SettingManager()
41
41
  settings.set("LOG_LEVEL", "DEBUG")
@@ -47,12 +47,13 @@ def create_test_settings(proxy_url=None):
47
47
 
48
48
  # 代理相关设置
49
49
  if proxy_url:
50
- settings.set("PROXY_ENABLED", True)
50
+ # 高级代理配置(适用于ProxyMiddleware)
51
+ # 只要配置了代理API URL,中间件就会自动启用
51
52
  settings.set("PROXY_API_URL", proxy_url)
52
- settings.set("PROXY_REFRESH_INTERVAL", 60)
53
- settings.set("PROXY_POOL_SIZE", 5)
54
- else:
55
- settings.set("PROXY_ENABLED", False)
53
+ elif proxy_list:
54
+ # 代理配置(适用于ProxyMiddleware)
55
+ # 只要配置了代理列表,中间件就会自动启用
56
+ settings.set("PROXY_LIST", proxy_list)
56
57
 
57
58
  return settings
58
59
 
@@ -65,7 +66,7 @@ async def test_aiohttp_with_proxy(proxy_url, target_url):
65
66
 
66
67
  try:
67
68
  # 创建设置
68
- settings = create_test_settings(proxy_url)
69
+ settings = create_test_settings(proxy_url=proxy_url)
69
70
  crawler = MockCrawler(settings)
70
71
 
71
72
  # 创建下载器
@@ -73,6 +74,7 @@ async def test_aiohttp_with_proxy(proxy_url, target_url):
73
74
  downloader.open()
74
75
 
75
76
  # 创建代理中间件
77
+ from crawlo.middleware.proxy import ProxyMiddleware
76
78
  proxy_middleware = ProxyMiddleware(settings, "DEBUG")
77
79
 
78
80
  # 创建请求
@@ -115,15 +117,15 @@ async def test_aiohttp_with_proxy(proxy_url, target_url):
115
117
  pass
116
118
 
117
119
 
118
- async def test_httpx_with_proxy_async(proxy_url, target_url):
120
+ async def test_httpx_with_proxy_async(proxy_list, target_url):
119
121
  """测试httpx下载器与代理的适配性"""
120
122
  print(f"\n=== 测试 httpx 下载器与代理 ===")
121
- print(f"代理URL: {proxy_url}")
123
+ print(f"代理列表: {proxy_list}")
122
124
  print(f"目标URL: {target_url}")
123
125
 
124
126
  try:
125
127
  # 创建设置
126
- settings = create_test_settings(proxy_url)
128
+ settings = create_test_settings(proxy_list=proxy_list)
127
129
  crawler = MockCrawler(settings)
128
130
 
129
131
  # 创建下载器
@@ -131,7 +133,8 @@ async def test_httpx_with_proxy_async(proxy_url, target_url):
131
133
  downloader.open()
132
134
 
133
135
  # 创建代理中间件
134
- proxy_middleware = ProxyMiddleware(settings, "DEBUG")
136
+ from crawlo.middleware.simple_proxy import SimpleProxyMiddleware
137
+ proxy_middleware = SimpleProxyMiddleware(settings, "DEBUG")
135
138
 
136
139
  # 创建请求
137
140
  request = Request(url=target_url)
@@ -168,7 +171,6 @@ async def test_httpx_with_proxy_async(proxy_url, target_url):
168
171
  # 清理资源
169
172
  try:
170
173
  await downloader.close()
171
- await proxy_middleware.close()
172
174
  except:
173
175
  pass
174
176
 
@@ -181,7 +183,7 @@ async def test_curl_cffi_with_proxy_async(proxy_url, target_url):
181
183
 
182
184
  try:
183
185
  # 创建设置
184
- settings = create_test_settings(proxy_url)
186
+ settings = create_test_settings(proxy_url=proxy_url)
185
187
  crawler = MockCrawler(settings)
186
188
 
187
189
  # 创建下载器
@@ -238,26 +240,28 @@ async def main():
238
240
  # 使用测试代理URL(这里使用一个公开的测试代理)
239
241
  # 注意:在实际使用中,您需要替换为有效的代理URL
240
242
  test_proxy_url = "http://test.proxy.api:8080/proxy/getitem/"
243
+ test_proxy_list = ["http://proxy1:8080", "http://proxy2:8080"]
241
244
  test_target_url = "https://httpbin.org/ip" # 一个返回IP信息的测试站点
242
245
 
243
246
  print(f"测试代理API: {test_proxy_url}")
247
+ print(f"测试代理列表: {test_proxy_list}")
244
248
  print(f"测试目标URL: {test_target_url}")
245
249
 
246
- # 测试aiohttp下载器
250
+ # 测试aiohttp下载器(使用高级代理)
247
251
  aiohttp_result = await test_aiohttp_with_proxy(test_proxy_url, test_target_url)
248
252
 
249
- # 测试httpx下载器
250
- httpx_result = await test_httpx_with_proxy_async(test_proxy_url, test_target_url)
253
+ # 测试httpx下载器(使用简化代理)
254
+ httpx_result = await test_httpx_with_proxy_async(test_proxy_list, test_target_url)
251
255
 
252
- # 测试curl-cffi下载器
256
+ # 测试curl-cffi下载器(使用高级代理)
253
257
  curl_cffi_result = await test_curl_cffi_with_proxy_async(test_proxy_url, test_target_url)
254
258
 
255
259
  # 汇总结果
256
260
  print("\n" + "="*50)
257
261
  print("测试结果汇总:")
258
- print(f"aiohttp 下载器: {'✓ 通过' if aiohttp_result else '✗ 失败'}")
259
- print(f"httpx 下载器: {'✓ 通过' if httpx_result else '✗ 失败'}")
260
- print(f"curl-cffi 下载器: {'✓ 通过' if curl_cffi_result else '✗ 失败'}")
262
+ print(f"aiohttp 下载器 (高级代理): {'✓ 通过' if aiohttp_result else '✗ 失败'}")
263
+ print(f"httpx 下载器 (简化代理): {'✓ 通过' if httpx_result else '✗ 失败'}")
264
+ print(f"curl-cffi 下载器 (高级代理): {'✓ 通过' if curl_cffi_result else '✗ 失败'}")
261
265
 
262
266
  overall_result = all([aiohttp_result, httpx_result, curl_cffi_result])
263
267
  print(f"\n总体结果: {'✓ 所有下载器都适配代理中间件' if overall_result else '✗ 部分下载器不兼容'}")