crawlo 1.4.3__py3-none-any.whl → 1.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (107) hide show
  1. crawlo/__init__.py +11 -15
  2. crawlo/__version__.py +1 -1
  3. crawlo/commands/genspider.py +52 -17
  4. crawlo/commands/startproject.py +24 -0
  5. crawlo/core/engine.py +2 -2
  6. crawlo/core/scheduler.py +4 -4
  7. crawlo/crawler.py +13 -6
  8. crawlo/downloader/__init__.py +5 -2
  9. crawlo/extension/__init__.py +2 -2
  10. crawlo/filters/aioredis_filter.py +8 -1
  11. crawlo/filters/memory_filter.py +8 -1
  12. crawlo/initialization/built_in.py +13 -4
  13. crawlo/initialization/core.py +5 -4
  14. crawlo/interfaces.py +24 -0
  15. crawlo/middleware/__init__.py +7 -4
  16. crawlo/middleware/middleware_manager.py +15 -8
  17. crawlo/mode_manager.py +45 -11
  18. crawlo/network/response.py +374 -69
  19. crawlo/pipelines/mysql_pipeline.py +6 -6
  20. crawlo/pipelines/pipeline_manager.py +2 -2
  21. crawlo/project.py +2 -4
  22. crawlo/queue/pqueue.py +2 -6
  23. crawlo/queue/queue_manager.py +1 -2
  24. crawlo/settings/default_settings.py +15 -30
  25. crawlo/task_manager.py +2 -2
  26. crawlo/templates/project/items.py.tmpl +2 -2
  27. crawlo/templates/project/middlewares.py.tmpl +9 -89
  28. crawlo/templates/project/pipelines.py.tmpl +8 -68
  29. crawlo/templates/project/settings.py.tmpl +51 -65
  30. crawlo/templates/project/settings_distributed.py.tmpl +59 -67
  31. crawlo/templates/project/settings_gentle.py.tmpl +45 -40
  32. crawlo/templates/project/settings_high_performance.py.tmpl +45 -40
  33. crawlo/templates/project/settings_minimal.py.tmpl +37 -26
  34. crawlo/templates/project/settings_simple.py.tmpl +45 -40
  35. crawlo/templates/run.py.tmpl +3 -7
  36. crawlo/tools/__init__.py +0 -11
  37. crawlo/utils/__init__.py +17 -1
  38. crawlo/utils/db_helper.py +220 -319
  39. crawlo/utils/error_handler.py +313 -67
  40. crawlo/utils/fingerprint.py +3 -4
  41. crawlo/utils/misc.py +82 -0
  42. crawlo/utils/request.py +55 -66
  43. crawlo/utils/selector_helper.py +138 -0
  44. crawlo/utils/spider_loader.py +185 -45
  45. crawlo/utils/text_helper.py +95 -0
  46. crawlo-1.4.5.dist-info/METADATA +329 -0
  47. {crawlo-1.4.3.dist-info → crawlo-1.4.5.dist-info}/RECORD +89 -68
  48. tests/bug_check_test.py +251 -0
  49. tests/direct_selector_helper_test.py +97 -0
  50. tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -0
  51. tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -0
  52. tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -0
  53. tests/ofweek_scrapy/ofweek_scrapy/settings.py +85 -0
  54. tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -0
  55. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +162 -0
  56. tests/ofweek_scrapy/scrapy.cfg +11 -0
  57. tests/performance_comparison.py +4 -5
  58. tests/simple_crawlo_test.py +1 -2
  59. tests/simple_follow_test.py +39 -0
  60. tests/simple_response_selector_test.py +95 -0
  61. tests/simple_selector_helper_test.py +155 -0
  62. tests/simple_selector_test.py +208 -0
  63. tests/simple_url_test.py +74 -0
  64. tests/test_crawler_process_import.py +39 -0
  65. tests/test_crawler_process_spider_modules.py +48 -0
  66. tests/test_edge_cases.py +7 -5
  67. tests/test_encoding_core.py +57 -0
  68. tests/test_encoding_detection.py +127 -0
  69. tests/test_factory_compatibility.py +197 -0
  70. tests/test_multi_directory.py +68 -0
  71. tests/test_multiple_spider_modules.py +81 -0
  72. tests/test_optimized_selector_naming.py +101 -0
  73. tests/test_priority_behavior.py +18 -18
  74. tests/test_response_follow.py +105 -0
  75. tests/test_response_selector_methods.py +93 -0
  76. tests/test_response_url_methods.py +71 -0
  77. tests/test_response_urljoin.py +87 -0
  78. tests/test_scrapy_style_encoding.py +113 -0
  79. tests/test_selector_helper.py +101 -0
  80. tests/test_selector_optimizations.py +147 -0
  81. tests/test_spider_loader.py +50 -0
  82. tests/test_spider_loader_comprehensive.py +70 -0
  83. tests/test_spider_modules.py +85 -0
  84. tests/test_spiders/__init__.py +1 -0
  85. tests/test_spiders/test_spider.py +10 -0
  86. crawlo/tools/anti_crawler.py +0 -269
  87. crawlo/utils/class_loader.py +0 -26
  88. crawlo/utils/enhanced_error_handler.py +0 -357
  89. crawlo-1.4.3.dist-info/METADATA +0 -190
  90. examples/test_project/__init__.py +0 -7
  91. examples/test_project/run.py +0 -35
  92. examples/test_project/test_project/__init__.py +0 -4
  93. examples/test_project/test_project/items.py +0 -18
  94. examples/test_project/test_project/middlewares.py +0 -119
  95. examples/test_project/test_project/pipelines.py +0 -97
  96. examples/test_project/test_project/settings.py +0 -170
  97. examples/test_project/test_project/spiders/__init__.py +0 -10
  98. examples/test_project/test_project/spiders/of_week_dis.py +0 -144
  99. tests/simple_log_test.py +0 -58
  100. tests/simple_test.py +0 -48
  101. tests/test_framework_logger.py +0 -67
  102. tests/test_framework_startup.py +0 -65
  103. tests/test_mode_change.py +0 -73
  104. {crawlo-1.4.3.dist-info → crawlo-1.4.5.dist-info}/WHEEL +0 -0
  105. {crawlo-1.4.3.dist-info → crawlo-1.4.5.dist-info}/entry_points.txt +0 -0
  106. {crawlo-1.4.3.dist-info → crawlo-1.4.5.dist-info}/top_level.txt +0 -0
  107. /tests/{final_command_test_report.md → ofweek_scrapy/ofweek_scrapy/__init__.py} +0 -0
@@ -1,40 +1,41 @@
1
- crawlo/__init__.py,sha256=2Io5P9qJghOAjjD3YWdaiIq5laPLyLWVkEqgiVfUa3o,2381
2
- crawlo/__version__.py,sha256=adPldP4cMp2T8pbTFGYTXV50hu_smS3hxWkk5kLXpZE,23
1
+ crawlo/__init__.py,sha256=n5vFwi0iuYrpAIyoNJZzWHV1gvF-vh-Yze3jiuwEXqM,2180
2
+ crawlo/__version__.py,sha256=47Hd5fKyrYgSfmOfBF7ibw9EyAE1ctXOQOLg_x_Ld9w,23
3
3
  crawlo/cli.py,sha256=AQnAB5NMI-Ic1VPw_Jjng8L4AI4-wMozOwzE6CfXkZU,2402
4
4
  crawlo/config.py,sha256=EQIT7WpkXAlr2ocd5SYJYOKTSWUlQx2AkTHX7ErEWxw,9798
5
5
  crawlo/config_validator.py,sha256=oY4-2bwXUlwHAnGgkI-EznviDfML_dcxbWSGXNSxC2k,11516
6
- crawlo/crawler.py,sha256=i4rc9beEOilKGK633nRh5UxCNgciil9Lyfj38xgIauU,26998
6
+ crawlo/crawler.py,sha256=6f9eDeUEZVfnUywaZ6CnL5R3bHO4sG82z-Syl3zZKvE,27360
7
7
  crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
8
8
  crawlo/exceptions.py,sha256=YVIDnC1bKSMv3fXH_6tinWMuD9HmKHIaUfO4_fkX5sY,1247
9
9
  crawlo/framework.py,sha256=9gP6VN4MHqutGXaxnwpNMSULfVYbNp906UdZiJGywlQ,9458
10
- crawlo/mode_manager.py,sha256=S4dUoeVZ4fMnd4pXWutcHwk5Zv68ZBTgo9taR9OkQiM,7768
11
- crawlo/project.py,sha256=nVRc0CIdd9g863NGfuItvajl8zlO5mEta4FQCx9_vZ8,14060
10
+ crawlo/interfaces.py,sha256=q1vwMSiZLfLpPhFa9Y0hAcjYEKvLkW2fZ2fmoAZ-5TE,653
11
+ crawlo/mode_manager.py,sha256=e8QmwsnndFx_hGME_7w-hazKo0GOYjUr-7FBf7dWxgc,8903
12
+ crawlo/project.py,sha256=9wnlHd-rYAC3TT1Fc1ftyUBx7mbDT6TQCqoaIP6N3iA,13998
12
13
  crawlo/stats_collector.py,sha256=hIjlnX750jU4Oncyand1jBccfaX4Tu7egd2DBYu2N7A,2379
13
14
  crawlo/subscriber.py,sha256=h8fx69NJZeWem0ZkCmfHAi2kgfDGFObHpwN0aGNUM6Y,5115
14
- crawlo/task_manager.py,sha256=I9h3Rl0VRAfwqp24CHT3TuEAapNdTbVghkmuJhtM7jg,5966
15
+ crawlo/task_manager.py,sha256=Ic6PFUqZOhLXuZ_UEk_8Neb9FmqYv8I2RzV3vLzFNSU,5966
15
16
  crawlo/commands/__init__.py,sha256=orvY6wLOBwGUEJKeF3h_T1fxj8AaQLjngBDd-3xKOE4,392
16
17
  crawlo/commands/check.py,sha256=TKDhI_sj7kErgiJpt2vCZ9QL-g6yWjrrPWKbgh8pgEU,23199
17
- crawlo/commands/genspider.py,sha256=7YGZdv12G341SWmkGbyDeMde2RgqGYxYXRExFy7KKNc,5088
18
+ crawlo/commands/genspider.py,sha256=JB4ZuFpKsYwtjx3DSsxugH7e3kqxhDWPG5ZKfvM0isI,6041
18
19
  crawlo/commands/help.py,sha256=8xPC0iNCg1rRBoK2bb6noAEANc1JwrdM35eF-j6yeZM,5111
19
20
  crawlo/commands/list.py,sha256=trzcd3kG6DhkOqYZADcl3yR7M8iJBgRw5fE-g9e0gVM,5877
20
21
  crawlo/commands/run.py,sha256=EjpIilgCTkXGVSV4rEISbJubdhqrok9nNe5-xDfDK5E,13169
21
- crawlo/commands/startproject.py,sha256=-Bo8vvDfIhqzGmWyhxMatBlPLhYpRwJC7l4fpbN8vVk,16506
22
+ crawlo/commands/startproject.py,sha256=boZrMyn6TgCi1jt3D3DQfui6hJitjwNO8mqlWKNOBns,17366
22
23
  crawlo/commands/stats.py,sha256=vlGJLyiXZtY0ASdzCK59JNereSsAel4W9JCGaOzCr-8,6201
23
24
  crawlo/commands/utils.py,sha256=YVNEEzlm_qNY3SVvU8h6o2lQMkVgypvoB4ZFrP4gln0,5578
24
25
  crawlo/core/__init__.py,sha256=BWkj3AqZwp2Bk73UzUlC_qsqv_MH_HNrzy4DY1hosj4,1330
25
- crawlo/core/engine.py,sha256=y9mj0nKHb3Ki4scXkxsMO6XoTIqxmbsD0WuryR_6iHg,19385
26
+ crawlo/core/engine.py,sha256=znJ0VDFBImYi6KkTD8GHNo-V9BDnPSv9iYfTYLPsVSc,19379
26
27
  crawlo/core/processor.py,sha256=hR5MrbeZvDUx0ShKntr4qwkeVZzjlPJ8EAKgIFkNVts,1555
27
- crawlo/core/scheduler.py,sha256=-6DBz7gUg8WwUl39DAbi6Ng2AJSswNBCDr_mV3sUZFs,14088
28
+ crawlo/core/scheduler.py,sha256=G9xtrvE1wsTSOTOFUKDEphJvy6Xk5icuCGXTScYy7nQ,14084
28
29
  crawlo/data/__init__.py,sha256=UPqgioMdu3imSUmpLWzVlpvoBnEfaPSAT-crCcWd7iw,121
29
30
  crawlo/data/user_agents.py,sha256=zjjFkldQkqtrn45j0WZplaZLannPxZDeAU0JofxQcBc,9891
30
- crawlo/downloader/__init__.py,sha256=VZG5HiSHOmimiH9okQN3MBwgXsCzxr2awflVz5UiboY,8897
31
+ crawlo/downloader/__init__.py,sha256=P5pl-BGYCkdKWgoIewcYPz7ocVLixVfYuCDFmYyuqIw,8966
31
32
  crawlo/downloader/aiohttp_downloader.py,sha256=-dIFucMOQhiiEmtgEpG2Lqh1vF-PvDddbIrZ8Hge0Ig,9556
32
33
  crawlo/downloader/cffi_downloader.py,sha256=QxoeocCE2DsQCnhZla6-BjhplaTZDWMbEJmNrghWSDA,10488
33
34
  crawlo/downloader/httpx_downloader.py,sha256=MpgDeIdGqNsiSKLOEDBnr5Z0eUbhHnqVEmAuoIfJmFU,12296
34
35
  crawlo/downloader/hybrid_downloader.py,sha256=dNnFeegRnyLaOxTWI6XrWKqqVPx80AZBZNgmrcKRVBM,8240
35
36
  crawlo/downloader/playwright_downloader.py,sha256=L-TVzG7cYfuBlqW0XSZuz5C_r9fpJrmYNcoQ-cDEna4,16663
36
37
  crawlo/downloader/selenium_downloader.py,sha256=P8GuhEw6OYVeN3oeksuBLpUJCELXiu0mAR23X6IIOAA,21508
37
- crawlo/extension/__init__.py,sha256=-R4P9fklpgSB8cGEduMsjkbJZ7ReYSrZaYjApgYUm9U,2986
38
+ crawlo/extension/__init__.py,sha256=wwaTTWYUzbg5b84sQn2JvBlyuhVGkw-REkhVlR2vymA,2980
38
39
  crawlo/extension/health_check.py,sha256=stDpyP4gOzAdbBlPbSf0rge0QounAhF8CtrGq5fa_7c,5657
39
40
  crawlo/extension/log_interval.py,sha256=N25aNjFkjh9br6g3ViFqRrz06C2geAdfGas-OT2oZh8,4497
40
41
  crawlo/extension/log_stats.py,sha256=CWjMb_V1o8j8uwGFvh9SZ7EYLl_OYzmuIsOT5V-_BE4,2452
@@ -47,12 +48,12 @@ crawlo/factories/base.py,sha256=loB_vyc0CsQK0BgwRoSOFS8gPcmv-b9irtjC9UaBGA4,1832
47
48
  crawlo/factories/crawler.py,sha256=e9zl4qytByzsYbz66klY3cZTvQei0-9GjdFK4XCyXcg,3198
48
49
  crawlo/factories/registry.py,sha256=YU87CdsntOz609M0aQbGcCG9glPinUJxOn-_CaM4f-M,2595
49
50
  crawlo/filters/__init__.py,sha256=noSe07tp2Ip_zXwAbS021BojrqNRaObDO-2YV6DOQfc,4381
50
- crawlo/filters/aioredis_filter.py,sha256=unms0WaRsxbCL6VaAQMT-SsBHKyxR6-o118pf-3ErK0,9512
51
- crawlo/filters/memory_filter.py,sha256=ZojFhZ6gE76aQBC-rfImxSkSMwQtiotenx0pIcQOaFg,9561
51
+ crawlo/filters/aioredis_filter.py,sha256=WglGW-XLjsy8r_NDrNsXk_nzwaIq081MBnooHqCCQZA,9841
52
+ crawlo/filters/memory_filter.py,sha256=gIPXCw650v81XRiz0MhWXH-zcn24ERzDTzBQZRoy1YU,9890
52
53
  crawlo/initialization/__init__.py,sha256=uNRMm9GccMYZi51scpvo-CPx_3ayp3Y81psBHlUoDfw,1132
53
- crawlo/initialization/built_in.py,sha256=1uEEtYCTQlfx5uRW-s9oumlmIEJrjZa0QpOLAbZbZqI,15758
54
+ crawlo/initialization/built_in.py,sha256=DlZf4k9FlU52tnwlFtKqWHqlFZpo-VHB0qP61rVqJzo,16259
54
55
  crawlo/initialization/context.py,sha256=wG9t-M-Qttj7TN6gDumPX5Q5GHaPDUpLTZZDne2r3TE,4863
55
- crawlo/initialization/core.py,sha256=sMiSBueoaWoDohQJ50IFC_DSvj0EeYSB7G1MORlDtMc,6872
56
+ crawlo/initialization/core.py,sha256=GWc9QNSp2JmHlCAhgq1aqGDXHcO6QlxFAVfePKC1xeo,6872
56
57
  crawlo/initialization/phases.py,sha256=iWhGITh9eudfSmzf2G3DLPAIJkCDrv9TVBtnAoS1_3c,4176
57
58
  crawlo/initialization/registry.py,sha256=kKVegqWxtPCaZ1mTyVHN4yFecAGDOPFJfebkP-SoobE,4919
58
59
  crawlo/items/__init__.py,sha256=rFpx1qFBo0Ik7bSdnXC8EVTJUOQdoJYGVdhYjaH00nk,409
@@ -66,10 +67,10 @@ crawlo/logging/factory.py,sha256=b4Z0fBmP00GpvpJ7k4QxqYP32n_EqG5KD3ouUWU7L4U,665
66
67
  crawlo/logging/manager.py,sha256=aem7yla0q83rf2CpwQEyg5YMbey4TzkquBVWiKtcqdQ,3182
67
68
  crawlo/logging/monitor.py,sha256=mzZWm3rQ2mGUoAmkEJPUkBmR0VWK66l14aqqhQ0zwE8,4935
68
69
  crawlo/logging/sampler.py,sha256=1BoRMpusP3wbXRnet5xl9_Yb_3_-AUq9WJhK9gYg7v4,5292
69
- crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
70
+ crawlo/middleware/__init__.py,sha256=khNCstVcYlL14SbLZ8ys9ub1-C8k4FIiMQ99Vw9wA-0,635
70
71
  crawlo/middleware/default_header.py,sha256=Pw-ev8ffi16GeCh84R5L3hAZgp3G1QXS-H5kV3JEp4Q,5164
71
72
  crawlo/middleware/download_delay.py,sha256=2iWnJFtWDlqDy5MsAob8TPiJQoiz9v21yatkBI0eptg,3542
72
- crawlo/middleware/middleware_manager.py,sha256=_Kgd6Ir4cRUiPCEHJELZPOkKNtmu-WAE59dRWKPpAU8,6415
73
+ crawlo/middleware/middleware_manager.py,sha256=H_o0nwo_xQ8aSRnnvEs2Ho3fS-3WNi_5AjChhqvRYnk,6645
73
74
  crawlo/middleware/offsite.py,sha256=4tUkPqXMMXsi1WwYnJ_e7wMd6sRgK19QHRCYq8-w8jk,4682
74
75
  crawlo/middleware/proxy.py,sha256=uKk5OSLIs7jv9bBgkZwsi1rIpthooxhMrGBC2BPRDCc,16022
75
76
  crawlo/middleware/request_ignore.py,sha256=7qdX4zAimjSGwdod_aWUbOTfzLBWZ5KzLVFchGMCxCI,2663
@@ -79,7 +80,7 @@ crawlo/middleware/retry.py,sha256=Acfo95B9wF8fQTCQIqluZOS2hHdnknQu_FOHvpGKJp0,42
79
80
  crawlo/middleware/simple_proxy.py,sha256=rQ4RkqewGvDRCw021nGrg8ngkBzg3wqrEVqvSmBgQ6M,2256
80
81
  crawlo/network/__init__.py,sha256=bvEnpEUBZJ79URfNZbsHhsBKna54hM2-x_BV8eotTA4,418
81
82
  crawlo/network/request.py,sha256=e6-YLgK7SU8D19n21mQwqt_b_aeRVJFOgWPIBPal2ys,14178
82
- crawlo/network/response.py,sha256=QwJhL3xJfPVy_gwtGrg61oAgaqCoCmjyj1Ug7Zju7Pg,13060
83
+ crawlo/network/response.py,sha256=-URnNc_J7qBSG19uJbfuF6A_14MHLOtY78FvcZDzbsI,23418
83
84
  crawlo/pipelines/__init__.py,sha256=FDe2Pr5tiHtV8hFlheElRO_O1aVKvSWlkTcAl9BXAKA,637
84
85
  crawlo/pipelines/bloom_dedup_pipeline.py,sha256=vIF_6noJAdpotrJpnCmrVXCi59gRmHHn28mYW6VukbM,5465
85
86
  crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
@@ -88,34 +89,33 @@ crawlo/pipelines/database_dedup_pipeline.py,sha256=IxahtD_mhni-Y21_idOMX58_Htf46
88
89
  crawlo/pipelines/json_pipeline.py,sha256=wrCsh8YInmcPLAkhPrHObMx89VZfhf-c7qRrYsTixPE,8585
89
90
  crawlo/pipelines/memory_dedup_pipeline.py,sha256=lKkYPu6vkpPjfQ1-xOLvPFT4VdTI8QVx0yjqtVR0ZB0,3598
90
91
  crawlo/pipelines/mongo_pipeline.py,sha256=PohTKTGw3QRvuP-T6SrquwW3FAHSno8jQ2D2cH_d75U,5837
91
- crawlo/pipelines/mysql_pipeline.py,sha256=Kjgu6cks1KD4FPXwlTnFaos2LG-N8LLaBDyKZ_MEcsI,14196
92
- crawlo/pipelines/pipeline_manager.py,sha256=R6MRb5d-caOit7PZoglJLHa3qQ68U5YAQlwt8KcjRxo,4393
92
+ crawlo/pipelines/mysql_pipeline.py,sha256=pLJQJUKqzWrrOxuO-eHXNq5xLza0DHeuGnpwX2Pc4NI,14186
93
+ crawlo/pipelines/pipeline_manager.py,sha256=_DtWfxcTinIf5ApzUOVjZksd2tPbc7qeKi92IVd_kbs,4387
93
94
  crawlo/pipelines/redis_dedup_pipeline.py,sha256=RB1kXLr8ZuWNrgZKYwt--tlmnWsQTbuwTsSt3pafol8,6077
94
95
  crawlo/queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- crawlo/queue/pqueue.py,sha256=Q4H2ePag7-pBjzJ0a4S1P4z_IT_G08T6l1uZBkuxO5A,1262
96
- crawlo/queue/queue_manager.py,sha256=Y2K4P-il5ACdP72X5jgTT-65Fu94_kQZ7H4c7AdRt-c,21547
96
+ crawlo/queue/pqueue.py,sha256=bbgd3l1VfqYXfz-4VFaiWLmJit1LdB3qHalCtNqyrqI,1210
97
+ crawlo/queue/queue_manager.py,sha256=8rKygMxr6DgSjnGsKFmvlTI5XAARvQIN_ENkAruHGXs,21532
97
98
  crawlo/queue/redis_priority_queue.py,sha256=vLvg2toKaRrXD1QyEdu1ZjTmANv7clFaBF7mCtstBmI,15995
98
99
  crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
99
- crawlo/settings/default_settings.py,sha256=DdoMvB7M_OyGKlBOr0lY-T5TN4V1bo6PlPvu4ZcQXnY,12836
100
+ crawlo/settings/default_settings.py,sha256=kBcE5PF-sfB12cjIxHeNPEvzSWSHYDu6saEgrTGXn5o,11970
100
101
  crawlo/settings/setting_manager.py,sha256=yI1tGaludevxKGGZO3Pn4aYofrg2cwYwvMZCFC5PPZw,8595
101
102
  crawlo/spider/__init__.py,sha256=QGhe_yNsnfnCF3G9nSoWEw23b8SkP5oSFU5W79C5DzI,21881
102
103
  crawlo/templates/crawlo.cfg.tmpl,sha256=lwiUVe5sFixJgHFEjn1OtbAeyWsECOrz37uheuVtulk,240
103
- crawlo/templates/run.py.tmpl,sha256=Mt4gcw7RJu9ri4eBRnsAzf0EDt2giFWzpX36OTqEUpQ,961
104
+ crawlo/templates/run.py.tmpl,sha256=g8yst2hkqhKGNotR33fDxwmEsX6aEvhrXY_cfYos_vc,788
104
105
  crawlo/templates/spiders_init.py.tmpl,sha256=p6UK8KWr8FDydNxiAh6Iz29MY5WmgXIkf2z-buOGhOM,354
105
106
  crawlo/templates/project/__init__.py.tmpl,sha256=aQnHaOjMSkTviOC8COUX0fKymuyf8lx2tGduxkMkXEE,61
106
- crawlo/templates/project/items.py.tmpl,sha256=8_3DBA8HrS2XbfHzsMZNJiZbFY6fDJUUMFoFti_obJk,314
107
- crawlo/templates/project/middlewares.py.tmpl,sha256=fxHqi-Sjec5GiHJciprOU-6SAUTzM728NlZckIqf9hM,4278
108
- crawlo/templates/project/pipelines.py.tmpl,sha256=j9oqEhCezmmHlBhMWgYtlgup4jhWnMlv6AEiAOHODkg,2704
109
- crawlo/templates/project/settings.py.tmpl,sha256=gGYwPf-RCuw1pWyBZRUvL1BHzgoLYcIQ1sXUmWDGp1k,7050
110
- crawlo/templates/project/settings_distributed.py.tmpl,sha256=YSN7YcA05MT2yO5mg5MpryVyeEDhcaJEp9TU2NAUPS8,7147
111
- crawlo/templates/project/settings_gentle.py.tmpl,sha256=HS1qumqDciXFBTZYm-RS__ldphDVbuo6Poz8pTCHNEg,6797
112
- crawlo/templates/project/settings_high_performance.py.tmpl,sha256=u6n7S0KzSgJU23pkRifXnsS7AdQplJXefRyeG51v_QI,6887
113
- crawlo/templates/project/settings_minimal.py.tmpl,sha256=GJG44CJlop0_kDwtqhbKJlG_42VThHfICGVUNijkkRs,2550
114
- crawlo/templates/project/settings_simple.py.tmpl,sha256=fmy5PgrS_uVlXnIwXQFQ1Q28Ls121lbVg21Dbxrn6B4,6649
107
+ crawlo/templates/project/items.py.tmpl,sha256=hpQ2AfUmhddnzMuKM5LF6t44dOfFXwJRAZlWFKUFOZw,343
108
+ crawlo/templates/project/middlewares.py.tmpl,sha256=eEobZl8g_0DtiwLYbirQULqOacH-yUrrs4PUrGcJ2UE,1098
109
+ crawlo/templates/project/pipelines.py.tmpl,sha256=7BeaQDMHbIjhKzRtzlCMiFlU8xgMzDs2PIHq3EVUAlQ,887
110
+ crawlo/templates/project/settings.py.tmpl,sha256=mL9_JAyz8R35r-ywRHi4T-dtal7oczU5kodEWxldw40,5265
111
+ crawlo/templates/project/settings_distributed.py.tmpl,sha256=RHzfWZITv-0ErCR9OYEswAZHpA5d9fYil0ZoGCtFt8g,5459
112
+ crawlo/templates/project/settings_gentle.py.tmpl,sha256=pmjrBLjnpGcR90RkcJrM5O8PsTrRhUB92QR3R4TJyko,5733
113
+ crawlo/templates/project/settings_high_performance.py.tmpl,sha256=9QhXSzfxIsMPyq0kZY9h2YBllyXGpGE37bMEbSrs_Ag,5823
114
+ crawlo/templates/project/settings_minimal.py.tmpl,sha256=1qUPhSdHtvLSHTpytUJ8K63sMROhTwkz8e4tVg1fYoM,2222
115
+ crawlo/templates/project/settings_simple.py.tmpl,sha256=sIyrCIVXsHSKl8Yjj8HkGs-ppMFH26a5yp6egVNlT2Q,5585
115
116
  crawlo/templates/project/spiders/__init__.py.tmpl,sha256=llhcIItXpm0TlEeumeLwp4fcYv2NHl8Iru7tLhDhxiE,216
116
117
  crawlo/templates/spider/spider.py.tmpl,sha256=KvU-9YpN6MifDE7XzejjyyQS7RUjLDLZ8zqJcLwSsu0,5198
117
- crawlo/tools/__init__.py,sha256=tOYfYPvZlrO8cmvnMWBjTma6UTLTFZN3qdC8pJwHrzI,4142
118
- crawlo/tools/anti_crawler.py,sha256=LwLC6BkxDSkxc5H1hQ6kY9j7O0PZGAMPZECr7gbqw2M,9431
118
+ crawlo/tools/__init__.py,sha256=sXDMZNP6EwZIFivGcRthxqD1DFMMS8UOJvULAzHD-w4,3927
119
119
  crawlo/tools/authenticated_proxy.py,sha256=ULCK0Cc9F2rGhRqu6kzKBdxzK9v2n1CsatSQ_PMxpAg,7272
120
120
  crawlo/tools/data_formatter.py,sha256=iBDHpZBZvn9O7pLkTQilE1TzYJQEc3z3f6HXoVus0f0,7808
121
121
  crawlo/tools/data_validator.py,sha256=bLWnkpFdclJuqjtSAgMI5nznN4vAuPwE1YaiFWKWenM,5490
@@ -127,44 +127,37 @@ crawlo/tools/request_tools.py,sha256=oXrk4yWMACVa65fDQCQgzsg6a94FH4_lS7qNR53FHYU
127
127
  crawlo/tools/retry_mechanism.py,sha256=4AQ_HLuYt4hYMI9XHoKFk2GQKEiDJB5pAnsMCfjc6Bk,7777
128
128
  crawlo/tools/scenario_adapter.py,sha256=pzysL1B2uQ1ZSEncVHd9Hv2viHNgaxP44YAUcDcppfw,9660
129
129
  crawlo/tools/text_cleaner.py,sha256=UrMGcgRnJaufjmDKIDsRYKMA8znCAArHDgouttWPygk,6690
130
- crawlo/utils/__init__.py,sha256=8kMbOZf9bzOUjtvh2QvqXZmiZh3pYzxXH9YQhYcwcoY,597
130
+ crawlo/utils/__init__.py,sha256=nxLnfqcEGLnsfSEagoKNyu-pm2ByU9BwE5tLxcS71Qo,1003
131
131
  crawlo/utils/batch_processor.py,sha256=8LNy-K2SrQVUxmGEWxQyYw_j9M-erN4Ie7O4d3zpBvM,9142
132
- crawlo/utils/class_loader.py,sha256=kZRGfyA3OPAH2QsQ-beOKjw3JKKBs6OyJyJyXkvWDrc,675
133
132
  crawlo/utils/controlled_spider_mixin.py,sha256=8CuM3Cr2wQLHbaO_ohbCsPcImJnyfZHpERbSeMgQ-AQ,16936
134
- crawlo/utils/db_helper.py,sha256=ZqOt1d3mErVv4TOvoWlov0niUxORB9aHByTmMoNFIDw,10902
135
- crawlo/utils/enhanced_error_handler.py,sha256=fJC__rnYNKTNUHNbgjZtT846HoE31qyGbPft9bwyYLU,14214
133
+ crawlo/utils/db_helper.py,sha256=xTgBTXSWTNXM19rLsypPtnsswO0HdDV1K7zn_wYk4s0,8137
136
134
  crawlo/utils/env_config.py,sha256=W-VD_WF63DHxsyJysvp1eJwRh3L_pBRl_PitQAY3nQY,4079
137
- crawlo/utils/error_handler.py,sha256=vJ_4EVkuVn_TrM16VgN4doyhe_Pg7xWwVUWIENgJQAg,5455
138
- crawlo/utils/fingerprint.py,sha256=70Me5avs40HYbz6LQ9La56EVP2qRapYjX-zF7WQDGkM,3687
135
+ crawlo/utils/error_handler.py,sha256=e2LeUGT_OMcNKcjiX9Pp-NuQh5spsHBqIPBd7VxA2IQ,16247
136
+ crawlo/utils/fingerprint.py,sha256=3IbctH3zwyBjN_12SH7-vrFt-akA2WSo3iAzHc6u--s,3689
139
137
  crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
140
138
  crawlo/utils/large_scale_config.py,sha256=NZMsDj4qbVx06Fu0aHqNKX1yzo6WFT7CgrhVnvw1ZFs,8372
141
139
  crawlo/utils/large_scale_helper.py,sha256=4ORkZcIrwJ0SlKOUh7l7WIuERORuRhNBgHCM71Rz0n0,12452
142
140
  crawlo/utils/log.py,sha256=KmUWVYq8t6fSGOC88nnYCDxwBUdoPWvaBmpOSHn2oWI,2914
141
+ crawlo/utils/misc.py,sha256=m_TbfMf4Aoe70zmkv7XWyFg8Rz0qOYPXepwB6EcYr7Y,2519
143
142
  crawlo/utils/performance_monitor.py,sha256=32KspSo7RWvCX_fl0ZFn4ScWWOqbVVwEhPRd921Ez6I,9832
144
143
  crawlo/utils/queue_helper.py,sha256=gFmkh1jKlIcN1rmo2Jl6vYcLP5ByUWlfHO9eNlZPBLs,4918
145
144
  crawlo/utils/redis_connection_pool.py,sha256=EsPZkmQctWkoYU2wcrqkgwnIWnE6nG4XCXECKn216JA,12575
146
145
  crawlo/utils/redis_key_validator.py,sha256=-UTTx0Ul184pzwSply8hVdH0lp-gkXXOc_gEHR_7VlU,5809
147
- crawlo/utils/request.py,sha256=ejdKpTwc-HE04HQybafhOVywzz57IV3pY0YMkSLyGUo,9065
146
+ crawlo/utils/request.py,sha256=RcINrLvShfZ5VHu1T_hJJRXp-viKWSo35C2JOgWyl2k,8641
148
147
  crawlo/utils/request_serializer.py,sha256=b5abcgjJk4IU6Wfg46AmOAU2wmzu_WqcpEbuAncRMGQ,8931
149
- crawlo/utils/spider_loader.py,sha256=xNzQb7qhQ7TqZsfFtCLpuVcsGi-USriZosU0YSBr9II,2233
148
+ crawlo/utils/selector_helper.py,sha256=BVczzsSzPL5zF5KHXK3hyuqEl9o0ADYEuCH7Aw8aj98,4332
149
+ crawlo/utils/spider_loader.py,sha256=oxifl0p4SOFhvvnD38Em4zGtC7sRr_pw4dki01MoAq0,7677
150
150
  crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
151
+ crawlo/utils/text_helper.py,sha256=TTZgQPayMFUOYj8syt47Gwa4AQVY15W1b56STJetAKE,2920
151
152
  crawlo/utils/tools.py,sha256=uy7qw5Z1BIhyEgiHENvtM7WoGCJxlS8EX3PmOA7ouCo,275
152
153
  crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
153
154
  examples/__init__.py,sha256=NkRbV8_S1tb8S2AW6BE2U6P2-eGOPwMR1k0YQAwQpSE,130
154
- examples/test_project/__init__.py,sha256=BQ6FVVDjB00-Vyib8h2I3P-LV5tjsFzTSLC2rFDe7Gw,136
155
- examples/test_project/run.py,sha256=HHMFVY8D9ouWytp-xNvFDKB8NphrZGuFbVrah6-afG8,953
156
- examples/test_project/test_project/__init__.py,sha256=RU5IUwU1oeDABAj3ZsLdMo53r3XG4saYIYgO1SXo13g,57
157
- examples/test_project/test_project/items.py,sha256=L_bnTuJLW7xtuja4AaqYuSsQvVdaqMunZWdh1OTz4HY,310
158
- examples/test_project/test_project/middlewares.py,sha256=fE7AUI2Yb9_ZwA4vC4ngn-s231FtEYeYWjXOwd5mkzE,4270
159
- examples/test_project/test_project/pipelines.py,sha256=7EDGrvuNH6JY0XINRwzgPmB-X4Ax6J790jtxbkL2O_U,2696
160
- examples/test_project/test_project/settings.py,sha256=imfrkrwSJ1V3jINe2ZhynlT4_w5bUBkeI9DFK7DS2g0,7115
161
- examples/test_project/test_project/spiders/__init__.py,sha256=Lx_To88ShpAR6Pyd9PUMjct690MZH0gETxz6knRqPRY,212
162
- examples/test_project/test_project/spiders/of_week_dis.py,sha256=-gDLaKGoF5birxCoLL_CX82bYopXjo4QmOV6a7I-Ci0,5178
163
155
  tests/__init__.py,sha256=409aRX8hsPffiZCVjOogtxwhACzBp8G2UTJyUQSxhK0,136
164
156
  tests/advanced_tools_example.py,sha256=1_iitECKCuWUYMNNGo61l3lmwMRrWdA8F_Xw56UaGZY,9340
165
157
  tests/authenticated_proxy_example.py,sha256=fKmHXXxIxCJXjEplttCWRh7PZhbxkBSxJF91Bx-qOME,3019
166
158
  tests/baidu_performance_test.py,sha256=wxdaI7UwKboMYH_qcaqZLxAStvndH60bvKGzD8F-jaI,3974
167
159
  tests/baidu_test.py,sha256=NKYnwDbPJX3tmKtRn7uQ_QWzUXiLTQC-Gdr1cQkJzEo,1874
160
+ tests/bug_check_test.py,sha256=EIDOUk_QgtBOWKuBLm_WHbgJ0fsDuJACJ-nuxnBIdkQ,8056
168
161
  tests/cleaners_example.py,sha256=blVqSJ7SeWUNd17JjHZJgVTzWH65XKevLyaMB_Wg8qA,5324
169
162
  tests/comprehensive_framework_test.py,sha256=_1N-OGbKvBTNachNvIjkL_izr4uv6OUybUkhxxz5MAk,5977
170
163
  tests/comprehensive_test.py,sha256=wypCaB56IV8w8nd5VA5LSXUQ3IgLf0AKKUiCci6yEJQ,2969
@@ -178,13 +171,13 @@ tests/debug_log_config.py,sha256=cPS6qOLnynYTFOxpjcy9OUgIqrkasWb9f2c_PASc2_E,371
178
171
  tests/debug_log_levels.py,sha256=CZWG3KGDq-hYJ5TPhoZTyjKFKkkM-AoK3oP1w-JC1sc,2168
179
172
  tests/debug_pipelines.py,sha256=FMb36bH9lQxBLb-nM579hBRK1S16Vxu1t_BC3Dj8O2w,2164
180
173
  tests/detailed_log_test.py,sha256=oTCFF_Un7Jq2gV4rpRDFOxlHJSthnQhvEf0CSItfB7I,7501
174
+ tests/direct_selector_helper_test.py,sha256=p7_x3x87JUnpKplmwYO4zN5ympcPJSPdHsviso-LmpI,2862
181
175
  tests/distributed_test.py,sha256=u6cEiymZzCItaTClKTxwVjNmOj9_PZii4_eGNAVMDW8,1825
182
176
  tests/distributed_test_debug.py,sha256=pUv6ZKEJ5pK9xOA7lgVk6WW3cBAtnb1bsuZzJ8oGLvY,2181
183
177
  tests/dynamic_loading_example.py,sha256=7LdeQZFevrb-U1_dgr4oX3aYo2Da4HvE_0KIf1fw4Ew,18786
184
178
  tests/dynamic_loading_test.py,sha256=dzDW7b66HeDsIYsYgvNRihE3V6b6gEbUGQpp-eJbcIM,3413
185
179
  tests/env_config_example.py,sha256=_ZRDh_LR23ZKpy9E--y_KM0QIOiZF5vRT98QTn52TY8,4951
186
180
  tests/error_handling_example.py,sha256=grTeo1X17rFz4lhgASb0g5yu4NWbmNz5neyuonnNR40,5294
187
- tests/final_command_test_report.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
181
  tests/final_comprehensive_test.py,sha256=szTNbtwKfYNmE0kzDPCsE_kvnTG7FNKl2JERakGhKIk,4314
189
182
  tests/final_log_test.py,sha256=CpZ4ZvvuvFiBvz1a50qN599XIU086ett_I0bSX42BLU,9367
190
183
  tests/final_validation_test.py,sha256=4cuTr58i46JI6M4Tz54e7vrVFrOr3R7HSWgyQPKmM9M,5244
@@ -193,20 +186,23 @@ tests/framework_performance_test.py,sha256=Qp47VrsCK0ylEhDkFOm7lnD8rVkaJ7u1MopsE
193
186
  tests/log_buffering_test.py,sha256=0B5UY1yQuxnBU1pEyz3IBYweN__4fOkPXly-kYfOpNU,3226
194
187
  tests/log_generation_timing_test.py,sha256=zHb_m2FqlpRCYw-wqFWFn8cbVH8UR3VvXKSM6nNnbgo,4681
195
188
  tests/optimized_performance_test.py,sha256=bA0dN4j7ViyTSSiCJEjlkJ9Y7jspTFKs2xX7UXHE8Gs,7379
196
- tests/performance_comparison.py,sha256=CKGFbcwqsF3CAWIaOWF3Ca6o_OdEdZCgIfYb_m6CzIw,9302
189
+ tests/performance_comparison.py,sha256=UevHOM_9z2ILedf_xZ_8F8QiPjb_M8WTfGQrxzKtgco,9266
197
190
  tests/queue_blocking_test.py,sha256=hp-6hmTOO64oOAWVtlN8cFJ95GjbK3t9fj-4q_TKowk,3955
198
191
  tests/queue_test.py,sha256=HeBiBXqAgIAbUkLVQ3McS6NdRselA30m3lnuxNBvZbk,2689
199
192
  tests/redis_key_validation_demo.py,sha256=WD2jvuBwHhLYIb3lVFtvYSSnmXWn1EW4EPCEwFhfi6M,4467
200
193
  tests/request_params_example.py,sha256=J50NdsnK1sDrqG-5m3oA-mu1_wHwVwHIfsWxGeQpz7o,4250
201
194
  tests/response_improvements_example.py,sha256=t1cbG3nesp82bqog4_ku1GvQzNbhRyWa5EaKTmOPrSk,5402
202
195
  tests/simple_command_test.py,sha256=8TowzW45ukKTPeaNC5uij3RR7rqPULiBr2PguSSMdP8,3688
203
- tests/simple_crawlo_test.py,sha256=gfL910xt56HhrERAB9QhO_oxKWKw2j2IsifQkgYtwvE,4848
204
- tests/simple_log_test.py,sha256=xOEGH5UzRLsCpwqgi1VEoZ7NY3DoWckwy6Wy9lKQ6ZE,1757
196
+ tests/simple_crawlo_test.py,sha256=FYDn5cgAxHN81QSYa_wcJcxJit7aLnIopnkHKKr83dE,4801
197
+ tests/simple_follow_test.py,sha256=3vNT5Eqwza6fxAY9Xl_9xtFGdfrPwm6NnVHdRmJsH8A,1053
205
198
  tests/simple_log_test2.py,sha256=Rn3XerVlkT0M-vbQmrQL7bVIZG3REnJNmMvUvKr6C20,3944
206
199
  tests/simple_optimization_test.py,sha256=hflvaC81ra1ZrPOp-Z7rQrH95OnSADvAjy95BLulD6o,3678
207
200
  tests/simple_queue_type_test.py,sha256=wAf4XLKl9oS5BlfrRJ1SLY-kYmNq4YY0LdIC7HmW-yg,1193
201
+ tests/simple_response_selector_test.py,sha256=0naeRUX1n-oAW6VRj-12c6nre2D0RjJ0dD3Nx7BBTjY,2844
202
+ tests/simple_selector_helper_test.py,sha256=l9FsVhQ-z-ICqqetLIyeSaI8Dn6bXNCD8sLdr0tpvms,4438
203
+ tests/simple_selector_test.py,sha256=XzOYzpEzr0yaioLV6v-4XC60VZMd5jRthlyp7Ud02o4,6630
208
204
  tests/simple_spider_test.py,sha256=RzziJg-fbIVJ6_CgbismfkwrLwpJp4WWp2RLgG7Tpws,1168
209
- tests/simple_test.py,sha256=Pyxgg0YnBG_3_NRFla0HgJ21CYlfO4K-js3x6_-6ZEk,1258
205
+ tests/simple_url_test.py,sha256=g9RBn46V7fHZTU0BrB5pl5AGCbw6QuKOXClVACb-MEQ,2297
210
206
  tests/spider_log_timing_test.py,sha256=pvYpKZemClr4mCR76xywhsiWbT5sPdzD_taZKFjlgvM,5573
211
207
  tests/test_advanced_tools.py,sha256=HT_TcwfFzli-CavIJSqQqnCxnBn5FDMX09zL7AJ5tNY,5398
212
208
  tests/test_all_commands.py,sha256=VgVa9SzU5Irvn5igHpC2W4E_6ZDWDt7jc-T4UPK_PFE,7718
@@ -221,6 +217,8 @@ tests/test_config_consistency.py,sha256=RgSxyaypMpysltsGSh1vFMeOShiZZG0rmUKzEhNL
221
217
  tests/test_config_merge.py,sha256=ts1j-TIKkFS0EO5q1I4O7f4YUKR5MLTmRSqOpOlv094,5606
222
218
  tests/test_config_validator.py,sha256=Z4gBHkI0_fEx-xgiiG4T33F4BAuePuF81obpNTXfseY,6202
223
219
  tests/test_controlled_spider_mixin.py,sha256=AQ493ic6AxZAKd7QCgnUES92BBWCMNteTd5DjoQlhwo,2864
220
+ tests/test_crawler_process_import.py,sha256=iIPqSCpv2VRb_hWTu5euLME4PDFf7NwixeBypRuv39Y,1175
221
+ tests/test_crawler_process_spider_modules.py,sha256=uMr4esj6ascVBzt0WrPd3ZOQfKD00O6tJrNhuWOdvV0,1395
224
222
  tests/test_crawlo_proxy_integration.py,sha256=81DVwosMoiSMxj4V_jLzcL7aqvSv_8ucggkQyXsvzT0,2733
225
223
  tests/test_date_tools.py,sha256=pcLDyhLrZ_jh-PhPm4CvLZEgNeH9kLMPKN5zacHwuWM,4053
226
224
  tests/test_dedup_fix.py,sha256=UFdm8lIi0ZIdp40W8ruxRD69bxzijuFUfNyJmB4Fwl0,8788
@@ -235,18 +233,19 @@ tests/test_dynamic_downloaders_proxy.py,sha256=t_aWpxOHi4h3_fg2ImtIq7IIJ0r3PTHtn
235
233
  tests/test_dynamic_proxy.py,sha256=zi7Ocbhc9GL1zCs0XhmG2NvBBeIZ2d2hPJVh18lH4Y0,3172
236
234
  tests/test_dynamic_proxy_config.py,sha256=C_9CEjCJtrr0SxIXCyLDhSIi88ujF7UAT1F-FAphd0w,5853
237
235
  tests/test_dynamic_proxy_real.py,sha256=krWnbFIH26mWNPhOfPMmx3ZxJfOreZxMZFGwVb_8-K8,3511
238
- tests/test_edge_cases.py,sha256=1RnFaCebYTDNNz_LK8M0MepiSwPvJUk_FBK4nQTCUbg,10729
236
+ tests/test_edge_cases.py,sha256=460JtYR6yuTo8J4wqJScMzDkrrDUE2Q8R425AaUycIQ,11127
237
+ tests/test_encoding_core.py,sha256=k5fZET0R1KInhAlbbHEJv4m9d6NuibOxxfIcR43TS7Y,1681
238
+ tests/test_encoding_detection.py,sha256=Zb1KkF2CR57qa0Hr_Iv8msompGJZT2EIL_2mGp0zX9Q,4245
239
239
  tests/test_enhanced_error_handler.py,sha256=Ku_86jv7iDe25v8ZxalcXxJJjIiIvQXWH8ZldbwdVm8,8581
240
240
  tests/test_enhanced_error_handler_comprehensive.py,sha256=j_cxyIPGks9A3untKhAdj5HU0hrLbbzOLu0uAtGUlJo,9369
241
241
  tests/test_env_config.py,sha256=Qu1sDeADs69dSr1x0QmEe8nJrMHneE_4JClt-N901e8,4867
242
242
  tests/test_error_handler_compatibility.py,sha256=xJ43cmCwfBGh-qBwCGiMDPPlfNDLw4ZrmlrHN9IojkU,4241
243
243
  tests/test_factories.py,sha256=wKFfr8YBXPs-AQ8YOFgDhINn5uivKqPBZQPUe5GL9Ig,8865
244
+ tests/test_factory_compatibility.py,sha256=zzTXd3ku3iedgxgB1DxTt3zfetiIl6wCjL9yXIUCpic,6260
244
245
  tests/test_final_validation.py,sha256=OuZI01O0E68Pao--bD-BFDTRZFPc_Mt4W-OXUzlt6ZA,4966
245
246
  tests/test_fingerprint_consistency.py,sha256=68V5u_2hNABI5pNWzXUrA1PJ08Xh9x3-JsMSNNjORMo,4956
246
247
  tests/test_fingerprint_simple.py,sha256=qiSba8gF3Zl91QO_ijJO7KstLdjATs30V_GZCNHShig,1626
247
248
  tests/test_framework_env_usage.py,sha256=bFb_ptdLeX2obdJWEqEHPWweiWR-wR2BpvEaJMQK7h4,4201
248
- tests/test_framework_logger.py,sha256=nAtL_N49L7OurthY329vZK_jBjJIcPcETEIiV0HGqt0,2560
249
- tests/test_framework_startup.py,sha256=I_ij7J6NO3DTBuHlh1Z4CJUcGxLEjRaIB6EdykiISEc,2267
250
249
  tests/test_get_component_logger.py,sha256=UKj5uT1F3L3atoJFmpk7QSDO2fZHgw-7Y84vMFbHRkM,2285
251
250
  tests/test_hash_performance.py,sha256=4eVPwbu66Oun0LVyTTNd9d2cj2V1xq0YZkRg8Z0TO-Q,3211
252
251
  tests/test_integration.py,sha256=lVEzKNAjFzFZHRNZAyJmXxa_5Ogf_qqL4APqs620o58,4839
@@ -258,15 +257,17 @@ tests/test_logging_final.py,sha256=K9vxyODslXza05hElVEcvzbXgzthYKK5CRj4UJTftIw,6
258
257
  tests/test_logging_integration.py,sha256=5WpExyt6BmYBZwrjqtQIGOw1Id64opJBAIahDk70Mlc,11131
259
258
  tests/test_logging_system.py,sha256=LGfK14ZEWzRtl3_VkBGz-AaVa_dDtuC5zu40m8FvmMo,9206
260
259
  tests/test_middleware_debug.py,sha256=gtiaWCxBSTcaNkdqXirM7CsThr_HfiCueBdQCpp7rqg,4572
261
- tests/test_mode_change.py,sha256=GT53CBdxcG3-evcKz_OOfH4PBiq_oqQyuDjRXrvv1UU,2665
262
260
  tests/test_mode_consistency.py,sha256=t72WX0etC_AayaL2AT6e2lIgbfP-zxTgYAiTARSN2Jk,1276
261
+ tests/test_multi_directory.py,sha256=sH9Y3B-fuESlc7J1aICa-AlBcCW8HFR-Q5j2anUr8l0,2196
262
+ tests/test_multiple_spider_modules.py,sha256=M0wPyQW7HMasbMIgn_R78wjZEj4A_DgqaGHp0qF9Y0c,2567
263
263
  tests/test_offsite_middleware.py,sha256=njpXTdngOqBs60Wj6xgo5EEXlJnMHd7vtYGi9dVauW0,10602
264
264
  tests/test_offsite_middleware_simple.py,sha256=4MfDKSXGHcoFLYnnxCH2rmnzztWyN0xByYLoHtepyiA,7918
265
+ tests/test_optimized_selector_naming.py,sha256=fbmlB5S2kBwtQWpWoQ4lQ7rUQm2_DeWK-t6KqvIRTUQ,2787
265
266
  tests/test_parsel.py,sha256=wuZqRFIm9xx1tt6o3Xi_OjvwhT_MPmHiUEj2ax06zlo,701
266
267
  tests/test_performance.py,sha256=Lqs2iu3dmWipZkBPARcwIjDLXsqe42ntz1M4RzqqXKo,11457
267
268
  tests/test_performance_monitor.py,sha256=paW3HGg6ReHb9lwnOivGCrI8STwbwp_mbuhgfds1h3I,4187
268
269
  tests/test_pipeline_fingerprint_consistency.py,sha256=LL55oGSDGy0K8LxoyKa6ogNHXhJlZHe509vCFbibLkk,2847
269
- tests/test_priority_behavior.py,sha256=p04M0HIgBaXyuVHmp-ImITA9jGaKI_RPwZ3DPY_Trt4,9134
270
+ tests/test_priority_behavior.py,sha256=JQ5uv80cAUKV9Eh3S8j5zxYSSL-dmzhwhuKOINM26zU,9325
270
271
  tests/test_priority_consistency.py,sha256=rVX7nku5N_QpB_ffDu3xqREkCWPX5aNNiXy112o9wpA,5756
271
272
  tests/test_priority_consistency_fixed.py,sha256=MlYi5PIr5wxunC3Ku4ilnxOatKyRu2qIvhV7pjadkjg,10765
272
273
  tests/test_proxy_api.py,sha256=XnmklS-xU4ke_560gV6AIlBsRmG8YLQTGFAZrTUZuhc,11013
@@ -300,12 +301,22 @@ tests/test_request_params.py,sha256=l2etiDebqylPBym1e9DSDn4wxwTHv8DQHKq9AzlzlG0,
300
301
  tests/test_request_serialization.py,sha256=Ikgec8tt_sPCK6jcZyK8vRw84zRNE6nxQy9rba1WKmE,2332
301
302
  tests/test_response_code_middleware.py,sha256=wSe525bm-bk_iWMjPDzUu1LfOQrwJY8_MLKAspq2dzk,12193
302
303
  tests/test_response_filter_middleware.py,sha256=YWrGzJ7wmftTjJXcNTtJl3b3EdJsO4oR22ZLWwgErhg,16327
304
+ tests/test_response_follow.py,sha256=gjVZ_knsuHUaCDOjRPk-qG9HRCwReXlVrIx_KpveRHM,3738
303
305
  tests/test_response_improvements.py,sha256=vNqHKyoEoYeEGAUiRzdsff2V6yvJ9QnDwGg7gmN38Ow,6028
306
+ tests/test_response_selector_methods.py,sha256=6aS7q_PBx601MnXbCze-ZWNO-uCKFVjhxcCg9NJqKrI,2738
307
+ tests/test_response_url_methods.py,sha256=plOpSN3JLRI8-lbj4cva8-_jRFdDwmax9Gkv6O2Ac-s,2759
308
+ tests/test_response_urljoin.py,sha256=uXTWhFx8-XBb-Vaghn9YKJz5ThkwRuNykBWW4S7f3go,3379
304
309
  tests/test_retry_middleware.py,sha256=mi7s4HDAqmmd9nvyxs3ZgxdEKOYkCgDu3rDvU_9o8vQ,11133
305
310
  tests/test_retry_middleware_realistic.py,sha256=Sam5y4jCN8oeElU4xxeS5zjAyzS-P8siPV7OaifgsyU,9679
306
311
  tests/test_scheduler.py,sha256=1fCu35QgK5gzgrhD0aUZj5lxL0QbokzPav-yEJxz9Ig,8182
307
312
  tests/test_scheduler_config_update.py,sha256=LuxjEbt20QrPyVkjSFxvTnFtUxwMaHB6TcqjFyo8bow,4261
313
+ tests/test_scrapy_style_encoding.py,sha256=2K_0lHsYqop4qb5lO1U8g7hbae4nkMPrbEvVTl5TT9Y,3408
314
+ tests/test_selector_helper.py,sha256=-fw8p-uJixTKso7OLUBTVJ2oOjL8LIJA1WDetzthGO0,2818
315
+ tests/test_selector_optimizations.py,sha256=5t5RrDkcy0YtK2Es9DBfi3Cejfv6yV4dagulIQhmEho,4665
308
316
  tests/test_simple_response.py,sha256=_ui2PuVZvJcAuLY7HZ8xcsy_tDBimgBqX0ukj3kE5J0,1549
317
+ tests/test_spider_loader.py,sha256=-myi78LztwABeaCpJj-DzO2CxNEYW8lavtVuUreoHcI,1314
318
+ tests/test_spider_loader_comprehensive.py,sha256=gp6SWrDQcrg4RFNkLJQWDQ16NDfpdOlg0rCyJ86-F-8,2591
319
+ tests/test_spider_modules.py,sha256=wxPs28FtpGnQTemMY6r7WxVrwYo3bHnAd5dq94qj1K4,2797
309
320
  tests/test_telecom_spider_redis_key.py,sha256=c-gfixPul2VlYMQJGf0H5ZgYJ461fQgSKbCPrbAU45M,7625
310
321
  tests/test_template_content.py,sha256=2RgCdOA3pMUSOqC_JbTGeW7KonbTqJ0ySYJNWegU-v0,2903
311
322
  tests/test_template_redis_key.py,sha256=99-s0_-8MFJbIvGG_X__sH0qkXWTtJv8fdTdlftsq4I,4876
@@ -317,10 +328,20 @@ tests/untested_features_report.md,sha256=31aUlsw_1OKe0_ijAjeH85kJ7HJ8qzKLJdOHDjW
317
328
  tests/verify_debug.py,sha256=iQ4Efwg9bQTHscr73VYAAZ8rBIe1u6mQfeaEK5YgneY,1564
318
329
  tests/verify_distributed.py,sha256=0IolM4ymuPOz_uTfHSWFO3Vxzp7Lo6i0zhSbzJhHFtI,4045
319
330
  tests/verify_log_fix.py,sha256=7reyVl3MXTDASyChgU5BAYuzuxvFjSLG9HywAHso0qg,4336
331
+ tests/ofweek_scrapy/scrapy.cfg,sha256=D_8rsW65iTbH7nG1kI25jYTCpoQKBVa2shajrsC6fBw,280
332
+ tests/ofweek_scrapy/ofweek_scrapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
+ tests/ofweek_scrapy/ofweek_scrapy/items.py,sha256=Y_TwwHPAgOXTuCTdnhRxil7vYPk1_rzj1ZatTq4AX-I,280
334
+ tests/ofweek_scrapy/ofweek_scrapy/middlewares.py,sha256=O4jVSXZgxtsRzU9O_O3YdkS7_QLndzv3uYP-Op8g254,3654
335
+ tests/ofweek_scrapy/ofweek_scrapy/pipelines.py,sha256=ZO6WqTqPpTwLvnwO7YL0E35OPp4zSfJ_GhMeshNRSow,379
336
+ tests/ofweek_scrapy/ofweek_scrapy/settings.py,sha256=X3Y6goZluAz0n2bepWAKEhZX0URFfe9_lBRBCPgtLPk,2933
337
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py,sha256=ULwecZkx3_NTphkz7y_qiazBeUoHFnCCWnKSjoDCZj0,161
338
+ tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py,sha256=gcfKze-ipzP7JTDGCL3TgtjwIwfgI7dPL6GmdXVT0fs,6880
320
339
  tests/scrapy_comparison/ofweek_scrapy.py,sha256=rhVds_WjYum1bLuWWe90HtXE51fZXEqhhPSc822ZasQ,5790
321
340
  tests/scrapy_comparison/scrapy_test.py,sha256=-IsGUHPBgEL0TmXjeLZl-TUA01B7Dsc2nRo4JZbFwZA,5599
322
- crawlo-1.4.3.dist-info/METADATA,sha256=PTbW7GP9xgBjFEX1adwjxeJHqM_x9VRU_ZLZsezaydU,4848
323
- crawlo-1.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
324
- crawlo-1.4.3.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
325
- crawlo-1.4.3.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
326
- crawlo-1.4.3.dist-info/RECORD,,
341
+ tests/test_spiders/__init__.py,sha256=Ws2DhfUA0Xh5Cxr9M46td7B6hyNoLTyAhZ60FnIh6D0,20
342
+ tests/test_spiders/test_spider.py,sha256=kNGEg80HMMFgzVseI1jJjljZEBy3QYKt_3SXGASffFM,168
343
+ crawlo-1.4.5.dist-info/METADATA,sha256=o0MSsONyv_KU7dMNANtCZlkLpVdDUz8zGJKd5i2DM1g,9355
344
+ crawlo-1.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
345
+ crawlo-1.4.5.dist-info/entry_points.txt,sha256=5HoVoTSPxI8SCa5B7pQYxLSrkOdiunyO9tqNsLMv52g,43
346
+ crawlo-1.4.5.dist-info/top_level.txt,sha256=keG_67pbZ_wZL2dmDRA9RMaNHTaV_x_oxZ9DKNgwvR0,22
347
+ crawlo-1.4.5.dist-info/RECORD,,
@@ -0,0 +1,251 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ 框架bug检查测试脚本
5
+ 全面测试框架的核心功能,查找潜在的bug
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ import asyncio
11
+ import traceback
12
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
13
+
14
+ from crawlo.spider import Spider
15
+ from crawlo import Request
16
+ from crawlo.network.response import Response
17
+
18
+
19
+ class TestSpider(Spider):
20
+ """测试爬虫"""
21
+ name = 'bug_check_spider'
22
+
23
+ def start_requests(self):
24
+ """发起测试请求"""
25
+ # 生成一些测试请求
26
+ yield Request('https://httpbin.org/get', callback=self.parse)
27
+
28
+ def parse(self, response):
29
+ """解析响应"""
30
+ print(f"成功获取响应: {response.url}")
31
+ print(f"状态码: {response.status_code}")
32
+ # 测试Response的各种方法
33
+ self.test_response_methods(response)
34
+ return []
35
+
36
+ def test_response_methods(self, response):
37
+ """测试Response的各种方法"""
38
+ print("测试Response方法...")
39
+
40
+ # 测试URL处理方法
41
+ try:
42
+ joined_url = response.urljoin('/test')
43
+ print(f" urljoin测试通过: {joined_url}")
44
+ except Exception as e:
45
+ print(f" urljoin测试失败: {e}")
46
+
47
+ try:
48
+ parsed = response.urlparse()
49
+ print(f" urlparse测试通过: {parsed}")
50
+ except Exception as e:
51
+ print(f" urlparse测试失败: {e}")
52
+
53
+ # 测试选择器方法
54
+ try:
55
+ # 测试提取文本
56
+ text = response.extract_text('title', default='No title')
57
+ print(f" extract_text测试通过: {text}")
58
+ except Exception as e:
59
+ print(f" extract_text测试失败: {e}")
60
+
61
+ try:
62
+ # 测试提取多个文本
63
+ texts = response.extract_texts('h1,h2,h3', default=[])
64
+ print(f" extract_texts测试通过: {texts}")
65
+ except Exception as e:
66
+ print(f" extract_texts测试失败: {e}")
67
+
68
+ try:
69
+ # 测试提取属性
70
+ attr = response.extract_attr('a', 'href', default='')
71
+ print(f" extract_attr测试通过: {attr}")
72
+ except Exception as e:
73
+ print(f" extract_attr测试失败: {e}")
74
+
75
+ try:
76
+ # 测试提取多个属性
77
+ attrs = response.extract_attrs('a', 'href', default=[])
78
+ print(f" extract_attrs测试通过: {attrs}")
79
+ except Exception as e:
80
+ print(f" extract_attrs测试失败: {e}")
81
+
82
+
83
+ async def test_request_serialization():
84
+ """测试Request序列化"""
85
+ print("测试Request序列化...")
86
+
87
+ try:
88
+ # 创建一个复杂的Request对象
89
+ request = Request(
90
+ url='https://example.com/test',
91
+ method='POST',
92
+ headers={'User-Agent': 'Test'},
93
+ json_body={'key': 'value'},
94
+ meta={'test': 'data'},
95
+ priority=5
96
+ )
97
+
98
+ # 测试pickle序列化
99
+ import pickle
100
+ serialized = pickle.dumps(request)
101
+ deserialized = pickle.loads(serialized)
102
+ print(f" Request序列化测试通过: {deserialized.url}")
103
+ return True
104
+ except Exception as e:
105
+ print(f" Request序列化测试失败: {e}")
106
+ traceback.print_exc()
107
+ return False
108
+
109
+
110
+ async def test_queue_operations():
111
+ """测试队列操作"""
112
+ print("测试队列操作...")
113
+
114
+ try:
115
+ from crawlo.queue.queue_manager import QueueConfig, QueueManager, QueueType
116
+
117
+ # 创建内存队列配置
118
+ queue_config = QueueConfig(
119
+ queue_type=QueueType.MEMORY,
120
+ max_queue_size=10
121
+ )
122
+
123
+ # 创建队列管理器
124
+ queue_manager = QueueManager(queue_config)
125
+ await queue_manager.initialize()
126
+
127
+ # 测试添加请求
128
+ request = Request('https://example.com/test')
129
+ success = await queue_manager.put(request)
130
+ print(f" 队列添加请求: {'成功' if success else '失败'}")
131
+
132
+ # 测试获取请求
133
+ retrieved = await queue_manager.get(timeout=1.0)
134
+ print(f" 队列获取请求: {'成功' if retrieved else '失败'}")
135
+
136
+ # 关闭队列
137
+ await queue_manager.close()
138
+ print(" 队列操作测试完成")
139
+ return True
140
+ except Exception as e:
141
+ print(f" 队列操作测试失败: {e}")
142
+ traceback.print_exc()
143
+ return False
144
+
145
+
146
+ async def test_redis_queue_operations():
147
+ """测试Redis队列操作"""
148
+ print("测试Redis队列操作...")
149
+
150
+ try:
151
+ from crawlo.queue.queue_manager import QueueConfig, QueueManager, QueueType
152
+
153
+ # 创建Redis队列配置
154
+ queue_config = QueueConfig(
155
+ queue_type=QueueType.REDIS,
156
+ redis_url='redis://127.0.0.1:6379/15', # 使用测试数据库
157
+ queue_name='test:bug_check',
158
+ max_queue_size=10
159
+ )
160
+
161
+ # 创建队列管理器
162
+ queue_manager = QueueManager(queue_config)
163
+ initialized = await queue_manager.initialize()
164
+ if not initialized:
165
+ print(" Redis队列初始化失败,跳过测试")
166
+ return True
167
+
168
+ # 测试添加请求
169
+ request = Request('https://example.com/test_redis')
170
+ success = await queue_manager.put(request)
171
+ print(f" Redis队列添加请求: {'成功' if success else '失败'}")
172
+
173
+ # 测试获取请求
174
+ retrieved = await queue_manager.get(timeout=1.0)
175
+ print(f" Redis队列获取请求: {'成功' if retrieved else '失败'}")
176
+
177
+ # 关闭队列
178
+ await queue_manager.close()
179
+ print(" Redis队列操作测试完成")
180
+ return True
181
+ except Exception as e:
182
+ print(f" Redis队列操作测试失败: {e}")
183
+ # 不将Redis连接失败视为测试失败
184
+ return True
185
+
186
+
187
+ async def test_spider_registration():
188
+ """测试爬虫注册"""
189
+ print("测试爬虫注册...")
190
+
191
+ try:
192
+ from crawlo.spider import get_spider_by_name, get_spider_names
193
+
194
+ # 检查测试爬虫是否已注册
195
+ spider_class = get_spider_by_name('bug_check_spider')
196
+ if spider_class:
197
+ print(f" 爬虫注册测试通过: {spider_class.__name__}")
198
+ else:
199
+ print(" 爬虫注册测试失败: 未找到注册的爬虫")
200
+ return False
201
+
202
+ # 检查所有注册的爬虫
203
+ spider_names = get_spider_names()
204
+ print(f" 已注册的爬虫: {spider_names}")
205
+ return True
206
+ except Exception as e:
207
+ print(f" 爬虫注册测试失败: {e}")
208
+ traceback.print_exc()
209
+ return False
210
+
211
+
212
+ async def main():
213
+ """主函数"""
214
+ print("开始框架bug检查测试...")
215
+ print("=" * 50)
216
+
217
+ tests = [
218
+ test_request_serialization,
219
+ test_queue_operations,
220
+ test_redis_queue_operations,
221
+ test_spider_registration,
222
+ ]
223
+
224
+ passed = 0
225
+ total = len(tests)
226
+
227
+ for test_func in tests:
228
+ try:
229
+ if await test_func():
230
+ passed += 1
231
+ print(f"✓ {test_func.__name__} 通过")
232
+ else:
233
+ print(f"✗ {test_func.__name__} 失败")
234
+ except Exception as e:
235
+ print(f"✗ {test_func.__name__} 异常: {e}")
236
+ print()
237
+
238
+ print("=" * 50)
239
+ print(f"测试结果: {passed}/{total} 通过")
240
+
241
+ if passed == total:
242
+ print("所有测试通过!框架核心功能正常。")
243
+ return 0
244
+ else:
245
+ print("部分测试失败,请检查框架实现。")
246
+ return 1
247
+
248
+
249
+ if __name__ == "__main__":
250
+ exit_code = asyncio.run(main())
251
+ exit(exit_code)