cobweb-launcher 1.2.49__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. cobweb/base/__init__.py +141 -4
  2. cobweb/base/basic.py +28 -82
  3. cobweb/base/common_queue.py +13 -0
  4. cobweb/base/dotting.py +1 -1
  5. cobweb/base/request.py +14 -2
  6. cobweb/base/seed.py +10 -6
  7. cobweb/constant.py +16 -0
  8. cobweb/crawlers/crawler.py +51 -181
  9. cobweb/db/redis_db.py +28 -0
  10. cobweb/launchers/__init__.py +2 -2
  11. cobweb/launchers/launcher.py +110 -141
  12. cobweb/launchers/launcher_api.py +66 -114
  13. cobweb/launchers/launcher_pro.py +76 -194
  14. cobweb/pipelines/base_pipeline.py +54 -0
  15. cobweb/pipelines/loghub_pipeline.py +34 -0
  16. cobweb/pipelines/pipeline.py +25 -49
  17. cobweb/schedulers/__init__.py +0 -2
  18. cobweb/schedulers/scheduler_redis.py +5 -8
  19. cobweb/setting.py +29 -6
  20. cobweb/utils/dotting.py +10 -42
  21. cobweb_/__init__.py +2 -0
  22. cobweb_/base/__init__.py +9 -0
  23. cobweb_/base/common_queue.py +30 -0
  24. cobweb_/base/decorators.py +40 -0
  25. cobweb_/base/item.py +46 -0
  26. cobweb_/base/log.py +94 -0
  27. cobweb_/base/request.py +82 -0
  28. cobweb_/base/response.py +23 -0
  29. cobweb_/base/seed.py +114 -0
  30. cobweb_/constant.py +94 -0
  31. cobweb_/crawlers/__init__.py +1 -0
  32. cobweb_/crawlers/crawler.py +184 -0
  33. cobweb_/db/__init__.py +2 -0
  34. cobweb_/db/api_db.py +82 -0
  35. cobweb_/db/redis_db.py +130 -0
  36. cobweb_/exceptions/__init__.py +1 -0
  37. cobweb_/exceptions/oss_db_exception.py +28 -0
  38. cobweb_/launchers/__init__.py +3 -0
  39. cobweb_/launchers/launcher.py +235 -0
  40. cobweb_/launchers/launcher_air.py +88 -0
  41. cobweb_/launchers/launcher_api.py +221 -0
  42. cobweb_/launchers/launcher_pro.py +222 -0
  43. cobweb_/pipelines/__init__.py +3 -0
  44. cobweb_/pipelines/pipeline.py +69 -0
  45. cobweb_/pipelines/pipeline_console.py +22 -0
  46. cobweb_/pipelines/pipeline_loghub.py +34 -0
  47. cobweb_/setting.py +74 -0
  48. cobweb_/utils/__init__.py +5 -0
  49. cobweb_/utils/bloom.py +58 -0
  50. cobweb_/utils/dotting.py +32 -0
  51. cobweb_/utils/oss.py +94 -0
  52. cobweb_/utils/tools.py +42 -0
  53. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/METADATA +1 -1
  54. cobweb_launcher-1.3.2.dist-info/RECORD +110 -0
  55. cobweb_launcher-1.3.2.dist-info/top_level.txt +2 -0
  56. cobweb_new/__init__.py +2 -0
  57. cobweb_new/base/__init__.py +72 -0
  58. cobweb_new/base/common_queue.py +53 -0
  59. cobweb_new/base/decorators.py +72 -0
  60. cobweb_new/base/item.py +46 -0
  61. cobweb_new/base/log.py +94 -0
  62. cobweb_new/base/request.py +82 -0
  63. cobweb_new/base/response.py +23 -0
  64. cobweb_new/base/seed.py +118 -0
  65. cobweb_new/constant.py +105 -0
  66. cobweb_new/crawlers/__init__.py +1 -0
  67. cobweb_new/crawlers/crawler-new.py +85 -0
  68. cobweb_new/crawlers/crawler.py +170 -0
  69. cobweb_new/db/__init__.py +2 -0
  70. cobweb_new/db/api_db.py +82 -0
  71. cobweb_new/db/redis_db.py +158 -0
  72. cobweb_new/exceptions/__init__.py +1 -0
  73. cobweb_new/exceptions/oss_db_exception.py +28 -0
  74. cobweb_new/launchers/__init__.py +3 -0
  75. cobweb_new/launchers/launcher.py +237 -0
  76. cobweb_new/launchers/launcher_air.py +88 -0
  77. cobweb_new/launchers/launcher_api.py +161 -0
  78. cobweb_new/launchers/launcher_pro.py +96 -0
  79. cobweb_new/launchers/tesss.py +47 -0
  80. cobweb_new/pipelines/__init__.py +3 -0
  81. cobweb_new/pipelines/pipeline.py +68 -0
  82. cobweb_new/pipelines/pipeline_console.py +22 -0
  83. cobweb_new/pipelines/pipeline_loghub.py +34 -0
  84. cobweb_new/setting.py +95 -0
  85. cobweb_new/utils/__init__.py +5 -0
  86. cobweb_new/utils/bloom.py +58 -0
  87. cobweb_new/utils/oss.py +94 -0
  88. cobweb_new/utils/tools.py +42 -0
  89. cobweb/schedulers/scheduler_api.py +0 -72
  90. cobweb_launcher-1.2.49.dist-info/RECORD +0 -44
  91. cobweb_launcher-1.2.49.dist-info/top_level.txt +0 -1
  92. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/LICENSE +0 -0
  93. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,110 @@
1
+ cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
+ cobweb/constant.py,sha256=eofONAntk9O6S-cb4KbYGYHL_u7nBlOqqFOw_HzJHAU,3588
3
+ cobweb/setting.py,sha256=pY6LKsgWI3164GiGA1z_y26LVf5-3mpiEgmm86mKRdY,3135
4
+ cobweb/base/__init__.py,sha256=yrf__ULml-wN2CuLDgu7o1slbFhiSJwvOu_dsYstPLU,4849
5
+ cobweb/base/basic.py,sha256=ukWBLDdBgJhV15pO13ZxcYYXVLmnWFctOyHuDvZQh4M,6390
6
+ cobweb/base/common_queue.py,sha256=Gor7sR3h1hlZWaI0XcNAbf0S15Ftjr3DFRWNTGL13uU,1137
7
+ cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
8
+ cobweb/base/dotting.py,sha256=0SH8F2uAGWZjfODpTAXngYHz8JgfCm-RqpmQbfQ3NCY,1233
9
+ cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
10
+ cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
11
+ cobweb/base/request.py,sha256=acGm3OzxsPed5VUTk7D9eeHZPMh7KUNQRUv44G5znZg,2659
12
+ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
+ cobweb/base/seed.py,sha256=PN5J4gKPEXylwyQeSGOBfauxHktxFr7RJe8nVX1hBw4,2987
14
+ cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
+ cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
+ cobweb/crawlers/crawler.py,sha256=rdXLgY91kKDSjOvDml3CUqJTR9RFifcLEYNiF6-gXEs,2964
17
+ cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
+ cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
+ cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
20
+ cobweb/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
21
+ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
22
+ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
23
+ cobweb/launchers/__init__.py,sha256=uzfPkLbY2m0wsIR_s93VFxmO0U49GgUG7hXPzMYdye0,118
24
+ cobweb/launchers/launcher.py,sha256=0BCXzZ05PFB6aQejwVBuMPwoxhlc2BLYL541HO5yiYM,7238
25
+ cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
26
+ cobweb/launchers/launcher_api.py,sha256=qPazoC7U-UmgebbiTkhl6f4yQmN34XMl6HawekhAhEo,5789
27
+ cobweb/launchers/launcher_pro.py,sha256=np6cY7UmW7-649mORdsCvbLy2_mkoXhNZ3OLtev555I,3409
28
+ cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
29
+ cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
30
+ cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
31
+ cobweb/pipelines/pipeline.py,sha256=FTl2JOoF3ysWa-mxb1pbmayEM4IXQH5a4L5CymlBQNU,1293
32
+ cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
33
+ cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
34
+ cobweb/schedulers/__init__.py,sha256=WJgBXsPE8zpJQ9L_-bHIUpBcaB2G4HmutDdWF3ud1Bs,44
35
+ cobweb/schedulers/scheduler_redis.py,sha256=Aw7de0sXigRAxJgqUhHWu30hMBzgEWjkj-3OXXqmldg,2118
36
+ cobweb/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
37
+ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
38
+ cobweb/utils/dotting.py,sha256=fZ-16TAf51dovGIKaHsQthgTkbSEMAteaJPluZhniJI,879
39
+ cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
40
+ cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
41
+ cobweb_/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
42
+ cobweb_/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
43
+ cobweb_/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
44
+ cobweb_/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
45
+ cobweb_/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
46
+ cobweb_/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
47
+ cobweb_/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
48
+ cobweb_/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
49
+ cobweb_/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
50
+ cobweb_/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
51
+ cobweb_/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
52
+ cobweb_/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
53
+ cobweb_/crawlers/crawler.py,sha256=mPRc9GBfWi5AoSxB1jlARxvG_AzsPVRFil5O8RnOxCY,7018
54
+ cobweb_/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
55
+ cobweb_/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
56
+ cobweb_/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
57
+ cobweb_/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
58
+ cobweb_/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
59
+ cobweb_/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
60
+ cobweb_/launchers/launcher.py,sha256=sPts-xlgxoeIfl1fn1XR2XVZxLzt7He9xrYDfTHRAGo,7029
61
+ cobweb_/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
62
+ cobweb_/launchers/launcher_api.py,sha256=Ih8f5xDcFlGBn6VSnlrpxcchMB48ugsj2NTWYgGYWfY,8669
63
+ cobweb_/launchers/launcher_pro.py,sha256=NBJstQuB0o_jMiySJ14lk0Y3WAxxiScaQvXa1qtTSo4,8683
64
+ cobweb_/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
65
+ cobweb_/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
66
+ cobweb_/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
67
+ cobweb_/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
68
+ cobweb_/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
69
+ cobweb_/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
70
+ cobweb_/utils/dotting.py,sha256=fZ-16TAf51dovGIKaHsQthgTkbSEMAteaJPluZhniJI,879
71
+ cobweb_/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
72
+ cobweb_/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
73
+ cobweb_new/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
74
+ cobweb_new/constant.py,sha256=wy2bLpGZrl1MtgVv-Z1Tmtj5uWh-KGzDrrGKdVStxV4,3236
75
+ cobweb_new/setting.py,sha256=Ya3X4HbvDfSmMF2kSJwaaP1naxrWETTFW88T11agP7k,3035
76
+ cobweb_new/base/__init__.py,sha256=L74KN3qZn6s33EXyxQ_vB3FF8mA7pZJd_ekkWiUKd5Y,2229
77
+ cobweb_new/base/common_queue.py,sha256=Po6yY8HqpC6Wt6csd3Co3lBd7ygN2vmEECczgyc_sM8,1292
78
+ cobweb_new/base/decorators.py,sha256=8VDpANSIhxhrFnwgQzAxM_8ZyDXKdn3zTH0oZIXqRPE,1801
79
+ cobweb_new/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
80
+ cobweb_new/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
81
+ cobweb_new/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
82
+ cobweb_new/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
83
+ cobweb_new/base/seed.py,sha256=KBVxVU4jMB6oiw8HPtu-nDUVUZ6jiTjzR917jTYGCZs,2977
84
+ cobweb_new/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
85
+ cobweb_new/crawlers/crawler-new.py,sha256=TAYMH2E3BTkjU6bFLlIMVfsR3cV2ggjA0moUpaXOe1Y,2762
86
+ cobweb_new/crawlers/crawler.py,sha256=xiFNM0t69f5xlm59hPbO2MpqtdirVAUhD84-CLpyHPM,6349
87
+ cobweb_new/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
88
+ cobweb_new/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
89
+ cobweb_new/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
90
+ cobweb_new/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
91
+ cobweb_new/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
92
+ cobweb_new/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
93
+ cobweb_new/launchers/launcher.py,sha256=87P_2rRjzqyQXcG_EJ5Y6lMAk7saM8k1WBJcl9ANX6k,8309
94
+ cobweb_new/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
95
+ cobweb_new/launchers/launcher_api.py,sha256=qPazoC7U-UmgebbiTkhl6f4yQmN34XMl6HawekhAhEo,5789
96
+ cobweb_new/launchers/launcher_pro.py,sha256=QLjAiN8qMk4NklSY7ldBAR5OEEUB8sECuCCwRrFEC68,3414
97
+ cobweb_new/launchers/tesss.py,sha256=pDe0wwhXbdjjmtfc7JLPfVOvs9yuc7Y8wLT1b1ueeEs,912
98
+ cobweb_new/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
99
+ cobweb_new/pipelines/pipeline.py,sha256=3IRHHqrHblZ_18Cps2bGK6iugDjs-dde7p3AbarfiN8,1958
100
+ cobweb_new/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
101
+ cobweb_new/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
102
+ cobweb_new/utils/__init__.py,sha256=c9macpjc15hrCUCdzO5RR_sgK_B9kvJKreSGprZ1ld4,112
103
+ cobweb_new/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
104
+ cobweb_new/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
105
+ cobweb_new/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
106
+ cobweb_launcher-1.3.2.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
107
+ cobweb_launcher-1.3.2.dist-info/METADATA,sha256=bnweVxLU9zIGyMqQ7mvKdMvzTYjVTIhJ1ei0tm35SOg,6509
108
+ cobweb_launcher-1.3.2.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
109
+ cobweb_launcher-1.3.2.dist-info/top_level.txt,sha256=A0GPGeX6QtxXg7AJno3SVRTHtVCCqeRIOrpwDoXg9qs,15
110
+ cobweb_launcher-1.3.2.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ cobweb
2
+ cobweb_
cobweb_new/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from .launchers import LauncherAir, LauncherPro, LauncherApi
2
+ from .constant import CrawlerModel
@@ -0,0 +1,72 @@
1
+ import time
2
+ from inspect import isgenerator
3
+ from typing import Callable, Union
4
+
5
+ from .common_queue import Queue
6
+ from .response import Response
7
+ from .request import Request
8
+ from .item import BaseItem, ConsoleItem
9
+ from .seed import Seed
10
+
11
+ from .log import logger
12
+ # from .decorators import decorator_oss_db, stop, pause
13
+ import decorators
14
+
15
+
16
+ class TaskQueue:
17
+
18
+ SEED = Queue() # 添加任务种子队列
19
+ TODO = Queue() # 任务种子队列
20
+ REQUEST = Queue() # 请求队列
21
+
22
+ DOWNLOAD = Queue() # 下载任务队列
23
+ RESPONSE = Queue() # 响应队列
24
+ DONE = Queue() # 下载完成队列
25
+
26
+ UPLOAD = Queue() # 任务上传队列
27
+
28
+ DELETE = Queue() # 任务删除队列
29
+
30
+ def __init__(self, db):
31
+ self.db = db
32
+
33
+ @staticmethod
34
+ def is_empty():
35
+ total_length = TaskQueue.SEED.length
36
+ total_length += TaskQueue.TODO.length
37
+ total_length += TaskQueue.REQUEST.length
38
+ total_length += TaskQueue.DOWNLOAD.length
39
+ total_length += TaskQueue.RESPONSE.length
40
+ total_length += TaskQueue.UPLOAD.length
41
+ total_length += TaskQueue.DONE.length
42
+ total_length += TaskQueue.DELETE.length
43
+ return not bool(total_length)
44
+ # @staticmethod
45
+ # def distribute(it):
46
+
47
+ @staticmethod
48
+ def process_task(it: Union[Seed, Request, Response, BaseItem], crawler_func: Callable):
49
+ try:
50
+ iterators = crawler_func(it)
51
+ if not isgenerator(iterators):
52
+ raise TypeError(f"{crawler_func.__name__} function isn't a generator")
53
+ for tk in iterators:
54
+ if isinstance(tk, Request):
55
+ TaskQueue.DOWNLOAD.push(tk)
56
+ elif isinstance(tk, Response):
57
+ TaskQueue.RESPONSE.push(tk)
58
+ elif isinstance(tk, BaseItem):
59
+ TaskQueue.UPLOAD.push(tk)
60
+ elif isinstance(tk, Seed):
61
+ TaskQueue.SEED.push(tk)
62
+ except Exception as e:
63
+ if not isinstance(it, BaseItem):
64
+ it.seed.params.retry += 1
65
+
66
+ time.sleep(5)
67
+
68
+
69
+ class Distribute:
70
+ """
71
+ 数据分发器,将数据分发到各个队列中
72
+ """
@@ -0,0 +1,53 @@
1
+ import time
2
+ from collections import deque
3
+
4
+
5
+ class Queue:
6
+
7
+ def __init__(self):
8
+ self._queue = deque()
9
+
10
+ @property
11
+ def length(self) -> int:
12
+ return len(self._queue)
13
+
14
+ def push(self, data, left: bool = False, direct_insertion: bool = False):
15
+ try:
16
+ if not data:
17
+ return None
18
+ if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
19
+ self._queue.extendleft(data) if left else self._queue.extend(data)
20
+ else:
21
+ self._queue.appendleft(data) if left else self._queue.append(data)
22
+ except AttributeError:
23
+ pass
24
+
25
+ def pop(self, left: bool = True):
26
+ try:
27
+ return self._queue.popleft() if left else self._queue.pop()
28
+ except IndexError:
29
+ return None
30
+ except AttributeError:
31
+ return None
32
+
33
+ def clear(self):
34
+ self._queue.clear()
35
+
36
+ def get(self):
37
+ try:
38
+ yield self._queue.popleft()
39
+ except IndexError:
40
+ time.sleep(1)
41
+ yield None
42
+ except AttributeError:
43
+ yield None
44
+
45
+
46
+ class RedisQueue(Queue):
47
+
48
+ def __init__(self, db):
49
+ super().__init__()
50
+ self.db = db
51
+
52
+ def pop(self, left: bool = True):
53
+ ...
@@ -0,0 +1,72 @@
1
+ import time
2
+ import threading
3
+ from functools import wraps
4
+
5
+
6
+ def add_thread(num=1):
7
+ def decorator(func):
8
+ @wraps(func)
9
+ def wrapper(self, *args):
10
+ for i in range(num):
11
+ name = func.__name__ + "_" + str(i) if num > 1 else func.__name__
12
+ self._threads.append(threading.Thread(name=name, target=func, args=(self,) + args))
13
+ return wrapper
14
+
15
+ return decorator
16
+
17
+
18
+ def pause(func):
19
+ @wraps(func)
20
+ def wrapper(self, *args, **kwargs):
21
+ while not self.pause.is_set():
22
+ try:
23
+ func(self, *args, **kwargs)
24
+ except Exception as e:
25
+ pass
26
+ # logger.info(f"{func.__name__}: " + str(e))
27
+ finally:
28
+ time.sleep(0.1)
29
+
30
+ return wrapper
31
+
32
+
33
+ def stop(func):
34
+ @wraps(func)
35
+ def wrapper(self, *args, **kwargs):
36
+ while not self.stop.is_set():
37
+ try:
38
+ func(self, *args, **kwargs)
39
+ except Exception as e:
40
+ # logger.info(f"{func.__name__}: " + str(e))
41
+ pass
42
+ finally:
43
+ time.sleep(0.1)
44
+
45
+ return wrapper
46
+
47
+
48
+ def decorator_oss_db(exception, retries=3):
49
+ def decorator(func):
50
+ @wraps(func)
51
+ def wrapper(callback_func, *args, **kwargs):
52
+ result = None
53
+ for i in range(retries):
54
+ msg = None
55
+ try:
56
+ return func(callback_func, *args, **kwargs)
57
+ except Exception as e:
58
+ result = None
59
+ msg = e
60
+ finally:
61
+ if result:
62
+ return result
63
+
64
+ if i >= 2 and msg:
65
+ raise exception(msg)
66
+
67
+ return wrapper
68
+
69
+ return decorator
70
+
71
+
72
+
@@ -0,0 +1,46 @@
1
+ from .seed import Seed
2
+ from collections import namedtuple
3
+
4
+
5
+ class Item(type):
6
+
7
+ def __new__(cls, name, bases, dct):
8
+ new_class_instance = type.__new__(cls, name, bases, dct)
9
+ if name != "BaseItem":
10
+ table = getattr(new_class_instance, "__TABLE__")
11
+ fields = getattr(new_class_instance, "__FIELDS__")
12
+ new_class_instance.Data = namedtuple(table, fields)
13
+ return new_class_instance
14
+
15
+
16
+ class BaseItem(metaclass=Item):
17
+
18
+ __TABLE__ = ""
19
+ __FIELDS__ = ""
20
+
21
+ def __init__(self, seed: Seed, **kwargs):
22
+ self.seed = seed
23
+
24
+ data = {}
25
+ for key, value in kwargs.items():
26
+ if key not in self.__FIELDS__:
27
+ self.__setattr__(key, value)
28
+ else:
29
+ data[key] = value
30
+
31
+ self.data = self.Data(**data)
32
+
33
+ @property
34
+ def to_dict(self):
35
+ return self.data._asdict()
36
+
37
+ @property
38
+ def table(self):
39
+ return self.Data.__name__
40
+
41
+
42
+ class ConsoleItem(BaseItem):
43
+
44
+ __TABLE__ = "console"
45
+ __FIELDS__ = "data"
46
+
cobweb_new/base/log.py ADDED
@@ -0,0 +1,94 @@
1
+ import logging
2
+
3
+
4
+ class ColorCodes:
5
+ # Text Reset
6
+ RESET = "\033[0m"
7
+
8
+ # Regular Colors
9
+ RED = "\033[31m"
10
+ GREEN = "\033[32m"
11
+ YELLOW = "\033[33m"
12
+ BLUE = "\033[34m"
13
+ PURPLE = "\033[35m"
14
+ CYAN = "\033[36m"
15
+ WHITE = "\033[37m"
16
+
17
+ # Bright Colors
18
+ BRIGHT_RED = "\033[91m"
19
+ BRIGHT_GREEN = "\033[92m"
20
+ BRIGHT_YELLOW = "\033[93m"
21
+ BRIGHT_BLUE = "\033[94m"
22
+ BRIGHT_PURPLE = "\033[95m"
23
+ BRIGHT_CYAN = "\033[96m"
24
+ BRIGHT_WHITE = "\033[97m"
25
+
26
+ # Background Colors
27
+ BG_RED = "\033[41m"
28
+ BG_GREEN = "\033[42m"
29
+ BG_YELLOW = "\033[43m"
30
+ BG_BLUE = "\033[44m"
31
+ BG_PURPLE = "\033[45m"
32
+ BG_CYAN = "\033[46m"
33
+ BG_WHITE = "\033[47m"
34
+
35
+ # Bright Background Colors
36
+ BG_BRIGHT_RED = "\033[101m"
37
+ BG_BRIGHT_GREEN = "\033[102m"
38
+ BG_BRIGHT_YELLOW = "\033[103m"
39
+ BG_BRIGHT_BLUE = "\033[104m"
40
+ BG_BRIGHT_PURPLE = "\033[105m"
41
+ BG_BRIGHT_CYAN = "\033[106m"
42
+ BG_BRIGHT_WHITE = "\033[107m"
43
+
44
+ # Text Styles
45
+ BOLD = "\033[1m"
46
+ DIM = "\033[2m"
47
+ ITALIC = "\033[3m"
48
+ UNDERLINE = "\033[4m"
49
+ BLINK = "\033[5m"
50
+ REVERSE = "\033[7m"
51
+ HIDDEN = "\033[8m"
52
+
53
+
54
+ class Log:
55
+ logging.getLogger('oss2.api').setLevel(logging.WARNING)
56
+ logging.basicConfig(
57
+ level=logging.INFO,
58
+ format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
59
+ f' %(levelname)s -> %(message)s'
60
+ )
61
+ log = logging.getLogger()
62
+
63
+ def set_log_name(self, name):
64
+ self.__class__.log = logging.getLogger(name)
65
+
66
+ @property
67
+ def debug(self):
68
+ return self.__class__.log.debug
69
+
70
+ @property
71
+ def info(self):
72
+ return self.__class__.log.info
73
+
74
+ @property
75
+ def warning(self):
76
+ return self.__class__.log.warning
77
+
78
+ @property
79
+ def exception(self):
80
+ return self.__class__.log.exception
81
+
82
+ @property
83
+ def error(self):
84
+ return self.__class__.log.error
85
+
86
+ @property
87
+ def critical(self):
88
+ return self.__class__.log.critical
89
+
90
+
91
+ logger = Log()
92
+
93
+
94
+
@@ -0,0 +1,82 @@
1
+ import random
2
+ import requests
3
+
4
+
5
+ class Request:
6
+
7
+ __REQUEST_ATTRS__ = {
8
+ "params",
9
+ "headers",
10
+ "cookies",
11
+ "data",
12
+ "json",
13
+ "files",
14
+ "auth",
15
+ "timeout",
16
+ "proxies",
17
+ "hooks",
18
+ "stream",
19
+ "verify",
20
+ "cert",
21
+ "allow_redirects",
22
+ }
23
+
24
+ def __init__(
25
+ self,
26
+ url,
27
+ seed,
28
+ random_ua=True,
29
+ check_status_code=True,
30
+ **kwargs
31
+ ):
32
+ self.url = url
33
+ self.seed = seed
34
+ self.check_status_code = check_status_code
35
+ self.request_setting = {}
36
+
37
+ for k, v in kwargs.items():
38
+ if k in self.__class__.__REQUEST_ATTRS__:
39
+ self.request_setting[k] = v
40
+ continue
41
+ self.__setattr__(k, v)
42
+
43
+ if not getattr(self, "method", None):
44
+ self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
45
+
46
+ if random_ua:
47
+ self._build_header()
48
+
49
+ @property
50
+ def _random_ua(self) -> str:
51
+ v1 = random.randint(4, 15)
52
+ v2 = random.randint(3, 11)
53
+ v3 = random.randint(1, 16)
54
+ v4 = random.randint(533, 605)
55
+ v5 = random.randint(1000, 6000)
56
+ v6 = random.randint(10, 80)
57
+ user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
58
+ f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
59
+ return user_agent
60
+
61
+ def _build_header(self) -> dict:
62
+ if not self.request_setting.get("headers"):
63
+ self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
64
+ elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
65
+ self.request_setting["headers"]["user-agent"] = self._random_ua
66
+
67
+ def download(self) -> requests.Response:
68
+ response = requests.request(self.method, self.url, **self.request_setting)
69
+ if self.check_status_code:
70
+ response.raise_for_status()
71
+ return response
72
+
73
+ @property
74
+ def to_dict(self):
75
+ _dict = self.__dict__.copy()
76
+ _dict.pop('url')
77
+ _dict.pop('seed')
78
+ _dict.pop('check_status_code')
79
+ _dict.pop('request_setting')
80
+ return _dict
81
+
82
+
@@ -0,0 +1,23 @@
1
+
2
+
3
+ class Response:
4
+
5
+ def __init__(
6
+ self,
7
+ seed,
8
+ response,
9
+ **kwargs
10
+ ):
11
+ self.seed = seed
12
+ self.response = response
13
+
14
+ for k, v in kwargs.items():
15
+ self.__setattr__(k, v)
16
+
17
+ @property
18
+ def to_dict(self):
19
+ _dict = self.__dict__.copy()
20
+ _dict.pop('seed')
21
+ _dict.pop('response')
22
+ return _dict
23
+
@@ -0,0 +1,118 @@
1
+ import json
2
+ import time
3
+ import hashlib
4
+
5
+
6
+ class SeedParams:
7
+
8
+ def __init__(self, retry, priority, seed_version, seed_status=None):
9
+ self.retry = retry or 0
10
+ self.priority = priority or 300
11
+ self.seed_version = seed_version or int(time.time())
12
+ self.seed_status = seed_status
13
+
14
+
15
+ class Seed:
16
+
17
+ __SEED_PARAMS__ = [
18
+ "retry",
19
+ "priority",
20
+ "seed_version",
21
+ "seed_status"
22
+ ]
23
+
24
+ def __init__(
25
+ self,
26
+ seed,
27
+ sid=None,
28
+ retry=None,
29
+ priority=None,
30
+ seed_version=None,
31
+ seed_status=None,
32
+ **kwargs
33
+ ):
34
+ if any(isinstance(seed, t) for t in (str, bytes)):
35
+ try:
36
+ item = json.loads(seed)
37
+ self._init_seed(item)
38
+ except json.JSONDecodeError:
39
+ self.__setattr__("url", seed)
40
+ elif isinstance(seed, dict):
41
+ self._init_seed(seed)
42
+ else:
43
+ raise TypeError(Exception(
44
+ f"seed type error, "
45
+ f"must be str or dict! "
46
+ f"seed: {seed}"
47
+ ))
48
+
49
+ seed_params = {
50
+ "retry": retry,
51
+ "priority": priority,
52
+ "seed_version": seed_version,
53
+ "seed_status": seed_status,
54
+ }
55
+
56
+ if kwargs:
57
+ self._init_seed(kwargs)
58
+ seed_params.update({
59
+ k:v for k, v in kwargs.items()
60
+ if k in self.__SEED_PARAMS__
61
+ })
62
+ if sid or not getattr(self, "sid", None):
63
+ self._init_id(sid)
64
+ self.params = SeedParams(**seed_params)
65
+
66
+ def __getattr__(self, name):
67
+ return None
68
+
69
+ def __setitem__(self, key, value):
70
+ setattr(self, key, value)
71
+
72
+ def __getitem__(self, item):
73
+ return getattr(self, item)
74
+
75
+ def __str__(self):
76
+ return json.dumps(self.__dict__, ensure_ascii=False)
77
+
78
+ def __repr__(self):
79
+ chars = [f"{k}={v}" for k, v in self.__dict__.items()]
80
+ return f'{self.__class__.__name__}({", ".join(chars)})'
81
+
82
+ def _init_seed(self, seed_info:dict):
83
+ for k, v in seed_info.items():
84
+ if k not in self.__SEED_PARAMS__:
85
+ self.__setattr__(k, v)
86
+
87
+ def _init_id(self, sid):
88
+ if not sid:
89
+ sid = hashlib.md5(self.to_string.encode()).hexdigest()
90
+ self.__setattr__("sid", sid)
91
+
92
+ @property
93
+ def to_dict(self) -> dict:
94
+ seed = self.__dict__.copy()
95
+ if seed.get("params"):
96
+ del seed["params"]
97
+ return seed
98
+
99
+ @property
100
+ def to_string(self) -> str:
101
+ return json.dumps(
102
+ self.to_dict,
103
+ ensure_ascii=False,
104
+ separators=(",", ":")
105
+ )
106
+
107
+ # @property
108
+ # def get_all(self):
109
+ # return json.dumps(
110
+ # self.__dict__,
111
+ # ensure_ascii=False,
112
+ # separators=(",", ":")
113
+ # )
114
+
115
+ @property
116
+ def seed(self):
117
+ return self
118
+