re-common 10.0.19__py3-none-any.whl → 10.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/decorators/utils.py +59 -59
  145. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  146. re_common/v2/baselibrary/tools/WeChatRobot.py +79 -79
  147. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  148. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  149. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  150. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  151. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  152. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  153. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  154. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  155. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  156. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  157. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  158. re_common/v2/baselibrary/utils/basedict.py +37 -37
  159. re_common/v2/baselibrary/utils/basehdfs.py +161 -127
  160. re_common/v2/baselibrary/utils/basepika.py +180 -180
  161. re_common/v2/baselibrary/utils/basetime.py +77 -0
  162. re_common/v2/baselibrary/utils/db.py +38 -38
  163. re_common/v2/baselibrary/utils/json_cls.py +16 -11
  164. re_common/v2/baselibrary/utils/mq.py +83 -83
  165. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  166. re_common/v2/baselibrary/utils/string_bool.py +149 -149
  167. re_common/v2/baselibrary/utils/string_clear.py +204 -202
  168. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  169. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  170. re_common/vip/base_step_process.py +11 -11
  171. re_common/vip/baseencodeid.py +90 -90
  172. re_common/vip/changetaskname.py +28 -28
  173. re_common/vip/core_var.py +24 -24
  174. re_common/vip/mmh3Hash.py +89 -89
  175. re_common/vip/proxy/allproxys.py +127 -127
  176. re_common/vip/proxy/allproxys_thread.py +159 -159
  177. re_common/vip/proxy/cnki_proxy.py +153 -153
  178. re_common/vip/proxy/kuaidaili.py +87 -87
  179. re_common/vip/proxy/proxy_all.py +113 -113
  180. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  181. re_common/vip/proxy/wanfang_proxy.py +152 -152
  182. re_common/vip/proxy/wp_proxy_all.py +181 -181
  183. re_common/vip/read_rawid_to_txt.py +91 -91
  184. re_common/vip/title/__init__.py +5 -5
  185. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  186. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  187. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  188. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  189. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  190. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  191. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  192. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  193. re_common/vip/title/transform/__init__.py +10 -10
  194. {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/LICENSE +201 -201
  195. {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/METADATA +16 -24
  196. re_common-10.0.22.dist-info/RECORD +227 -0
  197. {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/WHEEL +1 -1
  198. re_common-10.0.19.dist-info/RECORD +0 -226
  199. {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/top_level.txt +0 -0
@@ -1,127 +1,161 @@
1
- import gzip
2
- from io import BytesIO
3
-
4
- from hdfs import InsecureClient
5
-
6
-
7
- class HDFSUtils(object):
8
- """
9
- HDFS 工具类,封装常见的 HDFS 操作。
10
- """
11
-
12
- def __init__(self, hdfs_url, hdfs_user):
13
- """
14
- 初始化 HDFS 客户端。
15
- :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
16
- :param hdfs_user: HDFS 用户名
17
- """
18
- self.hdfs_url = hdfs_url
19
- self.hdfs_user = hdfs_user
20
- self.client = InsecureClient(hdfs_url, user=hdfs_user)
21
-
22
- def upload_file(self, local_path, hdfs_path, overwrite=False):
23
- """
24
- 将本地文件上传到 HDFS。
25
- :param local_path: 本地文件路径
26
- :param hdfs_path: HDFS 文件路径
27
- :param overwrite: 是否覆盖已存在的文件
28
- :return: None
29
- """
30
- self.client.upload(hdfs_path, local_path, overwrite=overwrite)
31
- print(f"文件上传成功: {local_path} -> {hdfs_path}")
32
-
33
- def download_file(self, hdfs_path, local_path, overwrite=False):
34
- """
35
- 从 HDFS 下载文件到本地。
36
- :param hdfs_path: HDFS 文件路径
37
- :param local_path: 本地文件路径
38
- :param overwrite: 是否覆盖已存在的文件
39
- :return: None
40
- """
41
- self.client.download(hdfs_path, local_path, overwrite=overwrite)
42
- print(f"文件下载成功: {hdfs_path} -> {local_path}")
43
-
44
- def delete_file(self, hdfs_path, recursive=False):
45
- """
46
- 删除 HDFS 上的文件或目录。
47
- :param hdfs_path: HDFS 文件或目录路径
48
- :param recursive: 是否递归删除目录
49
- :return: None
50
- """
51
- self.client.delete(hdfs_path, recursive=recursive)
52
- print(f"文件/目录删除成功: {hdfs_path}")
53
-
54
- def create_directory(self, hdfs_path):
55
- """
56
- 在 HDFS 上创建目录。
57
- :param hdfs_path: HDFS 目录路径
58
- :return: None
59
- """
60
- self.client.makedirs(hdfs_path)
61
- print(f"目录创建成功: {hdfs_path}")
62
-
63
- def list_files(self, hdfs_path):
64
- """
65
- 列出 HDFS 目录下的文件和子目录。
66
- :param hdfs_path: HDFS 目录路径
67
- :return: 文件/目录列表
68
- """
69
- files = self.client.list(hdfs_path)
70
- return files
71
-
72
- def read_file(self, hdfs_path):
73
- """
74
- 读取 HDFS 文件内容。
75
- :param hdfs_path: HDFS 文件路径
76
- :return: 文件内容
77
- """
78
- with self.client.read(hdfs_path) as reader:
79
- content = reader.read()
80
- print(f"文件读取成功: {hdfs_path}")
81
- return content
82
-
83
- def read_gz_file(self, hdfs_path, encoding='utf-8'):
84
- """
85
- 读取 HDFS 上的 .gz 文件内容。
86
- :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
87
- :param encoding: 文件编码格式(默认 utf-8)
88
- :return: 文件内容
89
- """
90
- with self.client.read(hdfs_path) as reader: # 以二进制模式读取
91
- compressed_data = reader.read() # 读取压缩数据
92
- with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
93
- content = gz_file.read().decode(encoding) # 解码为字符串
94
- print(f"文件读取成功: {hdfs_path}")
95
- return content
96
-
97
- def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
98
- """
99
- 向 HDFS 文件写入内容。
100
- :param hdfs_path: HDFS 文件路径
101
- :param content: 要写入的内容
102
- :param overwrite: 是否覆盖已存在的文件
103
- :param encoding: 文件编码格式
104
- :return: None
105
- """
106
- with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
107
- writer.write(content)
108
- print(f"文件写入成功: {hdfs_path}")
109
-
110
- def file_exists(self, hdfs_path):
111
- """
112
- 检查 HDFS 文件或目录是否存在。
113
- :param hdfs_path: HDFS 文件或目录路径
114
- :return: 是否存在
115
- """
116
- status = self.client.status(hdfs_path, strict=False)
117
- return status is not None
118
-
119
- def rename_file(self, hdfs_src_path, hdfs_dst_path):
120
- """
121
- 重命名或移动 HDFS 文件/目录。
122
- :param hdfs_src_path: 源路径
123
- :param hdfs_dst_path: 目标路径
124
- :return: None
125
- """
126
- self.client.rename(hdfs_src_path, hdfs_dst_path)
127
- print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")
1
+ import gzip
2
+ from io import BytesIO
3
+
4
+ from hdfs import InsecureClient
5
+
6
+
7
+ class HDFSUtils(object):
8
+ """
9
+ HDFS 工具类,封装常见的 HDFS 操作。
10
+ """
11
+
12
+ def __init__(self, hdfs_url, hdfs_user):
13
+ """
14
+ 初始化 HDFS 客户端。
15
+ :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
16
+ :param hdfs_user: HDFS 用户名
17
+ """
18
+ self.hdfs_url = hdfs_url
19
+ self.hdfs_user = hdfs_user
20
+ self.client = InsecureClient(hdfs_url, user=hdfs_user)
21
+
22
+ def upload_file(self, local_path, hdfs_path, overwrite=False):
23
+ """
24
+ 将本地文件上传到 HDFS。
25
+ :param local_path: 本地文件路径
26
+ :param hdfs_path: HDFS 文件路径
27
+ :param overwrite: 是否覆盖已存在的文件
28
+ :return: None
29
+ """
30
+ self.client.upload(hdfs_path, local_path, overwrite=overwrite)
31
+ print(f"文件上传成功: {local_path} -> {hdfs_path}")
32
+
33
+ def download_file(self, hdfs_path, local_path, overwrite=False):
34
+ """
35
+ 从 HDFS 下载文件到本地。
36
+ :param hdfs_path: HDFS 文件路径
37
+ :param local_path: 本地文件路径
38
+ :param overwrite: 是否覆盖已存在的文件
39
+ :return: None
40
+ """
41
+ self.client.download(hdfs_path, local_path, overwrite=overwrite)
42
+ print(f"文件下载成功: {hdfs_path} -> {local_path}")
43
+
44
+ def delete_file(self, hdfs_path, recursive=False):
45
+ """
46
+ 删除 HDFS 上的文件或目录。
47
+ :param hdfs_path: HDFS 文件或目录路径
48
+ :param recursive: 是否递归删除目录
49
+ :return: None
50
+ """
51
+ self.client.delete(hdfs_path, recursive=recursive)
52
+ print(f"文件/目录删除成功: {hdfs_path}")
53
+
54
+ def create_directory(self, hdfs_path):
55
+ """
56
+ 在 HDFS 上创建目录。
57
+ :param hdfs_path: HDFS 目录路径
58
+ :return: None
59
+ """
60
+ self.client.makedirs(hdfs_path)
61
+ print(f"目录创建成功: {hdfs_path}")
62
+
63
+ def list_files(self, hdfs_path):
64
+ """
65
+ 列出 HDFS 目录下的文件和子目录。
66
+ :param hdfs_path: HDFS 目录路径
67
+ :return: 文件/目录列表
68
+ """
69
+ files = self.client.list(hdfs_path)
70
+ return files
71
+
72
+ def read_file(self, hdfs_path):
73
+ """
74
+ 读取 HDFS 文件内容。
75
+ :param hdfs_path: HDFS 文件路径
76
+ :return: 文件内容
77
+ """
78
+ with self.client.read(hdfs_path) as reader:
79
+ content = reader.read()
80
+ print(f"文件读取成功: {hdfs_path}")
81
+ return content
82
+
83
+ def read_gz_file(self, hdfs_path, encoding='utf-8'):
84
+ """
85
+ 读取 HDFS 上的 .gz 文件内容。
86
+ :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
87
+ :param encoding: 文件编码格式(默认 utf-8)
88
+ :return: 文件内容
89
+ """
90
+ with self.client.read(hdfs_path) as reader: # 以二进制模式读取
91
+ compressed_data = reader.read() # 读取压缩数据
92
+ with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
93
+ content = gz_file.read().decode(encoding) # 解码为字符串
94
+ print(f"文件读取成功: {hdfs_path}")
95
+ return content
96
+
97
+ def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
98
+ """
99
+ 向 HDFS 文件写入内容。
100
+ :param hdfs_path: HDFS 文件路径
101
+ :param content: 要写入的内容
102
+ :param overwrite: 是否覆盖已存在的文件
103
+ :param encoding: 文件编码格式
104
+ :return: None
105
+ """
106
+ with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
107
+ writer.write(content)
108
+ print(f"文件写入成功: {hdfs_path}")
109
+
110
+ def write_file_kwargs(self, hdfs_path, content, **kwargs):
111
+ """
112
+ HDFS 文件写入内容
113
+ 自定义参数实现更大的灵活性
114
+ """
115
+ with self.client.write(hdfs_path, **kwargs) as writer:
116
+ writer.write(content)
117
+ print(f"文件写入成功: {hdfs_path}")
118
+
119
+ def safe_append_hdfs(self, hdfs_path, content):
120
+ """
121
+ 更安全的追加写入方式,显式检查文件是否存在
122
+
123
+ :param content: 要写入的内容
124
+ :param hdfs_path: HDFS文件路径
125
+ """
126
+ try:
127
+ # 检查文件是否存在
128
+ file_exists = self.client.status(hdfs_path, strict=False) is not None
129
+
130
+ if not file_exists:
131
+ print(f"文件 {hdfs_path} 不存在,将创建新文件")
132
+ # 第一次写入不使用append模式
133
+ with self.client.write(hdfs_path, encoding='utf-8') as writer:
134
+ writer.write(content)
135
+ else:
136
+ # 追加模式写入
137
+ with self.client.write(hdfs_path, encoding='utf-8', append=True) as writer:
138
+ writer.write(content)
139
+
140
+ except Exception as e:
141
+ print(f"文件操作失败: {str(e)}")
142
+ raise
143
+
144
+ def file_exists(self, hdfs_path):
145
+ """
146
+ 检查 HDFS 文件或目录是否存在。
147
+ :param hdfs_path: HDFS 文件或目录路径
148
+ :return: 是否存在
149
+ """
150
+ status = self.client.status(hdfs_path, strict=False)
151
+ return status is not None
152
+
153
+ def rename_file(self, hdfs_src_path, hdfs_dst_path):
154
+ """
155
+ 重命名或移动 HDFS 文件/目录。
156
+ :param hdfs_src_path: 源路径
157
+ :param hdfs_dst_path: 目标路径
158
+ :return: None
159
+ """
160
+ self.client.rename(hdfs_src_path, hdfs_dst_path)
161
+ print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")
@@ -1,180 +1,180 @@
1
- import pika
2
-
3
-
4
- # https://blog.csdn.net/songfreeman/article/details/50943603
5
- class BasePika(object):
6
-
7
- def __init__(self, username=None, password=None, mqhost=None, virtual_host=None):
8
- self.username = username
9
- self.password = password
10
- self.conn = None
11
- self.host = mqhost
12
- self.virtual_host = virtual_host
13
- self.auto_ack = True
14
-
15
- def set_default(self):
16
- self.host = "192.168.31.79"
17
- self.virtual_host = "vhost_NetDataGather"
18
- self.username = "vip"
19
- self.password = "piv$*123"
20
-
21
- def connect_str(self,amqp_str):
22
- parameters = pika.URLParameters(amqp_str)
23
- self.conn = pika.BlockingConnection(parameters)
24
-
25
- def connect(self):
26
- """
27
- 设置用户名 密码 进行连接
28
- :return:
29
- """
30
- credentials = pika.PlainCredentials(self.username, self.password)
31
- # parameters = pika.URLParameters('amqp://guest:guest@rabbit-server1:5672/%2F')
32
- # 可以通过将 heartbeat 设为 0,关闭 rabbitmq 的心跳检测
33
- parameters = pika.ConnectionParameters(host=self.host,
34
- virtual_host=self.virtual_host,
35
- credentials=credentials,
36
- heartbeat=0)
37
- self.conn = pika.BlockingConnection(parameters)
38
-
39
- def close(self):
40
- # 关闭消息队列
41
- self.conn.close()
42
-
43
- def create_channel(self):
44
- self.channel = self.conn.channel()
45
-
46
- def __del__(self):
47
- self.channel.close()
48
- self.conn.close()
49
-
50
- def random_queue_declare(self):
51
- """
52
- 这样, result.method.queue 包含一个随机的队列名, 比如:看起来像 amq.gen-JzTY20BRgKO-HjmUJj0wLg.
53
- 其次:
54
- 一旦我们断开consumer连接,这个队列名将自动删除。这里有一个标识设置:
55
- :return:
56
- """
57
- return self.channel.queue_declare("", exclusive=True)
58
-
59
- def queue_declare(self, queue="hello", durable=False):
60
- """
61
- 创建目的地队列hello 取消息时也可以调用
62
- 取消息和发送消息都调用 保证队列存在,也保证了不管服务端还是客户端先启动都有队列
63
- durable True 为持久化
64
- :return:
65
- """
66
- return self.channel.queue_declare(queue=queue, durable=durable)
67
-
68
- def get_queue_size(self, queue="hello"):
69
- """
70
- 获取某个队列的长度
71
- :param queue:
72
- :return:
73
- """
74
- queue = self.queue_declare(queue=queue, durable=True)
75
- return queue.method.message_count
76
-
77
- def get_properties(self):
78
- """
79
- 与 queue_declare里的 durable = True 配合使用,
80
- 设置给 easy_send_msg的properties
81
- :return:
82
- """
83
- return pika.BasicProperties(
84
- delivery_mode=2, # 设置消息为持久化的
85
- )
86
-
87
- def easy_send_msg(self, exchange="", routing_key="hello", body="hello world", properties=None):
88
- """
89
- 空字符串标识默认的或者匿名的exchange,如果存在routing_key, 消息路由到routing_key指定的队列中。
90
- routing_key 标识发送到哪个队列,就是服务器上的队列名
91
- body 发送的消息
92
-
93
- basic_publish 如果 exchange 不是"" 但没有绑定队列 消息会消失
94
- :return:
95
- """
96
- self.channel.basic_publish(exchange=exchange,
97
- routing_key=routing_key,
98
- body=body,
99
- properties=properties)
100
-
101
- def basic_ack(self, ch, method):
102
- """
103
- callback的消息确认
104
- :param ch:
105
- :param method:
106
- :return:
107
- """
108
- ch.basic_ack(delivery_tag=method.delivery_tag)
109
-
110
- def callback(self, ch, method, properties, body):
111
- """
112
- 从队列接收消息要更复杂一些,它需要为队列订阅一个 callback 函数来进行接收。
113
- 当我们接收一个消息后,这个 callback 函数将会被 pika函数库自动调用,
114
- 在我们的这个实例里面这个函数将用来打印接收的消息内容到屏幕
115
- :param method:
116
- :param properties:
117
- :param body:
118
- :return:
119
- """
120
- print(type(body))
121
- print(" [x] Received %r" % body)
122
- if self.auto_ack is False:
123
- self.basic_ack(ch, method)
124
-
125
- def set_get_msg_callback(self, routing_key="hello", callback=None, auto_ack=True):
126
- """
127
- 设置取消息的callback
128
- no_ack 如果设置为True,将使用自动确认模式
129
- no_ack 如果设置为False,在callback中确认
130
- :return:
131
- """
132
- self.auto_ack = auto_ack
133
- if callback is None:
134
- callback = self.callback
135
- self.channel.basic_consume(routing_key,
136
- callback,
137
- auto_ack=auto_ack)
138
-
139
- def start_get_msg(self):
140
- """
141
- 开始取消息,会循环不停的取消息
142
- :return:
143
- """
144
- self.channel.start_consuming()
145
-
146
- def basic_qos(self, prefetch_count=1):
147
- """
148
- 可以提前发送几个消息来,当auto_ack=True时无效
149
- prefetch_count==1 消息未处理完前不要发送信息的消息
150
- :return:
151
- """
152
- self.channel.basic_qos(prefetch_count=prefetch_count)
153
-
154
- def exchange_declare(self, exchangename="logs", type="fanout"):
155
- """
156
- fanout exchange非常简单,你从这个名字中就能猜出来,它将从Producer方收到的消息广播给所有他知道的receiver方。而这正是我们的logger记录所需要的消息。
157
- 交换的类型
158
- 直接交换(direct exchange)的路由算法很简单 -- 消息发送到绑定键值(binding key) 刚好完全符合路由键值( routing key) 的消息队列中。
159
-
160
- 消息发送到一个 topic交换不能是一个任意的 routing_key -- 它必须是一个用小数点 分割的单词列表。 这个字符可以是任何单词,但是通常是指定一些连接特定消息的功能。一些有效的路由键(routing key)比如:“stock.usd.nyse",
161
- topic 是 直接交换的升级版
162
-
163
- headers Exchange :headers交换器允许你匹配AMQP消息的header而非路由键。除此之外,headers交换器和direct交换器完全一致,但性能会差很多。因此它并不太实用,而且几乎再也用不到了。
164
- exchangename接下来会与队列绑定
165
- direct , topic , headers 和 fanout
166
- :return:
167
- """
168
- return self.channel.exchange_declare(exchange=exchangename,
169
- exchange_type=type)
170
-
171
- def queue_bind(self, exchange="logs", queue="", routing_key=""):
172
- """
173
- queue 临时队列获取 self.random_queue_declare().method.queue
174
- :param exchange:
175
- :param queue:
176
- :return:
177
- """
178
- self.channel.queue_bind(exchange=exchange,
179
- queue=queue,
180
- routing_key=routing_key)
1
+ import pika
2
+
3
+
4
+ # https://blog.csdn.net/songfreeman/article/details/50943603
5
+ class BasePika(object):
6
+
7
+ def __init__(self, username=None, password=None, mqhost=None, virtual_host=None):
8
+ self.username = username
9
+ self.password = password
10
+ self.conn = None
11
+ self.host = mqhost
12
+ self.virtual_host = virtual_host
13
+ self.auto_ack = True
14
+
15
+ def set_default(self):
16
+ self.host = "192.168.31.79"
17
+ self.virtual_host = "vhost_NetDataGather"
18
+ self.username = "vip"
19
+ self.password = "piv$*123"
20
+
21
+ def connect_str(self,amqp_str):
22
+ parameters = pika.URLParameters(amqp_str)
23
+ self.conn = pika.BlockingConnection(parameters)
24
+
25
+ def connect(self):
26
+ """
27
+ 设置用户名 密码 进行连接
28
+ :return:
29
+ """
30
+ credentials = pika.PlainCredentials(self.username, self.password)
31
+ # parameters = pika.URLParameters('amqp://guest:guest@rabbit-server1:5672/%2F')
32
+ # 可以通过将 heartbeat 设为 0,关闭 rabbitmq 的心跳检测
33
+ parameters = pika.ConnectionParameters(host=self.host,
34
+ virtual_host=self.virtual_host,
35
+ credentials=credentials,
36
+ heartbeat=0)
37
+ self.conn = pika.BlockingConnection(parameters)
38
+
39
+ def close(self):
40
+ # 关闭消息队列
41
+ self.conn.close()
42
+
43
+ def create_channel(self):
44
+ self.channel = self.conn.channel()
45
+
46
+ def __del__(self):
47
+ self.channel.close()
48
+ self.conn.close()
49
+
50
+ def random_queue_declare(self):
51
+ """
52
+ 这样, result.method.queue 包含一个随机的队列名, 比如:看起来像 amq.gen-JzTY20BRgKO-HjmUJj0wLg.
53
+ 其次:
54
+ 一旦我们断开consumer连接,这个队列名将自动删除。这里有一个标识设置:
55
+ :return:
56
+ """
57
+ return self.channel.queue_declare("", exclusive=True)
58
+
59
+ def queue_declare(self, queue="hello", durable=False):
60
+ """
61
+ 创建目的地队列hello 取消息时也可以调用
62
+ 取消息和发送消息都调用 保证队列存在,也保证了不管服务端还是客户端先启动都有队列
63
+ durable True 为持久化
64
+ :return:
65
+ """
66
+ return self.channel.queue_declare(queue=queue, durable=durable)
67
+
68
+ def get_queue_size(self, queue="hello"):
69
+ """
70
+ 获取某个队列的长度
71
+ :param queue:
72
+ :return:
73
+ """
74
+ queue = self.queue_declare(queue=queue, durable=True)
75
+ return queue.method.message_count
76
+
77
+ def get_properties(self):
78
+ """
79
+ 与 queue_declare里的 durable = True 配合使用,
80
+ 设置给 easy_send_msg的properties
81
+ :return:
82
+ """
83
+ return pika.BasicProperties(
84
+ delivery_mode=2, # 设置消息为持久化的
85
+ )
86
+
87
+ def easy_send_msg(self, exchange="", routing_key="hello", body="hello world", properties=None):
88
+ """
89
+ 空字符串标识默认的或者匿名的exchange,如果存在routing_key, 消息路由到routing_key指定的队列中。
90
+ routing_key 标识发送到哪个队列,就是服务器上的队列名
91
+ body 发送的消息
92
+
93
+ basic_publish 如果 exchange 不是"" 但没有绑定队列 消息会消失
94
+ :return:
95
+ """
96
+ self.channel.basic_publish(exchange=exchange,
97
+ routing_key=routing_key,
98
+ body=body,
99
+ properties=properties)
100
+
101
+ def basic_ack(self, ch, method):
102
+ """
103
+ callback的消息确认
104
+ :param ch:
105
+ :param method:
106
+ :return:
107
+ """
108
+ ch.basic_ack(delivery_tag=method.delivery_tag)
109
+
110
+ def callback(self, ch, method, properties, body):
111
+ """
112
+ 从队列接收消息要更复杂一些,它需要为队列订阅一个 callback 函数来进行接收。
113
+ 当我们接收一个消息后,这个 callback 函数将会被 pika函数库自动调用,
114
+ 在我们的这个实例里面这个函数将用来打印接收的消息内容到屏幕
115
+ :param method:
116
+ :param properties:
117
+ :param body:
118
+ :return:
119
+ """
120
+ print(type(body))
121
+ print(" [x] Received %r" % body)
122
+ if self.auto_ack is False:
123
+ self.basic_ack(ch, method)
124
+
125
+ def set_get_msg_callback(self, routing_key="hello", callback=None, auto_ack=True):
126
+ """
127
+ 设置取消息的callback
128
+ no_ack 如果设置为True,将使用自动确认模式
129
+ no_ack 如果设置为False,在callback中确认
130
+ :return:
131
+ """
132
+ self.auto_ack = auto_ack
133
+ if callback is None:
134
+ callback = self.callback
135
+ self.channel.basic_consume(routing_key,
136
+ callback,
137
+ auto_ack=auto_ack)
138
+
139
+ def start_get_msg(self):
140
+ """
141
+ 开始取消息,会循环不停的取消息
142
+ :return:
143
+ """
144
+ self.channel.start_consuming()
145
+
146
+ def basic_qos(self, prefetch_count=1):
147
+ """
148
+ 可以提前发送几个消息来,当auto_ack=True时无效
149
+ prefetch_count==1 消息未处理完前不要发送信息的消息
150
+ :return:
151
+ """
152
+ self.channel.basic_qos(prefetch_count=prefetch_count)
153
+
154
+ def exchange_declare(self, exchangename="logs", type="fanout"):
155
+ """
156
+ fanout exchange非常简单,你从这个名字中就能猜出来,它将从Producer方收到的消息广播给所有他知道的receiver方。而这正是我们的logger记录所需要的消息。
157
+ 交换的类型
158
+ 直接交换(direct exchange)的路由算法很简单 -- 消息发送到绑定键值(binding key) 刚好完全符合路由键值( routing key) 的消息队列中。
159
+
160
+ 消息发送到一个 topic交换不能是一个任意的 routing_key -- 它必须是一个用小数点 分割的单词列表。 这个字符可以是任何单词,但是通常是指定一些连接特定消息的功能。一些有效的路由键(routing key)比如:“stock.usd.nyse",
161
+ topic 是 直接交换的升级版
162
+
163
+ headers Exchange :headers交换器允许你匹配AMQP消息的header而非路由键。除此之外,headers交换器和direct交换器完全一致,但性能会差很多。因此它并不太实用,而且几乎再也用不到了。
164
+ exchangename接下来会与队列绑定
165
+ direct , topic , headers 和 fanout
166
+ :return:
167
+ """
168
+ return self.channel.exchange_declare(exchange=exchangename,
169
+ exchange_type=type)
170
+
171
+ def queue_bind(self, exchange="logs", queue="", routing_key=""):
172
+ """
173
+ queue 临时队列获取 self.random_queue_declare().method.queue
174
+ :param exchange:
175
+ :param queue:
176
+ :return:
177
+ """
178
+ self.channel.queue_bind(exchange=exchange,
179
+ queue=queue,
180
+ routing_key=routing_key)