re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
  145. re_common/v2/baselibrary/business_utils/__init__.py +0 -0
  146. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
  147. re_common/v2/baselibrary/decorators/utils.py +59 -59
  148. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  149. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
  150. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  151. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  152. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  153. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  154. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  155. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  156. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  157. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  158. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  159. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  160. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  161. re_common/v2/baselibrary/utils/basedict.py +37 -37
  162. re_common/v2/baselibrary/utils/basehdfs.py +161 -161
  163. re_common/v2/baselibrary/utils/basepika.py +180 -180
  164. re_common/v2/baselibrary/utils/basetime.py +77 -77
  165. re_common/v2/baselibrary/utils/db.py +38 -38
  166. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  167. re_common/v2/baselibrary/utils/mq.py +83 -83
  168. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  169. re_common/v2/baselibrary/utils/string_bool.py +186 -149
  170. re_common/v2/baselibrary/utils/string_clear.py +227 -204
  171. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  172. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  173. re_common/vip/base_step_process.py +11 -11
  174. re_common/vip/baseencodeid.py +90 -90
  175. re_common/vip/changetaskname.py +28 -28
  176. re_common/vip/core_var.py +24 -24
  177. re_common/vip/mmh3Hash.py +89 -89
  178. re_common/vip/proxy/allproxys.py +127 -127
  179. re_common/vip/proxy/allproxys_thread.py +159 -159
  180. re_common/vip/proxy/cnki_proxy.py +153 -153
  181. re_common/vip/proxy/kuaidaili.py +87 -87
  182. re_common/vip/proxy/proxy_all.py +113 -113
  183. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  184. re_common/vip/proxy/wanfang_proxy.py +152 -152
  185. re_common/vip/proxy/wp_proxy_all.py +181 -181
  186. re_common/vip/read_rawid_to_txt.py +91 -91
  187. re_common/vip/title/__init__.py +5 -5
  188. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  189. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  190. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  191. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  192. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  193. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  194. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  195. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  196. re_common/vip/title/transform/__init__.py +10 -10
  197. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
  198. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
  199. re_common-10.0.24.dist-info/RECORD +230 -0
  200. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
  201. re_common-10.0.22.dist-info/RECORD +0 -227
  202. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
@@ -1,161 +1,161 @@
1
- import gzip
2
- from io import BytesIO
3
-
4
- from hdfs import InsecureClient
5
-
6
-
7
- class HDFSUtils(object):
8
- """
9
- HDFS 工具类,封装常见的 HDFS 操作。
10
- """
11
-
12
- def __init__(self, hdfs_url, hdfs_user):
13
- """
14
- 初始化 HDFS 客户端。
15
- :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
16
- :param hdfs_user: HDFS 用户名
17
- """
18
- self.hdfs_url = hdfs_url
19
- self.hdfs_user = hdfs_user
20
- self.client = InsecureClient(hdfs_url, user=hdfs_user)
21
-
22
- def upload_file(self, local_path, hdfs_path, overwrite=False):
23
- """
24
- 将本地文件上传到 HDFS。
25
- :param local_path: 本地文件路径
26
- :param hdfs_path: HDFS 文件路径
27
- :param overwrite: 是否覆盖已存在的文件
28
- :return: None
29
- """
30
- self.client.upload(hdfs_path, local_path, overwrite=overwrite)
31
- print(f"文件上传成功: {local_path} -> {hdfs_path}")
32
-
33
- def download_file(self, hdfs_path, local_path, overwrite=False):
34
- """
35
- 从 HDFS 下载文件到本地。
36
- :param hdfs_path: HDFS 文件路径
37
- :param local_path: 本地文件路径
38
- :param overwrite: 是否覆盖已存在的文件
39
- :return: None
40
- """
41
- self.client.download(hdfs_path, local_path, overwrite=overwrite)
42
- print(f"文件下载成功: {hdfs_path} -> {local_path}")
43
-
44
- def delete_file(self, hdfs_path, recursive=False):
45
- """
46
- 删除 HDFS 上的文件或目录。
47
- :param hdfs_path: HDFS 文件或目录路径
48
- :param recursive: 是否递归删除目录
49
- :return: None
50
- """
51
- self.client.delete(hdfs_path, recursive=recursive)
52
- print(f"文件/目录删除成功: {hdfs_path}")
53
-
54
- def create_directory(self, hdfs_path):
55
- """
56
- 在 HDFS 上创建目录。
57
- :param hdfs_path: HDFS 目录路径
58
- :return: None
59
- """
60
- self.client.makedirs(hdfs_path)
61
- print(f"目录创建成功: {hdfs_path}")
62
-
63
- def list_files(self, hdfs_path):
64
- """
65
- 列出 HDFS 目录下的文件和子目录。
66
- :param hdfs_path: HDFS 目录路径
67
- :return: 文件/目录列表
68
- """
69
- files = self.client.list(hdfs_path)
70
- return files
71
-
72
- def read_file(self, hdfs_path):
73
- """
74
- 读取 HDFS 文件内容。
75
- :param hdfs_path: HDFS 文件路径
76
- :return: 文件内容
77
- """
78
- with self.client.read(hdfs_path) as reader:
79
- content = reader.read()
80
- print(f"文件读取成功: {hdfs_path}")
81
- return content
82
-
83
- def read_gz_file(self, hdfs_path, encoding='utf-8'):
84
- """
85
- 读取 HDFS 上的 .gz 文件内容。
86
- :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
87
- :param encoding: 文件编码格式(默认 utf-8)
88
- :return: 文件内容
89
- """
90
- with self.client.read(hdfs_path) as reader: # 以二进制模式读取
91
- compressed_data = reader.read() # 读取压缩数据
92
- with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
93
- content = gz_file.read().decode(encoding) # 解码为字符串
94
- print(f"文件读取成功: {hdfs_path}")
95
- return content
96
-
97
- def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
98
- """
99
- 向 HDFS 文件写入内容。
100
- :param hdfs_path: HDFS 文件路径
101
- :param content: 要写入的内容
102
- :param overwrite: 是否覆盖已存在的文件
103
- :param encoding: 文件编码格式
104
- :return: None
105
- """
106
- with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
107
- writer.write(content)
108
- print(f"文件写入成功: {hdfs_path}")
109
-
110
- def write_file_kwargs(self, hdfs_path, content, **kwargs):
111
- """
112
- 向 HDFS 文件写入内容
113
- 自定义参数实现更大的灵活性
114
- """
115
- with self.client.write(hdfs_path, **kwargs) as writer:
116
- writer.write(content)
117
- print(f"文件写入成功: {hdfs_path}")
118
-
119
- def safe_append_hdfs(self, hdfs_path, content):
120
- """
121
- 更安全的追加写入方式,显式检查文件是否存在
122
-
123
- :param content: 要写入的内容
124
- :param hdfs_path: HDFS文件路径
125
- """
126
- try:
127
- # 检查文件是否存在
128
- file_exists = self.client.status(hdfs_path, strict=False) is not None
129
-
130
- if not file_exists:
131
- print(f"文件 {hdfs_path} 不存在,将创建新文件")
132
- # 第一次写入不使用append模式
133
- with self.client.write(hdfs_path, encoding='utf-8') as writer:
134
- writer.write(content)
135
- else:
136
- # 追加模式写入
137
- with self.client.write(hdfs_path, encoding='utf-8', append=True) as writer:
138
- writer.write(content)
139
-
140
- except Exception as e:
141
- print(f"文件操作失败: {str(e)}")
142
- raise
143
-
144
- def file_exists(self, hdfs_path):
145
- """
146
- 检查 HDFS 文件或目录是否存在。
147
- :param hdfs_path: HDFS 文件或目录路径
148
- :return: 是否存在
149
- """
150
- status = self.client.status(hdfs_path, strict=False)
151
- return status is not None
152
-
153
- def rename_file(self, hdfs_src_path, hdfs_dst_path):
154
- """
155
- 重命名或移动 HDFS 文件/目录。
156
- :param hdfs_src_path: 源路径
157
- :param hdfs_dst_path: 目标路径
158
- :return: None
159
- """
160
- self.client.rename(hdfs_src_path, hdfs_dst_path)
161
- print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")
1
+ import gzip
2
+ from io import BytesIO
3
+
4
+ from hdfs import InsecureClient
5
+
6
+
7
+ class HDFSUtils(object):
8
+ """
9
+ HDFS 工具类,封装常见的 HDFS 操作。
10
+ """
11
+
12
+ def __init__(self, hdfs_url, hdfs_user):
13
+ """
14
+ 初始化 HDFS 客户端。
15
+ :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
16
+ :param hdfs_user: HDFS 用户名
17
+ """
18
+ self.hdfs_url = hdfs_url
19
+ self.hdfs_user = hdfs_user
20
+ self.client = InsecureClient(hdfs_url, user=hdfs_user)
21
+
22
+ def upload_file(self, local_path, hdfs_path, overwrite=False):
23
+ """
24
+ 将本地文件上传到 HDFS。
25
+ :param local_path: 本地文件路径
26
+ :param hdfs_path: HDFS 文件路径
27
+ :param overwrite: 是否覆盖已存在的文件
28
+ :return: None
29
+ """
30
+ self.client.upload(hdfs_path, local_path, overwrite=overwrite)
31
+ print(f"文件上传成功: {local_path} -> {hdfs_path}")
32
+
33
+ def download_file(self, hdfs_path, local_path, overwrite=False):
34
+ """
35
+ 从 HDFS 下载文件到本地。
36
+ :param hdfs_path: HDFS 文件路径
37
+ :param local_path: 本地文件路径
38
+ :param overwrite: 是否覆盖已存在的文件
39
+ :return: None
40
+ """
41
+ self.client.download(hdfs_path, local_path, overwrite=overwrite)
42
+ print(f"文件下载成功: {hdfs_path} -> {local_path}")
43
+
44
+ def delete_file(self, hdfs_path, recursive=False):
45
+ """
46
+ 删除 HDFS 上的文件或目录。
47
+ :param hdfs_path: HDFS 文件或目录路径
48
+ :param recursive: 是否递归删除目录
49
+ :return: None
50
+ """
51
+ self.client.delete(hdfs_path, recursive=recursive)
52
+ print(f"文件/目录删除成功: {hdfs_path}")
53
+
54
+ def create_directory(self, hdfs_path):
55
+ """
56
+ 在 HDFS 上创建目录。
57
+ :param hdfs_path: HDFS 目录路径
58
+ :return: None
59
+ """
60
+ self.client.makedirs(hdfs_path)
61
+ print(f"目录创建成功: {hdfs_path}")
62
+
63
+ def list_files(self, hdfs_path):
64
+ """
65
+ 列出 HDFS 目录下的文件和子目录。
66
+ :param hdfs_path: HDFS 目录路径
67
+ :return: 文件/目录列表
68
+ """
69
+ files = self.client.list(hdfs_path)
70
+ return files
71
+
72
+ def read_file(self, hdfs_path):
73
+ """
74
+ 读取 HDFS 文件内容。
75
+ :param hdfs_path: HDFS 文件路径
76
+ :return: 文件内容
77
+ """
78
+ with self.client.read(hdfs_path) as reader:
79
+ content = reader.read()
80
+ print(f"文件读取成功: {hdfs_path}")
81
+ return content
82
+
83
+ def read_gz_file(self, hdfs_path, encoding='utf-8'):
84
+ """
85
+ 读取 HDFS 上的 .gz 文件内容。
86
+ :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
87
+ :param encoding: 文件编码格式(默认 utf-8)
88
+ :return: 文件内容
89
+ """
90
+ with self.client.read(hdfs_path) as reader: # 以二进制模式读取
91
+ compressed_data = reader.read() # 读取压缩数据
92
+ with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
93
+ content = gz_file.read().decode(encoding) # 解码为字符串
94
+ print(f"文件读取成功: {hdfs_path}")
95
+ return content
96
+
97
+ def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
98
+ """
99
+ 向 HDFS 文件写入内容。
100
+ :param hdfs_path: HDFS 文件路径
101
+ :param content: 要写入的内容
102
+ :param overwrite: 是否覆盖已存在的文件
103
+ :param encoding: 文件编码格式
104
+ :return: None
105
+ """
106
+ with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
107
+ writer.write(content)
108
+ print(f"文件写入成功: {hdfs_path}")
109
+
110
+ def write_file_kwargs(self, hdfs_path, content, **kwargs):
111
+ """
112
+ 向 HDFS 文件写入内容
113
+ 自定义参数实现更大的灵活性
114
+ """
115
+ with self.client.write(hdfs_path, **kwargs) as writer:
116
+ writer.write(content)
117
+ print(f"文件写入成功: {hdfs_path}")
118
+
119
+ def safe_append_hdfs(self, hdfs_path, content):
120
+ """
121
+ 更安全的追加写入方式,显式检查文件是否存在
122
+
123
+ :param content: 要写入的内容
124
+ :param hdfs_path: HDFS文件路径
125
+ """
126
+ try:
127
+ # 检查文件是否存在
128
+ file_exists = self.client.status(hdfs_path, strict=False) is not None
129
+
130
+ if not file_exists:
131
+ print(f"文件 {hdfs_path} 不存在,将创建新文件")
132
+ # 第一次写入不使用append模式
133
+ with self.client.write(hdfs_path, encoding='utf-8') as writer:
134
+ writer.write(content)
135
+ else:
136
+ # 追加模式写入
137
+ with self.client.write(hdfs_path, encoding='utf-8', append=True) as writer:
138
+ writer.write(content)
139
+
140
+ except Exception as e:
141
+ print(f"文件操作失败: {str(e)}")
142
+ raise
143
+
144
+ def file_exists(self, hdfs_path):
145
+ """
146
+ 检查 HDFS 文件或目录是否存在。
147
+ :param hdfs_path: HDFS 文件或目录路径
148
+ :return: 是否存在
149
+ """
150
+ status = self.client.status(hdfs_path, strict=False)
151
+ return status is not None
152
+
153
+ def rename_file(self, hdfs_src_path, hdfs_dst_path):
154
+ """
155
+ 重命名或移动 HDFS 文件/目录。
156
+ :param hdfs_src_path: 源路径
157
+ :param hdfs_dst_path: 目标路径
158
+ :return: None
159
+ """
160
+ self.client.rename(hdfs_src_path, hdfs_dst_path)
161
+ print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")