re-common 10.0.37__py3-none-any.whl → 10.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
  145. re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
  146. re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
  147. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
  148. re_common/v2/baselibrary/decorators/utils.py +59 -59
  149. re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
  150. re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
  151. re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
  152. re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
  153. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  154. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
  155. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  156. re_common/v2/baselibrary/tools/concurrency.py +35 -35
  157. re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
  158. re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
  159. re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
  160. re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
  161. re_common/v2/baselibrary/tools/dict_tools.py +44 -44
  162. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  163. re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
  164. re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
  165. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  166. re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
  167. re_common/v2/baselibrary/tools/list_tools.py +69 -69
  168. re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
  169. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  170. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  171. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  172. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  173. re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
  174. re_common/v2/baselibrary/utils/author_smi.py +361 -361
  175. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  176. re_common/v2/baselibrary/utils/basedict.py +37 -37
  177. re_common/v2/baselibrary/utils/basehdfs.py +163 -163
  178. re_common/v2/baselibrary/utils/basepika.py +180 -180
  179. re_common/v2/baselibrary/utils/basetime.py +77 -77
  180. re_common/v2/baselibrary/utils/db.py +156 -156
  181. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  182. re_common/v2/baselibrary/utils/mq.py +83 -83
  183. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  184. re_common/v2/baselibrary/utils/string_bool.py +186 -186
  185. re_common/v2/baselibrary/utils/string_clear.py +246 -246
  186. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  187. re_common/v2/baselibrary/utils/stringutils.py +271 -278
  188. re_common/vip/base_step_process.py +11 -11
  189. re_common/vip/baseencodeid.py +90 -90
  190. re_common/vip/changetaskname.py +28 -28
  191. re_common/vip/core_var.py +24 -24
  192. re_common/vip/mmh3Hash.py +89 -89
  193. re_common/vip/proxy/allproxys.py +127 -127
  194. re_common/vip/proxy/allproxys_thread.py +159 -159
  195. re_common/vip/proxy/cnki_proxy.py +153 -153
  196. re_common/vip/proxy/kuaidaili.py +87 -87
  197. re_common/vip/proxy/proxy_all.py +113 -113
  198. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  199. re_common/vip/proxy/wanfang_proxy.py +152 -152
  200. re_common/vip/proxy/wp_proxy_all.py +181 -181
  201. re_common/vip/read_rawid_to_txt.py +91 -91
  202. re_common/vip/title/__init__.py +5 -5
  203. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  204. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  205. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  206. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  207. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  208. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  209. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  210. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  211. re_common/vip/title/transform/__init__.py +10 -10
  212. {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/LICENSE +201 -201
  213. {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/METADATA +16 -16
  214. re_common-10.0.39.dist-info/RECORD +248 -0
  215. {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/WHEEL +1 -1
  216. re_common-10.0.37.dist-info/RECORD +0 -248
  217. {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/top_level.txt +0 -0
@@ -1,163 +1,163 @@
1
- import gzip
2
- from io import BytesIO
3
-
4
- from hdfs import InsecureClient
5
-
6
-
7
- class HDFSUtils(object):
8
- """
9
- HDFS 工具类,封装常见的 HDFS 操作。
10
-
11
- InsecureClient: 缺陷 写大文件数据时无法写入不报错
12
- """
13
-
14
- def __init__(self, hdfs_url, hdfs_user):
15
- """
16
- 初始化 HDFS 客户端。
17
- :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
18
- :param hdfs_user: HDFS 用户名
19
- """
20
- self.hdfs_url = hdfs_url
21
- self.hdfs_user = hdfs_user
22
- self.client = InsecureClient(hdfs_url, user=hdfs_user)
23
-
24
- def upload_file(self, local_path, hdfs_path, overwrite=False):
25
- """
26
- 将本地文件上传到 HDFS。
27
- :param local_path: 本地文件路径
28
- :param hdfs_path: HDFS 文件路径
29
- :param overwrite: 是否覆盖已存在的文件
30
- :return: None
31
- """
32
- self.client.upload(hdfs_path, local_path, overwrite=overwrite)
33
- print(f"文件上传成功: {local_path} -> {hdfs_path}")
34
-
35
- def download_file(self, hdfs_path, local_path, overwrite=False):
36
- """
37
- 从 HDFS 下载文件到本地。
38
- :param hdfs_path: HDFS 文件路径
39
- :param local_path: 本地文件路径
40
- :param overwrite: 是否覆盖已存在的文件
41
- :return: None
42
- """
43
- self.client.download(hdfs_path, local_path, overwrite=overwrite)
44
- print(f"文件下载成功: {hdfs_path} -> {local_path}")
45
-
46
- def delete_file(self, hdfs_path, recursive=False):
47
- """
48
- 删除 HDFS 上的文件或目录。
49
- :param hdfs_path: HDFS 文件或目录路径
50
- :param recursive: 是否递归删除目录
51
- :return: None
52
- """
53
- self.client.delete(hdfs_path, recursive=recursive)
54
- print(f"文件/目录删除成功: {hdfs_path}")
55
-
56
- def create_directory(self, hdfs_path):
57
- """
58
- 在 HDFS 上创建目录。
59
- :param hdfs_path: HDFS 目录路径
60
- :return: None
61
- """
62
- self.client.makedirs(hdfs_path)
63
- print(f"目录创建成功: {hdfs_path}")
64
-
65
- def list_files(self, hdfs_path):
66
- """
67
- 列出 HDFS 目录下的文件和子目录。
68
- :param hdfs_path: HDFS 目录路径
69
- :return: 文件/目录列表
70
- """
71
- files = self.client.list(hdfs_path)
72
- return files
73
-
74
- def read_file(self, hdfs_path):
75
- """
76
- 读取 HDFS 文件内容。
77
- :param hdfs_path: HDFS 文件路径
78
- :return: 文件内容
79
- """
80
- with self.client.read(hdfs_path) as reader:
81
- content = reader.read()
82
- print(f"文件读取成功: {hdfs_path}")
83
- return content
84
-
85
- def read_gz_file(self, hdfs_path, encoding='utf-8'):
86
- """
87
- 读取 HDFS 上的 .gz 文件内容。
88
- :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
89
- :param encoding: 文件编码格式(默认 utf-8)
90
- :return: 文件内容
91
- """
92
- with self.client.read(hdfs_path) as reader: # 以二进制模式读取
93
- compressed_data = reader.read() # 读取压缩数据
94
- with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
95
- content = gz_file.read().decode(encoding) # 解码为字符串
96
- print(f"文件读取成功: {hdfs_path}")
97
- return content
98
-
99
- def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
100
- """
101
- 向 HDFS 文件写入内容。
102
- :param hdfs_path: HDFS 文件路径
103
- :param content: 要写入的内容
104
- :param overwrite: 是否覆盖已存在的文件
105
- :param encoding: 文件编码格式
106
- :return: None
107
- """
108
- with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
109
- writer.write(content)
110
- print(f"文件写入成功: {hdfs_path}")
111
-
112
- def write_file_kwargs(self, hdfs_path, content, **kwargs):
113
- """
114
- 向 HDFS 文件写入内容
115
- 自定义参数实现更大的灵活性
116
- """
117
- with self.client.write(hdfs_path, **kwargs) as writer:
118
- writer.write(content)
119
- print(f"文件写入成功: {hdfs_path}")
120
-
121
- def safe_append_hdfs(self, hdfs_path, content):
122
- """
123
- 更安全的追加写入方式,显式检查文件是否存在
124
-
125
- :param content: 要写入的内容
126
- :param hdfs_path: HDFS文件路径
127
- """
128
- try:
129
- # 检查文件是否存在
130
- file_exists = self.client.status(hdfs_path, strict=False) is not None
131
-
132
- if not file_exists:
133
- print(f"文件 {hdfs_path} 不存在,将创建新文件")
134
- # 第一次写入不使用append模式
135
- with self.client.write(hdfs_path, encoding='utf-8') as writer:
136
- writer.write(content)
137
- else:
138
- # 追加模式写入
139
- with self.client.write(hdfs_path, encoding='utf-8', append=True) as writer:
140
- writer.write(content)
141
-
142
- except Exception as e:
143
- print(f"文件操作失败: {str(e)}")
144
- raise
145
-
146
- def file_exists(self, hdfs_path):
147
- """
148
- 检查 HDFS 文件或目录是否存在。
149
- :param hdfs_path: HDFS 文件或目录路径
150
- :return: 是否存在
151
- """
152
- status = self.client.status(hdfs_path, strict=False)
153
- return status is not None
154
-
155
- def rename_file(self, hdfs_src_path, hdfs_dst_path):
156
- """
157
- 重命名或移动 HDFS 文件/目录。
158
- :param hdfs_src_path: 源路径
159
- :param hdfs_dst_path: 目标路径
160
- :return: None
161
- """
162
- self.client.rename(hdfs_src_path, hdfs_dst_path)
163
- print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")
1
+ import gzip
2
+ from io import BytesIO
3
+
4
+ from hdfs import InsecureClient
5
+
6
+
7
+ class HDFSUtils(object):
8
+ """
9
+ HDFS 工具类,封装常见的 HDFS 操作。
10
+
11
+ InsecureClient: 缺陷 写大文件数据时无法写入不报错
12
+ """
13
+
14
+ def __init__(self, hdfs_url, hdfs_user):
15
+ """
16
+ 初始化 HDFS 客户端。
17
+ :param hdfs_url: HDFS 的 URL,例如 "http://namenode:50070"
18
+ :param hdfs_user: HDFS 用户名
19
+ """
20
+ self.hdfs_url = hdfs_url
21
+ self.hdfs_user = hdfs_user
22
+ self.client = InsecureClient(hdfs_url, user=hdfs_user)
23
+
24
+ def upload_file(self, local_path, hdfs_path, overwrite=False):
25
+ """
26
+ 将本地文件上传到 HDFS。
27
+ :param local_path: 本地文件路径
28
+ :param hdfs_path: HDFS 文件路径
29
+ :param overwrite: 是否覆盖已存在的文件
30
+ :return: None
31
+ """
32
+ self.client.upload(hdfs_path, local_path, overwrite=overwrite)
33
+ print(f"文件上传成功: {local_path} -> {hdfs_path}")
34
+
35
+ def download_file(self, hdfs_path, local_path, overwrite=False):
36
+ """
37
+ 从 HDFS 下载文件到本地。
38
+ :param hdfs_path: HDFS 文件路径
39
+ :param local_path: 本地文件路径
40
+ :param overwrite: 是否覆盖已存在的文件
41
+ :return: None
42
+ """
43
+ self.client.download(hdfs_path, local_path, overwrite=overwrite)
44
+ print(f"文件下载成功: {hdfs_path} -> {local_path}")
45
+
46
+ def delete_file(self, hdfs_path, recursive=False):
47
+ """
48
+ 删除 HDFS 上的文件或目录。
49
+ :param hdfs_path: HDFS 文件或目录路径
50
+ :param recursive: 是否递归删除目录
51
+ :return: None
52
+ """
53
+ self.client.delete(hdfs_path, recursive=recursive)
54
+ print(f"文件/目录删除成功: {hdfs_path}")
55
+
56
+ def create_directory(self, hdfs_path):
57
+ """
58
+ 在 HDFS 上创建目录。
59
+ :param hdfs_path: HDFS 目录路径
60
+ :return: None
61
+ """
62
+ self.client.makedirs(hdfs_path)
63
+ print(f"目录创建成功: {hdfs_path}")
64
+
65
+ def list_files(self, hdfs_path):
66
+ """
67
+ 列出 HDFS 目录下的文件和子目录。
68
+ :param hdfs_path: HDFS 目录路径
69
+ :return: 文件/目录列表
70
+ """
71
+ files = self.client.list(hdfs_path)
72
+ return files
73
+
74
+ def read_file(self, hdfs_path):
75
+ """
76
+ 读取 HDFS 文件内容。
77
+ :param hdfs_path: HDFS 文件路径
78
+ :return: 文件内容
79
+ """
80
+ with self.client.read(hdfs_path) as reader:
81
+ content = reader.read()
82
+ print(f"文件读取成功: {hdfs_path}")
83
+ return content
84
+
85
+ def read_gz_file(self, hdfs_path, encoding='utf-8'):
86
+ """
87
+ 读取 HDFS 上的 .gz 文件内容。
88
+ :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
89
+ :param encoding: 文件编码格式(默认 utf-8)
90
+ :return: 文件内容
91
+ """
92
+ with self.client.read(hdfs_path) as reader: # 以二进制模式读取
93
+ compressed_data = reader.read() # 读取压缩数据
94
+ with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
95
+ content = gz_file.read().decode(encoding) # 解码为字符串
96
+ print(f"文件读取成功: {hdfs_path}")
97
+ return content
98
+
99
+ def write_file(self, hdfs_path, content, overwrite=False, encoding='utf-8'):
100
+ """
101
+ 向 HDFS 文件写入内容。
102
+ :param hdfs_path: HDFS 文件路径
103
+ :param content: 要写入的内容
104
+ :param overwrite: 是否覆盖已存在的文件
105
+ :param encoding: 文件编码格式
106
+ :return: None
107
+ """
108
+ with self.client.write(hdfs_path, overwrite=overwrite, encoding=encoding) as writer:
109
+ writer.write(content)
110
+ print(f"文件写入成功: {hdfs_path}")
111
+
112
+ def write_file_kwargs(self, hdfs_path, content, **kwargs):
113
+ """
114
+ 向 HDFS 文件写入内容
115
+ 自定义参数实现更大的灵活性
116
+ """
117
+ with self.client.write(hdfs_path, **kwargs) as writer:
118
+ writer.write(content)
119
+ print(f"文件写入成功: {hdfs_path}")
120
+
121
+ def safe_append_hdfs(self, hdfs_path, content):
122
+ """
123
+ 更安全的追加写入方式,显式检查文件是否存在
124
+
125
+ :param content: 要写入的内容
126
+ :param hdfs_path: HDFS文件路径
127
+ """
128
+ try:
129
+ # 检查文件是否存在
130
+ file_exists = self.client.status(hdfs_path, strict=False) is not None
131
+
132
+ if not file_exists:
133
+ print(f"文件 {hdfs_path} 不存在,将创建新文件")
134
+ # 第一次写入不使用append模式
135
+ with self.client.write(hdfs_path, encoding='utf-8') as writer:
136
+ writer.write(content)
137
+ else:
138
+ # 追加模式写入
139
+ with self.client.write(hdfs_path, encoding='utf-8', append=True) as writer:
140
+ writer.write(content)
141
+
142
+ except Exception as e:
143
+ print(f"文件操作失败: {str(e)}")
144
+ raise
145
+
146
+ def file_exists(self, hdfs_path):
147
+ """
148
+ 检查 HDFS 文件或目录是否存在。
149
+ :param hdfs_path: HDFS 文件或目录路径
150
+ :return: 是否存在
151
+ """
152
+ status = self.client.status(hdfs_path, strict=False)
153
+ return status is not None
154
+
155
+ def rename_file(self, hdfs_src_path, hdfs_dst_path):
156
+ """
157
+ 重命名或移动 HDFS 文件/目录。
158
+ :param hdfs_src_path: 源路径
159
+ :param hdfs_dst_path: 目标路径
160
+ :return: None
161
+ """
162
+ self.client.rename(hdfs_src_path, hdfs_dst_path)
163
+ print(f"文件/目录重命名成功: {hdfs_src_path} -> {hdfs_dst_path}")