re-common 10.0.21__py3-none-any.whl → 10.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/decorators/utils.py +59 -59
  145. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  146. re_common/v2/baselibrary/tools/WeChatRobot.py +79 -79
  147. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  148. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  149. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  150. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  151. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  152. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  153. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  154. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  155. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  156. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  157. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  158. re_common/v2/baselibrary/utils/basedict.py +37 -37
  159. re_common/v2/baselibrary/utils/basehdfs.py +161 -161
  160. re_common/v2/baselibrary/utils/basepika.py +180 -180
  161. re_common/v2/baselibrary/utils/basetime.py +77 -77
  162. re_common/v2/baselibrary/utils/db.py +38 -38
  163. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  164. re_common/v2/baselibrary/utils/mq.py +83 -83
  165. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  166. re_common/v2/baselibrary/utils/string_bool.py +149 -149
  167. re_common/v2/baselibrary/utils/string_clear.py +204 -202
  168. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  169. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  170. re_common/vip/base_step_process.py +11 -11
  171. re_common/vip/baseencodeid.py +90 -90
  172. re_common/vip/changetaskname.py +28 -28
  173. re_common/vip/core_var.py +24 -24
  174. re_common/vip/mmh3Hash.py +89 -89
  175. re_common/vip/proxy/allproxys.py +127 -127
  176. re_common/vip/proxy/allproxys_thread.py +159 -159
  177. re_common/vip/proxy/cnki_proxy.py +153 -153
  178. re_common/vip/proxy/kuaidaili.py +87 -87
  179. re_common/vip/proxy/proxy_all.py +113 -113
  180. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  181. re_common/vip/proxy/wanfang_proxy.py +152 -152
  182. re_common/vip/proxy/wp_proxy_all.py +181 -181
  183. re_common/vip/read_rawid_to_txt.py +91 -91
  184. re_common/vip/title/__init__.py +5 -5
  185. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  186. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  187. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  188. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  189. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  190. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  191. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  192. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  193. re_common/vip/title/transform/__init__.py +10 -10
  194. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/LICENSE +201 -201
  195. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/METADATA +16 -16
  196. re_common-10.0.22.dist-info/RECORD +227 -0
  197. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/WHEEL +1 -1
  198. re_common-10.0.21.dist-info/RECORD +0 -227
  199. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/top_level.txt +0 -0
@@ -1,291 +1,291 @@
1
- import boto3
2
- import botocore
3
- from boto3.session import Session
4
-
5
- # boto3 该开发工具包由两个关键的 Python 包组成:
6
- # Botocore(提供在 Python 开发工具包和 AWS CLI 之间共享的低级功能的库)
7
- # 和 Boto3(实现 Python 开发工具包本身的包)
8
-
9
-
10
- """
11
- aws_access_key_id = 'minioa'
12
- aws_secret_access_key = 'minio123'
13
- endpoint_url = 'http://192.168.31.164:9000'
14
- bbt = BaseBoto3(aws_access_key_id=aws_access_key_id,
15
- aws_secret_access_key=aws_secret_access_key,
16
- endpoint_url=endpoint_url)
17
- bbt.conn_session()
18
- bbt.set_is_low_level(False)
19
- bbt.get_client()
20
- print("**********************")
21
- print(bbt.delete_buckets("test1"))
22
- # bbt.set_is_low_level(False)
23
- # bbt.get_client()
24
- # print("**********************")
25
- # print(bbt.create_buckets("create2"))
26
- """
27
-
28
-
29
- class BaseBoto3(object):
30
-
31
- def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
32
- self.aws_access_key_id = aws_access_key_id
33
- self.aws_secret_access_key = aws_secret_access_key
34
- self.endpoint_url = endpoint_url
35
- self.session = None
36
- self.client = None
37
- self.is_low_level = False
38
- self.bucket = None
39
-
40
- def set_is_low_level(self, is_low_level):
41
- self.is_low_level = is_low_level
42
- return self
43
-
44
- def set_aws_access_key_id(self, aws_access_key_id):
45
- self.aws_access_key_id = aws_access_key_id
46
- return self
47
-
48
- def set_aws_secret_access_key(self, aws_secret_access_key):
49
- self.aws_secret_access_key = aws_secret_access_key
50
- return self
51
-
52
- def set_endpoint_url(self, endpoint_url):
53
- self.endpoint_url = endpoint_url
54
- return self
55
-
56
- def conn_session(self):
57
- self.session = Session(aws_access_key_id=self.aws_access_key_id,
58
- aws_secret_access_key=self.aws_secret_access_key)
59
- return self.session
60
-
61
- def get_client(self):
62
- assert self.session is not None
63
- if self.is_low_level:
64
- # 根据名称创建低级服务客户端
65
- # botocore.client.S3
66
- self.client = self.session.client('s3', endpoint_url=self.endpoint_url)
67
- print(type(self.client))
68
- else:
69
- # boto3.resources.factory.s3.ServiceResource
70
- # 按名称创建资源服务客户端
71
- self.client = self.session.resource('s3', endpoint_url=self.endpoint_url)
72
- print(type(self.client))
73
- return self
74
-
75
- def get_all_buckets(self):
76
- """
77
- 获取所有的桶信息
78
- :return:
79
- """
80
- if self.is_low_level is False:
81
- return self.client.buckets.all()
82
- else:
83
- return self.client.list_buckets()
84
-
85
- def create_buckets(self, buckets_name):
86
- """
87
-
88
- :param buckets_name:
89
- :return:
90
- 如果get_client 使用 client 返回
91
- {'ResponseMetadata': {'RequestId': '16BC90EED4A433C4', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '0', 'content-security-policy': 'block-all-mixed-content', 'location': '/create1', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '16BC90EED4A433C4', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Wed, 01 Dec 2021 07:28:39 GMT'}, 'RetryAttempts': 0}, 'Location': '/create1'}
92
- 如果resource 使用 client 返回
93
- s3.Bucket(name='create2')
94
- """
95
- assert buckets_name.find("_") == -1, "新建一个bucket桶(bucket name 中不能有_下划线)"
96
- # 新建一个bucket桶(bucket name 中不能有_下划线)
97
- return self.client.create_bucket(Bucket=buckets_name)
98
-
99
- def delete_buckets(self, bucket_name):
100
- """
101
- 删除桶 删除bucket(只能删除空的bucket)
102
- :return:
103
- """
104
- if self.is_low_level is False:
105
- bucket = self.client.Bucket(bucket_name)
106
- response = bucket.delete()
107
- else:
108
- response = self.client.delete_bucket(Bucket=bucket_name)
109
- return response
110
-
111
- def get_bucket(self, bucket_name):
112
- """
113
- 获取 bucket 对象
114
- :param bucket_name:
115
- :return:
116
- """
117
- if self.is_low_level is False:
118
- self.bucket = self.client.Bucket(bucket_name)
119
- return self.bucket
120
- else:
121
- raise Exception("无实现方法")
122
-
123
- def get_all_obs_filter(self, Prefix):
124
- """
125
- Prefix 为匹配模式
126
- 例:列出前缀为 haha 的文件
127
- Prefix='haha'
128
- :param Prefix:
129
- :return: 可以调用 obj.key
130
- """
131
- if not self.is_low_level:
132
- for obj in self.bucket.objects.filter(Prefix=Prefix):
133
- yield obj
134
- else:
135
- raise Exception("请设置 is_low_level 为 False")
136
-
137
- def get_object(self, bucket_name):
138
- """
139
- 会返回包括目录在内的所有对象
140
- :param bucket_name:
141
- :return:
142
- """
143
- if self.is_low_level is False:
144
- bucket = self.client.Bucket(bucket_name)
145
- # boto3.resources.collection.s3.Bucket.objectsCollection
146
- all_obj = bucket.objects.all()
147
- return all_obj
148
- # for obj in bucket.objects.all():
149
- # print('obj name:%s' % obj.key)
150
- else:
151
- return self.client.list_objects(Bucket=bucket_name)
152
-
153
- def upload_file(self, local_file, bucket_name, key):
154
- """
155
- # key 桶中的位置 test1/test.pdf
156
- :param local_file: 本地文件路径
157
- :param bucket_name: 桶名
158
- :param key: 远程文件路径
159
- :return:
160
- """
161
-
162
- if self.is_low_level is False:
163
- self.client.Bucket(bucket_name).upload_file(local_file, key)
164
- else:
165
- self.client.upload_file(local_file, bucket_name, key)
166
-
167
- def upload_fileobj(self, fileobj, bucket_name, key):
168
- # fileobj 字节流
169
- if self.is_low_level is False:
170
- self.client.Bucket(bucket_name).upload_fileobj(fileobj, key)
171
- else:
172
- self.client.upload_fileobj(fileobj, bucket_name, key)
173
-
174
- def download_file(self, local_file, bucket_name, key):
175
- if self.is_low_level is False:
176
- self.client.Bucket(bucket_name).download_file(key, local_file)
177
- else:
178
- self.client.download_file(bucket_name, key, local_file)
179
-
180
- def download_fileobj(self, fileobj, bucket_name, key):
181
- if self.is_low_level is False:
182
- self.client.Bucket(bucket_name).download_fileobj(key, fileobj)
183
- else:
184
- self.client.download_fileobj(bucket_name, key, fileobj)
185
-
186
- def check_exist(self, bucket_name, key):
187
- """
188
- if self.is_low_level:
189
- {'ResponseMetadata': {'RequestId': '17E6A65A2B299D3B', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '117', 'content-security-policy': 'block-all-mixed-content', 'content-type': 'binary/octet-stream', 'etag': '"2237a934f176003e41abf3d733291079"', 'last-modified': 'Thu, 25 Jul 2024 05:49:43 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '17E6A65A2B299D3B', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Mon, 29 Jul 2024 09:53:33 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 7, 25, 5, 49, 43, tzinfo=tzutc()), 'ContentLength': 117, 'ETag': '"2237a934f176003e41abf3d733291079"', 'ContentType': 'binary/octet-stream', 'Metadata': {}}
190
- 判定文件是否存在,
191
- :param bucket_name: 桶名
192
- :param key: 文件key
193
- :return:
194
- """
195
-
196
- if self.is_low_level:
197
- try:
198
- obj_info = self.client.head_object(
199
- Bucket=bucket_name,
200
- Key=key
201
- )
202
- return obj_info
203
- except:
204
- return None
205
- else:
206
- # 获取指定存储桶
207
- bucket = self.client.Bucket(bucket_name)
208
- # 检查 key 是否存在
209
- objs = list(bucket.objects.filter(Prefix=key))
210
- if len(objs) > 0 and objs[0].key == key:
211
- # [s3.ObjectSummary(bucket_name='crawl.dc.cqvip.com', key='foreign/organ/parsel_organ_log.txt')]
212
- return objs[0]
213
- else:
214
- return None
215
-
216
- def get_prefix_obj(self, bucket, prefix, delimiter):
217
- """
218
- Bucket="crawl.dc.cqvip.com",
219
- Prefix="foreign/organ/ei/",
220
- Delimiter='/'
221
-
222
- Returns:
223
-
224
- """
225
- if self.is_low_level:
226
- # for common_prefix in response.get('CommonPrefixes', []): 获取子目录
227
- return self.client.list_objects_v2(Bucket=bucket,
228
- Prefix=prefix,
229
- Delimiter=delimiter) # 设置 Delimiter='/' 可以确保只列出目录而不是所有对象。
230
- else:
231
- # 该方法只能列出文件 没办法列出目录
232
- # bucket_resource = self.client.Bucket(bucket)
233
- # objects = bucket_resource.objects.filter(Prefix=prefix, Delimiter=delimiter)
234
- # return list(objects)
235
-
236
- bucket_resource = self.client.Bucket(bucket)
237
- return bucket_resource.meta.client.list_objects_v2(Bucket=bucket,
238
- Prefix=prefix,
239
- Delimiter=delimiter)
240
-
241
- def get_object_value(self, bucket_name, file_key, encoding='utf-8'):
242
- """
243
- 读取文本数据
244
- Returns:
245
-
246
- """
247
- if self.is_low_level:
248
- obj = self.client.get_object(Bucket=bucket_name, Key=file_key)
249
- body = obj['Body'].read().decode(encoding)
250
- return body
251
- else:
252
- bucket_resource = self.client.Bucket(bucket_name)
253
- obj = bucket_resource.Object(file_key)
254
- body = obj.get()['Body'].read().decode(encoding)
255
- return body
256
-
257
- def put_object(self, bucket_name, key, body):
258
- """
259
- 直接写内容到文件
260
- Args:
261
- bucket_name:
262
- key:
263
- body: 需要 编码 .encode('utf-8')
264
-
265
- Returns:
266
-
267
- """
268
- if self.is_low_level:
269
- self.client.put_object(Bucket=bucket_name,
270
- Key=key,
271
- Body=body)
272
- else:
273
- # 获取 Bucket 对象
274
- bucket_resource = self.client.Bucket(bucket_name)
275
-
276
- # 将内容写入文件
277
- bucket_resource.put_object(Key=key, Body=body)
278
-
279
-
280
- bb = BaseBoto3(aws_access_key_id="UM51J2G5ZG0FE5CCERB9",
281
- aws_secret_access_key="u+OEmhE2fahF2L+oXB+HXe8IJs22Lo38icvlF+Yq",
282
- endpoint_url="http://192.168.31.135:9000"
283
- )
284
- bb.conn_session()
285
- bb.set_is_low_level(False)
286
- bb.get_client()
287
-
288
- result = bb.check_exist("crawl.dc.cqvip.com",
289
- "foreign/organ/parsel_organ_log.txt")
290
-
291
- print(result)
1
+ import boto3
2
+ import botocore
3
+ from boto3.session import Session
4
+
5
+ # boto3 该开发工具包由两个关键的 Python 包组成:
6
+ # Botocore(提供在 Python 开发工具包和 AWS CLI 之间共享的低级功能的库)
7
+ # 和 Boto3(实现 Python 开发工具包本身的包)
8
+
9
+
10
+ """
11
+ aws_access_key_id = 'minioa'
12
+ aws_secret_access_key = 'minio123'
13
+ endpoint_url = 'http://192.168.31.164:9000'
14
+ bbt = BaseBoto3(aws_access_key_id=aws_access_key_id,
15
+ aws_secret_access_key=aws_secret_access_key,
16
+ endpoint_url=endpoint_url)
17
+ bbt.conn_session()
18
+ bbt.set_is_low_level(False)
19
+ bbt.get_client()
20
+ print("**********************")
21
+ print(bbt.delete_buckets("test1"))
22
+ # bbt.set_is_low_level(False)
23
+ # bbt.get_client()
24
+ # print("**********************")
25
+ # print(bbt.create_buckets("create2"))
26
+ """
27
+
28
+
29
+ class BaseBoto3(object):
30
+
31
+ def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
32
+ self.aws_access_key_id = aws_access_key_id
33
+ self.aws_secret_access_key = aws_secret_access_key
34
+ self.endpoint_url = endpoint_url
35
+ self.session = None
36
+ self.client = None
37
+ self.is_low_level = False
38
+ self.bucket = None
39
+
40
+ def set_is_low_level(self, is_low_level):
41
+ self.is_low_level = is_low_level
42
+ return self
43
+
44
+ def set_aws_access_key_id(self, aws_access_key_id):
45
+ self.aws_access_key_id = aws_access_key_id
46
+ return self
47
+
48
+ def set_aws_secret_access_key(self, aws_secret_access_key):
49
+ self.aws_secret_access_key = aws_secret_access_key
50
+ return self
51
+
52
+ def set_endpoint_url(self, endpoint_url):
53
+ self.endpoint_url = endpoint_url
54
+ return self
55
+
56
+ def conn_session(self):
57
+ self.session = Session(aws_access_key_id=self.aws_access_key_id,
58
+ aws_secret_access_key=self.aws_secret_access_key)
59
+ return self.session
60
+
61
+ def get_client(self):
62
+ assert self.session is not None
63
+ if self.is_low_level:
64
+ # 根据名称创建低级服务客户端
65
+ # botocore.client.S3
66
+ self.client = self.session.client('s3', endpoint_url=self.endpoint_url)
67
+ print(type(self.client))
68
+ else:
69
+ # boto3.resources.factory.s3.ServiceResource
70
+ # 按名称创建资源服务客户端
71
+ self.client = self.session.resource('s3', endpoint_url=self.endpoint_url)
72
+ print(type(self.client))
73
+ return self
74
+
75
+ def get_all_buckets(self):
76
+ """
77
+ 获取所有的桶信息
78
+ :return:
79
+ """
80
+ if self.is_low_level is False:
81
+ return self.client.buckets.all()
82
+ else:
83
+ return self.client.list_buckets()
84
+
85
+ def create_buckets(self, buckets_name):
86
+ """
87
+
88
+ :param buckets_name:
89
+ :return:
90
+ 如果get_client 使用 client 返回
91
+ {'ResponseMetadata': {'RequestId': '16BC90EED4A433C4', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '0', 'content-security-policy': 'block-all-mixed-content', 'location': '/create1', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '16BC90EED4A433C4', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Wed, 01 Dec 2021 07:28:39 GMT'}, 'RetryAttempts': 0}, 'Location': '/create1'}
92
+ 如果resource 使用 client 返回
93
+ s3.Bucket(name='create2')
94
+ """
95
+ assert buckets_name.find("_") == -1, "新建一个bucket桶(bucket name 中不能有_下划线)"
96
+ # 新建一个bucket桶(bucket name 中不能有_下划线)
97
+ return self.client.create_bucket(Bucket=buckets_name)
98
+
99
+ def delete_buckets(self, bucket_name):
100
+ """
101
+ 删除桶 删除bucket(只能删除空的bucket)
102
+ :return:
103
+ """
104
+ if self.is_low_level is False:
105
+ bucket = self.client.Bucket(bucket_name)
106
+ response = bucket.delete()
107
+ else:
108
+ response = self.client.delete_bucket(Bucket=bucket_name)
109
+ return response
110
+
111
+ def get_bucket(self, bucket_name):
112
+ """
113
+ 获取 bucket 对象
114
+ :param bucket_name:
115
+ :return:
116
+ """
117
+ if self.is_low_level is False:
118
+ self.bucket = self.client.Bucket(bucket_name)
119
+ return self.bucket
120
+ else:
121
+ raise Exception("无实现方法")
122
+
123
+ def get_all_obs_filter(self, Prefix):
124
+ """
125
+ Prefix 为匹配模式
126
+ 例:列出前缀为 haha 的文件
127
+ Prefix='haha'
128
+ :param Prefix:
129
+ :return: 可以调用 obj.key
130
+ """
131
+ if not self.is_low_level:
132
+ for obj in self.bucket.objects.filter(Prefix=Prefix):
133
+ yield obj
134
+ else:
135
+ raise Exception("请设置 is_low_level 为 False")
136
+
137
+ def get_object(self, bucket_name):
138
+ """
139
+ 会返回包括目录在内的所有对象
140
+ :param bucket_name:
141
+ :return:
142
+ """
143
+ if self.is_low_level is False:
144
+ bucket = self.client.Bucket(bucket_name)
145
+ # boto3.resources.collection.s3.Bucket.objectsCollection
146
+ all_obj = bucket.objects.all()
147
+ return all_obj
148
+ # for obj in bucket.objects.all():
149
+ # print('obj name:%s' % obj.key)
150
+ else:
151
+ return self.client.list_objects(Bucket=bucket_name)
152
+
153
+ def upload_file(self, local_file, bucket_name, key):
154
+ """
155
+ # key 桶中的位置 test1/test.pdf
156
+ :param local_file: 本地文件路径
157
+ :param bucket_name: 桶名
158
+ :param key: 远程文件路径
159
+ :return:
160
+ """
161
+
162
+ if self.is_low_level is False:
163
+ self.client.Bucket(bucket_name).upload_file(local_file, key)
164
+ else:
165
+ self.client.upload_file(local_file, bucket_name, key)
166
+
167
+ def upload_fileobj(self, fileobj, bucket_name, key):
168
+ # fileobj 字节流
169
+ if self.is_low_level is False:
170
+ self.client.Bucket(bucket_name).upload_fileobj(fileobj, key)
171
+ else:
172
+ self.client.upload_fileobj(fileobj, bucket_name, key)
173
+
174
+ def download_file(self, local_file, bucket_name, key):
175
+ if self.is_low_level is False:
176
+ self.client.Bucket(bucket_name).download_file(key, local_file)
177
+ else:
178
+ self.client.download_file(bucket_name, key, local_file)
179
+
180
+ def download_fileobj(self, fileobj, bucket_name, key):
181
+ if self.is_low_level is False:
182
+ self.client.Bucket(bucket_name).download_fileobj(key, fileobj)
183
+ else:
184
+ self.client.download_fileobj(bucket_name, key, fileobj)
185
+
186
+ def check_exist(self, bucket_name, key):
187
+ """
188
+ if self.is_low_level:
189
+ {'ResponseMetadata': {'RequestId': '17E6A65A2B299D3B', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '117', 'content-security-policy': 'block-all-mixed-content', 'content-type': 'binary/octet-stream', 'etag': '"2237a934f176003e41abf3d733291079"', 'last-modified': 'Thu, 25 Jul 2024 05:49:43 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '17E6A65A2B299D3B', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Mon, 29 Jul 2024 09:53:33 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 7, 25, 5, 49, 43, tzinfo=tzutc()), 'ContentLength': 117, 'ETag': '"2237a934f176003e41abf3d733291079"', 'ContentType': 'binary/octet-stream', 'Metadata': {}}
190
+ 判定文件是否存在,
191
+ :param bucket_name: 桶名
192
+ :param key: 文件key
193
+ :return:
194
+ """
195
+
196
+ if self.is_low_level:
197
+ try:
198
+ obj_info = self.client.head_object(
199
+ Bucket=bucket_name,
200
+ Key=key
201
+ )
202
+ return obj_info
203
+ except:
204
+ return None
205
+ else:
206
+ # 获取指定存储桶
207
+ bucket = self.client.Bucket(bucket_name)
208
+ # 检查 key 是否存在
209
+ objs = list(bucket.objects.filter(Prefix=key))
210
+ if len(objs) > 0 and objs[0].key == key:
211
+ # [s3.ObjectSummary(bucket_name='crawl.dc.cqvip.com', key='foreign/organ/parsel_organ_log.txt')]
212
+ return objs[0]
213
+ else:
214
+ return None
215
+
216
+ def get_prefix_obj(self, bucket, prefix, delimiter):
217
+ """
218
+ Bucket="crawl.dc.cqvip.com",
219
+ Prefix="foreign/organ/ei/",
220
+ Delimiter='/'
221
+
222
+ Returns:
223
+
224
+ """
225
+ if self.is_low_level:
226
+ # for common_prefix in response.get('CommonPrefixes', []): 获取子目录
227
+ return self.client.list_objects_v2(Bucket=bucket,
228
+ Prefix=prefix,
229
+ Delimiter=delimiter) # 设置 Delimiter='/' 可以确保只列出目录而不是所有对象。
230
+ else:
231
+ # 该方法只能列出文件 没办法列出目录
232
+ # bucket_resource = self.client.Bucket(bucket)
233
+ # objects = bucket_resource.objects.filter(Prefix=prefix, Delimiter=delimiter)
234
+ # return list(objects)
235
+
236
+ bucket_resource = self.client.Bucket(bucket)
237
+ return bucket_resource.meta.client.list_objects_v2(Bucket=bucket,
238
+ Prefix=prefix,
239
+ Delimiter=delimiter)
240
+
241
+ def get_object_value(self, bucket_name, file_key, encoding='utf-8'):
242
+ """
243
+ 读取文本数据
244
+ Returns:
245
+
246
+ """
247
+ if self.is_low_level:
248
+ obj = self.client.get_object(Bucket=bucket_name, Key=file_key)
249
+ body = obj['Body'].read().decode(encoding)
250
+ return body
251
+ else:
252
+ bucket_resource = self.client.Bucket(bucket_name)
253
+ obj = bucket_resource.Object(file_key)
254
+ body = obj.get()['Body'].read().decode(encoding)
255
+ return body
256
+
257
+ def put_object(self, bucket_name, key, body):
258
+ """
259
+ 直接写内容到文件
260
+ Args:
261
+ bucket_name:
262
+ key:
263
+ body: 需要 编码 .encode('utf-8')
264
+
265
+ Returns:
266
+
267
+ """
268
+ if self.is_low_level:
269
+ self.client.put_object(Bucket=bucket_name,
270
+ Key=key,
271
+ Body=body)
272
+ else:
273
+ # 获取 Bucket 对象
274
+ bucket_resource = self.client.Bucket(bucket_name)
275
+
276
+ # 将内容写入文件
277
+ bucket_resource.put_object(Key=key, Body=body)
278
+
279
+
280
+ bb = BaseBoto3(aws_access_key_id="UM51J2G5ZG0FE5CCERB9",
281
+ aws_secret_access_key="u+OEmhE2fahF2L+oXB+HXe8IJs22Lo38icvlF+Yq",
282
+ endpoint_url="http://192.168.31.135:9000"
283
+ )
284
+ bb.conn_session()
285
+ bb.set_is_low_level(False)
286
+ bb.get_client()
287
+
288
+ result = bb.check_exist("crawl.dc.cqvip.com",
289
+ "foreign/organ/parsel_organ_log.txt")
290
+
291
+ print(result)
@@ -1,32 +1,32 @@
1
- import csv
2
-
3
-
4
- class BaseCsv(object):
5
-
6
- def __init__(self):
7
- pass
8
-
9
- def read_csv(self, filepath):
10
- """
11
- 根据文件路径逐行读取csv文件
12
- :param filepath:csv文件路径
13
- :return:csv每行记录
14
- """
15
- # mac_roman 原因
16
- # https://stackoverflow.com/questions/21504319/python-3-csv-file-giving-unicodedecodeerror-utf-8-codec-cant-decode-byte-err
17
- with open(filepath, "r",encoding='mac_roman') as f:
18
- reader = csv.reader(f)
19
- for row in reader:
20
- yield row
21
-
22
- def read_all_csv(self, filepath):
23
- """
24
- 根据文件路径读取csv文件所有行
25
- :param filepath:csv文件路径
26
- :return:csv所有行记录
27
- """
28
- with open(filepath, "r") as f:
29
- reader = csv.reader(f)
30
- result = list(reader)
31
- return result
32
-
1
+ import csv
2
+
3
+
4
+ class BaseCsv(object):
5
+
6
+ def __init__(self):
7
+ pass
8
+
9
+ def read_csv(self, filepath):
10
+ """
11
+ 根据文件路径逐行读取csv文件
12
+ :param filepath:csv文件路径
13
+ :return:csv每行记录
14
+ """
15
+ # mac_roman 原因
16
+ # https://stackoverflow.com/questions/21504319/python-3-csv-file-giving-unicodedecodeerror-utf-8-codec-cant-decode-byte-err
17
+ with open(filepath, "r",encoding='mac_roman') as f:
18
+ reader = csv.reader(f)
19
+ for row in reader:
20
+ yield row
21
+
22
+ def read_all_csv(self, filepath):
23
+ """
24
+ 根据文件路径读取csv文件所有行
25
+ :param filepath:csv文件路径
26
+ :return:csv所有行记录
27
+ """
28
+ with open(filepath, "r") as f:
29
+ reader = csv.reader(f)
30
+ result = list(reader)
31
+ return result
32
+