re-common 2.0.1__py3-none-any.whl → 10.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. re_common/baselibrary/__init__.py +4 -0
  2. re_common/baselibrary/baseabs/__init__.py +7 -0
  3. re_common/baselibrary/baseabs/baseabs.py +26 -0
  4. re_common/baselibrary/database/__init__.py +0 -0
  5. re_common/baselibrary/database/mbuilder.py +132 -0
  6. re_common/baselibrary/database/moudle.py +93 -0
  7. re_common/baselibrary/database/msqlite3.py +194 -0
  8. re_common/baselibrary/database/mysql.py +169 -0
  9. re_common/baselibrary/database/sql_factory.py +26 -0
  10. re_common/baselibrary/mthread/MThreadingRun.py +486 -0
  11. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -0
  12. re_common/baselibrary/mthread/__init__.py +3 -0
  13. re_common/baselibrary/mthread/mythreading.py +695 -0
  14. re_common/baselibrary/pakge_other/__init__.py +0 -0
  15. re_common/baselibrary/pakge_other/socks.py +404 -0
  16. re_common/baselibrary/readconfig/__init__.py +0 -0
  17. re_common/baselibrary/readconfig/config_factory.py +18 -0
  18. re_common/baselibrary/readconfig/ini_config.py +317 -0
  19. re_common/baselibrary/readconfig/toml_config.py +49 -0
  20. re_common/baselibrary/temporary/__init__.py +0 -0
  21. re_common/baselibrary/temporary/envdata.py +36 -0
  22. re_common/baselibrary/tools/__init__.py +0 -0
  23. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  24. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -0
  25. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -0
  26. re_common/baselibrary/tools/all_requests/mrequest.py +412 -0
  27. re_common/baselibrary/tools/all_requests/requests_request.py +81 -0
  28. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  29. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -0
  30. re_common/baselibrary/tools/contrast_db3.py +123 -0
  31. re_common/baselibrary/tools/copy_file.py +39 -0
  32. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -0
  33. re_common/baselibrary/tools/foreachgz.py +40 -0
  34. re_common/baselibrary/tools/get_attr.py +11 -0
  35. re_common/baselibrary/tools/image_to_pdf.py +62 -0
  36. re_common/baselibrary/tools/java_code_deal.py +139 -0
  37. re_common/baselibrary/tools/javacode.py +79 -0
  38. re_common/baselibrary/tools/mdb_db3.py +48 -0
  39. re_common/baselibrary/tools/merge_file.py +171 -0
  40. re_common/baselibrary/tools/merge_gz_file.py +165 -0
  41. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  42. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -0
  43. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -0
  44. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -0
  45. re_common/baselibrary/tools/mongo_tools.py +50 -0
  46. re_common/baselibrary/tools/move_file.py +170 -0
  47. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  48. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -0
  49. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -0
  50. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -0
  51. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -0
  52. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  53. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -0
  54. re_common/baselibrary/tools/mpandas/pandas_visualization.py +8 -0
  55. re_common/baselibrary/tools/myparsel.py +104 -0
  56. re_common/baselibrary/tools/rename_dir_file.py +37 -0
  57. re_common/baselibrary/tools/sequoiadb_utils.py +398 -0
  58. re_common/baselibrary/tools/split_line_to_many.py +25 -0
  59. re_common/baselibrary/tools/stringtodicts.py +33 -0
  60. re_common/baselibrary/tools/workwechant_bot.py +84 -0
  61. re_common/baselibrary/utils/__init__.py +0 -0
  62. re_common/baselibrary/utils/baseaiohttp.py +296 -0
  63. re_common/baselibrary/utils/baseaiomysql.py +87 -0
  64. re_common/baselibrary/utils/baseallstep.py +191 -0
  65. re_common/baselibrary/utils/baseavro.py +19 -0
  66. re_common/baselibrary/utils/baseboto3.py +291 -0
  67. re_common/baselibrary/utils/basecsv.py +32 -0
  68. re_common/baselibrary/utils/basedict.py +133 -0
  69. re_common/baselibrary/utils/basedir.py +241 -0
  70. re_common/baselibrary/utils/baseencode.py +351 -0
  71. re_common/baselibrary/utils/baseencoding.py +29 -0
  72. re_common/baselibrary/utils/baseesdsl.py +86 -0
  73. re_common/baselibrary/utils/baseexcel.py +264 -0
  74. re_common/baselibrary/utils/baseexcept.py +109 -0
  75. re_common/baselibrary/utils/basefile.py +654 -0
  76. re_common/baselibrary/utils/baseftp.py +214 -0
  77. re_common/baselibrary/utils/basegzip.py +60 -0
  78. re_common/baselibrary/utils/basehdfs.py +135 -0
  79. re_common/baselibrary/utils/basehttpx.py +268 -0
  80. re_common/baselibrary/utils/baseip.py +87 -0
  81. re_common/baselibrary/utils/basejson.py +2 -0
  82. re_common/baselibrary/utils/baselist.py +32 -0
  83. re_common/baselibrary/utils/basemotor.py +190 -0
  84. re_common/baselibrary/utils/basemssql.py +98 -0
  85. re_common/baselibrary/utils/baseodbc.py +113 -0
  86. re_common/baselibrary/utils/basepandas.py +302 -0
  87. re_common/baselibrary/utils/basepeewee.py +11 -0
  88. re_common/baselibrary/utils/basepika.py +180 -0
  89. re_common/baselibrary/utils/basepydash.py +143 -0
  90. re_common/baselibrary/utils/basepymongo.py +230 -0
  91. re_common/baselibrary/utils/basequeue.py +22 -0
  92. re_common/baselibrary/utils/baserar.py +57 -0
  93. re_common/baselibrary/utils/baserequest.py +279 -0
  94. re_common/baselibrary/utils/baseset.py +8 -0
  95. re_common/baselibrary/utils/basesmb.py +403 -0
  96. re_common/baselibrary/utils/basestring.py +382 -0
  97. re_common/baselibrary/utils/basetime.py +320 -0
  98. re_common/baselibrary/utils/basetuple.py +0 -0
  99. re_common/baselibrary/utils/baseurl.py +121 -0
  100. re_common/baselibrary/utils/basezip.py +57 -0
  101. re_common/baselibrary/utils/core/__init__.py +8 -0
  102. re_common/baselibrary/utils/core/bottomutils.py +18 -0
  103. re_common/baselibrary/utils/core/mdeprecated.py +327 -0
  104. re_common/baselibrary/utils/core/mlamada.py +16 -0
  105. re_common/baselibrary/utils/core/msginfo.py +25 -0
  106. re_common/baselibrary/utils/core/requests_core.py +103 -0
  107. re_common/baselibrary/utils/fateadm.py +429 -0
  108. re_common/baselibrary/utils/importfun.py +123 -0
  109. re_common/baselibrary/utils/mfaker.py +57 -0
  110. re_common/baselibrary/utils/my_abc/__init__.py +3 -0
  111. re_common/baselibrary/utils/my_abc/better_abc.py +32 -0
  112. re_common/baselibrary/utils/mylogger.py +414 -0
  113. re_common/baselibrary/utils/myredisclient.py +861 -0
  114. re_common/baselibrary/utils/pipupgrade.py +21 -0
  115. re_common/baselibrary/utils/ringlist.py +85 -0
  116. re_common/baselibrary/utils/version_compare.py +36 -0
  117. re_common/baselibrary/utils/ydmhttp.py +126 -0
  118. re_common/facade/__init__.py +1 -0
  119. re_common/facade/lazy_import.py +11 -0
  120. re_common/facade/loggerfacade.py +25 -0
  121. re_common/facade/mysqlfacade.py +467 -0
  122. re_common/facade/now.py +31 -0
  123. re_common/facade/sqlite3facade.py +257 -0
  124. re_common/facade/use/__init__.py +0 -0
  125. re_common/facade/use/mq_use_facade.py +83 -0
  126. re_common/facade/use/proxy_use_facade.py +20 -0
  127. re_common/libtest/__init__.py +0 -0
  128. re_common/libtest/base_dict_test.py +19 -0
  129. re_common/libtest/baseavro_test.py +13 -0
  130. re_common/libtest/basefile_test.py +14 -0
  131. re_common/libtest/basemssql_test.py +77 -0
  132. re_common/libtest/baseodbc_test.py +8 -0
  133. re_common/libtest/basepandas_test.py +38 -0
  134. re_common/libtest/get_attr_test/__init__.py +0 -0
  135. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -0
  136. re_common/libtest/get_attr_test/settings.py +55 -0
  137. re_common/libtest/idencode_test.py +54 -0
  138. re_common/libtest/iniconfig_test.py +35 -0
  139. re_common/libtest/ip_test.py +35 -0
  140. re_common/libtest/merge_file_test.py +20 -0
  141. re_common/libtest/mfaker_test.py +9 -0
  142. re_common/libtest/mm3_test.py +32 -0
  143. re_common/libtest/mylogger_test.py +89 -0
  144. re_common/libtest/myparsel_test.py +28 -0
  145. re_common/libtest/mysql_test.py +151 -0
  146. re_common/libtest/pymongo_test.py +21 -0
  147. re_common/libtest/split_test.py +12 -0
  148. re_common/libtest/sqlite3_merge_test.py +6 -0
  149. re_common/libtest/sqlite3_test.py +34 -0
  150. re_common/libtest/tomlconfig_test.py +30 -0
  151. re_common/libtest/use_tools_test/__init__.py +3 -0
  152. re_common/libtest/user/__init__.py +5 -0
  153. re_common/studio/__init__.py +5 -0
  154. re_common/studio/assignment_expressions.py +37 -0
  155. re_common/studio/mydash/__init__.py +0 -0
  156. re_common/studio/mydash/test1.py +19 -0
  157. re_common/studio/pydashstudio/__init__.py +0 -0
  158. re_common/studio/pydashstudio/first.py +9 -0
  159. re_common/studio/streamlitstudio/__init__.py +0 -0
  160. re_common/studio/streamlitstudio/first_app.py +66 -0
  161. re_common/studio/streamlitstudio/uber_pickups.py +24 -0
  162. re_common/studio/test.py +19 -0
  163. re_common/v2/baselibrary/utils/author_smi.py +14 -3
  164. re_common/v2/baselibrary/utils/stringutils.py +1 -0
  165. re_common/vip/__init__.py +0 -0
  166. re_common/vip/base_step_process.py +11 -0
  167. re_common/vip/baseencodeid.py +91 -0
  168. re_common/vip/changetaskname.py +28 -0
  169. re_common/vip/core_var.py +24 -0
  170. re_common/vip/mmh3Hash.py +90 -0
  171. re_common/vip/proxy/__init__.py +0 -0
  172. re_common/vip/proxy/allproxys.py +127 -0
  173. re_common/vip/proxy/allproxys_thread.py +159 -0
  174. re_common/vip/proxy/cnki_proxy.py +153 -0
  175. re_common/vip/proxy/kuaidaili.py +87 -0
  176. re_common/vip/proxy/proxy_all.py +113 -0
  177. re_common/vip/proxy/update_kuaidaili_0.py +42 -0
  178. re_common/vip/proxy/wanfang_proxy.py +152 -0
  179. re_common/vip/proxy/wp_proxy_all.py +182 -0
  180. re_common/vip/read_rawid_to_txt.py +92 -0
  181. re_common/vip/title/__init__.py +5 -0
  182. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -0
  183. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -0
  184. re_common/vip/title/transform/TransformCstadTitleToZt.py +196 -0
  185. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -0
  186. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -0
  187. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -0
  188. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -0
  189. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -0
  190. re_common/vip/title/transform/__init__.py +11 -0
  191. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/METADATA +1 -1
  192. re_common-10.0.1.dist-info/RECORD +213 -0
  193. re_common-2.0.1.dist-info/RECORD +0 -25
  194. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/LICENSE +0 -0
  195. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/WHEEL +0 -0
  196. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,291 @@
1
+ import boto3
2
+ import botocore
3
+ from boto3.session import Session
4
+
5
+ # boto3 该开发工具包由两个关键的 Python 包组成:
6
+ # Botocore(提供在 Python 开发工具包和 AWS CLI 之间共享的低级功能的库)
7
+ # 和 Boto3(实现 Python 开发工具包本身的包)
8
+
9
+
10
+ """
11
+ aws_access_key_id = 'minioa'
12
+ aws_secret_access_key = 'minio123'
13
+ endpoint_url = 'http://192.168.31.164:9000'
14
+ bbt = BaseBoto3(aws_access_key_id=aws_access_key_id,
15
+ aws_secret_access_key=aws_secret_access_key,
16
+ endpoint_url=endpoint_url)
17
+ bbt.conn_session()
18
+ bbt.set_is_low_level(False)
19
+ bbt.get_client()
20
+ print("**********************")
21
+ print(bbt.delete_buckets("test1"))
22
+ # bbt.set_is_low_level(False)
23
+ # bbt.get_client()
24
+ # print("**********************")
25
+ # print(bbt.create_buckets("create2"))
26
+ """
27
+
28
+
29
+ class BaseBoto3(object):
30
+
31
+ def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
32
+ self.aws_access_key_id = aws_access_key_id
33
+ self.aws_secret_access_key = aws_secret_access_key
34
+ self.endpoint_url = endpoint_url
35
+ self.session = None
36
+ self.client = None
37
+ self.is_low_level = False
38
+ self.bucket = None
39
+
40
+ def set_is_low_level(self, is_low_level):
41
+ self.is_low_level = is_low_level
42
+ return self
43
+
44
+ def set_aws_access_key_id(self, aws_access_key_id):
45
+ self.aws_access_key_id = aws_access_key_id
46
+ return self
47
+
48
+ def set_aws_secret_access_key(self, aws_secret_access_key):
49
+ self.aws_secret_access_key = aws_secret_access_key
50
+ return self
51
+
52
+ def set_endpoint_url(self, endpoint_url):
53
+ self.endpoint_url = endpoint_url
54
+ return self
55
+
56
+ def conn_session(self):
57
+ self.session = Session(aws_access_key_id=self.aws_access_key_id,
58
+ aws_secret_access_key=self.aws_secret_access_key)
59
+ return self.session
60
+
61
+ def get_client(self):
62
+ assert self.session is not None
63
+ if self.is_low_level:
64
+ # 根据名称创建低级服务客户端
65
+ # botocore.client.S3
66
+ self.client = self.session.client('s3', endpoint_url=self.endpoint_url)
67
+ print(type(self.client))
68
+ else:
69
+ # boto3.resources.factory.s3.ServiceResource
70
+ # 按名称创建资源服务客户端
71
+ self.client = self.session.resource('s3', endpoint_url=self.endpoint_url)
72
+ print(type(self.client))
73
+ return self
74
+
75
+ def get_all_buckets(self):
76
+ """
77
+ 获取所有的桶信息
78
+ :return:
79
+ """
80
+ if self.is_low_level is False:
81
+ return self.client.buckets.all()
82
+ else:
83
+ return self.client.list_buckets()
84
+
85
+ def create_buckets(self, buckets_name):
86
+ """
87
+
88
+ :param buckets_name:
89
+ :return:
90
+ 如果get_client 使用 client 返回
91
+ {'ResponseMetadata': {'RequestId': '16BC90EED4A433C4', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '0', 'content-security-policy': 'block-all-mixed-content', 'location': '/create1', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '16BC90EED4A433C4', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Wed, 01 Dec 2021 07:28:39 GMT'}, 'RetryAttempts': 0}, 'Location': '/create1'}
92
+ 如果resource 使用 client 返回
93
+ s3.Bucket(name='create2')
94
+ """
95
+ assert buckets_name.find("_") == -1, "新建一个bucket桶(bucket name 中不能有_下划线)"
96
+ # 新建一个bucket桶(bucket name 中不能有_下划线)
97
+ return self.client.create_bucket(Bucket=buckets_name)
98
+
99
+ def delete_buckets(self, bucket_name):
100
+ """
101
+ 删除桶 删除bucket(只能删除空的bucket)
102
+ :return:
103
+ """
104
+ if self.is_low_level is False:
105
+ bucket = self.client.Bucket(bucket_name)
106
+ response = bucket.delete()
107
+ else:
108
+ response = self.client.delete_bucket(Bucket=bucket_name)
109
+ return response
110
+
111
+ def get_bucket(self, bucket_name):
112
+ """
113
+ 获取 bucket 对象
114
+ :param bucket_name:
115
+ :return:
116
+ """
117
+ if self.is_low_level is False:
118
+ self.bucket = self.client.Bucket(bucket_name)
119
+ return self.bucket
120
+ else:
121
+ raise Exception("无实现方法")
122
+
123
+ def get_all_obs_filter(self, Prefix):
124
+ """
125
+ Prefix 为匹配模式
126
+ 例:列出前缀为 haha 的文件
127
+ Prefix='haha'
128
+ :param Prefix:
129
+ :return: 可以调用 obj.key
130
+ """
131
+ if not self.is_low_level:
132
+ for obj in self.bucket.objects.filter(Prefix=Prefix):
133
+ yield obj
134
+ else:
135
+ raise Exception("请设置 is_low_level 为 False")
136
+
137
+ def get_object(self, bucket_name):
138
+ """
139
+ 会返回包括目录在内的所有对象
140
+ :param bucket_name:
141
+ :return:
142
+ """
143
+ if self.is_low_level is False:
144
+ bucket = self.client.Bucket(bucket_name)
145
+ # boto3.resources.collection.s3.Bucket.objectsCollection
146
+ all_obj = bucket.objects.all()
147
+ return all_obj
148
+ # for obj in bucket.objects.all():
149
+ # print('obj name:%s' % obj.key)
150
+ else:
151
+ return self.client.list_objects(Bucket=bucket_name)
152
+
153
+ def upload_file(self, local_file, bucket_name, key):
154
+ """
155
+ # key 桶中的位置 test1/test.pdf
156
+ :param local_file: 本地文件路径
157
+ :param bucket_name: 桶名
158
+ :param key: 远程文件路径
159
+ :return:
160
+ """
161
+
162
+ if self.is_low_level is False:
163
+ self.client.Bucket(bucket_name).upload_file(local_file, key)
164
+ else:
165
+ self.client.upload_file(local_file, bucket_name, key)
166
+
167
+ def upload_fileobj(self, fileobj, bucket_name, key):
168
+ # fileobj 字节流
169
+ if self.is_low_level is False:
170
+ self.client.Bucket(bucket_name).upload_fileobj(fileobj, key)
171
+ else:
172
+ self.client.upload_fileobj(fileobj, bucket_name, key)
173
+
174
+ def download_file(self, local_file, bucket_name, key):
175
+ if self.is_low_level is False:
176
+ self.client.Bucket(bucket_name).download_file(key, local_file)
177
+ else:
178
+ self.client.download_file(bucket_name, key, local_file)
179
+
180
+ def download_fileobj(self, fileobj, bucket_name, key):
181
+ if self.is_low_level is False:
182
+ self.client.Bucket(bucket_name).download_fileobj(key, fileobj)
183
+ else:
184
+ self.client.download_fileobj(bucket_name, key, fileobj)
185
+
186
+ def check_exist(self, bucket_name, key):
187
+ """
188
+ if self.is_low_level:
189
+ {'ResponseMetadata': {'RequestId': '17E6A65A2B299D3B', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '117', 'content-security-policy': 'block-all-mixed-content', 'content-type': 'binary/octet-stream', 'etag': '"2237a934f176003e41abf3d733291079"', 'last-modified': 'Thu, 25 Jul 2024 05:49:43 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '17E6A65A2B299D3B', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Mon, 29 Jul 2024 09:53:33 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 7, 25, 5, 49, 43, tzinfo=tzutc()), 'ContentLength': 117, 'ETag': '"2237a934f176003e41abf3d733291079"', 'ContentType': 'binary/octet-stream', 'Metadata': {}}
190
+ 判定文件是否存在,
191
+ :param bucket_name: 桶名
192
+ :param key: 文件key
193
+ :return:
194
+ """
195
+
196
+ if self.is_low_level:
197
+ try:
198
+ obj_info = self.client.head_object(
199
+ Bucket=bucket_name,
200
+ Key=key
201
+ )
202
+ return obj_info
203
+ except:
204
+ return None
205
+ else:
206
+ # 获取指定存储桶
207
+ bucket = self.client.Bucket(bucket_name)
208
+ # 检查 key 是否存在
209
+ objs = list(bucket.objects.filter(Prefix=key))
210
+ if len(objs) > 0 and objs[0].key == key:
211
+ # [s3.ObjectSummary(bucket_name='crawl.dc.cqvip.com', key='foreign/organ/parsel_organ_log.txt')]
212
+ return objs[0]
213
+ else:
214
+ return None
215
+
216
+ def get_prefix_obj(self, bucket, prefix, delimiter):
217
+ """
218
+ Bucket="crawl.dc.cqvip.com",
219
+ Prefix="foreign/organ/ei/",
220
+ Delimiter='/'
221
+
222
+ Returns:
223
+
224
+ """
225
+ if self.is_low_level:
226
+ # for common_prefix in response.get('CommonPrefixes', []): 获取子目录
227
+ return self.client.list_objects_v2(Bucket=bucket,
228
+ Prefix=prefix,
229
+ Delimiter=delimiter) # 设置 Delimiter='/' 可以确保只列出目录而不是所有对象。
230
+ else:
231
+ # 该方法只能列出文件 没办法列出目录
232
+ # bucket_resource = self.client.Bucket(bucket)
233
+ # objects = bucket_resource.objects.filter(Prefix=prefix, Delimiter=delimiter)
234
+ # return list(objects)
235
+
236
+ bucket_resource = self.client.Bucket(bucket)
237
+ return bucket_resource.meta.client.list_objects_v2(Bucket=bucket,
238
+ Prefix=prefix,
239
+ Delimiter=delimiter)
240
+
241
+ def get_object_value(self, bucket_name, file_key, encoding='utf-8'):
242
+ """
243
+ 读取文本数据
244
+ Returns:
245
+
246
+ """
247
+ if self.is_low_level:
248
+ obj = self.client.get_object(Bucket=bucket_name, Key=file_key)
249
+ body = obj['Body'].read().decode(encoding)
250
+ return body
251
+ else:
252
+ bucket_resource = self.client.Bucket(bucket_name)
253
+ obj = bucket_resource.Object(file_key)
254
+ body = obj.get()['Body'].read().decode(encoding)
255
+ return body
256
+
257
+ def put_object(self, bucket_name, key, body):
258
+ """
259
+ 直接写内容到文件
260
+ Args:
261
+ bucket_name:
262
+ key:
263
+ body: 需要 编码 .encode('utf-8')
264
+
265
+ Returns:
266
+
267
+ """
268
+ if self.is_low_level:
269
+ self.client.put_object(Bucket=bucket_name,
270
+ Key=key,
271
+ Body=body)
272
+ else:
273
+ # 获取 Bucket 对象
274
+ bucket_resource = self.client.Bucket(bucket_name)
275
+
276
+ # 将内容写入文件
277
+ bucket_resource.put_object(Key=key, Body=body)
278
+
279
+
280
+ bb = BaseBoto3(aws_access_key_id="UM51J2G5ZG0FE5CCERB9",
281
+ aws_secret_access_key="u+OEmhE2fahF2L+oXB+HXe8IJs22Lo38icvlF+Yq",
282
+ endpoint_url="http://192.168.31.135:9000"
283
+ )
284
+ bb.conn_session()
285
+ bb.set_is_low_level(False)
286
+ bb.get_client()
287
+
288
+ result = bb.check_exist("crawl.dc.cqvip.com",
289
+ "foreign/organ/parsel_organ_log.txt")
290
+
291
+ print(result)
@@ -0,0 +1,32 @@
1
+ import csv
2
+
3
+
4
+ class BaseCsv(object):
5
+
6
+ def __init__(self):
7
+ pass
8
+
9
+ def read_csv(self, filepath):
10
+ """
11
+ 根据文件路径逐行读取csv文件
12
+ :param filepath:csv文件路径
13
+ :return:csv每行记录
14
+ """
15
+ # mac_roman 原因
16
+ # https://stackoverflow.com/questions/21504319/python-3-csv-file-giving-unicodedecodeerror-utf-8-codec-cant-decode-byte-err
17
+ with open(filepath, "r",encoding='mac_roman') as f:
18
+ reader = csv.reader(f)
19
+ for row in reader:
20
+ yield row
21
+
22
+ def read_all_csv(self, filepath):
23
+ """
24
+ 根据文件路径读取csv文件所有行
25
+ :param filepath:csv文件路径
26
+ :return:csv所有行记录
27
+ """
28
+ with open(filepath, "r") as f:
29
+ reader = csv.reader(f)
30
+ result = list(reader)
31
+ return result
32
+
@@ -0,0 +1,133 @@
1
+ import copy
2
+ import sys
3
+
4
+
5
+ class BaseDicts(object):
6
+
7
+ @classmethod
8
+ def removeDictsNone(self, dicts: dict) -> dict:
9
+ """
10
+ 去除字典中值为None的键值
11
+ :param dicts:
12
+ :return:
13
+ """
14
+ return {key: value for key, value in dicts.items() if value is not None}
15
+
16
+ @classmethod
17
+ def removeDictsStringNull(self, dicts: dict) -> dict:
18
+ """
19
+ 去除字典中值为''的键值
20
+ :param dicts:
21
+ :return:
22
+ """
23
+ return {key: value for key, value in dicts.items() if value != ""}
24
+
25
+ @classmethod
26
+ def removeDictsAllNone(self, dicts: dict) -> dict:
27
+ """
28
+ 去除字典中值为'' 和 None 的键值
29
+ :param dicts:
30
+ :return:
31
+ """
32
+ return {key: value for key, value in dicts.items() if value != "" and value is not None}
33
+
34
+ @classmethod
35
+ def sortkeys(self, dicts, reverse=False):
36
+ """
37
+ 默认升序排序,加 reverse = True 指定为降序排序
38
+ 通过keys 对dicts 排序
39
+ 经过测试是新的列表
40
+ :return:
41
+ """
42
+ return {k: dicts[k] for k in sorted(dicts.keys(), reverse=reverse)}
43
+
44
+ @classmethod
45
+ def sortvalues(self, dicts, reverse=False):
46
+ """
47
+ 默认升序排序,加 reverse = True 指定为降序排序
48
+ d[1] 为值 d[0] 为键
49
+ d 为元组 为dicts的键值
50
+ 通过 values 对dicts 排序
51
+ :param dicts:
52
+ :return:
53
+ """
54
+ return {k: v for k, v in sorted(dicts.items(), key=lambda d: d[1], reverse=reverse)}
55
+
56
+ @classmethod
57
+ def is_key_have(cls, dicts, key):
58
+ """
59
+ 判断key 是否存在,但只能判断一个层次
60
+ :param dicts:
61
+ :param key:
62
+ :return:
63
+ """
64
+ if key in dicts.keys():
65
+ return True
66
+ else:
67
+ return False
68
+
69
+ @classmethod
70
+ def is_more_key_have(cls, dicts, keys=[]):
71
+ """
72
+ 判断多个key 是否存在 可以有更深的层次
73
+ :param dicts:
74
+ :param keys: ["a.b","c.d"]
75
+ :return:
76
+ """
77
+ for item in keys:
78
+ if item.find("."):
79
+ allstrings = ""
80
+ for key in item.split("."):
81
+ allstrings = allstrings + '["{}"]'.format(key)
82
+ try:
83
+ eval("dicts" + allstrings)
84
+ except:
85
+ return False
86
+ else:
87
+ if item not in dicts.keys():
88
+ return False
89
+ return True
90
+
91
+ @classmethod
92
+ def get_recursive_dict(cls, dict_a, key, call_back):
93
+ if isinstance(dict_a, dict):
94
+ for key, value in dict_a.items():
95
+ if isinstance(value, list) or isinstance(value, tuple) or isinstance(value, dict):
96
+ dict_a[key] = cls.get_recursive_dict(value, key, call_back)
97
+ else:
98
+ dict_a[key] = call_back(key, value)
99
+
100
+ elif isinstance(dict_a, list):
101
+ # 如果列表中存在一个不是字典就不需要遍历了
102
+ if dict_a == []:
103
+ return call_back(key, dict_a)
104
+ is_true = True
105
+ for i in dict_a:
106
+ if isinstance(i, dict):
107
+ is_true = True
108
+ else:
109
+ is_true = False
110
+ continue
111
+ if is_true:
112
+ dict_temp = []
113
+ for value in dict_a:
114
+ dict_temp.append(cls.get_recursive_dict(value, key, call_back))
115
+ return dict_temp
116
+ else:
117
+ return call_back(key, dict_a)
118
+ elif isinstance(dict_a, tuple):
119
+ # 如果存在元组转成列表使用
120
+ return cls.get_recursive_dict(list(dict_a), key, call_back)
121
+ else:
122
+ assert False, "传入类型错误:{}".format(type(dict_a))
123
+ return dict_a
124
+
125
+ @classmethod
126
+ def is_dict_exit_key(cls, dicts, key_name, default=""):
127
+ if dicts is not None and isinstance(dicts, dict):
128
+ if dicts.get(key_name, 0) != 0:
129
+ return dicts[key_name]
130
+ else:
131
+ return default
132
+ else:
133
+ return default
@@ -0,0 +1,241 @@
1
+ import os
2
+ import shutil
3
+ import warnings
4
+
5
+ from re_common.baselibrary.utils.basefile import BaseFile
6
+
7
+
8
+ class BaseDir(object):
9
+
10
+ @classmethod
11
+ def get_dir_all_files(cls, path):
12
+ """
13
+ 返回所有的文件 不会返回目录 目的是得到一个文件列表
14
+ # 这里注意topdown参数。
15
+ # topdown决定遍历的顺序,
16
+ # 如果topdown为True,则先列举top下的目录,然后是目录的目录,依次类推;
17
+ # 反之,则先递归列举出最深层的子目录,然后是其兄弟目录,然后父目录。
18
+ # 我们需要先修改深层的子目录
19
+ :param path: 给定一个路径 如 C:/Users/XD/Pictures/所有视频/
20
+ :return:
21
+ """
22
+ # 遍历子目录
23
+ assert cls.is_dir(path), NotADirectoryError(path + " not is a dir please check")
24
+ for path, dirNames, fileNames in os.walk(path, topdown=True):
25
+ # 获取当前目录的所有文件
26
+ for fileName in fileNames:
27
+ yield os.path.join(path, fileName)
28
+
29
+ @classmethod
30
+ def get_dir_all_dir(cls, s_path):
31
+ """
32
+ 获取目录下的最深子目录
33
+ # 这里注意topdown参数。
34
+ # topdown决定遍历的顺序,
35
+ # 如果topdown为True,则先列举top下的目录,然后是目录的目录,依次类推;
36
+ # 反之,则先递归列举出最深层的子目录,然后是其兄弟目录,然后父目录。
37
+ # 我们需要先修改深层的子目录
38
+ :param path: 给定一个路径 如 C:/Users/XD/Pictures/所有视频/
39
+ :return:
40
+ """
41
+ # 遍历子目录
42
+ assert cls.is_dir(s_path), NotADirectoryError(s_path + " not is a dir please check")
43
+ for path, dirNames, fileNames in os.walk(s_path, topdown=False):
44
+ # 获取当前目录的所有文件
45
+ for one_dir in dirNames:
46
+ f_path = os.path.join(path, one_dir)
47
+ for path1, dirNames1, fileNames1 in os.walk(f_path):
48
+ if len(dirNames1) == 0:
49
+ yield f_path
50
+ # break 原因是当一个目录有子目录时 会循环子目录,所以一次判断就可以了
51
+ break
52
+
53
+ @classmethod
54
+ def is_dir_exists(cls, pathdir):
55
+ """
56
+ 目录是否存在
57
+ :param sPth:
58
+ :return:
59
+ """
60
+ return os.path.exists(pathdir)
61
+
62
+ @classmethod
63
+ def create_dir(cls, pathdir):
64
+ """
65
+ 创建目录 该方法可以创建多级目录
66
+ os.mkdir(pathdir)是创建一级目录 该目录的上级目录必须存在
67
+ :param pathdir:
68
+ :return:
69
+ """
70
+ if not cls.is_dir_exists(pathdir):
71
+ os.makedirs(pathdir)
72
+
73
+ @classmethod
74
+ def get_upper_dir(cls, curPath: str, num: int) -> str:
75
+ """
76
+ 获取上几层的目录,如果包含文件名 -1 代表着这个文件的目录
77
+ :param curPath:
78
+ :param num:
79
+ :return:
80
+ """
81
+ # 如果不是个整数
82
+ if not isinstance(num, int) or num > 0:
83
+ warnings.warn("this not int or num > 0 , The path is not processed, return curPath")
84
+ return curPath
85
+ pathlist = curPath.split(os.sep)
86
+ if abs(num) > (len(pathlist) - 1):
87
+ warnings.warn("this path length < num ,will return root dir ")
88
+ num = (len(pathlist) - 1) * (-1)
89
+ pathlist = pathlist[:num]
90
+ TopPath = os.sep.join(pathlist)
91
+ return TopPath
92
+
93
+ @classmethod
94
+ def get_file_dir_absolute(cls, file):
95
+ """
96
+ 请传入 __file__ 作为参数
97
+ 获取一个文件的绝对路径
98
+ :param __file__:
99
+ :return:
100
+ """
101
+ curPath = os.path.dirname(os.path.abspath(file))
102
+ return curPath
103
+
104
+ @classmethod
105
+ def is_dir(cls, spath):
106
+ if os.path.isdir(spath):
107
+ return True
108
+ return False
109
+
110
+ @classmethod
111
+ def get_dir_size(cls, dirpath):
112
+ """
113
+ 1eb = 1024pb
114
+ 1pb = 1024tb
115
+ 1tb = 1024gb
116
+ 1gb = 1024mb
117
+ 1mb = 1024kb
118
+ 1kb = 1024b
119
+ 上面的b是Byte
120
+ 1Byte = 8bit
121
+ :param dirpath:
122
+ :return: 这里返回的是Byte大小
123
+ """
124
+ size = 0
125
+ for filepath in cls.get_dir_all_files(dirpath):
126
+ size += sum(BaseFile.get_file_size(filepath))
127
+ return size
128
+
129
+ @classmethod
130
+ def copy_file_to_dir(cls, filePath, tarDirPath):
131
+ """
132
+
133
+ :param filePath: 文件路径
134
+ :param tarDirPath: 目录或文件路径
135
+ :return:
136
+ """
137
+ assert BaseFile.is_file_exists(filePath), FileNotFoundError("需要copy的文件不存在")
138
+ assert BaseFile.is_file(filePath), FileNotFoundError("需要copy的不是一个文件")
139
+ cls.create_dir(tarDirPath)
140
+ shutil.copy(filePath, tarDirPath)
141
+
142
+ @classmethod
143
+ def copy_dir_to_dir(cls, oldDir, newDir, moudle=0):
144
+ """
145
+ olddir和newdir都只能是目录,且newdir必须不存在
146
+ :param oldDir:
147
+ :param newDir:
148
+ :return:
149
+ """
150
+ assert cls.is_dir(oldDir), IsADirectoryError(oldDir + "目录不存在")
151
+ if moudle == 0:
152
+ # 该模式下新目录必须不存在
153
+ assert not cls.is_dir(newDir), IsADirectoryError(newDir + "目录存在,不能使用该模式,推荐使用模式2")
154
+ shutil.copytree(oldDir, newDir)
155
+ elif moudle == 1:
156
+ # 该模式下如果新目录存在先删除新目录
157
+ if cls.is_dir(newDir):
158
+ cls.delete_dir(newDir)
159
+ shutil.copytree(oldDir, newDir)
160
+ elif moudle == 3:
161
+ # 该模式下通过替换进行copy
162
+ pass
163
+ elif moudle == 4:
164
+ # 该模式会跳过已存在的文件 只将新文件放入其中
165
+ pass
166
+
167
+ @classmethod
168
+ def delete_dir(cls, dirpath):
169
+ """
170
+ os.rmdir(dirpath) 只能删除空目录
171
+ os.removedirs(path) 递归的删除目录
172
+ :param dirpath:
173
+ :return:
174
+ """
175
+ shutil.rmtree(dirpath)
176
+
177
+ @classmethod
178
+ def replace_dir_special_string(cls, strings):
179
+ """
180
+ windows 目录非法字符
181
+ :param strings:
182
+ :return:
183
+ """
184
+ strings1 = strings.replace("/", "").replace("\\", "") \
185
+ .replace(":", "").replace("*", "").replace("\"", "") \
186
+ .replace("<", "").replace(">", "").replace("|", "") \
187
+ .replace("?", "")
188
+ return strings1
189
+
190
+ @classmethod
191
+ def remove_file_suf(cls, dirpath, suf):
192
+ for file in cls.get_dir_all_files(dirpath):
193
+ if file.endswith(suf):
194
+ BaseFile.remove_file(file)
195
+
196
+ @classmethod
197
+ def is_null_dir(cls, dirpath):
198
+ """
199
+ 是否为空目录
200
+ :param dirpath:
201
+ :return:
202
+ """
203
+ if not os.listdir(dirpath):
204
+ return True
205
+ return False
206
+
207
+ @classmethod
208
+ def get_dir_file_num(cls, dirpath):
209
+ return len([name for name in os.listdir(dirpath) if os.path.isfile(os.path.join(dirpath, name))])
210
+
211
+ @classmethod
212
+ def get_dir_all_file_num(cls, dirpath, ext=None):
213
+ """
214
+ 获取目录下所有文件个数,包含目录下目录的个数
215
+ :return:
216
+ """
217
+ count = 0
218
+ for path in cls.get_dir_all_files(dirpath):
219
+ if BaseFile.is_file(path):
220
+ if ext is not None:
221
+ if path.endswith(ext):
222
+ count += 1
223
+ elif cls.is_dir(path):
224
+ count = count + cls.get_dir_all_file_num(path, ext)
225
+ return count
226
+
227
+ @classmethod
228
+ def get_dir_all_file_size(cls, dirpath, ext=None):
229
+ """
230
+ 获取目录下所有文件的总大小
231
+ :return:
232
+ """
233
+ count = 0
234
+ for path in cls.get_dir_all_files(dirpath):
235
+ if BaseFile.is_file(path):
236
+ if ext is not None:
237
+ if path.endswith(ext):
238
+ count = count + BaseFile.get_file_size(path)
239
+ elif cls.is_dir(path):
240
+ count = count + cls.get_dir_all_file_num(path, ext)
241
+ return count