re-common 10.0.37__py3-none-any.whl → 10.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
- re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
- re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
- re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
- re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
- re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/concurrency.py +35 -35
- re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
- re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
- re_common/v2/baselibrary/tools/dict_tools.py +44 -44
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
- re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
- re_common/v2/baselibrary/tools/list_tools.py +69 -69
- re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
- re_common/v2/baselibrary/utils/author_smi.py +361 -361
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +163 -163
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +156 -156
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -186
- re_common/v2/baselibrary/utils/string_clear.py +246 -246
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +271 -278
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/LICENSE +201 -201
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/METADATA +16 -16
- re_common-10.0.39.dist-info/RECORD +248 -0
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/WHEEL +1 -1
- re_common-10.0.37.dist-info/RECORD +0 -248
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/top_level.txt +0 -0
|
@@ -1,291 +1,291 @@
|
|
|
1
|
-
import boto3
|
|
2
|
-
import botocore
|
|
3
|
-
from boto3.session import Session
|
|
4
|
-
|
|
5
|
-
# boto3 该开发工具包由两个关键的 Python 包组成:
|
|
6
|
-
# Botocore(提供在 Python 开发工具包和 AWS CLI 之间共享的低级功能的库)
|
|
7
|
-
# 和 Boto3(实现 Python 开发工具包本身的包)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
aws_access_key_id = 'minioa'
|
|
12
|
-
aws_secret_access_key = 'minio123'
|
|
13
|
-
endpoint_url = 'http://192.168.31.164:9000'
|
|
14
|
-
bbt = BaseBoto3(aws_access_key_id=aws_access_key_id,
|
|
15
|
-
aws_secret_access_key=aws_secret_access_key,
|
|
16
|
-
endpoint_url=endpoint_url)
|
|
17
|
-
bbt.conn_session()
|
|
18
|
-
bbt.set_is_low_level(False)
|
|
19
|
-
bbt.get_client()
|
|
20
|
-
print("**********************")
|
|
21
|
-
print(bbt.delete_buckets("test1"))
|
|
22
|
-
# bbt.set_is_low_level(False)
|
|
23
|
-
# bbt.get_client()
|
|
24
|
-
# print("**********************")
|
|
25
|
-
# print(bbt.create_buckets("create2"))
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class BaseBoto3(object):
|
|
30
|
-
|
|
31
|
-
def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
|
|
32
|
-
self.aws_access_key_id = aws_access_key_id
|
|
33
|
-
self.aws_secret_access_key = aws_secret_access_key
|
|
34
|
-
self.endpoint_url = endpoint_url
|
|
35
|
-
self.session = None
|
|
36
|
-
self.client = None
|
|
37
|
-
self.is_low_level = False
|
|
38
|
-
self.bucket = None
|
|
39
|
-
|
|
40
|
-
def set_is_low_level(self, is_low_level):
|
|
41
|
-
self.is_low_level = is_low_level
|
|
42
|
-
return self
|
|
43
|
-
|
|
44
|
-
def set_aws_access_key_id(self, aws_access_key_id):
|
|
45
|
-
self.aws_access_key_id = aws_access_key_id
|
|
46
|
-
return self
|
|
47
|
-
|
|
48
|
-
def set_aws_secret_access_key(self, aws_secret_access_key):
|
|
49
|
-
self.aws_secret_access_key = aws_secret_access_key
|
|
50
|
-
return self
|
|
51
|
-
|
|
52
|
-
def set_endpoint_url(self, endpoint_url):
|
|
53
|
-
self.endpoint_url = endpoint_url
|
|
54
|
-
return self
|
|
55
|
-
|
|
56
|
-
def conn_session(self):
|
|
57
|
-
self.session = Session(aws_access_key_id=self.aws_access_key_id,
|
|
58
|
-
aws_secret_access_key=self.aws_secret_access_key)
|
|
59
|
-
return self.session
|
|
60
|
-
|
|
61
|
-
def get_client(self):
|
|
62
|
-
assert self.session is not None
|
|
63
|
-
if self.is_low_level:
|
|
64
|
-
# 根据名称创建低级服务客户端
|
|
65
|
-
# botocore.client.S3
|
|
66
|
-
self.client = self.session.client('s3', endpoint_url=self.endpoint_url)
|
|
67
|
-
print(type(self.client))
|
|
68
|
-
else:
|
|
69
|
-
# boto3.resources.factory.s3.ServiceResource
|
|
70
|
-
# 按名称创建资源服务客户端
|
|
71
|
-
self.client = self.session.resource('s3', endpoint_url=self.endpoint_url)
|
|
72
|
-
print(type(self.client))
|
|
73
|
-
return self
|
|
74
|
-
|
|
75
|
-
def get_all_buckets(self):
|
|
76
|
-
"""
|
|
77
|
-
获取所有的桶信息
|
|
78
|
-
:return:
|
|
79
|
-
"""
|
|
80
|
-
if self.is_low_level is False:
|
|
81
|
-
return self.client.buckets.all()
|
|
82
|
-
else:
|
|
83
|
-
return self.client.list_buckets()
|
|
84
|
-
|
|
85
|
-
def create_buckets(self, buckets_name):
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
:param buckets_name:
|
|
89
|
-
:return:
|
|
90
|
-
如果get_client 使用 client 返回
|
|
91
|
-
{'ResponseMetadata': {'RequestId': '16BC90EED4A433C4', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '0', 'content-security-policy': 'block-all-mixed-content', 'location': '/create1', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '16BC90EED4A433C4', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Wed, 01 Dec 2021 07:28:39 GMT'}, 'RetryAttempts': 0}, 'Location': '/create1'}
|
|
92
|
-
如果resource 使用 client 返回
|
|
93
|
-
s3.Bucket(name='create2')
|
|
94
|
-
"""
|
|
95
|
-
assert buckets_name.find("_") == -1, "新建一个bucket桶(bucket name 中不能有_下划线)"
|
|
96
|
-
# 新建一个bucket桶(bucket name 中不能有_下划线)
|
|
97
|
-
return self.client.create_bucket(Bucket=buckets_name)
|
|
98
|
-
|
|
99
|
-
def delete_buckets(self, bucket_name):
|
|
100
|
-
"""
|
|
101
|
-
删除桶 删除bucket(只能删除空的bucket)
|
|
102
|
-
:return:
|
|
103
|
-
"""
|
|
104
|
-
if self.is_low_level is False:
|
|
105
|
-
bucket = self.client.Bucket(bucket_name)
|
|
106
|
-
response = bucket.delete()
|
|
107
|
-
else:
|
|
108
|
-
response = self.client.delete_bucket(Bucket=bucket_name)
|
|
109
|
-
return response
|
|
110
|
-
|
|
111
|
-
def get_bucket(self, bucket_name):
|
|
112
|
-
"""
|
|
113
|
-
获取 bucket 对象
|
|
114
|
-
:param bucket_name:
|
|
115
|
-
:return:
|
|
116
|
-
"""
|
|
117
|
-
if self.is_low_level is False:
|
|
118
|
-
self.bucket = self.client.Bucket(bucket_name)
|
|
119
|
-
return self.bucket
|
|
120
|
-
else:
|
|
121
|
-
raise Exception("无实现方法")
|
|
122
|
-
|
|
123
|
-
def get_all_obs_filter(self, Prefix):
|
|
124
|
-
"""
|
|
125
|
-
Prefix 为匹配模式
|
|
126
|
-
例:列出前缀为 haha 的文件
|
|
127
|
-
Prefix='haha'
|
|
128
|
-
:param Prefix:
|
|
129
|
-
:return: 可以调用 obj.key
|
|
130
|
-
"""
|
|
131
|
-
if not self.is_low_level:
|
|
132
|
-
for obj in self.bucket.objects.filter(Prefix=Prefix):
|
|
133
|
-
yield obj
|
|
134
|
-
else:
|
|
135
|
-
raise Exception("请设置 is_low_level 为 False")
|
|
136
|
-
|
|
137
|
-
def get_object(self, bucket_name):
|
|
138
|
-
"""
|
|
139
|
-
会返回包括目录在内的所有对象
|
|
140
|
-
:param bucket_name:
|
|
141
|
-
:return:
|
|
142
|
-
"""
|
|
143
|
-
if self.is_low_level is False:
|
|
144
|
-
bucket = self.client.Bucket(bucket_name)
|
|
145
|
-
# boto3.resources.collection.s3.Bucket.objectsCollection
|
|
146
|
-
all_obj = bucket.objects.all()
|
|
147
|
-
return all_obj
|
|
148
|
-
# for obj in bucket.objects.all():
|
|
149
|
-
# print('obj name:%s' % obj.key)
|
|
150
|
-
else:
|
|
151
|
-
return self.client.list_objects(Bucket=bucket_name)
|
|
152
|
-
|
|
153
|
-
def upload_file(self, local_file, bucket_name, key):
|
|
154
|
-
"""
|
|
155
|
-
# key 桶中的位置 test1/test.pdf
|
|
156
|
-
:param local_file: 本地文件路径
|
|
157
|
-
:param bucket_name: 桶名
|
|
158
|
-
:param key: 远程文件路径
|
|
159
|
-
:return:
|
|
160
|
-
"""
|
|
161
|
-
|
|
162
|
-
if self.is_low_level is False:
|
|
163
|
-
self.client.Bucket(bucket_name).upload_file(local_file, key)
|
|
164
|
-
else:
|
|
165
|
-
self.client.upload_file(local_file, bucket_name, key)
|
|
166
|
-
|
|
167
|
-
def upload_fileobj(self, fileobj, bucket_name, key):
|
|
168
|
-
# fileobj 字节流
|
|
169
|
-
if self.is_low_level is False:
|
|
170
|
-
self.client.Bucket(bucket_name).upload_fileobj(fileobj, key)
|
|
171
|
-
else:
|
|
172
|
-
self.client.upload_fileobj(fileobj, bucket_name, key)
|
|
173
|
-
|
|
174
|
-
def download_file(self, local_file, bucket_name, key):
|
|
175
|
-
if self.is_low_level is False:
|
|
176
|
-
self.client.Bucket(bucket_name).download_file(key, local_file)
|
|
177
|
-
else:
|
|
178
|
-
self.client.download_file(bucket_name, key, local_file)
|
|
179
|
-
|
|
180
|
-
def download_fileobj(self, fileobj, bucket_name, key):
|
|
181
|
-
if self.is_low_level is False:
|
|
182
|
-
self.client.Bucket(bucket_name).download_fileobj(key, fileobj)
|
|
183
|
-
else:
|
|
184
|
-
self.client.download_fileobj(bucket_name, key, fileobj)
|
|
185
|
-
|
|
186
|
-
def check_exist(self, bucket_name, key):
|
|
187
|
-
"""
|
|
188
|
-
if self.is_low_level:
|
|
189
|
-
{'ResponseMetadata': {'RequestId': '17E6A65A2B299D3B', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '117', 'content-security-policy': 'block-all-mixed-content', 'content-type': 'binary/octet-stream', 'etag': '"2237a934f176003e41abf3d733291079"', 'last-modified': 'Thu, 25 Jul 2024 05:49:43 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '17E6A65A2B299D3B', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Mon, 29 Jul 2024 09:53:33 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 7, 25, 5, 49, 43, tzinfo=tzutc()), 'ContentLength': 117, 'ETag': '"2237a934f176003e41abf3d733291079"', 'ContentType': 'binary/octet-stream', 'Metadata': {}}
|
|
190
|
-
判定文件是否存在,
|
|
191
|
-
:param bucket_name: 桶名
|
|
192
|
-
:param key: 文件key
|
|
193
|
-
:return:
|
|
194
|
-
"""
|
|
195
|
-
|
|
196
|
-
if self.is_low_level:
|
|
197
|
-
try:
|
|
198
|
-
obj_info = self.client.head_object(
|
|
199
|
-
Bucket=bucket_name,
|
|
200
|
-
Key=key
|
|
201
|
-
)
|
|
202
|
-
return obj_info
|
|
203
|
-
except:
|
|
204
|
-
return None
|
|
205
|
-
else:
|
|
206
|
-
# 获取指定存储桶
|
|
207
|
-
bucket = self.client.Bucket(bucket_name)
|
|
208
|
-
# 检查 key 是否存在
|
|
209
|
-
objs = list(bucket.objects.filter(Prefix=key))
|
|
210
|
-
if len(objs) > 0 and objs[0].key == key:
|
|
211
|
-
# [s3.ObjectSummary(bucket_name='crawl.dc.cqvip.com', key='foreign/organ/parsel_organ_log.txt')]
|
|
212
|
-
return objs[0]
|
|
213
|
-
else:
|
|
214
|
-
return None
|
|
215
|
-
|
|
216
|
-
def get_prefix_obj(self, bucket, prefix, delimiter):
|
|
217
|
-
"""
|
|
218
|
-
Bucket="crawl.dc.cqvip.com",
|
|
219
|
-
Prefix="foreign/organ/ei/",
|
|
220
|
-
Delimiter='/'
|
|
221
|
-
|
|
222
|
-
Returns:
|
|
223
|
-
|
|
224
|
-
"""
|
|
225
|
-
if self.is_low_level:
|
|
226
|
-
# for common_prefix in response.get('CommonPrefixes', []): 获取子目录
|
|
227
|
-
return self.client.list_objects_v2(Bucket=bucket,
|
|
228
|
-
Prefix=prefix,
|
|
229
|
-
Delimiter=delimiter) # 设置 Delimiter='/' 可以确保只列出目录而不是所有对象。
|
|
230
|
-
else:
|
|
231
|
-
# 该方法只能列出文件 没办法列出目录
|
|
232
|
-
# bucket_resource = self.client.Bucket(bucket)
|
|
233
|
-
# objects = bucket_resource.objects.filter(Prefix=prefix, Delimiter=delimiter)
|
|
234
|
-
# return list(objects)
|
|
235
|
-
|
|
236
|
-
bucket_resource = self.client.Bucket(bucket)
|
|
237
|
-
return bucket_resource.meta.client.list_objects_v2(Bucket=bucket,
|
|
238
|
-
Prefix=prefix,
|
|
239
|
-
Delimiter=delimiter)
|
|
240
|
-
|
|
241
|
-
def get_object_value(self, bucket_name, file_key, encoding='utf-8'):
|
|
242
|
-
"""
|
|
243
|
-
读取文本数据
|
|
244
|
-
Returns:
|
|
245
|
-
|
|
246
|
-
"""
|
|
247
|
-
if self.is_low_level:
|
|
248
|
-
obj = self.client.get_object(Bucket=bucket_name, Key=file_key)
|
|
249
|
-
body = obj['Body'].read().decode(encoding)
|
|
250
|
-
return body
|
|
251
|
-
else:
|
|
252
|
-
bucket_resource = self.client.Bucket(bucket_name)
|
|
253
|
-
obj = bucket_resource.Object(file_key)
|
|
254
|
-
body = obj.get()['Body'].read().decode(encoding)
|
|
255
|
-
return body
|
|
256
|
-
|
|
257
|
-
def put_object(self, bucket_name, key, body):
|
|
258
|
-
"""
|
|
259
|
-
直接写内容到文件
|
|
260
|
-
Args:
|
|
261
|
-
bucket_name:
|
|
262
|
-
key:
|
|
263
|
-
body: 需要 编码 .encode('utf-8')
|
|
264
|
-
|
|
265
|
-
Returns:
|
|
266
|
-
|
|
267
|
-
"""
|
|
268
|
-
if self.is_low_level:
|
|
269
|
-
self.client.put_object(Bucket=bucket_name,
|
|
270
|
-
Key=key,
|
|
271
|
-
Body=body)
|
|
272
|
-
else:
|
|
273
|
-
# 获取 Bucket 对象
|
|
274
|
-
bucket_resource = self.client.Bucket(bucket_name)
|
|
275
|
-
|
|
276
|
-
# 将内容写入文件
|
|
277
|
-
bucket_resource.put_object(Key=key, Body=body)
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
bb = BaseBoto3(aws_access_key_id="UM51J2G5ZG0FE5CCERB9",
|
|
281
|
-
aws_secret_access_key="u+OEmhE2fahF2L+oXB+HXe8IJs22Lo38icvlF+Yq",
|
|
282
|
-
endpoint_url="http://192.168.31.135:9000"
|
|
283
|
-
)
|
|
284
|
-
bb.conn_session()
|
|
285
|
-
bb.set_is_low_level(False)
|
|
286
|
-
bb.get_client()
|
|
287
|
-
|
|
288
|
-
result = bb.check_exist("crawl.dc.cqvip.com",
|
|
289
|
-
"foreign/organ/parsel_organ_log.txt")
|
|
290
|
-
|
|
291
|
-
print(result)
|
|
1
|
+
import boto3
|
|
2
|
+
import botocore
|
|
3
|
+
from boto3.session import Session
|
|
4
|
+
|
|
5
|
+
# boto3 该开发工具包由两个关键的 Python 包组成:
|
|
6
|
+
# Botocore(提供在 Python 开发工具包和 AWS CLI 之间共享的低级功能的库)
|
|
7
|
+
# 和 Boto3(实现 Python 开发工具包本身的包)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
aws_access_key_id = 'minioa'
|
|
12
|
+
aws_secret_access_key = 'minio123'
|
|
13
|
+
endpoint_url = 'http://192.168.31.164:9000'
|
|
14
|
+
bbt = BaseBoto3(aws_access_key_id=aws_access_key_id,
|
|
15
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
16
|
+
endpoint_url=endpoint_url)
|
|
17
|
+
bbt.conn_session()
|
|
18
|
+
bbt.set_is_low_level(False)
|
|
19
|
+
bbt.get_client()
|
|
20
|
+
print("**********************")
|
|
21
|
+
print(bbt.delete_buckets("test1"))
|
|
22
|
+
# bbt.set_is_low_level(False)
|
|
23
|
+
# bbt.get_client()
|
|
24
|
+
# print("**********************")
|
|
25
|
+
# print(bbt.create_buckets("create2"))
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BaseBoto3(object):
|
|
30
|
+
|
|
31
|
+
def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
|
|
32
|
+
self.aws_access_key_id = aws_access_key_id
|
|
33
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
34
|
+
self.endpoint_url = endpoint_url
|
|
35
|
+
self.session = None
|
|
36
|
+
self.client = None
|
|
37
|
+
self.is_low_level = False
|
|
38
|
+
self.bucket = None
|
|
39
|
+
|
|
40
|
+
def set_is_low_level(self, is_low_level):
|
|
41
|
+
self.is_low_level = is_low_level
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def set_aws_access_key_id(self, aws_access_key_id):
|
|
45
|
+
self.aws_access_key_id = aws_access_key_id
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def set_aws_secret_access_key(self, aws_secret_access_key):
|
|
49
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def set_endpoint_url(self, endpoint_url):
|
|
53
|
+
self.endpoint_url = endpoint_url
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
def conn_session(self):
|
|
57
|
+
self.session = Session(aws_access_key_id=self.aws_access_key_id,
|
|
58
|
+
aws_secret_access_key=self.aws_secret_access_key)
|
|
59
|
+
return self.session
|
|
60
|
+
|
|
61
|
+
def get_client(self):
|
|
62
|
+
assert self.session is not None
|
|
63
|
+
if self.is_low_level:
|
|
64
|
+
# 根据名称创建低级服务客户端
|
|
65
|
+
# botocore.client.S3
|
|
66
|
+
self.client = self.session.client('s3', endpoint_url=self.endpoint_url)
|
|
67
|
+
print(type(self.client))
|
|
68
|
+
else:
|
|
69
|
+
# boto3.resources.factory.s3.ServiceResource
|
|
70
|
+
# 按名称创建资源服务客户端
|
|
71
|
+
self.client = self.session.resource('s3', endpoint_url=self.endpoint_url)
|
|
72
|
+
print(type(self.client))
|
|
73
|
+
return self
|
|
74
|
+
|
|
75
|
+
def get_all_buckets(self):
|
|
76
|
+
"""
|
|
77
|
+
获取所有的桶信息
|
|
78
|
+
:return:
|
|
79
|
+
"""
|
|
80
|
+
if self.is_low_level is False:
|
|
81
|
+
return self.client.buckets.all()
|
|
82
|
+
else:
|
|
83
|
+
return self.client.list_buckets()
|
|
84
|
+
|
|
85
|
+
def create_buckets(self, buckets_name):
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
:param buckets_name:
|
|
89
|
+
:return:
|
|
90
|
+
如果get_client 使用 client 返回
|
|
91
|
+
{'ResponseMetadata': {'RequestId': '16BC90EED4A433C4', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '0', 'content-security-policy': 'block-all-mixed-content', 'location': '/create1', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '16BC90EED4A433C4', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Wed, 01 Dec 2021 07:28:39 GMT'}, 'RetryAttempts': 0}, 'Location': '/create1'}
|
|
92
|
+
如果resource 使用 client 返回
|
|
93
|
+
s3.Bucket(name='create2')
|
|
94
|
+
"""
|
|
95
|
+
assert buckets_name.find("_") == -1, "新建一个bucket桶(bucket name 中不能有_下划线)"
|
|
96
|
+
# 新建一个bucket桶(bucket name 中不能有_下划线)
|
|
97
|
+
return self.client.create_bucket(Bucket=buckets_name)
|
|
98
|
+
|
|
99
|
+
def delete_buckets(self, bucket_name):
|
|
100
|
+
"""
|
|
101
|
+
删除桶 删除bucket(只能删除空的bucket)
|
|
102
|
+
:return:
|
|
103
|
+
"""
|
|
104
|
+
if self.is_low_level is False:
|
|
105
|
+
bucket = self.client.Bucket(bucket_name)
|
|
106
|
+
response = bucket.delete()
|
|
107
|
+
else:
|
|
108
|
+
response = self.client.delete_bucket(Bucket=bucket_name)
|
|
109
|
+
return response
|
|
110
|
+
|
|
111
|
+
def get_bucket(self, bucket_name):
|
|
112
|
+
"""
|
|
113
|
+
获取 bucket 对象
|
|
114
|
+
:param bucket_name:
|
|
115
|
+
:return:
|
|
116
|
+
"""
|
|
117
|
+
if self.is_low_level is False:
|
|
118
|
+
self.bucket = self.client.Bucket(bucket_name)
|
|
119
|
+
return self.bucket
|
|
120
|
+
else:
|
|
121
|
+
raise Exception("无实现方法")
|
|
122
|
+
|
|
123
|
+
def get_all_obs_filter(self, Prefix):
|
|
124
|
+
"""
|
|
125
|
+
Prefix 为匹配模式
|
|
126
|
+
例:列出前缀为 haha 的文件
|
|
127
|
+
Prefix='haha'
|
|
128
|
+
:param Prefix:
|
|
129
|
+
:return: 可以调用 obj.key
|
|
130
|
+
"""
|
|
131
|
+
if not self.is_low_level:
|
|
132
|
+
for obj in self.bucket.objects.filter(Prefix=Prefix):
|
|
133
|
+
yield obj
|
|
134
|
+
else:
|
|
135
|
+
raise Exception("请设置 is_low_level 为 False")
|
|
136
|
+
|
|
137
|
+
def get_object(self, bucket_name):
|
|
138
|
+
"""
|
|
139
|
+
会返回包括目录在内的所有对象
|
|
140
|
+
:param bucket_name:
|
|
141
|
+
:return:
|
|
142
|
+
"""
|
|
143
|
+
if self.is_low_level is False:
|
|
144
|
+
bucket = self.client.Bucket(bucket_name)
|
|
145
|
+
# boto3.resources.collection.s3.Bucket.objectsCollection
|
|
146
|
+
all_obj = bucket.objects.all()
|
|
147
|
+
return all_obj
|
|
148
|
+
# for obj in bucket.objects.all():
|
|
149
|
+
# print('obj name:%s' % obj.key)
|
|
150
|
+
else:
|
|
151
|
+
return self.client.list_objects(Bucket=bucket_name)
|
|
152
|
+
|
|
153
|
+
def upload_file(self, local_file, bucket_name, key):
|
|
154
|
+
"""
|
|
155
|
+
# key 桶中的位置 test1/test.pdf
|
|
156
|
+
:param local_file: 本地文件路径
|
|
157
|
+
:param bucket_name: 桶名
|
|
158
|
+
:param key: 远程文件路径
|
|
159
|
+
:return:
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
if self.is_low_level is False:
|
|
163
|
+
self.client.Bucket(bucket_name).upload_file(local_file, key)
|
|
164
|
+
else:
|
|
165
|
+
self.client.upload_file(local_file, bucket_name, key)
|
|
166
|
+
|
|
167
|
+
def upload_fileobj(self, fileobj, bucket_name, key):
|
|
168
|
+
# fileobj 字节流
|
|
169
|
+
if self.is_low_level is False:
|
|
170
|
+
self.client.Bucket(bucket_name).upload_fileobj(fileobj, key)
|
|
171
|
+
else:
|
|
172
|
+
self.client.upload_fileobj(fileobj, bucket_name, key)
|
|
173
|
+
|
|
174
|
+
def download_file(self, local_file, bucket_name, key):
|
|
175
|
+
if self.is_low_level is False:
|
|
176
|
+
self.client.Bucket(bucket_name).download_file(key, local_file)
|
|
177
|
+
else:
|
|
178
|
+
self.client.download_file(bucket_name, key, local_file)
|
|
179
|
+
|
|
180
|
+
def download_fileobj(self, fileobj, bucket_name, key):
|
|
181
|
+
if self.is_low_level is False:
|
|
182
|
+
self.client.Bucket(bucket_name).download_fileobj(key, fileobj)
|
|
183
|
+
else:
|
|
184
|
+
self.client.download_fileobj(bucket_name, key, fileobj)
|
|
185
|
+
|
|
186
|
+
def check_exist(self, bucket_name, key):
|
|
187
|
+
"""
|
|
188
|
+
if self.is_low_level:
|
|
189
|
+
{'ResponseMetadata': {'RequestId': '17E6A65A2B299D3B', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '117', 'content-security-policy': 'block-all-mixed-content', 'content-type': 'binary/octet-stream', 'etag': '"2237a934f176003e41abf3d733291079"', 'last-modified': 'Thu, 25 Jul 2024 05:49:43 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-request-id': '17E6A65A2B299D3B', 'x-content-type-options': 'nosniff', 'x-xss-protection': '1; mode=block', 'date': 'Mon, 29 Jul 2024 09:53:33 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 7, 25, 5, 49, 43, tzinfo=tzutc()), 'ContentLength': 117, 'ETag': '"2237a934f176003e41abf3d733291079"', 'ContentType': 'binary/octet-stream', 'Metadata': {}}
|
|
190
|
+
判定文件是否存在,
|
|
191
|
+
:param bucket_name: 桶名
|
|
192
|
+
:param key: 文件key
|
|
193
|
+
:return:
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
if self.is_low_level:
|
|
197
|
+
try:
|
|
198
|
+
obj_info = self.client.head_object(
|
|
199
|
+
Bucket=bucket_name,
|
|
200
|
+
Key=key
|
|
201
|
+
)
|
|
202
|
+
return obj_info
|
|
203
|
+
except:
|
|
204
|
+
return None
|
|
205
|
+
else:
|
|
206
|
+
# 获取指定存储桶
|
|
207
|
+
bucket = self.client.Bucket(bucket_name)
|
|
208
|
+
# 检查 key 是否存在
|
|
209
|
+
objs = list(bucket.objects.filter(Prefix=key))
|
|
210
|
+
if len(objs) > 0 and objs[0].key == key:
|
|
211
|
+
# [s3.ObjectSummary(bucket_name='crawl.dc.cqvip.com', key='foreign/organ/parsel_organ_log.txt')]
|
|
212
|
+
return objs[0]
|
|
213
|
+
else:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def get_prefix_obj(self, bucket, prefix, delimiter):
|
|
217
|
+
"""
|
|
218
|
+
Bucket="crawl.dc.cqvip.com",
|
|
219
|
+
Prefix="foreign/organ/ei/",
|
|
220
|
+
Delimiter='/'
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
|
|
224
|
+
"""
|
|
225
|
+
if self.is_low_level:
|
|
226
|
+
# for common_prefix in response.get('CommonPrefixes', []): 获取子目录
|
|
227
|
+
return self.client.list_objects_v2(Bucket=bucket,
|
|
228
|
+
Prefix=prefix,
|
|
229
|
+
Delimiter=delimiter) # 设置 Delimiter='/' 可以确保只列出目录而不是所有对象。
|
|
230
|
+
else:
|
|
231
|
+
# 该方法只能列出文件 没办法列出目录
|
|
232
|
+
# bucket_resource = self.client.Bucket(bucket)
|
|
233
|
+
# objects = bucket_resource.objects.filter(Prefix=prefix, Delimiter=delimiter)
|
|
234
|
+
# return list(objects)
|
|
235
|
+
|
|
236
|
+
bucket_resource = self.client.Bucket(bucket)
|
|
237
|
+
return bucket_resource.meta.client.list_objects_v2(Bucket=bucket,
|
|
238
|
+
Prefix=prefix,
|
|
239
|
+
Delimiter=delimiter)
|
|
240
|
+
|
|
241
|
+
def get_object_value(self, bucket_name, file_key, encoding='utf-8'):
|
|
242
|
+
"""
|
|
243
|
+
读取文本数据
|
|
244
|
+
Returns:
|
|
245
|
+
|
|
246
|
+
"""
|
|
247
|
+
if self.is_low_level:
|
|
248
|
+
obj = self.client.get_object(Bucket=bucket_name, Key=file_key)
|
|
249
|
+
body = obj['Body'].read().decode(encoding)
|
|
250
|
+
return body
|
|
251
|
+
else:
|
|
252
|
+
bucket_resource = self.client.Bucket(bucket_name)
|
|
253
|
+
obj = bucket_resource.Object(file_key)
|
|
254
|
+
body = obj.get()['Body'].read().decode(encoding)
|
|
255
|
+
return body
|
|
256
|
+
|
|
257
|
+
def put_object(self, bucket_name, key, body):
|
|
258
|
+
"""
|
|
259
|
+
直接写内容到文件
|
|
260
|
+
Args:
|
|
261
|
+
bucket_name:
|
|
262
|
+
key:
|
|
263
|
+
body: 需要 编码 .encode('utf-8')
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
|
|
267
|
+
"""
|
|
268
|
+
if self.is_low_level:
|
|
269
|
+
self.client.put_object(Bucket=bucket_name,
|
|
270
|
+
Key=key,
|
|
271
|
+
Body=body)
|
|
272
|
+
else:
|
|
273
|
+
# 获取 Bucket 对象
|
|
274
|
+
bucket_resource = self.client.Bucket(bucket_name)
|
|
275
|
+
|
|
276
|
+
# 将内容写入文件
|
|
277
|
+
bucket_resource.put_object(Key=key, Body=body)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
bb = BaseBoto3(aws_access_key_id="UM51J2G5ZG0FE5CCERB9",
|
|
281
|
+
aws_secret_access_key="u+OEmhE2fahF2L+oXB+HXe8IJs22Lo38icvlF+Yq",
|
|
282
|
+
endpoint_url="http://192.168.31.135:9000"
|
|
283
|
+
)
|
|
284
|
+
bb.conn_session()
|
|
285
|
+
bb.set_is_low_level(False)
|
|
286
|
+
bb.get_client()
|
|
287
|
+
|
|
288
|
+
result = bb.check_exist("crawl.dc.cqvip.com",
|
|
289
|
+
"foreign/organ/parsel_organ_log.txt")
|
|
290
|
+
|
|
291
|
+
print(result)
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class BaseCsv(object):
|
|
5
|
-
|
|
6
|
-
def __init__(self):
|
|
7
|
-
pass
|
|
8
|
-
|
|
9
|
-
def read_csv(self, filepath):
|
|
10
|
-
"""
|
|
11
|
-
根据文件路径逐行读取csv文件
|
|
12
|
-
:param filepath:csv文件路径
|
|
13
|
-
:return:csv每行记录
|
|
14
|
-
"""
|
|
15
|
-
# mac_roman 原因
|
|
16
|
-
# https://stackoverflow.com/questions/21504319/python-3-csv-file-giving-unicodedecodeerror-utf-8-codec-cant-decode-byte-err
|
|
17
|
-
with open(filepath, "r",encoding='mac_roman') as f:
|
|
18
|
-
reader = csv.reader(f)
|
|
19
|
-
for row in reader:
|
|
20
|
-
yield row
|
|
21
|
-
|
|
22
|
-
def read_all_csv(self, filepath):
|
|
23
|
-
"""
|
|
24
|
-
根据文件路径读取csv文件所有行
|
|
25
|
-
:param filepath:csv文件路径
|
|
26
|
-
:return:csv所有行记录
|
|
27
|
-
"""
|
|
28
|
-
with open(filepath, "r") as f:
|
|
29
|
-
reader = csv.reader(f)
|
|
30
|
-
result = list(reader)
|
|
31
|
-
return result
|
|
32
|
-
|
|
1
|
+
import csv
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaseCsv(object):
|
|
5
|
+
|
|
6
|
+
def __init__(self):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
def read_csv(self, filepath):
|
|
10
|
+
"""
|
|
11
|
+
根据文件路径逐行读取csv文件
|
|
12
|
+
:param filepath:csv文件路径
|
|
13
|
+
:return:csv每行记录
|
|
14
|
+
"""
|
|
15
|
+
# mac_roman 原因
|
|
16
|
+
# https://stackoverflow.com/questions/21504319/python-3-csv-file-giving-unicodedecodeerror-utf-8-codec-cant-decode-byte-err
|
|
17
|
+
with open(filepath, "r",encoding='mac_roman') as f:
|
|
18
|
+
reader = csv.reader(f)
|
|
19
|
+
for row in reader:
|
|
20
|
+
yield row
|
|
21
|
+
|
|
22
|
+
def read_all_csv(self, filepath):
|
|
23
|
+
"""
|
|
24
|
+
根据文件路径读取csv文件所有行
|
|
25
|
+
:param filepath:csv文件路径
|
|
26
|
+
:return:csv所有行记录
|
|
27
|
+
"""
|
|
28
|
+
with open(filepath, "r") as f:
|
|
29
|
+
reader = csv.reader(f)
|
|
30
|
+
result = list(reader)
|
|
31
|
+
return result
|
|
32
|
+
|