orbitkit 0.8.46__tar.gz → 0.8.47__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.46/orbitkit.egg-info → orbitkit-0.8.47}/PKG-INFO +28 -6
- orbitkit-0.8.47/orbitkit/VERSION +1 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/audio_transcoder/netmind_extract_v1.py +5 -4
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/__init__.py +1 -0
- orbitkit-0.8.47/orbitkit/util/util_aws_s3_wrapper.py +369 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47/orbitkit.egg-info}/PKG-INFO +28 -6
- orbitkit-0.8.47/orbitkit.egg-info/requires.txt +8 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/setup.py +13 -4
- orbitkit-0.8.46/orbitkit/VERSION +0 -1
- orbitkit-0.8.46/orbitkit/util/util_aws_s3_wrapper.py +0 -154
- orbitkit-0.8.46/orbitkit.egg-info/requires.txt +0 -6
- {orbitkit-0.8.46 → orbitkit-0.8.47}/LICENSE +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/MANIFEST.in +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/README.md +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/data_preprocessing.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.46 → orbitkit-0.8.47}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: orbitkit
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.47
|
|
4
4
|
Summary: This project is only for Orbit Tech internal use.
|
|
5
5
|
Home-page: https://github.com/clown-0726/orbitkit
|
|
6
6
|
Author: Lilu Cao
|
|
@@ -19,15 +19,37 @@ Classifier: Programming Language :: Python :: 3.4
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.5
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.6
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.7
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
29
|
Classifier: Topic :: Software Development :: Libraries
|
|
23
30
|
Description-Content-Type: text/markdown
|
|
24
31
|
License-File: LICENSE
|
|
25
|
-
Requires-Dist: boto3>=1.
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: boto3>=1.40.46
|
|
33
|
+
Requires-Dist: aioboto3>=15.5.0
|
|
34
|
+
Requires-Dist: aiofiles>=25.1.0
|
|
35
|
+
Requires-Dist: requests>=2.32.5
|
|
36
|
+
Requires-Dist: prettytable>=3.17.0
|
|
37
|
+
Requires-Dist: pytz>=2025.2
|
|
29
38
|
Requires-Dist: Deprecated
|
|
30
39
|
Requires-Dist: func_timeout
|
|
40
|
+
Dynamic: author
|
|
41
|
+
Dynamic: author-email
|
|
42
|
+
Dynamic: classifier
|
|
43
|
+
Dynamic: description
|
|
44
|
+
Dynamic: description-content-type
|
|
45
|
+
Dynamic: home-page
|
|
46
|
+
Dynamic: license
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
Dynamic: maintainer
|
|
49
|
+
Dynamic: maintainer-email
|
|
50
|
+
Dynamic: platform
|
|
51
|
+
Dynamic: requires-dist
|
|
52
|
+
Dynamic: summary
|
|
31
53
|
|
|
32
54
|
# orbitkit
|
|
33
55
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.47
|
|
@@ -74,11 +74,11 @@ class Translate:
|
|
|
74
74
|
# "start": 0.031, "end": 2.921},
|
|
75
75
|
# {"speaker": "SPEAKER_01", "text": "We just wanted to take a minute to thank you.", "start": 3.507, "end": 4.962}],
|
|
76
76
|
# "id": "0ca63ef01e224adca4865b3cec94c1a2", "model": "WhisperX"}
|
|
77
|
-
def text_processing(netmind_data, lang):
|
|
77
|
+
def text_processing(netmind_data, lang, translate_model='gpt-4.1-mini'):
|
|
78
78
|
import fasttext
|
|
79
79
|
from urllib.request import urlretrieve
|
|
80
80
|
model_path = "lid.176.bin"
|
|
81
|
-
tran = Translate()
|
|
81
|
+
tran = Translate(model=translate_model)
|
|
82
82
|
# 如果模型不存在,则下载
|
|
83
83
|
if not os.path.exists(model_path):
|
|
84
84
|
logger.info("Downloading fasttext language detection model...")
|
|
@@ -145,6 +145,7 @@ def send_request_to_stream(file_steam, **kwargs):
|
|
|
145
145
|
def request_wav_from_netmind(s3_client, s3_path=None, file_steam=None, **kwargs):
|
|
146
146
|
lang = kwargs.get('lang', 'en')
|
|
147
147
|
folder = kwargs.get('folder', '')
|
|
148
|
+
translate_model = kwargs.get('translate_model', 'gpt-4.1-mini')
|
|
148
149
|
if s3_path:
|
|
149
150
|
s3_path_obj = s3_split_path(s3_path)
|
|
150
151
|
# 开始尝试提取...
|
|
@@ -165,10 +166,10 @@ def request_wav_from_netmind(s3_client, s3_path=None, file_steam=None, **kwargs)
|
|
|
165
166
|
with open(json_netmind_wav_path, 'w', encoding='utf-8') as json_file:
|
|
166
167
|
json.dump(data, json_file, ensure_ascii=False, indent=4)
|
|
167
168
|
|
|
168
|
-
net_process = text_processing(data, lang)
|
|
169
|
+
net_process = text_processing(data, lang, translate_model=translate_model)
|
|
169
170
|
|
|
170
171
|
# 翻译接口处理
|
|
171
172
|
json_netmind_lang_wav_path = os.path.join(folder, 'netmind_lang_wav.json')
|
|
172
173
|
with open(json_netmind_lang_wav_path, 'w', encoding='utf-8') as json_file:
|
|
173
174
|
json.dump(net_process, json_file, ensure_ascii=False, indent=4)
|
|
174
|
-
return json_netmind_wav_path, json_netmind_lang_wav_path
|
|
175
|
+
return json_netmind_wav_path, json_netmind_lang_wav_path
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os.path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import boto3
|
|
5
|
+
from orbitkit.util import get_from_dict_or_env, s3_split_path, get_content_type_4_filename
|
|
6
|
+
import botocore
|
|
7
|
+
from botocore.exceptions import ClientError
|
|
8
|
+
import aioboto3
|
|
9
|
+
import aiofiles
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AwsS3Wrapper:
|
|
15
|
+
"""Encapsulates Amazon s3 actions for Orbitfin"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, s3_resource, s3_client):
|
|
18
|
+
"""
|
|
19
|
+
:param s3_resource: boto3.resource('s3')
|
|
20
|
+
:param s3_client: boto3.client('s3')
|
|
21
|
+
"""
|
|
22
|
+
self.s3_resource = s3_resource
|
|
23
|
+
self.s3_client = s3_client
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_s3(cls, *args, **kwargs):
|
|
27
|
+
# Try to get key aws pair
|
|
28
|
+
aws_access_key_id = get_from_dict_or_env(
|
|
29
|
+
kwargs, "aws_access_key_id", "AWS_ACCESS_KEY_ID",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
aws_secret_access_key = get_from_dict_or_env(
|
|
33
|
+
kwargs, "aws_secret_access_key", "AWS_SECRET_ACCESS_KEY",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
s3_resource = boto3.resource('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
|
37
|
+
s3_client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
|
38
|
+
|
|
39
|
+
return cls(s3_resource, s3_client)
|
|
40
|
+
|
|
41
|
+
def get_s3_resource(self):
|
|
42
|
+
return self.s3_resource
|
|
43
|
+
|
|
44
|
+
def get_s3_client(self):
|
|
45
|
+
return self.s3_client
|
|
46
|
+
|
|
47
|
+
def check_file_exist(self, s3_path: str) -> bool:
|
|
48
|
+
"""
|
|
49
|
+
:param s3_path: Target store path for s3.
|
|
50
|
+
:return:
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
54
|
+
try:
|
|
55
|
+
self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"]).load()
|
|
56
|
+
except botocore.exceptions.ClientError as e:
|
|
57
|
+
if e.response['Error']['Code'] == "404":
|
|
58
|
+
# The object does not exist.
|
|
59
|
+
return False
|
|
60
|
+
else:
|
|
61
|
+
# Something else has gone wrong.
|
|
62
|
+
raise Exception("Check s3 file exist unknown error...")
|
|
63
|
+
else:
|
|
64
|
+
# The object does exist.
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
def copy_file(self, source_path: str, target_path: str):
|
|
68
|
+
"""
|
|
69
|
+
:param source_path: Source s3 path location
|
|
70
|
+
:param target_path: Target s3 path location
|
|
71
|
+
:return:
|
|
72
|
+
"""
|
|
73
|
+
source_path_obj = s3_split_path(source_path)
|
|
74
|
+
target_path_obj = s3_split_path(target_path)
|
|
75
|
+
|
|
76
|
+
self.s3_resource.Object(target_path_obj["bucket"], target_path_obj["store_path"]).copy_from(
|
|
77
|
+
CopySource=source_path_obj["bucket"] + '/' + source_path_obj["store_path"],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def delete_file(self, s3_path: str):
|
|
81
|
+
"""
|
|
82
|
+
:param s3_path: Target store path for s3.
|
|
83
|
+
:return:
|
|
84
|
+
"""
|
|
85
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
86
|
+
self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"]).delete()
|
|
87
|
+
|
|
88
|
+
def download_file(self, s3_path: str, local_path: str, filename: str):
|
|
89
|
+
"""
|
|
90
|
+
:param s3_path: Target store path for s3.
|
|
91
|
+
:param local_path: Local path
|
|
92
|
+
:param filename: File name
|
|
93
|
+
:return:
|
|
94
|
+
"""
|
|
95
|
+
if not os.path.exists(local_path):
|
|
96
|
+
os.makedirs(local_path)
|
|
97
|
+
|
|
98
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
99
|
+
self.s3_resource.Bucket(s3_path_obj["bucket"]).download_file(s3_path_obj["store_path"], os.path.join(local_path, filename))
|
|
100
|
+
|
|
101
|
+
def upload_by_local_path(self, s3_path: str, local_path: str, text_with_utf8: bool = True):
|
|
102
|
+
"""
|
|
103
|
+
:param s3_path: Target store path for s3.
|
|
104
|
+
:param local_path: Local file path.
|
|
105
|
+
:param text_with_utf8: If content-type start with "text/" then put ;charset=utf-8 after.
|
|
106
|
+
:return:
|
|
107
|
+
"""
|
|
108
|
+
if not os.path.exists(local_path):
|
|
109
|
+
raise Exception("Local file doesn't exist!")
|
|
110
|
+
|
|
111
|
+
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
112
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
113
|
+
|
|
114
|
+
self.s3_client.upload_file(local_path,
|
|
115
|
+
s3_path_obj["bucket"],
|
|
116
|
+
s3_path_obj["store_path"],
|
|
117
|
+
ExtraArgs={'ContentType': content_type})
|
|
118
|
+
|
|
119
|
+
def upload_file(self, s3_path: str, content: bytes, metadata: Optional[dict] = None, text_with_utf8: bool = True):
|
|
120
|
+
"""
|
|
121
|
+
:param s3_path: Target store path for s3.
|
|
122
|
+
:param content: The content of file, if text-like use content.encode("utf-8"), if binary then put directly.
|
|
123
|
+
:param metadata: Custom metadata for file.
|
|
124
|
+
:param text_with_utf8: If content-type start with "text/" then put ;charset=utf-8 after.
|
|
125
|
+
:return:
|
|
126
|
+
"""
|
|
127
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
128
|
+
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
129
|
+
|
|
130
|
+
object_put = self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"])
|
|
131
|
+
if metadata:
|
|
132
|
+
object_put.put(Body=content, ContentType=content_type, Metadata=metadata)
|
|
133
|
+
else:
|
|
134
|
+
object_put.put(Body=content, ContentType=content_type)
|
|
135
|
+
|
|
136
|
+
def get_file_meta_info(self, s3_path: str) -> dict:
|
|
137
|
+
"""
|
|
138
|
+
:param s3_path: Target store path for s3.
|
|
139
|
+
:return:
|
|
140
|
+
"""
|
|
141
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
142
|
+
response = self.s3_client.head_object(Bucket=s3_path_obj["bucket"], Key=s3_path_obj["store_path"])
|
|
143
|
+
return {
|
|
144
|
+
"content_type": response['ContentType'],
|
|
145
|
+
"metadata": response['Metadata'],
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
def read_text_like_file(self, s3_path: str, decoding: str = "utf-8") -> str:
|
|
149
|
+
"""
|
|
150
|
+
:param s3_path: Target store path for s3.
|
|
151
|
+
:param decoding: decoding, default is "utf-8".
|
|
152
|
+
:return:
|
|
153
|
+
"""
|
|
154
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
155
|
+
obj = self.s3_client.get_object(Bucket=s3_path_obj["bucket"], Key=s3_path_obj["store_path"])
|
|
156
|
+
return obj['Body'].read().decode(decoding)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class AwsS3WrapperAsync:
|
|
160
|
+
"""Encapsulates Amazon S3 async actions for Orbitfin using aioboto3"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None):
|
|
163
|
+
"""
|
|
164
|
+
初始化异步 S3 包装器
|
|
165
|
+
|
|
166
|
+
:param aws_access_key_id: AWS access key ID(可选,不提供则使用 AWS CLI 配置或环境变量)
|
|
167
|
+
:param aws_secret_access_key: AWS secret access key(可选)
|
|
168
|
+
|
|
169
|
+
凭证获取顺序:
|
|
170
|
+
1. 直接传入的参数
|
|
171
|
+
2. 环境变量(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
|
172
|
+
3. AWS CLI 配置文件(~/.aws/credentials)
|
|
173
|
+
4. IAM 角色(如果在 EC2/ECS 上运行)
|
|
174
|
+
"""
|
|
175
|
+
self.aws_access_key_id = aws_access_key_id
|
|
176
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
177
|
+
|
|
178
|
+
# 如果提供了凭证,使用指定凭证;否则使用默认凭证链
|
|
179
|
+
if aws_access_key_id and aws_secret_access_key:
|
|
180
|
+
self.session = aioboto3.Session(
|
|
181
|
+
aws_access_key_id=aws_access_key_id,
|
|
182
|
+
aws_secret_access_key=aws_secret_access_key
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
# 使用默认凭证链(环境变量、AWS CLI 配置、IAM 角色等)
|
|
186
|
+
self.session = aioboto3.Session()
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def from_s3(cls, *args, **kwargs):
|
|
190
|
+
"""
|
|
191
|
+
创建 AwsS3WrapperAsync 实例
|
|
192
|
+
|
|
193
|
+
支持从以下来源获取 AWS 凭证(按优先级):
|
|
194
|
+
1. kwargs 参数(aws_access_key_id, aws_secret_access_key)
|
|
195
|
+
2. 环境变量(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
|
196
|
+
3. AWS CLI 配置文件(~/.aws/credentials)
|
|
197
|
+
4. IAM 角色
|
|
198
|
+
|
|
199
|
+
示例:
|
|
200
|
+
# 使用默认凭证链(推荐)
|
|
201
|
+
wrapper = AwsS3WrapperAsync.from_s3()
|
|
202
|
+
|
|
203
|
+
# 显式指定凭证
|
|
204
|
+
wrapper = AwsS3WrapperAsync.from_s3(
|
|
205
|
+
aws_access_key_id="xxx",
|
|
206
|
+
aws_secret_access_key="yyy"
|
|
207
|
+
)
|
|
208
|
+
"""
|
|
209
|
+
# 尝试从 kwargs 或环境变量获取凭证(可选)
|
|
210
|
+
aws_access_key_id = kwargs.get("aws_access_key_id") or os.environ.get("AWS_ACCESS_KEY_ID")
|
|
211
|
+
aws_secret_access_key = kwargs.get("aws_secret_access_key") or os.environ.get("AWS_SECRET_ACCESS_KEY")
|
|
212
|
+
|
|
213
|
+
return cls(aws_access_key_id, aws_secret_access_key)
|
|
214
|
+
|
|
215
|
+
async def check_file_exist(self, s3_path: str) -> bool:
|
|
216
|
+
"""
|
|
217
|
+
检查 S3 文件是否存在
|
|
218
|
+
:param s3_path: S3 路径
|
|
219
|
+
:return: 文件是否存在
|
|
220
|
+
"""
|
|
221
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
222
|
+
|
|
223
|
+
async with self.session.client('s3') as s3_client:
|
|
224
|
+
try:
|
|
225
|
+
await s3_client.head_object(
|
|
226
|
+
Bucket=s3_path_obj["bucket"],
|
|
227
|
+
Key=s3_path_obj["store_path"]
|
|
228
|
+
)
|
|
229
|
+
return True
|
|
230
|
+
except ClientError as e:
|
|
231
|
+
if e.response['Error']['Code'] == "404":
|
|
232
|
+
return False
|
|
233
|
+
else:
|
|
234
|
+
raise Exception("Check s3 file exist unknown error...")
|
|
235
|
+
|
|
236
|
+
async def copy_file(self, source_path: str, target_path: str):
|
|
237
|
+
"""
|
|
238
|
+
复制 S3 文件
|
|
239
|
+
:param source_path: 源 S3 路径
|
|
240
|
+
:param target_path: 目标 S3 路径
|
|
241
|
+
"""
|
|
242
|
+
source_path_obj = s3_split_path(source_path)
|
|
243
|
+
target_path_obj = s3_split_path(target_path)
|
|
244
|
+
|
|
245
|
+
async with self.session.client('s3') as s3_client:
|
|
246
|
+
copy_source = {
|
|
247
|
+
'Bucket': source_path_obj["bucket"],
|
|
248
|
+
'Key': source_path_obj["store_path"]
|
|
249
|
+
}
|
|
250
|
+
await s3_client.copy_object(
|
|
251
|
+
CopySource=copy_source,
|
|
252
|
+
Bucket=target_path_obj["bucket"],
|
|
253
|
+
Key=target_path_obj["store_path"]
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
async def delete_file(self, s3_path: str):
|
|
257
|
+
"""
|
|
258
|
+
删除 S3 文件
|
|
259
|
+
:param s3_path: S3 路径
|
|
260
|
+
"""
|
|
261
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
262
|
+
|
|
263
|
+
async with self.session.client('s3') as s3_client:
|
|
264
|
+
await s3_client.delete_object(
|
|
265
|
+
Bucket=s3_path_obj["bucket"],
|
|
266
|
+
Key=s3_path_obj["store_path"]
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
async def download_file(self, s3_path: str, local_path: str, filename: str):
|
|
270
|
+
"""
|
|
271
|
+
从 S3 下载文件到本地
|
|
272
|
+
:param s3_path: S3 路径
|
|
273
|
+
:param local_path: 本地目录路径
|
|
274
|
+
:param filename: 文件名
|
|
275
|
+
"""
|
|
276
|
+
if not os.path.exists(local_path):
|
|
277
|
+
os.makedirs(local_path)
|
|
278
|
+
|
|
279
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
280
|
+
local_file_path = os.path.join(local_path, filename)
|
|
281
|
+
|
|
282
|
+
async with self.session.client('s3') as s3_client:
|
|
283
|
+
response = await s3_client.get_object(
|
|
284
|
+
Bucket=s3_path_obj["bucket"],
|
|
285
|
+
Key=s3_path_obj["store_path"]
|
|
286
|
+
)
|
|
287
|
+
async with aiofiles.open(local_file_path, 'wb') as f:
|
|
288
|
+
await f.write(await response['Body'].read())
|
|
289
|
+
|
|
290
|
+
async def upload_by_local_path(self, s3_path: str, local_path: str, text_with_utf8: bool = True):
|
|
291
|
+
"""
|
|
292
|
+
从本地路径上传文件到 S3
|
|
293
|
+
:param s3_path: S3 目标路径
|
|
294
|
+
:param local_path: 本地文件路径
|
|
295
|
+
:param text_with_utf8: 如果是文本文件,是否添加 utf-8 编码标识
|
|
296
|
+
"""
|
|
297
|
+
if not os.path.exists(local_path):
|
|
298
|
+
raise Exception("Local file doesn't exist!")
|
|
299
|
+
|
|
300
|
+
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
301
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
302
|
+
|
|
303
|
+
async with self.session.client('s3') as s3_client:
|
|
304
|
+
async with aiofiles.open(local_path, 'rb') as f:
|
|
305
|
+
content = await f.read()
|
|
306
|
+
await s3_client.put_object(
|
|
307
|
+
Bucket=s3_path_obj["bucket"],
|
|
308
|
+
Key=s3_path_obj["store_path"],
|
|
309
|
+
Body=content,
|
|
310
|
+
ContentType=content_type
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
async def upload_file(self, s3_path: str, content: bytes, metadata: Optional[dict] = None, text_with_utf8: bool = True):
|
|
314
|
+
"""
|
|
315
|
+
上传文件内容到 S3
|
|
316
|
+
:param s3_path: S3 目标路径
|
|
317
|
+
:param content: 文件内容(字节)
|
|
318
|
+
:param metadata: 自定义元数据
|
|
319
|
+
:param text_with_utf8: 如果是文本文件,是否添加 utf-8 编码标识
|
|
320
|
+
"""
|
|
321
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
322
|
+
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
323
|
+
|
|
324
|
+
async with self.session.client('s3') as s3_client:
|
|
325
|
+
put_args = {
|
|
326
|
+
'Bucket': s3_path_obj["bucket"],
|
|
327
|
+
'Key': s3_path_obj["store_path"],
|
|
328
|
+
'Body': content,
|
|
329
|
+
'ContentType': content_type
|
|
330
|
+
}
|
|
331
|
+
if metadata:
|
|
332
|
+
put_args['Metadata'] = metadata
|
|
333
|
+
|
|
334
|
+
await s3_client.put_object(**put_args)
|
|
335
|
+
|
|
336
|
+
async def get_file_meta_info(self, s3_path: str) -> dict:
|
|
337
|
+
"""
|
|
338
|
+
获取 S3 文件的元信息
|
|
339
|
+
:param s3_path: S3 路径
|
|
340
|
+
:return: 包含 content_type 和 metadata 的字典
|
|
341
|
+
"""
|
|
342
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
343
|
+
|
|
344
|
+
async with self.session.client('s3') as s3_client:
|
|
345
|
+
response = await s3_client.head_object(
|
|
346
|
+
Bucket=s3_path_obj["bucket"],
|
|
347
|
+
Key=s3_path_obj["store_path"]
|
|
348
|
+
)
|
|
349
|
+
return {
|
|
350
|
+
"content_type": response['ContentType'],
|
|
351
|
+
"metadata": response['Metadata'],
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
async def read_text_like_file(self, s3_path: str, decoding: str = "utf-8") -> str:
|
|
355
|
+
"""
|
|
356
|
+
读取 S3 文本文件内容
|
|
357
|
+
:param s3_path: S3 路径
|
|
358
|
+
:param decoding: 解码方式,默认 utf-8
|
|
359
|
+
:return: 文件文本内容
|
|
360
|
+
"""
|
|
361
|
+
s3_path_obj = s3_split_path(s3_path)
|
|
362
|
+
|
|
363
|
+
async with self.session.client('s3') as s3_client:
|
|
364
|
+
response = await s3_client.get_object(
|
|
365
|
+
Bucket=s3_path_obj["bucket"],
|
|
366
|
+
Key=s3_path_obj["store_path"]
|
|
367
|
+
)
|
|
368
|
+
content = await response['Body'].read()
|
|
369
|
+
return content.decode(decoding)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: orbitkit
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.47
|
|
4
4
|
Summary: This project is only for Orbit Tech internal use.
|
|
5
5
|
Home-page: https://github.com/clown-0726/orbitkit
|
|
6
6
|
Author: Lilu Cao
|
|
@@ -19,15 +19,37 @@ Classifier: Programming Language :: Python :: 3.4
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.5
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.6
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.7
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
29
|
Classifier: Topic :: Software Development :: Libraries
|
|
23
30
|
Description-Content-Type: text/markdown
|
|
24
31
|
License-File: LICENSE
|
|
25
|
-
Requires-Dist: boto3>=1.
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: boto3>=1.40.46
|
|
33
|
+
Requires-Dist: aioboto3>=15.5.0
|
|
34
|
+
Requires-Dist: aiofiles>=25.1.0
|
|
35
|
+
Requires-Dist: requests>=2.32.5
|
|
36
|
+
Requires-Dist: prettytable>=3.17.0
|
|
37
|
+
Requires-Dist: pytz>=2025.2
|
|
29
38
|
Requires-Dist: Deprecated
|
|
30
39
|
Requires-Dist: func_timeout
|
|
40
|
+
Dynamic: author
|
|
41
|
+
Dynamic: author-email
|
|
42
|
+
Dynamic: classifier
|
|
43
|
+
Dynamic: description
|
|
44
|
+
Dynamic: description-content-type
|
|
45
|
+
Dynamic: home-page
|
|
46
|
+
Dynamic: license
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
Dynamic: maintainer
|
|
49
|
+
Dynamic: maintainer-email
|
|
50
|
+
Dynamic: platform
|
|
51
|
+
Dynamic: requires-dist
|
|
52
|
+
Dynamic: summary
|
|
31
53
|
|
|
32
54
|
# orbitkit
|
|
33
55
|
|
|
@@ -32,15 +32,24 @@ setup(
|
|
|
32
32
|
'Programming Language :: Python :: 3.5',
|
|
33
33
|
'Programming Language :: Python :: 3.6',
|
|
34
34
|
'Programming Language :: Python :: 3.7',
|
|
35
|
+
'Programming Language :: Python :: 3.8',
|
|
36
|
+
'Programming Language :: Python :: 3.9',
|
|
37
|
+
'Programming Language :: Python :: 3.10',
|
|
38
|
+
'Programming Language :: Python :: 3.11',
|
|
39
|
+
'Programming Language :: Python :: 3.12',
|
|
40
|
+
'Programming Language :: Python :: 3.13',
|
|
41
|
+
'Programming Language :: Python :: 3.14',
|
|
35
42
|
'Topic :: Software Development :: Libraries'
|
|
36
43
|
],
|
|
37
44
|
include_package_data=True,
|
|
38
45
|
zip_safe=False,
|
|
39
46
|
install_requires=[
|
|
40
|
-
"boto3 >= 1.
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
47
|
+
"boto3 >= 1.40.46",
|
|
48
|
+
"aioboto3 >= 15.5.0",
|
|
49
|
+
"aiofiles >= 25.1.0",
|
|
50
|
+
"requests >= 2.32.5",
|
|
51
|
+
"prettytable >= 3.17.0",
|
|
52
|
+
"pytz >= 2025.2",
|
|
44
53
|
"Deprecated",
|
|
45
54
|
"func_timeout",
|
|
46
55
|
]
|
orbitkit-0.8.46/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.46
|
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os.path
|
|
3
|
-
from typing import Optional
|
|
4
|
-
import boto3
|
|
5
|
-
from orbitkit.util import get_from_dict_or_env, s3_split_path, get_content_type_4_filename
|
|
6
|
-
import botocore
|
|
7
|
-
from botocore.exceptions import ClientError
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class AwsS3Wrapper:
|
|
13
|
-
"""Encapsulates Amazon s3 actions for Orbitfin"""
|
|
14
|
-
|
|
15
|
-
def __init__(self, s3_resource, s3_client):
|
|
16
|
-
"""
|
|
17
|
-
:param s3_resource: boto3.resource('s3')
|
|
18
|
-
:param s3_client: boto3.client('s3')
|
|
19
|
-
"""
|
|
20
|
-
self.s3_resource = s3_resource
|
|
21
|
-
self.s3_client = s3_client
|
|
22
|
-
|
|
23
|
-
@classmethod
|
|
24
|
-
def from_s3(cls, *args, **kwargs):
|
|
25
|
-
# Try to get key aws pair
|
|
26
|
-
aws_access_key_id = get_from_dict_or_env(
|
|
27
|
-
kwargs, "aws_access_key_id", "AWS_ACCESS_KEY_ID",
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
aws_secret_access_key = get_from_dict_or_env(
|
|
31
|
-
kwargs, "aws_secret_access_key", "AWS_SECRET_ACCESS_KEY",
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
s3_resource = boto3.resource('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
|
35
|
-
s3_client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
|
36
|
-
|
|
37
|
-
return cls(s3_resource, s3_client)
|
|
38
|
-
|
|
39
|
-
def get_s3_resource(self):
|
|
40
|
-
return self.s3_resource
|
|
41
|
-
|
|
42
|
-
def get_s3_client(self):
|
|
43
|
-
return self.s3_client
|
|
44
|
-
|
|
45
|
-
def check_file_exist(self, s3_path: str) -> bool:
|
|
46
|
-
"""
|
|
47
|
-
:param s3_path: Target store path for s3.
|
|
48
|
-
:return:
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
52
|
-
try:
|
|
53
|
-
self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"]).load()
|
|
54
|
-
except botocore.exceptions.ClientError as e:
|
|
55
|
-
if e.response['Error']['Code'] == "404":
|
|
56
|
-
# The object does not exist.
|
|
57
|
-
return False
|
|
58
|
-
else:
|
|
59
|
-
# Something else has gone wrong.
|
|
60
|
-
raise Exception("Check s3 file exist unknown error...")
|
|
61
|
-
else:
|
|
62
|
-
# The object does exist.
|
|
63
|
-
return True
|
|
64
|
-
|
|
65
|
-
def copy_file(self, source_path: str, target_path: str):
|
|
66
|
-
"""
|
|
67
|
-
:param source_path: Source s3 path location
|
|
68
|
-
:param target_path: Target s3 path location
|
|
69
|
-
:return:
|
|
70
|
-
"""
|
|
71
|
-
source_path_obj = s3_split_path(source_path)
|
|
72
|
-
target_path_obj = s3_split_path(target_path)
|
|
73
|
-
|
|
74
|
-
self.s3_resource.Object(target_path_obj["bucket"], target_path_obj["store_path"]).copy_from(
|
|
75
|
-
CopySource=source_path_obj["bucket"] + '/' + source_path_obj["store_path"],
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
def delete_file(self, s3_path: str):
|
|
79
|
-
"""
|
|
80
|
-
:param s3_path: Target store path for s3.
|
|
81
|
-
:return:
|
|
82
|
-
"""
|
|
83
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
84
|
-
self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"]).delete()
|
|
85
|
-
|
|
86
|
-
def download_file(self, s3_path: str, local_path: str, filename: str):
|
|
87
|
-
"""
|
|
88
|
-
:param s3_path: Target store path for s3.
|
|
89
|
-
:param local_path: Local path
|
|
90
|
-
:param filename: File name
|
|
91
|
-
:return:
|
|
92
|
-
"""
|
|
93
|
-
if not os.path.exists(local_path):
|
|
94
|
-
os.makedirs(local_path)
|
|
95
|
-
|
|
96
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
97
|
-
self.s3_resource.Bucket(s3_path_obj["bucket"]).download_file(s3_path_obj["store_path"], os.path.join(local_path, filename))
|
|
98
|
-
|
|
99
|
-
def upload_by_local_path(self, s3_path: str, local_path: str, text_with_utf8: bool = True):
|
|
100
|
-
"""
|
|
101
|
-
:param s3_path: Target store path for s3.
|
|
102
|
-
:param local_path: Local file path.
|
|
103
|
-
:param text_with_utf8: If content-type start with "text/" then put ;charset=utf-8 after.
|
|
104
|
-
:return:
|
|
105
|
-
"""
|
|
106
|
-
if not os.path.exists(local_path):
|
|
107
|
-
raise Exception("Local file doesn't exist!")
|
|
108
|
-
|
|
109
|
-
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
110
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
111
|
-
|
|
112
|
-
self.s3_client.upload_file(local_path,
|
|
113
|
-
s3_path_obj["bucket"],
|
|
114
|
-
s3_path_obj["store_path"],
|
|
115
|
-
ExtraArgs={'ContentType': content_type})
|
|
116
|
-
|
|
117
|
-
def upload_file(self, s3_path: str, content: bytes, metadata: Optional[dict] = None, text_with_utf8: bool = True):
|
|
118
|
-
"""
|
|
119
|
-
:param s3_path: Target store path for s3.
|
|
120
|
-
:param content: The content of file, if text-like use content.encode("utf-8"), if binary then put directly.
|
|
121
|
-
:param metadata: Custom metadata for file.
|
|
122
|
-
:param text_with_utf8: If content-type start with "text/" then put ;charset=utf-8 after.
|
|
123
|
-
:return:
|
|
124
|
-
"""
|
|
125
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
126
|
-
content_type = get_content_type_4_filename(s3_path, text_with_utf8)
|
|
127
|
-
|
|
128
|
-
object_put = self.s3_resource.Object(s3_path_obj["bucket"], s3_path_obj["store_path"])
|
|
129
|
-
if metadata:
|
|
130
|
-
object_put.put(Body=content, ContentType=content_type, Metadata=metadata)
|
|
131
|
-
else:
|
|
132
|
-
object_put.put(Body=content, ContentType=content_type)
|
|
133
|
-
|
|
134
|
-
def get_file_meta_info(self, s3_path: str) -> dict:
|
|
135
|
-
"""
|
|
136
|
-
:param s3_path: Target store path for s3.
|
|
137
|
-
:return:
|
|
138
|
-
"""
|
|
139
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
140
|
-
response = self.s3_client.head_object(Bucket=s3_path_obj["bucket"], Key=s3_path_obj["store_path"])
|
|
141
|
-
return {
|
|
142
|
-
"content_type": response['ContentType'],
|
|
143
|
-
"metadata": response['Metadata'],
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
def read_text_like_file(self, s3_path: str, decoding: str = "utf-8") -> str:
|
|
147
|
-
"""
|
|
148
|
-
:param s3_path: Target store path for s3.
|
|
149
|
-
:param decoding: decoding, default is "utf-8".
|
|
150
|
-
:return:
|
|
151
|
-
"""
|
|
152
|
-
s3_path_obj = s3_split_path(s3_path)
|
|
153
|
-
obj = self.s3_client.get_object(Bucket=s3_path_obj["bucket"], Key=s3_path_obj["store_path"])
|
|
154
|
-
return obj['Body'].read().decode(decoding)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|