megfile 3.0.2.post1__py3-none-any.whl → 3.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/hdfs_path.py +3 -1
- megfile/http_path.py +6 -2
- megfile/lib/base_prefetch_reader.py +3 -2
- megfile/s3_path.py +17 -12
- megfile/utils/__init__.py +16 -5
- megfile/version.py +1 -1
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/METADATA +3 -3
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/RECORD +13 -13
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/LICENSE +0 -0
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/WHEEL +0 -0
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/entry_points.txt +0 -0
- {megfile-3.0.2.post1.dist-info → megfile-3.0.4.dist-info}/top_level.txt +0 -0
megfile/hdfs_path.py
CHANGED
|
@@ -14,7 +14,7 @@ from megfile.lib.hdfs_tools import hdfs_api
|
|
|
14
14
|
from megfile.lib.url import get_url_scheme
|
|
15
15
|
from megfile.pathlike import PathLike, URIPath
|
|
16
16
|
from megfile.smart_path import SmartPath
|
|
17
|
-
from megfile.utils import cachedproperty
|
|
17
|
+
from megfile.utils import _is_pickle, cachedproperty
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
20
|
'HdfsPath',
|
|
@@ -603,6 +603,8 @@ class HdfsPath(URIPath):
|
|
|
603
603
|
client=self._client,
|
|
604
604
|
profile_name=self._profile_name,
|
|
605
605
|
**input_kwargs)
|
|
606
|
+
if _is_pickle(file_obj): # pytype: disable=wrong-arg-types
|
|
607
|
+
file_obj = io.BufferedReader(file_obj) # pytype: disable=wrong-arg-types
|
|
606
608
|
if 'b' not in mode:
|
|
607
609
|
file_obj = io.TextIOWrapper(
|
|
608
610
|
file_obj, encoding=encoding, errors=errors) # pytype: disable=wrong-arg-types
|
megfile/http_path.py
CHANGED
|
@@ -16,7 +16,7 @@ from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
|
|
|
16
16
|
from megfile.lib.url import get_url_scheme
|
|
17
17
|
from megfile.pathlike import PathLike
|
|
18
18
|
from megfile.smart_path import SmartPath
|
|
19
|
-
from megfile.utils import binary_open
|
|
19
|
+
from megfile.utils import _is_pickle, binary_open
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
22
|
'HttpPath',
|
|
@@ -223,7 +223,7 @@ class HttpPath(URIPath):
|
|
|
223
223
|
else:
|
|
224
224
|
block_forward = max(int(block_capacity * forward_ratio), 1)
|
|
225
225
|
|
|
226
|
-
|
|
226
|
+
reader = HttpPrefetchReader(
|
|
227
227
|
self.path_with_protocol,
|
|
228
228
|
content_size=content_size,
|
|
229
229
|
max_retries=max_retries,
|
|
@@ -232,6 +232,10 @@ class HttpPath(URIPath):
|
|
|
232
232
|
block_forward=block_forward,
|
|
233
233
|
block_size=block_size,
|
|
234
234
|
)
|
|
235
|
+
if _is_pickle(reader): # pytype: disable=wrong-arg-types
|
|
236
|
+
reader = io.BufferedReader(reader) # pytype: disable=wrong-arg-types
|
|
237
|
+
return reader
|
|
238
|
+
|
|
235
239
|
response.raw.auto_close = False
|
|
236
240
|
response.raw.name = self.path_with_protocol
|
|
237
241
|
return BufferedReader(response.raw)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from abc import ABC, abstractmethod
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
3
|
from collections import OrderedDict
|
|
4
4
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
5
5
|
from io import BytesIO
|
|
@@ -85,7 +85,8 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
85
85
|
def _get_futures(self):
|
|
86
86
|
return LRUCacheFutureManager()
|
|
87
87
|
|
|
88
|
-
@
|
|
88
|
+
@property
|
|
89
|
+
@abstractmethod
|
|
89
90
|
def name(self) -> str:
|
|
90
91
|
pass
|
|
91
92
|
|
megfile/s3_path.py
CHANGED
|
@@ -31,7 +31,7 @@ from megfile.lib.s3_prefetch_reader import S3PrefetchReader
|
|
|
31
31
|
from megfile.lib.s3_share_cache_reader import S3ShareCacheReader
|
|
32
32
|
from megfile.lib.url import get_url_scheme
|
|
33
33
|
from megfile.smart_path import SmartPath
|
|
34
|
-
from megfile.utils import cachedproperty, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, process_local, thread_local
|
|
34
|
+
from megfile.utils import _is_pickle, cachedproperty, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, process_local, thread_local
|
|
35
35
|
|
|
36
36
|
__all__ = [
|
|
37
37
|
'S3Path',
|
|
@@ -138,15 +138,20 @@ def get_endpoint_url(profile_name: Optional[str] = None) -> str:
|
|
|
138
138
|
|
|
139
139
|
:returns: S3 endpoint url
|
|
140
140
|
'''
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
141
|
+
if profile_name:
|
|
142
|
+
environ_keys = (f'{profile_name}__OSS_ENDPOINT'.upper(),)
|
|
143
|
+
else:
|
|
144
|
+
environ_keys = (
|
|
145
|
+
'OSS_ENDPOINT', 'AWS_ENDPOINT_URL_S3', 'AWS_ENDPOINT_URL')
|
|
146
|
+
for environ_key in environ_keys:
|
|
147
|
+
environ_endpoint_url = os.environ.get(environ_key)
|
|
148
|
+
if environ_endpoint_url:
|
|
149
|
+
warning_endpoint_url(environ_key, environ_endpoint_url)
|
|
150
|
+
return environ_endpoint_url
|
|
147
151
|
try:
|
|
148
|
-
|
|
149
|
-
|
|
152
|
+
config = get_scoped_config(profile_name=profile_name)
|
|
153
|
+
config_endpoint_url = config.get('s3', {}).get('endpoint_url')
|
|
154
|
+
config_endpoint_url = config_endpoint_url or config.get('endpoint_url')
|
|
150
155
|
if config_endpoint_url:
|
|
151
156
|
warning_endpoint_url('~/.aws/config', config_endpoint_url)
|
|
152
157
|
return config_endpoint_url
|
|
@@ -775,7 +780,7 @@ def s3_buffered_open(
|
|
|
775
780
|
forward_ratio: Optional[float] = None,
|
|
776
781
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
777
782
|
limited_seekable: bool = False,
|
|
778
|
-
buffered: bool =
|
|
783
|
+
buffered: bool = False,
|
|
779
784
|
share_cache_key: Optional[str] = None,
|
|
780
785
|
cache_path: Optional[str] = None
|
|
781
786
|
) -> Union[S3PrefetchReader, S3BufferedWriter, io.BufferedReader, io.
|
|
@@ -858,7 +863,7 @@ def s3_buffered_open(
|
|
|
858
863
|
block_forward=block_forward,
|
|
859
864
|
block_size=block_size,
|
|
860
865
|
profile_name=s3_url._profile_name)
|
|
861
|
-
if buffered:
|
|
866
|
+
if buffered or _is_pickle(reader): # pytype: disable=wrong-arg-types
|
|
862
867
|
reader = io.BufferedReader(reader) # pytype: disable=wrong-arg-types
|
|
863
868
|
return reader
|
|
864
869
|
|
|
@@ -880,7 +885,7 @@ def s3_buffered_open(
|
|
|
880
885
|
max_buffer_size=max_buffer_size,
|
|
881
886
|
block_size=block_size,
|
|
882
887
|
profile_name=s3_url._profile_name)
|
|
883
|
-
if buffered:
|
|
888
|
+
if buffered or _is_pickle(writer): # pytype: disable=wrong-arg-types
|
|
884
889
|
writer = io.BufferedWriter(writer) # pytype: disable=wrong-arg-types
|
|
885
890
|
return writer
|
|
886
891
|
|
megfile/utils/__init__.py
CHANGED
|
@@ -6,9 +6,6 @@ import uuid
|
|
|
6
6
|
from copy import copy
|
|
7
7
|
from functools import wraps
|
|
8
8
|
from io import BufferedIOBase, BufferedRandom, BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOBase, TextIOWrapper
|
|
9
|
-
from multiprocessing.util import register_after_fork
|
|
10
|
-
from threading import RLock as _RLock
|
|
11
|
-
from threading import local
|
|
12
9
|
from typing import IO, Callable, Optional
|
|
13
10
|
|
|
14
11
|
from megfile.utils.mutex import ProcessLocal, ThreadLocal
|
|
@@ -72,6 +69,20 @@ def is_writable(fileobj: IO) -> bool:
|
|
|
72
69
|
return hasattr(fileobj, 'write')
|
|
73
70
|
|
|
74
71
|
|
|
72
|
+
def _is_pickle(fileobj: IO) -> bool:
|
|
73
|
+
''' Test if File Object is pickle'''
|
|
74
|
+
if fileobj.name.endswith('.pkl') or fileobj.name.endswith('.pickle'):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
if 'r' in fileobj.mode and 'b' in fileobj.mode:
|
|
78
|
+
offset = fileobj.tell()
|
|
79
|
+
data = fileobj.read(2)
|
|
80
|
+
fileobj.seek(offset)
|
|
81
|
+
if len(data) >= 2 and data[0] == 128 and 2 <= data[1] <= 5:
|
|
82
|
+
return True
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
75
86
|
def get_content_offset(start: Optional[int], stop: Optional[int], size: int):
|
|
76
87
|
if start is None:
|
|
77
88
|
start = 0
|
|
@@ -94,7 +105,7 @@ def get_mode(fileobj, default='r'):
|
|
|
94
105
|
return getattr(fileobj, 'mode', default)
|
|
95
106
|
|
|
96
107
|
|
|
97
|
-
def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool =
|
|
108
|
+
def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
|
|
98
109
|
''' Create a File-Like Object, maintaining file pointer, to avoid misunderstanding the position when read / write / seek
|
|
99
110
|
|
|
100
111
|
:param intrusive: If is intrusive. If True, move file pointer to the original position after every read / write / seek. If False, then not.
|
|
@@ -103,7 +114,7 @@ def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = True):
|
|
|
103
114
|
from megfile.lib.shadow_handler import ShadowHandler
|
|
104
115
|
result = ShadowHandler(fileobj, intrusive=intrusive)
|
|
105
116
|
mode = get_mode(fileobj)
|
|
106
|
-
if
|
|
117
|
+
if "b" in mode and (buffered or _is_pickle(result)): # pytype: disable=wrong-arg-types
|
|
107
118
|
if "+" in mode:
|
|
108
119
|
result = BufferedRandom(result)
|
|
109
120
|
elif "x" in mode or "w" in mode or "a" in mode:
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "3.0.
|
|
1
|
+
VERSION = "3.0.4"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.4
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Home-page: https://github.com/megvii-research/megfile
|
|
6
6
|
Author: megvii
|
|
@@ -160,10 +160,10 @@ Using `s3` as an example, the following describes the configuration methods. For
|
|
|
160
160
|
You can use environments and configuration file for configuration, and priority is that environment variables take precedence over configuration file.
|
|
161
161
|
|
|
162
162
|
### Use environments
|
|
163
|
-
You can use environments to setup authentication credentials for your s3 account:
|
|
163
|
+
You can use environments to setup authentication credentials for your `s3` account:
|
|
164
164
|
- `AWS_ACCESS_KEY_ID`: access key
|
|
165
165
|
- `AWS_SECRET_ACCESS_KEY`: secret key
|
|
166
|
-
- `OSS_ENDPOINT`: endpoint url of s3
|
|
166
|
+
- `OSS_ENDPOINT` / `AWS_ENDPOINT_URL_S3` / `AWS_ENDPOINT_URL`: endpoint url of s3
|
|
167
167
|
- `AWS_S3_ADDRESSING_STYLE`: addressing style
|
|
168
168
|
|
|
169
169
|
### Use command
|
|
@@ -5,22 +5,22 @@ megfile/errors.py,sha256=Sbx3UEKnzuyUmB1tFU9cZv61Yr4dRa79J6D0UMmkvj4,13323
|
|
|
5
5
|
megfile/fs.py,sha256=OfY0z4GSl8fT3mDGdeqP2hWFsd1QJl-h8RkSbg6-M8I,11547
|
|
6
6
|
megfile/fs_path.py,sha256=sHn-sBcvq7SvYN71onkA_ssLs71NzM1MM3d3Sug8uzo,38237
|
|
7
7
|
megfile/hdfs.py,sha256=aAkHobOO0nDcLoqj0tx_1tvgoLOCooTWuukq0pO-nQA,9156
|
|
8
|
-
megfile/hdfs_path.py,sha256=
|
|
8
|
+
megfile/hdfs_path.py,sha256=obfMMKSuBcSGgtEN18jXhEldm4hawMhxk8QoRv4k790,26859
|
|
9
9
|
megfile/http.py,sha256=a3oAuARSSaIU8VMx86Mui0N5Vh-EI0AoHnwxRU5DSMU,2032
|
|
10
|
-
megfile/http_path.py,sha256=
|
|
10
|
+
megfile/http_path.py,sha256=WJd8-s_xsF71rC2QtXikH--FCsqn2u-e83YF8OarKeY,11593
|
|
11
11
|
megfile/interfaces.py,sha256=h3tWE8hVt5S-HopaMAX6lunPJ97vzhv6jH_2HubcDNc,6219
|
|
12
12
|
megfile/pathlike.py,sha256=Ere6tMf2nsI7bDsZo0WBzl_2HRrS_4iKOpYp0zZltAU,29487
|
|
13
13
|
megfile/s3.py,sha256=siBZfveWX1TDA4Mp41UvugcG3zlrhl_iPUbixUp1TmI,12352
|
|
14
|
-
megfile/s3_path.py,sha256=
|
|
14
|
+
megfile/s3_path.py,sha256=spksQesniQOYWmfIPfJeU4kMSiFAIlB8v2yIHVRLliA,91305
|
|
15
15
|
megfile/sftp.py,sha256=JCkF2v1ZbHuIy_Bg3l85AesjFDimDzx9Gh1gRoMsahc,12524
|
|
16
16
|
megfile/sftp_path.py,sha256=ErPKmwgaCOvvhp3aKhqX9WKIAGbWR30QUWvptQWtag8,51666
|
|
17
17
|
megfile/smart.py,sha256=y5Dzr7_f0jS2FJDF4tWbEO4SPf39zqYftqkVgMhiJds,33725
|
|
18
18
|
megfile/smart_path.py,sha256=Y0UFh4J2ccydRY2W-wX2ubaf9zzJx1M2nf-VLBGe4mk,6749
|
|
19
19
|
megfile/stdio.py,sha256=yRhlfUA2DHi3bq-9cXsSlbLCnHvS_zvglO2IYYyPsGc,707
|
|
20
20
|
megfile/stdio_path.py,sha256=eQulTXUwHvUKA-5PKCGfVNiEPkJhG9YtVhtU58OcmoM,2873
|
|
21
|
-
megfile/version.py,sha256=
|
|
21
|
+
megfile/version.py,sha256=DnboapDM00MYgi1PslOAcm9O5A6fEv0tG12NA4yH9TI,19
|
|
22
22
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
megfile/lib/base_prefetch_reader.py,sha256=
|
|
23
|
+
megfile/lib/base_prefetch_reader.py,sha256=WKwrpuniO8a1iU2k5mIHEpuBP8caLtIeqLrpKbmqX_A,13185
|
|
24
24
|
megfile/lib/combine_reader.py,sha256=XFSqEY5A5X5Uf7eQ6AXAzrvNteESSXvKNVPktGjo3KY,4546
|
|
25
25
|
megfile/lib/compare.py,sha256=yG2fZve_gMg32rQVCdwixBdqgYRsjn-24TqhALQaOrA,2233
|
|
26
26
|
megfile/lib/compat.py,sha256=0wt3_atcYhSLCxUj_WuDlQa3E1atjZfwJQ12thiFh5Q,234
|
|
@@ -41,12 +41,12 @@ megfile/lib/s3_share_cache_reader.py,sha256=ecx-62wGqgTn5vHIpKhNACCHdYWm6oc1_xwy
|
|
|
41
41
|
megfile/lib/shadow_handler.py,sha256=IbFyTw107t-yWH0cGrDjAJX-CS3xeEr77_PTGsnSgk4,2683
|
|
42
42
|
megfile/lib/stdio_handler.py,sha256=QDWtcZxz-hzi-rqQUiSlR3NrihX1fjK_Rj9T2mdTFEg,2044
|
|
43
43
|
megfile/lib/url.py,sha256=VbQLjo0s4AaV0iSk66BcjI68aUTcN9zBZ5x6-cM4Qvs,103
|
|
44
|
-
megfile/utils/__init__.py,sha256=
|
|
44
|
+
megfile/utils/__init__.py,sha256=xrBIJcVJTb_yR68ekAyd9u4HQ46s3xgIjUZoH_Lx7hU,9531
|
|
45
45
|
megfile/utils/mutex.py,sha256=-2KH3bNovKRd9zvsXq9n3bWM7rQdoG9hO7tUPxVG_Po,2538
|
|
46
|
-
megfile-3.0.
|
|
47
|
-
megfile-3.0.
|
|
48
|
-
megfile-3.0.
|
|
49
|
-
megfile-3.0.
|
|
50
|
-
megfile-3.0.
|
|
51
|
-
megfile-3.0.
|
|
52
|
-
megfile-3.0.
|
|
46
|
+
megfile-3.0.4.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
47
|
+
megfile-3.0.4.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
48
|
+
megfile-3.0.4.dist-info/METADATA,sha256=ULNUWUSidVxRQfUIJquDdSrV87Os1OQByXqRhO-7TPw,8963
|
|
49
|
+
megfile-3.0.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
50
|
+
megfile-3.0.4.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
51
|
+
megfile-3.0.4.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
52
|
+
megfile-3.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|