megfile 3.0.2__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/hdfs_path.py CHANGED
@@ -14,7 +14,7 @@ from megfile.lib.hdfs_tools import hdfs_api
14
14
  from megfile.lib.url import get_url_scheme
15
15
  from megfile.pathlike import PathLike, URIPath
16
16
  from megfile.smart_path import SmartPath
17
- from megfile.utils import cachedproperty
17
+ from megfile.utils import _is_pickle, cachedproperty
18
18
 
19
19
  __all__ = [
20
20
  'HdfsPath',
@@ -603,6 +603,8 @@ class HdfsPath(URIPath):
603
603
  client=self._client,
604
604
  profile_name=self._profile_name,
605
605
  **input_kwargs)
606
+ if _is_pickle(file_obj): # pytype: disable=wrong-arg-types
607
+ file_obj = io.BufferedReader(file_obj) # pytype: disable=wrong-arg-types
606
608
  if 'b' not in mode:
607
609
  file_obj = io.TextIOWrapper(
608
610
  file_obj, encoding=encoding, errors=errors) # pytype: disable=wrong-arg-types
megfile/http_path.py CHANGED
@@ -16,7 +16,7 @@ from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
16
16
  from megfile.lib.url import get_url_scheme
17
17
  from megfile.pathlike import PathLike
18
18
  from megfile.smart_path import SmartPath
19
- from megfile.utils import binary_open
19
+ from megfile.utils import _is_pickle, binary_open
20
20
 
21
21
  __all__ = [
22
22
  'HttpPath',
@@ -223,7 +223,7 @@ class HttpPath(URIPath):
223
223
  else:
224
224
  block_forward = max(int(block_capacity * forward_ratio), 1)
225
225
 
226
- return HttpPrefetchReader(
226
+ reader = HttpPrefetchReader(
227
227
  self.path_with_protocol,
228
228
  content_size=content_size,
229
229
  max_retries=max_retries,
@@ -232,6 +232,10 @@ class HttpPath(URIPath):
232
232
  block_forward=block_forward,
233
233
  block_size=block_size,
234
234
  )
235
+ if _is_pickle(reader): # pytype: disable=wrong-arg-types
236
+ reader = io.BufferedReader(reader) # pytype: disable=wrong-arg-types
237
+ return reader
238
+
235
239
  response.raw.auto_close = False
236
240
  response.raw.name = self.path_with_protocol
237
241
  return BufferedReader(response.raw)
@@ -1,5 +1,5 @@
1
1
  import os
2
- from abc import ABC, abstractmethod, abstractproperty
2
+ from abc import ABC, abstractmethod
3
3
  from collections import OrderedDict
4
4
  from concurrent.futures import Future, ThreadPoolExecutor
5
5
  from io import BytesIO
@@ -85,7 +85,8 @@ class BasePrefetchReader(Readable, Seekable, ABC):
85
85
  def _get_futures(self):
86
86
  return LRUCacheFutureManager()
87
87
 
88
- @abstractproperty
88
+ @property
89
+ @abstractmethod
89
90
  def name(self) -> str:
90
91
  pass
91
92
 
megfile/s3_path.py CHANGED
@@ -31,7 +31,7 @@ from megfile.lib.s3_prefetch_reader import S3PrefetchReader
31
31
  from megfile.lib.s3_share_cache_reader import S3ShareCacheReader
32
32
  from megfile.lib.url import get_url_scheme
33
33
  from megfile.smart_path import SmartPath
34
- from megfile.utils import cachedproperty, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, process_local, thread_local
34
+ from megfile.utils import _is_pickle, cachedproperty, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, process_local, thread_local
35
35
 
36
36
  __all__ = [
37
37
  'S3Path',
@@ -461,7 +461,10 @@ def _s3_glob_stat_single_path(
461
461
  return False
462
462
 
463
463
  def create_generator(_s3_pathname) -> Iterator[FileEntry]:
464
- if not S3Path(top_dir).exists():
464
+ top_dir_with_profile = top_dir
465
+ if profile_name:
466
+ top_dir_with_profile = f's3+{profile_name}://{top_dir[5:]}'
467
+ if not S3Path(top_dir_with_profile).exists():
465
468
  return
466
469
  if not has_magic(_s3_pathname):
467
470
  _s3_pathname_obj = S3Path(_s3_pathname)
@@ -772,7 +775,7 @@ def s3_buffered_open(
772
775
  forward_ratio: Optional[float] = None,
773
776
  block_size: int = DEFAULT_BLOCK_SIZE,
774
777
  limited_seekable: bool = False,
775
- buffered: bool = True,
778
+ buffered: bool = False,
776
779
  share_cache_key: Optional[str] = None,
777
780
  cache_path: Optional[str] = None
778
781
  ) -> Union[S3PrefetchReader, S3BufferedWriter, io.BufferedReader, io.
@@ -855,7 +858,7 @@ def s3_buffered_open(
855
858
  block_forward=block_forward,
856
859
  block_size=block_size,
857
860
  profile_name=s3_url._profile_name)
858
- if buffered:
861
+ if buffered or _is_pickle(reader): # pytype: disable=wrong-arg-types
859
862
  reader = io.BufferedReader(reader) # pytype: disable=wrong-arg-types
860
863
  return reader
861
864
 
@@ -877,7 +880,7 @@ def s3_buffered_open(
877
880
  max_buffer_size=max_buffer_size,
878
881
  block_size=block_size,
879
882
  profile_name=s3_url._profile_name)
880
- if buffered:
883
+ if buffered or _is_pickle(writer): # pytype: disable=wrong-arg-types
881
884
  writer = io.BufferedWriter(writer) # pytype: disable=wrong-arg-types
882
885
  return writer
883
886
 
megfile/utils/__init__.py CHANGED
@@ -6,9 +6,6 @@ import uuid
6
6
  from copy import copy
7
7
  from functools import wraps
8
8
  from io import BufferedIOBase, BufferedRandom, BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOBase, TextIOWrapper
9
- from multiprocessing.util import register_after_fork
10
- from threading import RLock as _RLock
11
- from threading import local
12
9
  from typing import IO, Callable, Optional
13
10
 
14
11
  from megfile.utils.mutex import ProcessLocal, ThreadLocal
@@ -72,6 +69,20 @@ def is_writable(fileobj: IO) -> bool:
72
69
  return hasattr(fileobj, 'write')
73
70
 
74
71
 
72
+ def _is_pickle(fileobj: IO) -> bool:
73
+ ''' Test if File Object is pickle'''
74
+ if fileobj.name.endswith('.pkl') or fileobj.name.endswith('.pickle'):
75
+ return True
76
+
77
+ if 'r' in fileobj.mode and 'b' in fileobj.mode:
78
+ offset = fileobj.tell()
79
+ data = fileobj.read(2)
80
+ fileobj.seek(offset)
81
+ if len(data) >= 2 and data[0] == 128 and 2 <= data[1] <= 5:
82
+ return True
83
+ return False
84
+
85
+
75
86
  def get_content_offset(start: Optional[int], stop: Optional[int], size: int):
76
87
  if start is None:
77
88
  start = 0
@@ -94,7 +105,7 @@ def get_mode(fileobj, default='r'):
94
105
  return getattr(fileobj, 'mode', default)
95
106
 
96
107
 
97
- def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = True):
108
+ def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
98
109
  ''' Create a File-Like Object, maintaining file pointer, to avoid misunderstanding the position when read / write / seek
99
110
 
100
111
  :param intrusive: If is intrusive. If True, move file pointer to the original position after every read / write / seek. If False, then not.
@@ -103,7 +114,7 @@ def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = True):
103
114
  from megfile.lib.shadow_handler import ShadowHandler
104
115
  result = ShadowHandler(fileobj, intrusive=intrusive)
105
116
  mode = get_mode(fileobj)
106
- if buffered and "b" in mode:
117
+ if "b" in mode and (buffered or _is_pickle(result)): # pytype: disable=wrong-arg-types
107
118
  if "+" in mode:
108
119
  result = BufferedRandom(result)
109
120
  elif "x" in mode or "w" in mode or "a" in mode:
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "3.0.2"
1
+ VERSION = "3.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: megfile
3
- Version: 3.0.2
3
+ Version: 3.0.3
4
4
  Summary: Megvii file operation library
5
5
  Home-page: https://github.com/megvii-research/megfile
6
6
  Author: megvii
@@ -5,22 +5,22 @@ megfile/errors.py,sha256=Sbx3UEKnzuyUmB1tFU9cZv61Yr4dRa79J6D0UMmkvj4,13323
5
5
  megfile/fs.py,sha256=OfY0z4GSl8fT3mDGdeqP2hWFsd1QJl-h8RkSbg6-M8I,11547
6
6
  megfile/fs_path.py,sha256=sHn-sBcvq7SvYN71onkA_ssLs71NzM1MM3d3Sug8uzo,38237
7
7
  megfile/hdfs.py,sha256=aAkHobOO0nDcLoqj0tx_1tvgoLOCooTWuukq0pO-nQA,9156
8
- megfile/hdfs_path.py,sha256=rVmdHydhe0x6Vn7hblewghiWZIdZswe4hfxLOHm8vCM,26677
8
+ megfile/hdfs_path.py,sha256=obfMMKSuBcSGgtEN18jXhEldm4hawMhxk8QoRv4k790,26859
9
9
  megfile/http.py,sha256=a3oAuARSSaIU8VMx86Mui0N5Vh-EI0AoHnwxRU5DSMU,2032
10
- megfile/http_path.py,sha256=ciqS32V0G3A7GVOU_lqVzRjZeyr9skXfsU87SPrkyS4,11396
10
+ megfile/http_path.py,sha256=WJd8-s_xsF71rC2QtXikH--FCsqn2u-e83YF8OarKeY,11593
11
11
  megfile/interfaces.py,sha256=h3tWE8hVt5S-HopaMAX6lunPJ97vzhv6jH_2HubcDNc,6219
12
12
  megfile/pathlike.py,sha256=Ere6tMf2nsI7bDsZo0WBzl_2HRrS_4iKOpYp0zZltAU,29487
13
13
  megfile/s3.py,sha256=siBZfveWX1TDA4Mp41UvugcG3zlrhl_iPUbixUp1TmI,12352
14
- megfile/s3_path.py,sha256=FYGjifwbhfraWZKKGRcuRAyllJwFcv_H1wjN5kYGzcw,90791
14
+ megfile/s3_path.py,sha256=vypCpnNzkObng6BVcq4j3Y_W3J2btq16B8154bWVPjg,91067
15
15
  megfile/sftp.py,sha256=JCkF2v1ZbHuIy_Bg3l85AesjFDimDzx9Gh1gRoMsahc,12524
16
16
  megfile/sftp_path.py,sha256=ErPKmwgaCOvvhp3aKhqX9WKIAGbWR30QUWvptQWtag8,51666
17
17
  megfile/smart.py,sha256=y5Dzr7_f0jS2FJDF4tWbEO4SPf39zqYftqkVgMhiJds,33725
18
18
  megfile/smart_path.py,sha256=Y0UFh4J2ccydRY2W-wX2ubaf9zzJx1M2nf-VLBGe4mk,6749
19
19
  megfile/stdio.py,sha256=yRhlfUA2DHi3bq-9cXsSlbLCnHvS_zvglO2IYYyPsGc,707
20
20
  megfile/stdio_path.py,sha256=eQulTXUwHvUKA-5PKCGfVNiEPkJhG9YtVhtU58OcmoM,2873
21
- megfile/version.py,sha256=HGcItPfK5-rkjDD8Y1bmaADbd3qo7wyXrM39-uif4RY,19
21
+ megfile/version.py,sha256=UE7hhbkRaCCOli6ABmNbLoQp1rQZubN3Q_YGWfVH4sY,19
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- megfile/lib/base_prefetch_reader.py,sha256=9HT0tcgXa95BdbyclpDEDw8WwKi6091GRQerS90-pjE,13191
23
+ megfile/lib/base_prefetch_reader.py,sha256=WKwrpuniO8a1iU2k5mIHEpuBP8caLtIeqLrpKbmqX_A,13185
24
24
  megfile/lib/combine_reader.py,sha256=XFSqEY5A5X5Uf7eQ6AXAzrvNteESSXvKNVPktGjo3KY,4546
25
25
  megfile/lib/compare.py,sha256=yG2fZve_gMg32rQVCdwixBdqgYRsjn-24TqhALQaOrA,2233
26
26
  megfile/lib/compat.py,sha256=0wt3_atcYhSLCxUj_WuDlQa3E1atjZfwJQ12thiFh5Q,234
@@ -41,12 +41,12 @@ megfile/lib/s3_share_cache_reader.py,sha256=ecx-62wGqgTn5vHIpKhNACCHdYWm6oc1_xwy
41
41
  megfile/lib/shadow_handler.py,sha256=IbFyTw107t-yWH0cGrDjAJX-CS3xeEr77_PTGsnSgk4,2683
42
42
  megfile/lib/stdio_handler.py,sha256=QDWtcZxz-hzi-rqQUiSlR3NrihX1fjK_Rj9T2mdTFEg,2044
43
43
  megfile/lib/url.py,sha256=VbQLjo0s4AaV0iSk66BcjI68aUTcN9zBZ5x6-cM4Qvs,103
44
- megfile/utils/__init__.py,sha256=qdX8FF_dYFKwp1BIWx3JeSGd91s7AKUDSEpDv9tORcM,9162
44
+ megfile/utils/__init__.py,sha256=xrBIJcVJTb_yR68ekAyd9u4HQ46s3xgIjUZoH_Lx7hU,9531
45
45
  megfile/utils/mutex.py,sha256=-2KH3bNovKRd9zvsXq9n3bWM7rQdoG9hO7tUPxVG_Po,2538
46
- megfile-3.0.2.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-3.0.2.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- megfile-3.0.2.dist-info/METADATA,sha256=DnkmGRjSQJP0v3CgkHNwp7th5pKwJcF7qyKUN1tKaDs,8916
49
- megfile-3.0.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
50
- megfile-3.0.2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
- megfile-3.0.2.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
- megfile-3.0.2.dist-info/RECORD,,
46
+ megfile-3.0.3.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
+ megfile-3.0.3.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
+ megfile-3.0.3.dist-info/METADATA,sha256=icEVsbrmXVTDGlH9axr1walAIB0pHrCPb6Jj0FpPAhA,8916
49
+ megfile-3.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
50
+ megfile-3.0.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
+ megfile-3.0.3.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
+ megfile-3.0.3.dist-info/RECORD,,