megfile 4.2.3__py3-none-any.whl → 4.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/sftp_path.py CHANGED
@@ -22,7 +22,7 @@ from megfile.lib.compat import fspath
22
22
  from megfile.lib.glob import FSFunc, iglob
23
23
  from megfile.pathlike import URIPath
24
24
  from megfile.smart_path import SmartPath
25
- from megfile.utils import calculate_md5, thread_local
25
+ from megfile.utils import calculate_md5, copyfileobj, thread_local
26
26
 
27
27
  _logger = get_logger(__name__)
28
28
 
@@ -713,12 +713,7 @@ class SftpPath(URIPath):
713
713
  if overwrite or not dst_path.exists():
714
714
  with self.open("rb") as fsrc:
715
715
  with dst_path.open("wb") as fdst:
716
- length = 16 * 1024
717
- while True:
718
- buf = fsrc.read(length)
719
- if not buf:
720
- break
721
- fdst.write(buf)
716
+ copyfileobj(fsrc, fdst)
722
717
  self.unlink()
723
718
 
724
719
  dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
@@ -1134,14 +1129,7 @@ class SftpPath(URIPath):
1134
1129
  else:
1135
1130
  with self.open("rb") as fsrc:
1136
1131
  with dst_path.open("wb") as fdst:
1137
- length = 16 * 1024
1138
- while True:
1139
- buf = fsrc.read(length)
1140
- if not buf:
1141
- break
1142
- fdst.write(buf)
1143
- if callback:
1144
- callback(len(buf))
1132
+ copyfileobj(fsrc, fdst, callback)
1145
1133
 
1146
1134
  src_stat = self.stat()
1147
1135
  dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
megfile/smart.py CHANGED
@@ -45,7 +45,7 @@ from megfile.s3 import (
45
45
  )
46
46
  from megfile.sftp import sftp_concat, sftp_copy, sftp_download, sftp_upload
47
47
  from megfile.smart_path import SmartPath, get_traditional_path
48
- from megfile.utils import combine, generate_cache_path
48
+ from megfile.utils import combine, copyfileobj, generate_cache_path
49
49
 
50
50
  __all__ = [
51
51
  "smart_access",
@@ -289,16 +289,7 @@ def _default_copy_func(
289
289
 
290
290
  with smart_open(src_path, "rb", followlinks=followlinks) as fsrc:
291
291
  with smart_open(dst_path, "wb") as fdst:
292
- # This magic number is copied from copyfileobj
293
- length = 16 * 1024
294
- while True:
295
- buf = fsrc.read(length)
296
- if not buf:
297
- break
298
- fdst.write(buf)
299
- if callback is None:
300
- continue
301
- callback(len(buf))
292
+ copyfileobj(fsrc, fdst, callback)
302
293
  try:
303
294
  src_stat = smart_stat(src_path)
304
295
  dst_path = SmartPath(dst_path)
@@ -723,6 +714,8 @@ def smart_open(
723
714
  read mode. Only be used in s3, http, hdfs.
724
715
  :param block_size: Size of single block. Each block will be uploaded by single
725
716
  thread. Only be used in s3, http, hdfs.
717
+ :param buffered: If you are operating pickle file without .pkl or .pickle extension,
718
+ please set this to True to avoid the performance issue.
726
719
 
727
720
  :returns: File-Like object
728
721
  :raises: FileNotFoundError, IsADirectoryError, ValueError
@@ -1101,15 +1094,10 @@ _concat_funcs = {"s3": s3_concat, "sftp": sftp_concat}
1101
1094
 
1102
1095
 
1103
1096
  def _default_concat_func(src_paths: List[PathLike], dst_path: PathLike) -> None:
1104
- length = 16 * 1024
1105
1097
  with smart_open(dst_path, "wb") as dst_fd:
1106
1098
  for src_path in src_paths:
1107
1099
  with smart_open(src_path, "rb") as src_fd:
1108
- while True:
1109
- buf = src_fd.read(length)
1110
- if not buf:
1111
- break
1112
- dst_fd.write(buf)
1100
+ copyfileobj(src_fd, dst_fd)
1113
1101
 
1114
1102
 
1115
1103
  def smart_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
megfile/utils/__init__.py CHANGED
@@ -16,8 +16,13 @@ from io import (
16
16
  TextIOWrapper,
17
17
  )
18
18
  from threading import RLock
19
- from typing import IO, Callable, Optional
19
+ from typing import IO, Callable, List, Optional
20
20
 
21
+ from megfile.config import (
22
+ DEFAULT_COPY_BUFFER_SIZE,
23
+ DEFAULT_HASH_BUFFER_SIZE,
24
+ READER_LAZY_PREFETCH,
25
+ )
21
26
  from megfile.utils.mutex import ProcessLocal, ThreadLocal
22
27
 
23
28
 
@@ -81,16 +86,22 @@ def is_writable(fileobj: IO) -> bool:
81
86
 
82
87
  def _is_pickle(fileobj) -> bool:
83
88
  """Test if File Object is pickle"""
89
+ if READER_LAZY_PREFETCH:
90
+ return False
91
+
84
92
  if fileobj.name.endswith(".pkl") or fileobj.name.endswith(".pickle"):
85
93
  return True
86
94
 
87
- if "r" in fileobj.mode and "b" in fileobj.mode:
88
- offset = fileobj.tell()
89
- fileobj.seek(0)
90
- data = fileobj.read(2)
91
- fileobj.seek(offset)
92
- if len(data) >= 2 and data[0] == 128 and 2 <= data[1] <= 5:
93
- return True
95
+ # XXX: read 2 bytes will trigger pre read in prefetch reader,
96
+ # so comment out the code for now.
97
+ #
98
+ # if "r" in fileobj.mode and "b" in fileobj.mode:
99
+ # offset = fileobj.tell()
100
+ # fileobj.seek(0)
101
+ # data = fileobj.read(2)
102
+ # fileobj.seek(offset)
103
+ # if len(data) >= 2 and data[0] == 128 and 2 <= data[1] <= 5:
104
+ # return True
94
105
  return False
95
106
 
96
107
 
@@ -257,7 +268,7 @@ def _get_class(cls_or_obj) -> type:
257
268
 
258
269
  def calculate_md5(file_object):
259
270
  hash_md5 = hashlib.md5() # nosec
260
- for chunk in iter(lambda: file_object.read(4096), b""):
271
+ for chunk in iter(lambda: file_object.read(DEFAULT_HASH_BUFFER_SIZE), b""):
261
272
  hash_md5.update(chunk)
262
273
  return hash_md5.hexdigest()
263
274
 
@@ -337,3 +348,75 @@ def is_domain_or_subdomain(sub, parent):
337
348
  if sub.endswith(f".{parent}"):
338
349
  return True
339
350
  return False
351
+
352
+
353
+ def copyfileobj(
354
+ fsrc: IO,
355
+ fdst: IO,
356
+ callback: Optional[Callable[[int], None]] = None,
357
+ length: int = DEFAULT_COPY_BUFFER_SIZE,
358
+ ) -> None:
359
+ """Copy data from fsrc to fdst with optional progress callback.
360
+
361
+ This is similar to shutil.copyfileobj but with callback support.
362
+
363
+ Args:
364
+ fsrc: Source file-like object (opened for reading)
365
+ fdst: Destination file-like object (opened for writing)
366
+ callback: Optional callback function called with number of bytes written
367
+ length: Buffer size for copying (default: DEFAULT_COPY_BUFFER_SIZE)
368
+ """
369
+ while True:
370
+ buf = fsrc.read(length)
371
+ if not buf:
372
+ break
373
+ fdst.write(buf)
374
+ if callback:
375
+ callback(len(buf))
376
+
377
+
378
+ def copyfd(
379
+ src_fd: int,
380
+ fdst: IO,
381
+ callback: Optional[Callable[[int], None]] = None,
382
+ length: int = DEFAULT_COPY_BUFFER_SIZE,
383
+ ) -> None:
384
+ """Copy data from file descriptor to file object with optional progress callback.
385
+
386
+ Args:
387
+ src_fd: Source file descriptor (integer)
388
+ fdst: Destination file-like object (opened for writing)
389
+ callback: Optional callback function called with number of bytes written
390
+ length: Buffer size for copying (default: DEFAULT_COPY_BUFFER_SIZE)
391
+ """
392
+ while True:
393
+ buf = os.read(src_fd, length)
394
+ if not buf:
395
+ break
396
+ fdst.write(buf)
397
+ if callback:
398
+ callback(len(buf))
399
+
400
+
401
+ def copyfileobj_multi(
402
+ fsrc: IO,
403
+ fdst_list: List[IO],
404
+ callback: Optional[Callable[[int], None]] = None,
405
+ length: int = DEFAULT_COPY_BUFFER_SIZE,
406
+ ) -> None:
407
+ """Copy data from fsrc to multiple destinations with optional progress callback.
408
+
409
+ Args:
410
+ fsrc: Source file-like object (opened for reading)
411
+ fdst_list: List of destination file-like objects (opened for writing)
412
+ callback: Optional callback function called with number of bytes written
413
+ length: Buffer size for copying (default: DEFAULT_COPY_BUFFER_SIZE)
414
+ """
415
+ while True:
416
+ buf = fsrc.read(length)
417
+ if not buf:
418
+ break
419
+ for fdst in fdst_list:
420
+ fdst.write(buf)
421
+ if callback:
422
+ callback(len(buf))
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.2.3"
1
+ VERSION = "4.2.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.2.3
3
+ Version: 4.2.4
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -29,6 +29,8 @@ Requires-Dist: tqdm
29
29
  Requires-Dist: pyyaml
30
30
  Provides-Extra: hdfs
31
31
  Requires-Dist: hdfs; extra == "hdfs"
32
+ Provides-Extra: sftp2
33
+ Requires-Dist: ssh2-python>=1.0.0; extra == "sftp2"
32
34
  Provides-Extra: cli
33
35
  Requires-Dist: click; extra == "cli"
34
36
  Dynamic: license-file
@@ -1,9 +1,9 @@
1
- megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
2
- megfile/cli.py,sha256=VxY0__M19Ti_S7ZIozp9l0FxvdLwdd6eQL-wfpYOi_0,29160
3
- megfile/config.py,sha256=2MMj5QkhlDJQFZRbCQL2c9iDdeMAVctiaPszRBkg5vM,3988
1
+ megfile/__init__.py,sha256=K7ZXETL1HuXwR-xeeRWd7xtvDCCczB-DffkVTFlqxLY,7458
2
+ megfile/cli.py,sha256=5HBBRu0T3lF53AhPJLQXF_pzHy0G6E5yH8m1e4NYfJg,29100
3
+ megfile/config.py,sha256=0h5miOJo35Wdk67XgaFHieaVlbvaiciil9IjYkL1Geg,4260
4
4
  megfile/errors.py,sha256=cGSYyB7VBRKi1Gehgt9IO-wDvtzICV4XgKOkoMvLU5w,15583
5
5
  megfile/fs.py,sha256=KMEqAE35alpcxiy6du5nPFYcaorhUM_kPJMah3q76ng,19160
6
- megfile/fs_path.py,sha256=Hozl9LAJ8EMuSWBSZXGj2GNmPZ1sJp9PZs-7hPrLgm8,39341
6
+ megfile/fs_path.py,sha256=J7rx6vkIFFbWFb2B2MHALa-p9yABLAMv-FUOqrg8VSk,39027
7
7
  megfile/hdfs.py,sha256=owXr4d3j1frCvlbhmhENcSBnKKDky5cJZzWLOF4ZJMo,13251
8
8
  megfile/hdfs_path.py,sha256=OmUe3vA3Qoxnqtcq0Rs3ygBvzAtqUz3fGo8iP5sWneE,26058
9
9
  megfile/http.py,sha256=1nuGe-JbnwMFyV3s35CJxByED3uoRoS9y8Y8cSGP9Kw,3865
@@ -11,16 +11,18 @@ megfile/http_path.py,sha256=08OmzmRMyLSyq1Yr1K2HbzexesURJrIoA6AibwYzUiA,13844
11
11
  megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
12
12
  megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
13
13
  megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
14
- megfile/s3_path.py,sha256=yulypUpJ2k0WzOPbeXyx8Q75YGJa3R5qXBt2QmMt9H0,93901
14
+ megfile/s3_path.py,sha256=-CPqLQzhkqmmhWLTd6sbd1-JULzseDayhcH9HqQ_5LE,94072
15
15
  megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
16
- megfile/sftp_path.py,sha256=CgirHWmNdXdqyIL9ufmlaMpwFhlkQVZhqmfvjUaj7qU,43845
17
- megfile/smart.py,sha256=GnabQVb_NU7a-etKfF-NgpJ9JM2rT0uZBeA9UoNz_wM,37014
16
+ megfile/sftp2.py,sha256=9l6TJfXUAsTydgV60HXgpAaBNjfzSgDcpdUexhHu2Do,26935
17
+ megfile/sftp2_path.py,sha256=D38d4q_b9N79Mwe7m348OQB9V9hPQOoSe-rpRXWa-6M,38675
18
+ megfile/sftp_path.py,sha256=U-5NIVnqyoVEjQFEy8pkAeM1UGqPhaAzq5U_dkxK85c,43374
19
+ megfile/smart.py,sha256=g9WeIxUsTa_4kuXpYJ65ZtqYfheqsaMZX-HUA1A7_jc,36731
18
20
  megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
19
21
  megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
20
22
  megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
21
- megfile/version.py,sha256=DjJW-M4YIrKI_sFscLAPh7I9pVVFC24RH3yBxS8RkNI,19
23
+ megfile/version.py,sha256=hnyIOxGx-9adm33CcvJetPS1oMoaaHkTCvydnNjsOLc,19
22
24
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
25
+ megfile/lib/base_prefetch_reader.py,sha256=MYaWOkXc3geZMYNPHlPZmmpOR6uSz-AMuCZwYdoz7t0,13296
24
26
  megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
25
27
  megfile/lib/compare.py,sha256=CPSbyqsQ396oSfxa7h0NdUUqBw5A3WOn6fHrNKkuinw,2188
26
28
  megfile/lib/compat.py,sha256=SynEeHluys3tCK-lb_1oV3o_ft83yZvunqM_AjibLgE,207
@@ -36,17 +38,17 @@ megfile/lib/s3_cached_handler.py,sha256=MkNt6AAapd5x8BH2gnW5_S0cLofN-mshEdb0qSoL
36
38
  megfile/lib/s3_limited_seekable_writer.py,sha256=joVcjoTx48jataXAwEqOI3toRxxO3-XQRvyWHtJ23lQ,6232
37
39
  megfile/lib/s3_memory_handler.py,sha256=epIzQgTlE_deuVlP4LTmsh1T3cmAgK8ATEwrit9j7X8,4186
38
40
  megfile/lib/s3_pipe_handler.py,sha256=g3iAN1P9pCdvSNsGeJBGcBa10S62oqIg_9W3b3wc7os,3809
39
- megfile/lib/s3_prefetch_reader.py,sha256=txJbXyxaFVS3wPvut5yW4ppVdLeDtVQIvCXFDlQfBMc,4474
40
- megfile/lib/s3_share_cache_reader.py,sha256=LVWKxHdHo0_zUIW4o8yqNvplqqwezUPeYEt02Vj-WNM,3754
41
+ megfile/lib/s3_prefetch_reader.py,sha256=AqfADmbbZYA6nw4vxBOiFWX5q5CSYOd0hq1LWcf1PY0,4524
42
+ megfile/lib/s3_share_cache_reader.py,sha256=8uip5IdVjPXCquXrskjocsZx2-TiXqWZPY0gX8JC144,4020
41
43
  megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
42
44
  megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
43
45
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
44
- megfile/utils/__init__.py,sha256=pawmXnCNokWLj338a60b_hK21koYavpEiEohZhsOaGQ,10156
46
+ megfile/utils/__init__.py,sha256=4hBVSXbNTbDj7Je0y9SbwgcPm_s41H9v3eHUMr9JNGo,12700
45
47
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
46
- megfile-4.2.3.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-4.2.3.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- megfile-4.2.3.dist-info/METADATA,sha256=EKObM2zjGcOsvRyMILjA4qgEvJg72mPB8icyZhBbZ9o,9601
49
- megfile-4.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
- megfile-4.2.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
- megfile-4.2.3.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
- megfile-4.2.3.dist-info/RECORD,,
48
+ megfile-4.2.4.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
49
+ megfile-4.2.4.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
50
+ megfile-4.2.4.dist-info/METADATA,sha256=7zDLJtMR3UfawCuutBu_LFtyKmeuXlVwnZJO6CY3nE8,9675
51
+ megfile-4.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
+ megfile-4.2.4.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
53
+ megfile-4.2.4.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
54
+ megfile-4.2.4.dist-info/RECORD,,