megfile 3.1.6__py3-none-any.whl → 3.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/config.py CHANGED
@@ -60,3 +60,5 @@ HDFS_MAX_RETRY_TIMES = int(
60
60
  SFTP_MAX_RETRY_TIMES = int(
61
61
  os.getenv("MEGFILE_SFTP_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
62
62
  )
63
+
64
+ HTTP_AUTH_HEADERS = ("Authorization", "Www-Authenticate", "Cookie", "Cookie2")
@@ -76,7 +76,11 @@ class S3PrefetchReader(BasePrefetchReader):
76
76
  try:
77
77
  start, end = 0, self._block_size - 1
78
78
  first_index_response = self._fetch_response(start=start, end=end)
79
- content_size = int(first_index_response["ContentRange"].split("/")[-1])
79
+ if "ContentRange" in first_index_response:
80
+ content_size = int(first_index_response["ContentRange"].split("/")[-1])
81
+ else:
82
+ # usually when read a file only have one block
83
+ content_size = int(first_index_response["ContentLength"])
80
84
  except S3InvalidRangeError:
81
85
  # usually when read a empty file
82
86
  # can use minio test empty file: https://hub.docker.com/r/minio/minio
megfile/s3_path.py CHANGED
@@ -7,16 +7,18 @@ from concurrent.futures import ThreadPoolExecutor
7
7
  from functools import cached_property, lru_cache, wraps
8
8
  from logging import getLogger as get_logger
9
9
  from typing import IO, Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple
10
+ from urllib.parse import urlparse
10
11
 
11
12
  import boto3
12
13
  import botocore
13
- from botocore.awsrequest import AWSResponse
14
+ from botocore.awsrequest import AWSPreparedRequest, AWSResponse
14
15
 
15
16
  from megfile.config import (
16
17
  DEFAULT_BLOCK_SIZE,
17
18
  DEFAULT_MAX_BLOCK_SIZE,
18
19
  DEFAULT_MIN_BLOCK_SIZE,
19
20
  GLOBAL_MAX_WORKERS,
21
+ HTTP_AUTH_HEADERS,
20
22
  S3_CLIENT_CACHE_MODE,
21
23
  S3_MAX_RETRY_TIMES,
22
24
  )
@@ -76,6 +78,7 @@ from megfile.utils import (
76
78
  generate_cache_path,
77
79
  get_binary_mode,
78
80
  get_content_offset,
81
+ is_domain_or_subdomain,
79
82
  is_readable,
80
83
  necessary_params,
81
84
  process_local,
@@ -162,24 +165,30 @@ def _patch_make_request(client: botocore.client.BaseClient, redirect: bool = Fal
162
165
  retry_callback=retry_callback,
163
166
  )
164
167
 
165
- def patch_send_request(send_request):
166
- def patched_send_request(request_dict, operation_model):
167
- http, parsed_response = send_request(request_dict, operation_model)
168
+ def patch_send(send):
169
+ def patched_send(request: AWSPreparedRequest) -> AWSResponse:
170
+ response: AWSResponse = send(request)
168
171
  if (
169
- request_dict["method"] == "GET" # only support GET method for now
170
- and http.status_code in (301, 302, 307, 308)
171
- and "Location" in http.headers
172
+ request.method == "GET" # only support GET method for now
173
+ and response.status_code in (301, 302, 307, 308)
174
+ and "Location" in response.headers
172
175
  ):
173
- request_dict["url"] = http.headers["Location"]
174
- http, parsed_response = send_request(request_dict, operation_model)
175
- return http, parsed_response
176
-
177
- return patched_send_request
176
+ # Permit sending auth/cookie headers from "foo.com" to "sub.foo.com".
177
+ # See also: https://go.dev/src/net/http/client.go#L980
178
+ location = response.headers["Location"]
179
+ ihost = urlparse(request.url).hostname
180
+ dhost = urlparse(location).hostname
181
+ if not is_domain_or_subdomain(dhost, ihost):
182
+ for name in HTTP_AUTH_HEADERS:
183
+ request.headers.pop(name, None)
184
+ request.url = location
185
+ response = send(request)
186
+ return response
187
+
188
+ return patched_send
178
189
 
179
190
  if redirect:
180
- client._endpoint._send_request = patch_send_request(
181
- client._endpoint._send_request
182
- )
191
+ client._endpoint._send = patch_send(client._endpoint._send)
183
192
 
184
193
  return client
185
194
 
megfile/smart.py CHANGED
@@ -995,11 +995,12 @@ def smart_load_content(
995
995
  return s3_load_content(path, start, stop)
996
996
 
997
997
  with smart_open(path, "rb") as fd:
998
- if start:
998
+ if start is not None:
999
999
  fd.seek(start)
1000
1000
  offset = -1
1001
- if start and stop:
1002
- offset = stop - start
1001
+ if stop is not None:
1002
+ offset = stop - (start or 0) # start may be None
1003
+ assert offset >= 0, "stop should be greater than start"
1003
1004
  return fd.read(offset) # pytype: disable=bad-return-type
1004
1005
 
1005
1006
 
megfile/utils/__init__.py CHANGED
@@ -346,3 +346,11 @@ class cached_classproperty(cached_property):
346
346
  val = self.func(cls)
347
347
  setattr(cls, self.attrname, val)
348
348
  return val
349
+
350
+
351
+ def is_domain_or_subdomain(sub, parent):
352
+ if sub == parent:
353
+ return True
354
+ if sub.endswith(f".{parent}"):
355
+ return True
356
+ return False
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "3.1.6"
1
+ VERSION = "3.1.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: megfile
3
- Version: 3.1.6
3
+ Version: 3.1.7
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -27,10 +27,10 @@ Requires-Dist: requests
27
27
  Requires-Dist: paramiko
28
28
  Requires-Dist: tqdm
29
29
  Requires-Dist: pyyaml
30
- Provides-Extra: cli
31
- Requires-Dist: click; extra == "cli"
32
30
  Provides-Extra: hdfs
33
31
  Requires-Dist: hdfs; extra == "hdfs"
32
+ Provides-Extra: cli
33
+ Requires-Dist: click; extra == "cli"
34
34
 
35
35
  megfile - Megvii FILE library
36
36
  ---
@@ -1,7 +1,7 @@
1
1
  docs/conf.py,sha256=sfDSly5jO8W_RmuAptOIp4hd8dNcO-9a5XrHTbxFnNo,2448
2
2
  megfile/__init__.py,sha256=i2Lbq_VxIgppaqwkxG0_H35dRfcjJ4mCYWjprOf4hHo,7318
3
3
  megfile/cli.py,sha256=0Sgbz3jeUryVll9Aa6R0MpJdQJUGENvz55YF7Jm1Uxc,23482
4
- megfile/config.py,sha256=_SkJRaVWUdfW1Q9uX0vao-6YVQKJtfej22Z8DykuRps,2331
4
+ megfile/config.py,sha256=k52eO9YUyYRJ0-bsscAXcfEt8xIAHGOHdLSnEJf6z7k,2411
5
5
  megfile/errors.py,sha256=a55qKQgyfiLmV-qnojUFzq2gu9JXpj3ZiC2qVaWyUTA,14160
6
6
  megfile/fs.py,sha256=dgj5fW-EEzQNdjMF2tkB5DjXu3iHQbtLi5PSIMxR8fc,11966
7
7
  megfile/fs_path.py,sha256=Ffvukc176beH5aQMZXXtwH6ApwLYXPViCIUP0pijgT0,41590
@@ -12,14 +12,14 @@ megfile/http_path.py,sha256=BhMNjQVB85IaCGGIKzgEfY73mAVdCzJP08W1RuGeMRA,16119
12
12
  megfile/interfaces.py,sha256=7C53Q2FAVFmOEnplfplvWqHab29HJE5RQnpfdb4loVY,8679
13
13
  megfile/pathlike.py,sha256=5VAKIArm2UqrpMBJMoNAEydFxLd1mjCZ8iQnKFUIYu0,31274
14
14
  megfile/s3.py,sha256=7SdfLjAePVh-bpRyuj566VB4Qa7KP86rCJGzYANR7wQ,13008
15
- megfile/s3_path.py,sha256=x-_wxVvVpv56LtDOwRAWJCOq3XMk7oAB6xdi22TBlmY,95029
15
+ megfile/s3_path.py,sha256=kOrP45zQbxCxNQcoovd060QARkP8QWYKd8BQGfxGY2g,95447
16
16
  megfile/sftp.py,sha256=vyDnYXX3i1j2fhXMC8YCeX-66MDb9wrBQQjQVhZx0uo,13004
17
17
  megfile/sftp_path.py,sha256=4tByWvUJK1KBJoa3t5aoWYnZpaRWN9nQIE6ZyiGHrbk,53519
18
- megfile/smart.py,sha256=Vr4R7HpjXjt587KOc2-1QGbQ5EsZ48YRzCaK0rz3IS0,36108
18
+ megfile/smart.py,sha256=vfWhPgL26KrF4s9K5oV55R3zvAyCr9nvD9CsYFWVA9g,36218
19
19
  megfile/smart_path.py,sha256=Wsn6fR9g7NTwNwwvZ_0H39NLHIlOLnCqK-ZY0n5CvKk,7812
20
20
  megfile/stdio.py,sha256=UYe-h440Wc4f5COOzOTG1svnp5nFzrfpixehJ0_0_NY,653
21
21
  megfile/stdio_path.py,sha256=7jzVdreamO18yBWZM7Pp71cO7GmrYb0M0qyQde2Ypq4,2706
22
- megfile/version.py,sha256=GfRGwuY_9YYywKtd8Nc8lbkE6hlfX3NUsNtuHN-c8Gs,19
22
+ megfile/version.py,sha256=n9Z_RgYWmzkUHtPVhIWP6RlF4L6HnKS2N0unpIEw86U,19
23
23
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  megfile/lib/base_prefetch_reader.py,sha256=CaYWuiKmlk4Utr0IFDPwPC58wV2jBAhqpxhwhRHc734,13652
25
25
  megfile/lib/combine_reader.py,sha256=uSzo3PmhD5ck6_Vv6dFU5vVx4boeA97VS-puPyhF_BE,4657
@@ -37,19 +37,19 @@ megfile/lib/s3_cached_handler.py,sha256=QrQKck06ye16o7GD71T-fVCseKlOhsxp82LtBTtA
37
37
  megfile/lib/s3_limited_seekable_writer.py,sha256=v-e7rfFBfWCSQVtJIaFHM_i0Hb1FkfVLHlhawo5MOIk,6358
38
38
  megfile/lib/s3_memory_handler.py,sha256=NGKWbI4LG2cmV06CP7KOVPqS_BNpm3ApqKi5ibgIBvQ,4208
39
39
  megfile/lib/s3_pipe_handler.py,sha256=DY1UTNCq8oD3QWXNb4orOiz3EoEAo6dhwmZZdk6h1bU,3694
40
- megfile/lib/s3_prefetch_reader.py,sha256=YZA6JOQXcioREh_z1E-kZ2WRPTm02v0dCEVqyaOMHns,4287
40
+ megfile/lib/s3_prefetch_reader.py,sha256=gjnnYI95LFwxpneDFfLBzw8gvT1Vc8yJJlBL101oysI,4501
41
41
  megfile/lib/s3_share_cache_reader.py,sha256=jhGL1B6NPv68cQnW1Jf7ey-zTQ8XfiJg5ILDNgRWHy0,3671
42
42
  megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
43
43
  megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
44
44
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
45
- megfile/utils/__init__.py,sha256=9rD_SoD--XWt7-EJi5-L80Y7YeoFdr-tUp-5ATB85oA,10717
45
+ megfile/utils/__init__.py,sha256=RAj8dAJZX5TkWKJu3Ip78uhA5XZ8wpir61eCm6bAnd4,10874
46
46
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
47
47
  scripts/convert_results_to_sarif.py,sha256=nDiOfsedb22Ps7ZodmYdlXZlxv54fRxCQgOZsB2OkNk,2833
48
48
  scripts/generate_file.py,sha256=-mTcBiqiQ1juvqojVfVZ-uZWgpANHJNdhrF7s68zNfc,10903
49
- megfile-3.1.6.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
50
- megfile-3.1.6.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
51
- megfile-3.1.6.dist-info/METADATA,sha256=XuomkBWbASrj60L6untH8oN32sNezHse1WfuPetTpYg,9178
52
- megfile-3.1.6.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
53
- megfile-3.1.6.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
54
- megfile-3.1.6.dist-info/top_level.txt,sha256=oTnYXo1Z3V61qSWAKtnY9RkDgRSHvfRN38FQae6E0W0,50
55
- megfile-3.1.6.dist-info/RECORD,,
49
+ megfile-3.1.7.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
50
+ megfile-3.1.7.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
51
+ megfile-3.1.7.dist-info/METADATA,sha256=IOh1sm_PB6oLFwTJwWMZx3tvIfyiIFaYG6d0IPTlt1Q,9178
52
+ megfile-3.1.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
53
+ megfile-3.1.7.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
54
+ megfile-3.1.7.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
55
+ megfile-3.1.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,5 @@
1
1
  dist
2
2
  docs
3
- empty
4
3
  html_cov
5
4
  html_doc
6
5
  megfile