s3fs 2025.9.0__tar.gz → 2025.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {s3fs-2025.9.0/s3fs.egg-info → s3fs-2025.12.0}/PKG-INFO +4 -9
- {s3fs-2025.9.0 → s3fs-2025.12.0}/docs/source/changelog.rst +15 -1
- s3fs-2025.12.0/docs/source/code-of-conduct.rst +126 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/docs/source/index.rst +15 -1
- {s3fs-2025.9.0 → s3fs-2025.12.0}/requirements.txt +1 -1
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/_version.py +3 -3
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/core.py +70 -32
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/errors.py +1 -1
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/derived/s3fs_fixtures.py +1 -3
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/test_s3fs.py +90 -8
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/utils.py +2 -2
- {s3fs-2025.9.0 → s3fs-2025.12.0/s3fs.egg-info}/PKG-INFO +4 -9
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs.egg-info/SOURCES.txt +1 -0
- s3fs-2025.12.0/s3fs.egg-info/requires.txt +3 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/setup.py +2 -14
- s3fs-2025.9.0/s3fs.egg-info/requires.txt +0 -9
- {s3fs-2025.9.0 → s3fs-2025.12.0}/LICENSE.txt +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/MANIFEST.in +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/README.md +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/docs/source/api.rst +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/docs/source/development.rst +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/docs/source/install.rst +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/__init__.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/mapping.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/__init__.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/derived/__init__.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/derived/s3fs_test.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/test_mapping.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs/tests/test_utils.py +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs.egg-info/dependency_links.txt +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs.egg-info/not-zip-safe +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/s3fs.egg-info/top_level.txt +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/setup.cfg +0 -0
- {s3fs-2025.9.0 → s3fs-2025.12.0}/versioneer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: s3fs
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.12.0
|
|
4
4
|
Summary: Convenient Filesystem interface over S3
|
|
5
5
|
Home-page: http://github.com/fsspec/s3fs/
|
|
6
6
|
Maintainer: Martin Durant
|
|
@@ -11,21 +11,17 @@ Classifier: Development Status :: 4 - Beta
|
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: License :: OSI Approved :: BSD License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Requires-Python: >= 3.10
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE.txt
|
|
22
22
|
Requires-Dist: aiobotocore<3.0.0,>=2.5.4
|
|
23
|
-
Requires-Dist: fsspec==2025.
|
|
23
|
+
Requires-Dist: fsspec==2025.12.0
|
|
24
24
|
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1
|
|
25
|
-
Provides-Extra: awscli
|
|
26
|
-
Requires-Dist: aiobotocore[awscli]<3.0.0,>=2.5.4; extra == "awscli"
|
|
27
|
-
Provides-Extra: boto3
|
|
28
|
-
Requires-Dist: aiobotocore[boto3]<3.0.0,>=2.5.4; extra == "boto3"
|
|
29
25
|
Dynamic: classifier
|
|
30
26
|
Dynamic: description
|
|
31
27
|
Dynamic: description-content-type
|
|
@@ -35,7 +31,6 @@ Dynamic: license
|
|
|
35
31
|
Dynamic: license-file
|
|
36
32
|
Dynamic: maintainer
|
|
37
33
|
Dynamic: maintainer-email
|
|
38
|
-
Dynamic: provides-extra
|
|
39
34
|
Dynamic: requires-dist
|
|
40
35
|
Dynamic: requires-python
|
|
41
36
|
Dynamic: summary
|
|
@@ -1,10 +1,24 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2025.12.0
|
|
5
|
+
---------
|
|
6
|
+
|
|
7
|
+
- remove optional dependencies (#995)
|
|
8
|
+
- add support for py3.14 and remove 3.9 (#993)
|
|
9
|
+
- add link docs->repo (#992)
|
|
10
|
+
|
|
11
|
+
2025.10.0
|
|
12
|
+
---------
|
|
13
|
+
|
|
14
|
+
- get bucket info on demand (#987)
|
|
15
|
+
- add CoC (#986)
|
|
16
|
+
- add goatcounter tracker (#985)
|
|
17
|
+
|
|
4
18
|
2025.9.0
|
|
5
19
|
--------
|
|
6
20
|
|
|
7
|
-
- update README for distribution compliance
|
|
21
|
+
- update README for distribution compliance (#977)
|
|
8
22
|
|
|
9
23
|
2025.7.0
|
|
10
24
|
--------
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Code of Conduct
|
|
2
|
+
===============
|
|
3
|
+
|
|
4
|
+
All participants in the fsspec community are expected to adhere to a Code of Conduct.
|
|
5
|
+
|
|
6
|
+
As contributors and maintainers of this project, and in the interest of
|
|
7
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
|
8
|
+
contribute through reporting issues, posting feature requests, updating
|
|
9
|
+
documentation, submitting pull requests or patches, and other activities.
|
|
10
|
+
|
|
11
|
+
We are committed to making participation in this project a harassment-free
|
|
12
|
+
experience for everyone, treating everyone as unique humans deserving of
|
|
13
|
+
respect.
|
|
14
|
+
|
|
15
|
+
Examples of unacceptable behaviour by participants include:
|
|
16
|
+
|
|
17
|
+
- The use of sexualized language or imagery
|
|
18
|
+
- Personal attacks
|
|
19
|
+
- Trolling or insulting/derogatory comments
|
|
20
|
+
- Public or private harassment
|
|
21
|
+
- Publishing other's private information, such as physical or electronic
|
|
22
|
+
addresses, without explicit permission
|
|
23
|
+
- Other unethical or unprofessional conduct
|
|
24
|
+
|
|
25
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
|
26
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
|
27
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
|
28
|
+
permanently any contributor for other behaviours that they deem inappropriate,
|
|
29
|
+
threatening, offensive, or harmful.
|
|
30
|
+
|
|
31
|
+
By adopting this Code of Conduct, project maintainers commit themselves
|
|
32
|
+
to fairly and consistently applying these principles to every aspect of
|
|
33
|
+
managing this project. Project maintainers who do not follow or enforce
|
|
34
|
+
the Code of Conduct may be permanently removed from the project team.
|
|
35
|
+
|
|
36
|
+
This code of conduct applies both within project spaces and in public
|
|
37
|
+
spaces when an individual is representing the project or its community.
|
|
38
|
+
|
|
39
|
+
If you feel the code of conduct has been violated, please report the
|
|
40
|
+
incident to the fsspec core team.
|
|
41
|
+
|
|
42
|
+
Reporting
|
|
43
|
+
---------
|
|
44
|
+
|
|
45
|
+
If you believe someone is violating theCode of Conduct we ask that you report it
|
|
46
|
+
to the Project by emailing community@anaconda.com. All reports will be kept
|
|
47
|
+
confidential. In some cases we may determine that a public statement will need
|
|
48
|
+
to be made. If that's the case, the identities of all victims and reporters
|
|
49
|
+
will remain confidential unless those individuals instruct us otherwise.
|
|
50
|
+
If you believe anyone is in physical danger, please notify appropriate law
|
|
51
|
+
enforcement first.
|
|
52
|
+
|
|
53
|
+
In your report please include:
|
|
54
|
+
|
|
55
|
+
- Your contact info
|
|
56
|
+
- Names (real, nicknames, or pseudonyms) of any individuals involved.
|
|
57
|
+
If there were other witnesses besides you, please try to include them as well.
|
|
58
|
+
- When and where the incident occurred. Please be as specific as possible.
|
|
59
|
+
- Your account of what occurred. If there is a publicly available record
|
|
60
|
+
please include a link.
|
|
61
|
+
- Any extra context you believe existed for the incident.
|
|
62
|
+
- If you believe this incident is ongoing.
|
|
63
|
+
- If you believe any member of the core team has a conflict of interest
|
|
64
|
+
in adjudicating the incident.
|
|
65
|
+
- What, if any, corrective response you believe would be appropriate.
|
|
66
|
+
- Any other information you believe we should have.
|
|
67
|
+
|
|
68
|
+
Core team members are obligated to maintain confidentiality with regard
|
|
69
|
+
to the reporter and details of an incident.
|
|
70
|
+
|
|
71
|
+
What happens next?
|
|
72
|
+
~~~~~~~~~~~~~~~~~~
|
|
73
|
+
|
|
74
|
+
You will receive an email acknowledging receipt of your complaint.
|
|
75
|
+
The core team will immediately meet to review the incident and determine:
|
|
76
|
+
|
|
77
|
+
- What happened.
|
|
78
|
+
- Whether this event constitutes a code of conduct violation.
|
|
79
|
+
- Who the bad actor was.
|
|
80
|
+
- Whether this is an ongoing situation, or if there is a threat to anyone's
|
|
81
|
+
physical safety.
|
|
82
|
+
- If this is determined to be an ongoing incident or a threat to physical safety,
|
|
83
|
+
the working groups' immediate priority will be to protect everyone involved.
|
|
84
|
+
|
|
85
|
+
If a member of the core team is one of the named parties, they will not be
|
|
86
|
+
included in any discussions, and will not be provided with any confidential
|
|
87
|
+
details from the reporter.
|
|
88
|
+
|
|
89
|
+
If anyone on the core team believes they have a conflict of interest in
|
|
90
|
+
adjudicating on a reported issue, they will inform the other core team
|
|
91
|
+
members, and exempt themselves from any discussion about the issue.
|
|
92
|
+
Following this declaration, they will not be provided with any confidential
|
|
93
|
+
details from the reporter.
|
|
94
|
+
|
|
95
|
+
Once the working group has a complete account of the events they will make a
|
|
96
|
+
decision as to how to response. Responses may include:
|
|
97
|
+
|
|
98
|
+
- Nothing (if we determine no violation occurred).
|
|
99
|
+
- A private reprimand from the working group to the individual(s) involved.
|
|
100
|
+
- A public reprimand.
|
|
101
|
+
- An imposed vacation
|
|
102
|
+
- A permanent or temporary ban from some or all spaces (GitHub repositories, etc.)
|
|
103
|
+
- A request for a public or private apology.
|
|
104
|
+
|
|
105
|
+
We'll respond within one week to the person who filed the report with either a
|
|
106
|
+
resolution or an explanation of why the situation is not yet resolved.
|
|
107
|
+
|
|
108
|
+
Once we've determined our final action, we'll contact the original reporter
|
|
109
|
+
to let them know what action (if any) we'll be taking. We'll take into account
|
|
110
|
+
feedback from the reporter on the appropriateness of our response, but we
|
|
111
|
+
don't guarantee we'll act on it.
|
|
112
|
+
|
|
113
|
+
Acknowledgement
|
|
114
|
+
---------------
|
|
115
|
+
|
|
116
|
+
This CoC is modified from the one by `BeeWare`_, which in turn refers to
|
|
117
|
+
the `Contributor Covenant`_ and the `Django`_ project.
|
|
118
|
+
|
|
119
|
+
.. _BeeWare: https://beeware.org/community/behavior/code-of-conduct/
|
|
120
|
+
.. _Contributor Covenant: https://www.contributor-covenant.org/version/1/3/0/code-of-conduct/
|
|
121
|
+
.. _Django: https://www.djangoproject.com/conduct/reporting/
|
|
122
|
+
|
|
123
|
+
.. raw:: html
|
|
124
|
+
|
|
125
|
+
<script data-goatcounter="https://projspec.goatcounter.com/count"
|
|
126
|
+
async src="//gc.zgo.at/count.js"></script>
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
S3Fs
|
|
2
2
|
====
|
|
3
3
|
|
|
4
|
-
S3Fs is a Pythonic file interface to S3. It builds on top of botocore_.
|
|
4
|
+
S3Fs is a Pythonic file interface to S3. It builds on top of botocore_. The project is hosted on `GitHub <https://github.com/fsspec/s3fs>`_ |github_stars|
|
|
5
|
+
|
|
6
|
+
.. |github_stars| image:: https://img.shields.io/github/stars/fsspec/s3fs?style=social
|
|
7
|
+
:target: https://github.com/fsspec/s3fs
|
|
8
|
+
:alt: GitHub Repository
|
|
5
9
|
|
|
6
10
|
The top-level class :py:class:`.S3FileSystem` holds connection information and allows
|
|
7
11
|
typical file-system style operations like ``cp``, ``mv``, ``ls``, ``du``,
|
|
@@ -340,6 +344,7 @@ Contents
|
|
|
340
344
|
development
|
|
341
345
|
api
|
|
342
346
|
changelog
|
|
347
|
+
code-of-conduct
|
|
343
348
|
:maxdepth: 2
|
|
344
349
|
|
|
345
350
|
|
|
@@ -351,3 +356,12 @@ Indices and tables
|
|
|
351
356
|
* :ref:`genindex`
|
|
352
357
|
* :ref:`modindex`
|
|
353
358
|
* :ref:`search`
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
These docs pages collect anonymous tracking data using goatcounter, and the
|
|
362
|
+
dashboard is available to the public: https://s3fs.goatcounter.com/ .
|
|
363
|
+
|
|
364
|
+
.. raw:: html
|
|
365
|
+
|
|
366
|
+
<script data-goatcounter="https://s3fs.goatcounter.com/count"
|
|
367
|
+
async src="//gc.zgo.at/count.js"></script>
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-12-03T10:32:02-0500",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2025.
|
|
14
|
+
"full-revisionid": "65f394575b9667f33b59473dc28a8f1cf6708745",
|
|
15
|
+
"version": "2025.12.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import asyncio
|
|
3
2
|
import errno
|
|
4
3
|
import io
|
|
5
4
|
import logging
|
|
5
|
+
import math
|
|
6
6
|
import mimetypes
|
|
7
7
|
import os
|
|
8
8
|
import socket
|
|
9
|
-
from typing import Tuple, Optional
|
|
10
9
|
import weakref
|
|
11
10
|
import re
|
|
12
11
|
|
|
@@ -69,6 +68,8 @@ S3_RETRYABLE_ERRORS = (
|
|
|
69
68
|
ResponseParserError,
|
|
70
69
|
)
|
|
71
70
|
|
|
71
|
+
MAX_UPLOAD_PARTS = 10_000 # maximum number of parts for S3 multipart upload
|
|
72
|
+
|
|
72
73
|
if ClientPayloadError is not None:
|
|
73
74
|
S3_RETRYABLE_ERRORS += (ClientPayloadError,)
|
|
74
75
|
|
|
@@ -166,7 +167,7 @@ def _coalesce_version_id(*args):
|
|
|
166
167
|
if len(version_ids) > 1:
|
|
167
168
|
raise ValueError(
|
|
168
169
|
"Cannot coalesce version_ids where more than one are defined,"
|
|
169
|
-
" {}"
|
|
170
|
+
f" {version_ids}"
|
|
170
171
|
)
|
|
171
172
|
elif len(version_ids) == 0:
|
|
172
173
|
return None
|
|
@@ -174,6 +175,18 @@ def _coalesce_version_id(*args):
|
|
|
174
175
|
return version_ids.pop()
|
|
175
176
|
|
|
176
177
|
|
|
178
|
+
def calculate_chunksize(filesize, chunksize=None, max_parts=MAX_UPLOAD_PARTS) -> int:
|
|
179
|
+
if chunksize is None:
|
|
180
|
+
chunksize = 50 * 2**20 # default chunksize set to 50 MiB
|
|
181
|
+
required_chunks = math.ceil(filesize / chunksize)
|
|
182
|
+
# increase chunksize to fit within the max_parts limit
|
|
183
|
+
if required_chunks > max_parts:
|
|
184
|
+
# S3 supports uploading objects up to 5 TiB in size,
|
|
185
|
+
# so each chunk can be up to ~524 MiB.
|
|
186
|
+
chunksize = math.ceil(filesize / max_parts)
|
|
187
|
+
return chunksize
|
|
188
|
+
|
|
189
|
+
|
|
177
190
|
class S3FileSystem(AsyncFileSystem):
|
|
178
191
|
"""
|
|
179
192
|
Access S3 as if it were a file system.
|
|
@@ -440,7 +453,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
440
453
|
s3_key = s3_components[1]
|
|
441
454
|
return bucket, s3_key
|
|
442
455
|
|
|
443
|
-
def split_path(self, path) ->
|
|
456
|
+
def split_path(self, path) -> tuple[str, str, str | None]:
|
|
444
457
|
"""
|
|
445
458
|
Normalise S3 path string into bucket and key.
|
|
446
459
|
|
|
@@ -764,6 +777,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
764
777
|
else:
|
|
765
778
|
files.append(c)
|
|
766
779
|
files += dirs
|
|
780
|
+
files.sort(key=lambda f: f["name"])
|
|
767
781
|
except ClientError as e:
|
|
768
782
|
raise translate_boto_error(e)
|
|
769
783
|
|
|
@@ -887,38 +901,49 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
887
901
|
sdirs = set()
|
|
888
902
|
thisdircache = {}
|
|
889
903
|
for o in out:
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
904
|
+
# not self._parent, because that strips "/" from placeholders
|
|
905
|
+
par = o["name"].rsplit("/", maxsplit=1)[0]
|
|
906
|
+
o["Key"] = o["name"]
|
|
907
|
+
name = o["name"]
|
|
908
|
+
while "/" in par:
|
|
909
|
+
if par not in sdirs:
|
|
910
|
+
sdirs.add(par)
|
|
911
|
+
d = False
|
|
912
|
+
if len(path) <= len(par):
|
|
913
|
+
d = {
|
|
914
|
+
"Key": par,
|
|
915
|
+
"Size": 0,
|
|
916
|
+
"name": par,
|
|
917
|
+
"StorageClass": "DIRECTORY",
|
|
918
|
+
"type": "directory",
|
|
919
|
+
"size": 0,
|
|
920
|
+
}
|
|
921
|
+
dirs.append(d)
|
|
922
|
+
thisdircache[par] = []
|
|
923
|
+
ppar = self._parent(par)
|
|
924
|
+
if ppar in thisdircache:
|
|
925
|
+
if d and d not in thisdircache[ppar]:
|
|
926
|
+
thisdircache[ppar].append(d)
|
|
927
|
+
if par in sdirs and not name.endswith("/"):
|
|
928
|
+
# exclude placeholdees, they do not belong in the directory listing
|
|
929
|
+
thisdircache[par].append(o)
|
|
930
|
+
par, name, o = par.rsplit("/", maxsplit=1)[0], par, d
|
|
931
|
+
if par in thisdircache or par in self.dircache:
|
|
932
|
+
break
|
|
911
933
|
|
|
912
934
|
# Explicitly add directories to their parents in the dircache
|
|
913
935
|
for d in dirs:
|
|
914
936
|
par = self._parent(d["name"])
|
|
915
|
-
|
|
937
|
+
# extra condition here (in any()) to deal with directory-marking files
|
|
938
|
+
if par in thisdircache and not any(
|
|
939
|
+
_["name"] == d["name"] for _ in thisdircache[par]
|
|
940
|
+
):
|
|
916
941
|
thisdircache[par].append(d)
|
|
917
942
|
|
|
918
943
|
if not prefix:
|
|
919
944
|
for k, v in thisdircache.items():
|
|
920
945
|
if k not in self.dircache and len(k) >= len(path):
|
|
921
|
-
self.dircache[k] = v
|
|
946
|
+
self.dircache[k] = sorted(v, key=lambda x: x["name"])
|
|
922
947
|
if withdirs:
|
|
923
948
|
out = sorted(out + dirs, key=lambda x: x["name"])
|
|
924
949
|
if detail:
|
|
@@ -1043,7 +1068,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1043
1068
|
files = await self._lsdir(
|
|
1044
1069
|
self._parent(path), refresh=refresh, versions=versions
|
|
1045
1070
|
)
|
|
1046
|
-
except
|
|
1071
|
+
except OSError:
|
|
1047
1072
|
pass
|
|
1048
1073
|
files = [
|
|
1049
1074
|
o
|
|
@@ -1230,7 +1255,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1230
1255
|
lpath,
|
|
1231
1256
|
rpath,
|
|
1232
1257
|
callback=_DEFAULT_CALLBACK,
|
|
1233
|
-
chunksize=
|
|
1258
|
+
chunksize=None,
|
|
1234
1259
|
max_concurrency=None,
|
|
1235
1260
|
mode="overwrite",
|
|
1236
1261
|
**kwargs,
|
|
@@ -1258,6 +1283,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1258
1283
|
if content_type is not None:
|
|
1259
1284
|
kwargs["ContentType"] = content_type
|
|
1260
1285
|
|
|
1286
|
+
chunksize = calculate_chunksize(size, chunksize=chunksize)
|
|
1261
1287
|
with open(lpath, "rb") as f0:
|
|
1262
1288
|
if size < min(5 * 2**30, 2 * chunksize):
|
|
1263
1289
|
chunk = f0.read()
|
|
@@ -1276,8 +1302,8 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1276
1302
|
key,
|
|
1277
1303
|
mpu,
|
|
1278
1304
|
f0,
|
|
1305
|
+
chunksize,
|
|
1279
1306
|
callback=callback,
|
|
1280
|
-
chunksize=chunksize,
|
|
1281
1307
|
max_concurrency=max_concurrency,
|
|
1282
1308
|
)
|
|
1283
1309
|
parts = [
|
|
@@ -1305,8 +1331,8 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1305
1331
|
key,
|
|
1306
1332
|
mpu,
|
|
1307
1333
|
f0,
|
|
1334
|
+
chunksize,
|
|
1308
1335
|
callback=_DEFAULT_CALLBACK,
|
|
1309
|
-
chunksize=50 * 2**20,
|
|
1310
1336
|
max_concurrency=None,
|
|
1311
1337
|
):
|
|
1312
1338
|
max_concurrency = max_concurrency or self.max_concurrency
|
|
@@ -1464,6 +1490,18 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
1464
1490
|
pass
|
|
1465
1491
|
except ClientError as e:
|
|
1466
1492
|
raise translate_boto_error(e, set_cause=False)
|
|
1493
|
+
else:
|
|
1494
|
+
try:
|
|
1495
|
+
out = await self._call_s3("head_bucket", Bucket=bucket, **self.req_kw)
|
|
1496
|
+
return {
|
|
1497
|
+
"name": bucket,
|
|
1498
|
+
"type": "directory",
|
|
1499
|
+
"size": 0,
|
|
1500
|
+
"StorageClass": "DIRECTORY",
|
|
1501
|
+
"VersionId": out.get("VersionId"),
|
|
1502
|
+
}
|
|
1503
|
+
except ClientError as e:
|
|
1504
|
+
raise translate_boto_error(e, set_cause=False)
|
|
1467
1505
|
|
|
1468
1506
|
try:
|
|
1469
1507
|
# We check to see if the path is a directory by attempting to list its
|
|
@@ -2128,7 +2166,7 @@ class S3FileSystem(AsyncFileSystem):
|
|
|
2128
2166
|
path = self._parent(path)
|
|
2129
2167
|
|
|
2130
2168
|
async def _walk(self, path, maxdepth=None, **kwargs):
|
|
2131
|
-
if path in ["", "*"] + ["{}://"
|
|
2169
|
+
if path in ["", "*"] + [f"{p}://" for p in self.protocol]:
|
|
2132
2170
|
raise ValueError("Cannot crawl all of S3")
|
|
2133
2171
|
async for _ in super()._walk(path, maxdepth=maxdepth, **kwargs):
|
|
2134
2172
|
yield _
|
|
@@ -155,7 +155,7 @@ def translate_boto_error(error, message=None, set_cause=True, *args, **kwargs):
|
|
|
155
155
|
custom_exc = constructor(message, *args, **kwargs)
|
|
156
156
|
else:
|
|
157
157
|
# No match found, wrap this in an IOError with the appropriate message.
|
|
158
|
-
custom_exc =
|
|
158
|
+
custom_exc = OSError(errno.EIO, message or str(error), *args)
|
|
159
159
|
|
|
160
160
|
if set_cause:
|
|
161
161
|
custom_exc.__cause__ = error
|
|
@@ -38,9 +38,7 @@ class S3fsFixtures(AbstractFixtures):
|
|
|
38
38
|
"Effect": "Deny",
|
|
39
39
|
"Principal": "*",
|
|
40
40
|
"Action": "s3:PutObject",
|
|
41
|
-
"Resource": "arn:aws:s3:::{
|
|
42
|
-
bucket_name=secure_bucket_name
|
|
43
|
-
),
|
|
41
|
+
"Resource": f"arn:aws:s3:::{secure_bucket_name}/*",
|
|
44
42
|
"Condition": {
|
|
45
43
|
"StringNotEquals": {
|
|
46
44
|
"s3:x-amz-server-side-encryption": "aws:kms"
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import asyncio
|
|
3
2
|
import errno
|
|
4
3
|
import datetime
|
|
@@ -20,7 +19,7 @@ from dateutil.tz import tzutc
|
|
|
20
19
|
|
|
21
20
|
import botocore
|
|
22
21
|
import s3fs.core
|
|
23
|
-
from s3fs.core import S3FileSystem
|
|
22
|
+
from s3fs.core import MAX_UPLOAD_PARTS, S3FileSystem, calculate_chunksize
|
|
24
23
|
from s3fs.utils import ignoring, SSEParams
|
|
25
24
|
from botocore.exceptions import NoCredentialsError
|
|
26
25
|
from fsspec.asyn import sync
|
|
@@ -128,9 +127,7 @@ def s3(s3_base):
|
|
|
128
127
|
"Effect": "Deny",
|
|
129
128
|
"Principal": "*",
|
|
130
129
|
"Action": "s3:PutObject",
|
|
131
|
-
"Resource": "arn:aws:s3:::{
|
|
132
|
-
bucket_name=secure_bucket_name
|
|
133
|
-
),
|
|
130
|
+
"Resource": f"arn:aws:s3:::{secure_bucket_name}/*",
|
|
134
131
|
"Condition": {
|
|
135
132
|
"StringNotEquals": {
|
|
136
133
|
"s3:x-amz-server-side-encryption": "aws:kms"
|
|
@@ -1672,7 +1669,7 @@ def test_fsspec_versions_multiple(s3):
|
|
|
1672
1669
|
fo.write(contents)
|
|
1673
1670
|
version_lookup[fo.version_id] = contents
|
|
1674
1671
|
urls = [
|
|
1675
|
-
"s3://{}?versionId={}"
|
|
1672
|
+
f"s3://{versioned_file}?versionId={version}"
|
|
1676
1673
|
for version in version_lookup.keys()
|
|
1677
1674
|
]
|
|
1678
1675
|
fs, token, paths = fsspec.core.get_fs_token_paths(
|
|
@@ -1702,7 +1699,7 @@ def test_versioned_file_fullpath(s3):
|
|
|
1702
1699
|
with s3.open(versioned_file, "wb") as fo:
|
|
1703
1700
|
fo.write(b"2")
|
|
1704
1701
|
|
|
1705
|
-
file_with_version = "{}?versionId={}"
|
|
1702
|
+
file_with_version = f"{versioned_file}?versionId={version_id}"
|
|
1706
1703
|
|
|
1707
1704
|
with s3.open(file_with_version, "rb") as fo:
|
|
1708
1705
|
assert fo.version_id == version_id
|
|
@@ -2365,7 +2362,7 @@ def test_get_file_info_with_selector(s3):
|
|
|
2365
2362
|
pass
|
|
2366
2363
|
|
|
2367
2364
|
infos = fs.find(base_dir, maxdepth=None, withdirs=True, detail=True)
|
|
2368
|
-
assert len(infos) ==
|
|
2365
|
+
assert len(infos) == 4 # includes base_dir directory
|
|
2369
2366
|
|
|
2370
2367
|
for info in infos.values():
|
|
2371
2368
|
if info["name"].endswith(file_a):
|
|
@@ -2986,3 +2983,88 @@ def test_put_exclusive_small(s3, tmpdir):
|
|
|
2986
2983
|
with pytest.raises(FileExistsError):
|
|
2987
2984
|
s3.put(fn, f"{test_bucket_name}/afile", mode="create")
|
|
2988
2985
|
assert not s3.list_multipart_uploads(test_bucket_name)
|
|
2986
|
+
|
|
2987
|
+
|
|
2988
|
+
def test_bucket_info(s3):
|
|
2989
|
+
info = s3.info(test_bucket_name)
|
|
2990
|
+
assert "VersionId" in info
|
|
2991
|
+
assert info["type"] == "directory"
|
|
2992
|
+
assert info["name"] == test_bucket_name
|
|
2993
|
+
|
|
2994
|
+
|
|
2995
|
+
MB = 2**20
|
|
2996
|
+
GB = 2**30
|
|
2997
|
+
TB = 2**40
|
|
2998
|
+
|
|
2999
|
+
|
|
3000
|
+
@pytest.mark.parametrize(
|
|
3001
|
+
["filesize", "chunksize", "expected"],
|
|
3002
|
+
[
|
|
3003
|
+
# small file, use default chunksize
|
|
3004
|
+
(1000, None, 50 * MB),
|
|
3005
|
+
# exact boundary, use default chunksize
|
|
3006
|
+
(50 * MB * MAX_UPLOAD_PARTS, None, 50 * MB),
|
|
3007
|
+
# file requiring increased chunksize
|
|
3008
|
+
(50 * MB * (MAX_UPLOAD_PARTS + 1), None, 52_434_043),
|
|
3009
|
+
# very large files, expect increased chunksize
|
|
3010
|
+
(1 * TB, None, 109_951_163),
|
|
3011
|
+
(5 * TB, None, 549_755_814),
|
|
3012
|
+
# respect explicit chunksize
|
|
3013
|
+
(5 * GB, 10 * MB, 10 * MB),
|
|
3014
|
+
],
|
|
3015
|
+
)
|
|
3016
|
+
def test_calculate_chunksize(filesize, chunksize, expected):
|
|
3017
|
+
assert calculate_chunksize(filesize, chunksize) == expected
|
|
3018
|
+
|
|
3019
|
+
|
|
3020
|
+
def test_find_ls_fail(s3):
|
|
3021
|
+
# beacuse of https://github.com/fsspec/s3fs/pull/989
|
|
3022
|
+
client = get_boto3_client()
|
|
3023
|
+
files = {
|
|
3024
|
+
f"{test_bucket_name}/find/a/a": b"data",
|
|
3025
|
+
f"{test_bucket_name}/find/a/b": b"data",
|
|
3026
|
+
f"{test_bucket_name}/find/a": b"", # duplicate of dir, without "/"
|
|
3027
|
+
f"{test_bucket_name}/find/b": b"", # empty file without "/" and no children
|
|
3028
|
+
f"{test_bucket_name}/find/c/c": b"data", # directory with no placeholder
|
|
3029
|
+
f"{test_bucket_name}/find/d/d": b"data", # dir will acquire placeholder with "/"
|
|
3030
|
+
}
|
|
3031
|
+
client.put_object(Bucket=test_bucket_name, Key="find/d/", Body=b"")
|
|
3032
|
+
client.put_object(
|
|
3033
|
+
Bucket=test_bucket_name, Key="find/e/", Body=b""
|
|
3034
|
+
) # placeholder only
|
|
3035
|
+
s3.pipe(files)
|
|
3036
|
+
|
|
3037
|
+
out0 = s3.ls(f"{test_bucket_name}/find", detail=True)
|
|
3038
|
+
s3.find(test_bucket_name, detail=False)
|
|
3039
|
+
out = s3.ls(f"{test_bucket_name}/find", detail=True)
|
|
3040
|
+
assert out == out0
|
|
3041
|
+
|
|
3042
|
+
s3.invalidate_cache()
|
|
3043
|
+
s3.find(f"{test_bucket_name}/find", detail=False)
|
|
3044
|
+
out = s3.ls(f"{test_bucket_name}/find", detail=True)
|
|
3045
|
+
assert out == out0
|
|
3046
|
+
|
|
3047
|
+
|
|
3048
|
+
def test_find_missing_ls(s3):
|
|
3049
|
+
# https://github.com/fsspec/s3fs/issues/988#issuecomment-3436727753
|
|
3050
|
+
BUCKET = test_bucket_name
|
|
3051
|
+
BASE_PREFIX = "disappearing-folders/"
|
|
3052
|
+
BASE = f"s3://{BUCKET}/{BASE_PREFIX}"
|
|
3053
|
+
|
|
3054
|
+
s3_with_cache = S3FileSystem(
|
|
3055
|
+
anon=False,
|
|
3056
|
+
use_listings_cache=True,
|
|
3057
|
+
client_kwargs={"endpoint_url": endpoint_uri},
|
|
3058
|
+
)
|
|
3059
|
+
s3_no_cache = S3FileSystem(
|
|
3060
|
+
anon=False,
|
|
3061
|
+
use_listings_cache=False,
|
|
3062
|
+
client_kwargs={"endpoint_url": endpoint_uri},
|
|
3063
|
+
)
|
|
3064
|
+
|
|
3065
|
+
s3_with_cache.pipe({f"{BASE}folder/foo/1.txt": b"", f"{BASE}bar.txt": b""})
|
|
3066
|
+
s3_with_cache.find(BASE)
|
|
3067
|
+
listed_cached = s3_with_cache.ls(BASE, detail=False)
|
|
3068
|
+
listed_no_cache = s3_no_cache.ls(BASE, detail=False)
|
|
3069
|
+
|
|
3070
|
+
assert set(listed_cached) == set(listed_no_cache)
|
|
@@ -118,7 +118,7 @@ def title_case(string):
|
|
|
118
118
|
return "".join(x.capitalize() for x in string.split("_"))
|
|
119
119
|
|
|
120
120
|
|
|
121
|
-
class ParamKwargsHelper
|
|
121
|
+
class ParamKwargsHelper:
|
|
122
122
|
"""
|
|
123
123
|
Utility class to help extract the subset of keys that an s3 method is
|
|
124
124
|
actually using
|
|
@@ -152,7 +152,7 @@ class ParamKwargsHelper(object):
|
|
|
152
152
|
return {k: v for k, v in d.items() if k in valid_keys}
|
|
153
153
|
|
|
154
154
|
|
|
155
|
-
class SSEParams
|
|
155
|
+
class SSEParams:
|
|
156
156
|
def __init__(
|
|
157
157
|
self,
|
|
158
158
|
server_side_encryption=None,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: s3fs
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.12.0
|
|
4
4
|
Summary: Convenient Filesystem interface over S3
|
|
5
5
|
Home-page: http://github.com/fsspec/s3fs/
|
|
6
6
|
Maintainer: Martin Durant
|
|
@@ -11,21 +11,17 @@ Classifier: Development Status :: 4 - Beta
|
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: License :: OSI Approved :: BSD License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Requires-Python: >= 3.10
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE.txt
|
|
22
22
|
Requires-Dist: aiobotocore<3.0.0,>=2.5.4
|
|
23
|
-
Requires-Dist: fsspec==2025.
|
|
23
|
+
Requires-Dist: fsspec==2025.12.0
|
|
24
24
|
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1
|
|
25
|
-
Provides-Extra: awscli
|
|
26
|
-
Requires-Dist: aiobotocore[awscli]<3.0.0,>=2.5.4; extra == "awscli"
|
|
27
|
-
Provides-Extra: boto3
|
|
28
|
-
Requires-Dist: aiobotocore[boto3]<3.0.0,>=2.5.4; extra == "boto3"
|
|
29
25
|
Dynamic: classifier
|
|
30
26
|
Dynamic: description
|
|
31
27
|
Dynamic: description-content-type
|
|
@@ -35,7 +31,6 @@ Dynamic: license
|
|
|
35
31
|
Dynamic: license-file
|
|
36
32
|
Dynamic: maintainer
|
|
37
33
|
Dynamic: maintainer-email
|
|
38
|
-
Dynamic: provides-extra
|
|
39
34
|
Dynamic: requires-dist
|
|
40
35
|
Dynamic: requires-python
|
|
41
36
|
Dynamic: summary
|
|
@@ -3,14 +3,6 @@
|
|
|
3
3
|
from setuptools import setup
|
|
4
4
|
import versioneer
|
|
5
5
|
|
|
6
|
-
with open("requirements.txt") as file:
|
|
7
|
-
aiobotocore_version_suffix = ""
|
|
8
|
-
for line in file:
|
|
9
|
-
parts = line.rstrip().split("aiobotocore")
|
|
10
|
-
if len(parts) == 2:
|
|
11
|
-
aiobotocore_version_suffix = parts[1]
|
|
12
|
-
break
|
|
13
|
-
|
|
14
6
|
setup(
|
|
15
7
|
name="s3fs",
|
|
16
8
|
version=versioneer.get_version(),
|
|
@@ -20,11 +12,11 @@ setup(
|
|
|
20
12
|
"Intended Audience :: Developers",
|
|
21
13
|
"License :: OSI Approved :: BSD License",
|
|
22
14
|
"Operating System :: OS Independent",
|
|
23
|
-
"Programming Language :: Python :: 3.9",
|
|
24
15
|
"Programming Language :: Python :: 3.10",
|
|
25
16
|
"Programming Language :: Python :: 3.11",
|
|
26
17
|
"Programming Language :: Python :: 3.12",
|
|
27
18
|
"Programming Language :: Python :: 3.13",
|
|
19
|
+
"Programming Language :: Python :: 3.14",
|
|
28
20
|
],
|
|
29
21
|
description="Convenient Filesystem interface over S3",
|
|
30
22
|
url="http://github.com/fsspec/s3fs/",
|
|
@@ -33,12 +25,8 @@ setup(
|
|
|
33
25
|
license="BSD",
|
|
34
26
|
keywords="s3, boto",
|
|
35
27
|
packages=["s3fs"],
|
|
36
|
-
python_requires=">= 3.
|
|
28
|
+
python_requires=">= 3.10",
|
|
37
29
|
install_requires=[open("requirements.txt").read().strip().split("\n")],
|
|
38
|
-
extras_require={
|
|
39
|
-
"awscli": [f"aiobotocore[awscli]{aiobotocore_version_suffix}"],
|
|
40
|
-
"boto3": [f"aiobotocore[boto3]{aiobotocore_version_suffix}"],
|
|
41
|
-
},
|
|
42
30
|
long_description="README.md",
|
|
43
31
|
long_description_content_type="text/markdown",
|
|
44
32
|
zip_safe=False,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|