oasis-data-manager 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/MANIFEST.in +1 -2
  2. oasis_data_manager-0.2.3/PKG-INFO +410 -0
  3. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/README.md +28 -0
  4. oasis_data_manager-0.2.3/oasis_data_manager/__init__.py +1 -0
  5. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/aws.py +7 -1
  6. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/base.py +9 -7
  7. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/config.py +1 -0
  8. oasis_data_manager-0.2.3/oasis_data_manager.egg-info/PKG-INFO +410 -0
  9. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager.egg-info/SOURCES.txt +1 -3
  10. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager.egg-info/requires.txt +14 -2
  11. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager.egg-info/top_level.txt +1 -0
  12. oasis_data_manager-0.2.3/pyproject.toml +73 -0
  13. oasis_data_manager-0.2.3/setup.cfg +4 -0
  14. oasis_data_manager-0.2.2/PKG-INFO +0 -38
  15. oasis_data_manager-0.2.2/oasis_data_manager/__init__.py +0 -1
  16. oasis_data_manager-0.2.2/oasis_data_manager.egg-info/PKG-INFO +0 -38
  17. oasis_data_manager-0.2.2/requirements-package.in +0 -5
  18. oasis_data_manager-0.2.2/setup.cfg +0 -29
  19. oasis_data_manager-0.2.2/setup.py +0 -64
  20. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/CHANGELOG.rst +0 -0
  21. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/LICENSE +0 -0
  22. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/complex/__init__.py +0 -0
  23. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/complex/complex.py +0 -0
  24. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/complex/examples.py +0 -0
  25. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/config.py +0 -0
  26. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/__init__.py +0 -0
  27. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/backends/__init__.py +0 -0
  28. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/backends/base.py +0 -0
  29. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/backends/dask.py +0 -0
  30. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/backends/pandas.py +0 -0
  31. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/backends/pyarrow.py +0 -0
  32. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/config.py +0 -0
  33. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/exceptions.py +0 -0
  34. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/df_reader/reader.py +0 -0
  35. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/errors/__init__.py +0 -0
  36. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/__init__.py +0 -0
  37. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/__init__.py +0 -0
  38. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/aws_s3.py +0 -0
  39. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/azure.py +0 -0
  40. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/azure_abfs.py +0 -0
  41. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/backends/local.py +0 -0
  42. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/filestore.py +0 -0
  43. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager/filestore/log.py +0 -0
  44. {oasis_data_manager-0.2.2 → oasis_data_manager-0.2.3}/oasis_data_manager.egg-info/dependency_links.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  include README.md
2
2
  include LICENSE
3
3
  include CHANGELOG.rst
4
- include setup.cfg
5
- include requirements-package.in
4
+ include pyproject.toml
6
5
  recursive-exclude * __pycache__
7
6
  recursive-exclude * *.py[co]
@@ -0,0 +1,410 @@
1
+ Metadata-Version: 2.4
2
+ Name: oasis-data-manager
3
+ Version: 0.2.3
4
+ Author-email: Oasis LMF <support@oasislmf.org>
5
+ License: BSD-3-Clause
6
+ Project-URL: Homepage, https://github.com/OasisLMF/OasisDataManager
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: fastparquet
10
+ Requires-Dist: fsspec>=2023.12.2
11
+ Requires-Dist: pandas
12
+ Requires-Dist: typing_extensions
13
+ Requires-Dist: xxhash
14
+ Provides-Extra: geo
15
+ Requires-Dist: geopandas==0.14.4; extra == "geo"
16
+ Requires-Dist: pyogrio; extra == "geo"
17
+ Provides-Extra: dask
18
+ Requires-Dist: oasis-data-manager[geo]; extra == "dask"
19
+ Requires-Dist: dask>2024.1.1; extra == "dask"
20
+ Requires-Dist: dask-geopandas; extra == "dask"
21
+ Requires-Dist: dask-sql; extra == "dask"
22
+ Requires-Dist: distributed; extra == "dask"
23
+ Provides-Extra: s3
24
+ Requires-Dist: s3fs>=2023.12.2; extra == "s3"
25
+ Provides-Extra: azure
26
+ Requires-Dist: adlfs; extra == "azure"
27
+ Provides-Extra: extra
28
+ Requires-Dist: oasis-data-manager[geo]; extra == "extra"
29
+ Requires-Dist: oasis-data-manager[dask]; extra == "extra"
30
+ Requires-Dist: oasis-data-manager[s3]; extra == "extra"
31
+ Requires-Dist: oasis-data-manager[azure]; extra == "extra"
32
+
33
+ # OasisDataManager
34
+
35
+ A Python library providing unified data access patterns across different storage backends and DataFrame engines, as part of the [OasisLMF](https://github.com/OasisLMF) catastrophe modelling platform.
36
+
37
+ It abstracts:
38
+ - **File I/O** — local filesystem, AWS S3, Azure Blob Storage
39
+ - **DataFrame reading** — pandas, Dask, PyArrow
40
+ - **Data pipelines** — fetch → filter → SQL → transform, composable via a fluent API
41
+
42
+ ---
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ # Core install (pandas + local storage only)
48
+ pip install oasis-data-manager
49
+
50
+ # Development install
51
+ pip install -e .
52
+ pip install -r requirements.txt
53
+
54
+ # With optional cloud and distributed features (Dask, S3, Azure, PyArrow)
55
+ pip install -e ".[extra]"
56
+ ```
57
+
58
+ ### Optional dependencies
59
+
60
+ The package ships several optional extras. Install any combination with `pip install oasis-data-manager[<extra>,...]`.
61
+
62
+ | Extra | What it adds | Key packages |
63
+ |---|---|---|
64
+ | `s3` | AWS S3 storage backend (`AwsS3Storage`) | `s3fs` |
65
+ | `azure` | Azure Blob Storage backend (`AzureABFSStorage`) | `adlfs` |
66
+ | `geo` | Geospatial DataFrame support (GeoDataFrame read/write) | `geopandas`, `pyogrio` |
67
+ | `dask` | Dask reader, distributed execution, and geospatial Dask support | `dask`, `dask-sql`, `distributed`, `dask-geopandas` (includes `geo`) |
68
+ | `extra` | Everything above bundled together | all of the above |
69
+
70
+ **Examples**
71
+
72
+ ```bash
73
+ # S3 support only
74
+ pip install "oasis-data-manager[s3]"
75
+
76
+ # Both cloud backends
77
+ pip install "oasis-data-manager[s3,azure]"
78
+
79
+ # Dask reader (also installs geo)
80
+ pip install "oasis-data-manager[dask]"
81
+
82
+ # Everything
83
+ pip install "oasis-data-manager[extra]"
84
+ ```
85
+
86
+ ---
87
+
88
+ ## Quick start
89
+
90
+ ```python
91
+ from oasis_data_manager.df_reader.backends.pandas import OasisPandasReader
92
+ from oasis_data_manager.filestore.backends.local import LocalStorage
93
+
94
+ storage = LocalStorage("/data")
95
+
96
+ # Read a CSV and get a pandas DataFrame
97
+ df = OasisPandasReader("accounts.csv", storage).as_pandas()
98
+
99
+ # Chain filters
100
+ df = (
101
+ OasisPandasReader("accounts.csv", storage)
102
+ .filter([lambda x: x[x["PortNumber"] == "1"]])
103
+ .as_pandas()
104
+ )
105
+ ```
106
+
107
+ ---
108
+
109
+ ## Storage backends
110
+
111
+ Three backends are provided. All share the same interface.
112
+
113
+ ### Local
114
+
115
+ ```python
116
+ from oasis_data_manager.filestore.backends.local import LocalStorage
117
+
118
+ storage = LocalStorage(root_dir="/data")
119
+ ```
120
+
121
+ ### AWS S3
122
+
123
+ ```python
124
+ from oasis_data_manager.filestore.backends.aws import AwsS3Storage
125
+
126
+ storage = AwsS3Storage(
127
+ bucket_name="my-bucket",
128
+ access_key="AKIA...",
129
+ secret_key="...",
130
+ root_dir="models/", # optional sub-path within the bucket
131
+ )
132
+ ```
133
+
134
+ ### Azure Blob Storage
135
+
136
+ ```python
137
+ from oasis_data_manager.filestore.backends.azure import AzureABFSStorage
138
+
139
+ storage = AzureABFSStorage(
140
+ account_name="myaccount",
141
+ account_key="...",
142
+ azure_container="my-container",
143
+ root_dir="models/", # optional sub-path within the container
144
+ )
145
+ ```
146
+
147
+ ### Configuration dict pattern
148
+
149
+ Used throughout the OasisLMF platform to configure storage from serialisable dicts:
150
+
151
+ ```python
152
+ from oasis_data_manager.filestore.config import get_storage_from_config
153
+
154
+ config = {
155
+ "storage_class": "AwsS3Storage",
156
+ "options": {
157
+ "bucket_name": "my-bucket",
158
+ "access_key": "AKIA...",
159
+ "secret_key": "...",
160
+ }
161
+ }
162
+ storage = get_storage_from_config(config)
163
+ ```
164
+
165
+ ### Common storage operations
166
+
167
+ ```python
168
+ # Open a file (context manager, like built-in open)
169
+ with storage.open("path/to/file.csv") as f:
170
+ data = f.read()
171
+
172
+ # Copy a file to a local temp directory
173
+ local_path = storage.get("remote/file.parquet", "/tmp/")
174
+
175
+ # Upload a file
176
+ storage.put("/tmp/output.csv", "remote/output.csv")
177
+
178
+ # Delete
179
+ storage.delete_file("remote/old.csv")
180
+ ```
181
+
182
+ ---
183
+
184
+ ## DataFrame readers
185
+
186
+ ### Reader backends
187
+
188
+ | Class | Engine | Formats | Filter behaviour |
189
+ |---|---|---|---|
190
+ | `OasisPandasReader` | pandas | CSV, Parquet | In-memory (post-load) |
191
+ | `OasisDaskReader` | Dask | CSV, Parquet | In-memory via dask-sql |
192
+ | `OasisPyarrowReader` | PyArrow | Parquet only | Predicate pushdown (pre-load) |
193
+
194
+ Format-specific subclasses (`OasisPandasReaderCSV`, `OasisDaskReaderParquet`, etc.) are available for Pandas and Dask, but only base OasisPyarrowReader is available and uses parquet.
195
+
196
+ ### Fluent API
197
+
198
+ All readers share the same chainable interface. The actual file read is **lazy** — it happens on the first access to `.df` or when `.as_pandas()` is called.
199
+
200
+ ```python
201
+ from oasis_data_manager.df_reader.backends.pandas import OasisPandasReader
202
+ from oasis_data_manager.df_reader.backends.dask import OasisDaskReader
203
+ from oasis_data_manager.filestore.backends.local import LocalStorage
204
+
205
+ storage = LocalStorage("/data")
206
+
207
+ # Pandas — CSV
208
+ df = OasisPandasReader("losses.csv", storage).as_pandas()
209
+
210
+ # Pandas — Parquet (detected automatically from extension)
211
+ df = OasisPandasReader("losses.parquet", storage).as_pandas()
212
+
213
+ # Dask
214
+ df = OasisDaskReader("losses.csv", storage).as_pandas()
215
+ ```
216
+
217
+ ### Filtering
218
+
219
+ Pass a list of callables; each receives the DataFrame and must return a (filtered) DataFrame.
220
+
221
+ ```python
222
+ df = (
223
+ OasisPandasReader("locations.csv", storage)
224
+ .filter([
225
+ lambda x: x[x["CountryCode"] == "US"],
226
+ lambda x: x[x["LocNumber"].notna()],
227
+ ])
228
+ .as_pandas()
229
+ )
230
+ ```
231
+
232
+ `OasisPandasReader` and `OasisDaskReader` apply filters **after** loading the full file into memory. `OasisPyarrowReader` accepts a `filters` kwarg (list of tuples or list of lists) that is pushed down into the Parquet engine before any data is read into memory — use this for large Parquet files where row-group skipping matters.
233
+
234
+ ```python
235
+ from oasis_data_manager.df_reader.backends.pyarrow import OasisPyarrowReader
236
+
237
+ # AND of conditions — list of tuples
238
+ df = (
239
+ OasisPyarrowReader("losses.parquet", storage)
240
+ .read(filters=[("CountryCode", "==", "US"), ("TIV", ">=", 1_000_000)])
241
+ .as_pandas()
242
+ )
243
+
244
+ # OR of AND-groups — list of lists
245
+ df = (
246
+ OasisPyarrowReader("losses.parquet", storage)
247
+ .read(filters=[[("CountryCode", "==", "US")], [("CountryCode", "==", "GB")]])
248
+ .as_pandas()
249
+ )
250
+ ```
251
+
252
+ Supported operators: `==`, `!=`, `<`, `<=`, `>`, `>=`, `in`, `not in`.
253
+
254
+ ### SQL (Dask only)
255
+
256
+ Requires `dask-sql`. The reserved table name is `table`.
257
+
258
+ ```python
259
+ from oasis_data_manager.df_reader.backends.dask import OasisDaskReader
260
+
261
+ df = (
262
+ OasisDaskReader("locations.csv", storage)
263
+ .sql("SELECT LocNumber, Latitude, Longitude FROM table WHERE CountryCode = 'US'")
264
+ .as_pandas()
265
+ )
266
+ ```
267
+
268
+ ### Arbitrary queries
269
+
270
+ `.query(fn)` passes the raw DataFrame to any callable and returns the result directly (not a reader).
271
+
272
+ ```python
273
+ count = OasisPandasReader("losses.csv", storage).query(lambda df: len(df))
274
+ ```
275
+
276
+ ### Configuration dict pattern
277
+
278
+ ```python
279
+ from oasis_data_manager.df_reader.config import get_df_reader
280
+
281
+ config = {
282
+ "path": "accounts.csv",
283
+ "storage": storage,
284
+ "options": {"dtype": {"LocNumber": str}},
285
+ "engine": "OasisPandasReaderCSV",
286
+ }
287
+ reader = get_df_reader(config)
288
+ df = reader.as_pandas()
289
+ ```
290
+
291
+ ---
292
+
293
+ ## Complex data pipelines
294
+
295
+ `ComplexData` composes storage fetch, SQL filtering, and post-read transformations into a single reusable class.
296
+
297
+ ### `FileStoreComplexData` — files not handled by the df_reader (e.g. HDF5)
298
+
299
+ ```python
300
+ from oasis_data_manager.complex import FileStoreComplexData, Adjustment
301
+ import h5py
302
+ import pandas as pd
303
+
304
+ class NormaliseAdjustment(Adjustment):
305
+ @classmethod
306
+ def apply(cls, df):
307
+ df["loss"] = df["loss"] / df["loss"].max()
308
+ return df
309
+
310
+ class EventLossData(FileStoreComplexData):
311
+ filename = "event_losses.hdf5"
312
+ sql = "SELECT * FROM table WHERE event_id > 1000"
313
+ adjustments = [NormaliseAdjustment]
314
+
315
+ def to_dataframe(self, result) -> pd.DataFrame:
316
+ f = h5py.File(result)
317
+ return pd.DataFrame({"event_id": list(f["event_id"]), "loss": list(f["loss"])})
318
+
319
+ # Run the pipeline
320
+ df = EventLossData(storage=storage).run().as_pandas()
321
+ ```
322
+
323
+ ### `RestComplexData` — HTTP endpoints
324
+
325
+ ```python
326
+ from oasis_data_manager.complex import RestComplexData
327
+
328
+ class ExposureAPI(RestComplexData):
329
+ url = "https://api.example.com/exposures"
330
+ timeout = 30
331
+
332
+ def get_headers(self):
333
+ return {"Authorization": "Bearer my-token"}
334
+
335
+ def handle_response(self, response):
336
+ return response.json()["data"]
337
+
338
+ df = ExposureAPI().run().as_pandas()
339
+ ```
340
+
341
+ ---
342
+
343
+ ## Exceptions
344
+
345
+ | Exception | Description |
346
+ |---|---|
347
+ | `OasisDataManagerException` | Base exception for this library |
348
+ | `OasisException` | Backward-compatible alias for the above |
349
+ | `MissingInputsException` | Raised when a required input file is not found |
350
+
351
+ ```python
352
+ from oasis_data_manager.errors import OasisDataManagerException, MissingInputsException
353
+ ```
354
+
355
+ ---
356
+
357
+ ## Import paths
358
+
359
+ ```python
360
+ # Storage backends
361
+ from oasis_data_manager.filestore.backends.local import LocalStorage
362
+ from oasis_data_manager.filestore.backends.aws import AwsS3Storage
363
+ from oasis_data_manager.filestore.backends.azure import AzureABFSStorage
364
+ from oasis_data_manager.filestore.config import get_storage_from_config
365
+
366
+ # DataFrame readers
367
+ from oasis_data_manager.df_reader.backends.pandas import OasisPandasReader, OasisPandasReaderCSV, OasisPandasReaderParquet
368
+ from oasis_data_manager.df_reader.backends.dask import OasisDaskReader, OasisDaskReaderCSV, OasisDaskReaderParquet
369
+ from oasis_data_manager.df_reader.backends.pyarrow import OasisPyarrowReader
370
+ from oasis_data_manager.df_reader.config import get_df_reader
371
+
372
+ # Exceptions
373
+ from oasis_data_manager.errors import OasisDataManagerException, OasisException
374
+ ```
375
+
376
+ Deprecated module paths (`filestore/backends/aws_s3.py`, `filestore/backends/azure_abfs.py`) still work but emit a `DeprecationWarning`.
377
+
378
+ ---
379
+
380
+ ## Development
381
+
382
+ ```bash
383
+ # Install dev dependencies
384
+ pip install -e .
385
+ pip install -r requirements.txt
386
+
387
+ # Run tests
388
+ pytest
389
+
390
+ # Skip type checking and import sorting for faster iteration
391
+ pytest --no-header -p no:mypy -p no:isort tests/df_reader/
392
+
393
+ # Cloud integration tests (requires Docker)
394
+ docker compose up -d
395
+ pytest tests/filestorage/test_aws.py tests/filestorage/test_azure.py
396
+ docker compose down
397
+
398
+ # Linting
399
+ flake8 --select F401,F522,F524,F541 --show-source ./
400
+ autopep8 --diff --exit-code --recursive --max-line-length 150 --ignore E402 .
401
+
402
+ # Build
403
+ python setup.py sdist && python setup.py bdist_wheel
404
+ ```
405
+
406
+ ---
407
+
408
+ ## License
409
+
410
+ Part of the [OasisLMF](https://github.com/OasisLMF) platform. See repository for licence details.
@@ -23,6 +23,34 @@ pip install -r requirements.txt
23
23
  pip install -e ".[extra]"
24
24
  ```
25
25
 
26
+ ### Optional dependencies
27
+
28
+ The package ships several optional extras. Install any combination with `pip install oasis-data-manager[<extra>,...]`.
29
+
30
+ | Extra | What it adds | Key packages |
31
+ |---|---|---|
32
+ | `s3` | AWS S3 storage backend (`AwsS3Storage`) | `s3fs` |
33
+ | `azure` | Azure Blob Storage backend (`AzureABFSStorage`) | `adlfs` |
34
+ | `geo` | Geospatial DataFrame support (GeoDataFrame read/write) | `geopandas`, `pyogrio` |
35
+ | `dask` | Dask reader, distributed execution, and geospatial Dask support | `dask`, `dask-sql`, `distributed`, `dask-geopandas` (includes `geo`) |
36
+ | `extra` | Everything above bundled together | all of the above |
37
+
38
+ **Examples**
39
+
40
+ ```bash
41
+ # S3 support only
42
+ pip install "oasis-data-manager[s3]"
43
+
44
+ # Both cloud backends
45
+ pip install "oasis-data-manager[s3,azure]"
46
+
47
+ # Dask reader (also installs geo)
48
+ pip install "oasis-data-manager[dask]"
49
+
50
+ # Everything
51
+ pip install "oasis-data-manager[extra]"
52
+ ```
53
+
26
54
  ---
27
55
 
28
56
  ## Quick start
@@ -0,0 +1 @@
1
+ __version__ = '0.2.3'
@@ -19,6 +19,7 @@ class AwsS3Storage(BaseStorage):
19
19
  access_key: Optional[str] = None,
20
20
  secret_key: Optional[str] = None,
21
21
  endpoint_url: Optional[str] = None,
22
+ public_bucket: bool = False,
22
23
  file_overwrite=True,
23
24
  object_parameters: Optional[dict] = None,
24
25
  auto_create_bucket=False,
@@ -92,6 +93,7 @@ class AwsS3Storage(BaseStorage):
92
93
  self.access_key = access_key
93
94
  self.secret_key = secret_key
94
95
  self.endpoint_url = endpoint_url
96
+ self.public_bucket = public_bucket
95
97
  self.file_overwrite = file_overwrite
96
98
  self.object_parameters = object_parameters
97
99
  self.auto_create_bucket = auto_create_bucket
@@ -128,6 +130,7 @@ class AwsS3Storage(BaseStorage):
128
130
  "access_key": self.access_key,
129
131
  "secret_key": self.secret_key,
130
132
  "endpoint_url": self.endpoint_url,
133
+ "public_bucket": self.public_bucket,
131
134
  "file_overwrite": self.file_overwrite,
132
135
  "object_parameters": self.object_parameters,
133
136
  "auto_create_bucket": self.auto_create_bucket,
@@ -163,7 +166,7 @@ class AwsS3Storage(BaseStorage):
163
166
  if self.reduced_redundancy:
164
167
  s3_additional_kwargs["StorageClass"] = "REDUCED_REDUNDANCY"
165
168
 
166
- return {
169
+ options = {
167
170
  "key": self.access_key,
168
171
  "secret": self.secret_key,
169
172
  "token": self.security_token,
@@ -174,6 +177,9 @@ class AwsS3Storage(BaseStorage):
174
177
  "region_name": self.region_name,
175
178
  },
176
179
  }
180
+ if self.public_bucket:
181
+ options["anon"] = True
182
+ return options
177
183
 
178
184
  def _strip_signing_parameters(self, url):
179
185
  """Duplicated Unsiged URLs from Django-Stroage
@@ -69,7 +69,7 @@ class BaseStorage(object):
69
69
  fsspec_filesystem_class: Optional[Type[fsspec.AbstractFileSystem]]
70
70
 
71
71
  def __init__(
72
- self, root_dir="", cache_dir: Union[str, None] = "/tmp/data-cache", logger=None
72
+ self, root_dir="", cache_dir: Union[str, None] = "/tmp/data-cache", logger=None, **kwargs
73
73
  ):
74
74
  # Use for caching files across multiple runs, set value 'None' or 'False' to disable
75
75
  self.cache_root = cache_dir
@@ -458,9 +458,10 @@ class BaseStorage(object):
458
458
  def open(self, path, *args, **kwargs):
459
459
  if self._is_valid_url(path):
460
460
  with tempfile.TemporaryDirectory() as d:
461
- with open(
462
- self.get_from_cache(path, no_cache_target=os.path.join(d, "f"))
463
- ) as f:
461
+ local_path = self.get_from_cache(path, no_cache_target=os.path.join(d, "f"))
462
+ if local_path is None:
463
+ raise FileNotFoundError(f"No such file or directory: '{path}'")
464
+ with open(local_path) as f:
464
465
  yield f
465
466
  else:
466
467
  with self.fs.open(path, *args, **kwargs) as f:
@@ -470,7 +471,8 @@ class BaseStorage(object):
470
471
  def with_fileno(self, path, mode="rb"):
471
472
  with tempfile.TemporaryDirectory() as d:
472
473
  target = os.path.join(d, "fileno")
473
- path = self.get_from_cache(path, no_cache_target=target)
474
-
475
- with open(path, mode) as f:
474
+ local_path = self.get_from_cache(path, no_cache_target=target)
475
+ if local_path is None:
476
+ raise FileNotFoundError(f"No such file or directory: '{path}'")
477
+ with open(local_path, mode) as f:
476
478
  yield f
@@ -19,6 +19,7 @@ class S3StorageConfig(BaseStorageConfig):
19
19
  access_key: NotRequired[str]
20
20
  secret_key: NotRequired[str]
21
21
  endpoint_url: NotRequired[str]
22
+ public_bucket: NotRequired[bool]
22
23
  file_overwrite: NotRequired[bool]
23
24
  object_parameters: NotRequired[dict]
24
25
  auto_create_bucket: NotRequired[bool]