FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -35
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +49 -4
- flowerpower/cli/pipeline.py +576 -381
- flowerpower/cli/utils.py +55 -0
- flowerpower/flowerpower.py +12 -7
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +18 -142
- flowerpower/web/app.py +0 -0
- flowerpower-1.0.0b1.dist-info/METADATA +324 -0
- flowerpower-1.0.0b1.dist-info/RECORD +94 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -525
- flowerpower/pipeline.py +0 -2419
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.12.4.dist-info/METADATA +0 -575
- flowerpower-0.9.12.4.dist-info/RECORD +0 -70
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,54 @@
|
|
1
1
|
import configparser
|
2
2
|
import os
|
3
|
+
from typing import Any, TypeVar, Union
|
3
4
|
|
4
5
|
import yaml
|
5
6
|
from fsspec import AbstractFileSystem, filesystem
|
7
|
+
from fsspec.utils import infer_storage_options
|
6
8
|
from pydantic import BaseModel
|
7
9
|
|
8
10
|
|
9
11
|
class BaseStorageOptions(BaseModel):
|
12
|
+
"""Base class for filesystem storage configuration options.
|
13
|
+
|
14
|
+
Provides common functionality for all storage option classes including:
|
15
|
+
- YAML serialization/deserialization
|
16
|
+
- Dictionary conversion
|
17
|
+
- Filesystem instance creation
|
18
|
+
- Configuration updates
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
protocol (str): Storage protocol identifier (e.g., "s3", "gs", "file")
|
22
|
+
|
23
|
+
Example:
|
24
|
+
>>> # Create and save options
|
25
|
+
>>> options = BaseStorageOptions(protocol="s3")
|
26
|
+
>>> options.to_yaml("config.yml")
|
27
|
+
>>>
|
28
|
+
>>> # Load from YAML
|
29
|
+
>>> loaded = BaseStorageOptions.from_yaml("config.yml")
|
30
|
+
>>> print(loaded.protocol)
|
31
|
+
's3'
|
32
|
+
"""
|
33
|
+
|
10
34
|
protocol: str
|
11
35
|
|
12
36
|
def to_dict(self, with_protocol: bool = False) -> dict:
|
37
|
+
"""Convert storage options to dictionary.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
with_protocol: Whether to include protocol in output dictionary
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
dict: Dictionary of storage options with non-None values
|
44
|
+
|
45
|
+
Example:
|
46
|
+
>>> options = BaseStorageOptions(protocol="s3")
|
47
|
+
>>> print(options.to_dict())
|
48
|
+
{}
|
49
|
+
>>> print(options.to_dict(with_protocol=True))
|
50
|
+
{'protocol': 's3'}
|
51
|
+
"""
|
13
52
|
items = self.model_dump().items()
|
14
53
|
if not with_protocol:
|
15
54
|
return {k: v for k, v in items if k != "protocol" and v is not None}
|
@@ -19,11 +58,38 @@ class BaseStorageOptions(BaseModel):
|
|
19
58
|
def from_yaml(
|
20
59
|
cls, path: str, fs: AbstractFileSystem = None
|
21
60
|
) -> "BaseStorageOptions":
|
22
|
-
|
61
|
+
"""Load storage options from YAML file.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
path: Path to YAML configuration file
|
65
|
+
fs: Filesystem to use for reading file
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
BaseStorageOptions: Loaded storage options instance
|
69
|
+
|
70
|
+
Example:
|
71
|
+
>>> # Load from local file
|
72
|
+
>>> options = BaseStorageOptions.from_yaml("config.yml")
|
73
|
+
>>> print(options.protocol)
|
74
|
+
's3'
|
75
|
+
"""
|
76
|
+
if fs is None:
|
77
|
+
fs = filesystem("file")
|
78
|
+
with fs.open(path) as f:
|
23
79
|
data = yaml.safe_load(f)
|
24
80
|
return cls(**data)
|
25
81
|
|
26
|
-
def to_yaml(self, path: str, fs: AbstractFileSystem = None):
|
82
|
+
def to_yaml(self, path: str, fs: AbstractFileSystem = None) -> None:
|
83
|
+
"""Save storage options to YAML file.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
path: Path where to save configuration
|
87
|
+
fs: Filesystem to use for writing
|
88
|
+
|
89
|
+
Example:
|
90
|
+
>>> options = BaseStorageOptions(protocol="s3")
|
91
|
+
>>> options.to_yaml("config.yml")
|
92
|
+
"""
|
27
93
|
if fs is None:
|
28
94
|
fs = filesystem("file")
|
29
95
|
data = self.to_dict()
|
@@ -31,24 +97,325 @@ class BaseStorageOptions(BaseModel):
|
|
31
97
|
yaml.safe_dump(data, f)
|
32
98
|
|
33
99
|
def to_filesystem(self) -> AbstractFileSystem:
|
34
|
-
|
100
|
+
"""Create fsspec filesystem instance from options.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
AbstractFileSystem: Configured filesystem instance
|
104
|
+
|
105
|
+
Example:
|
106
|
+
>>> options = BaseStorageOptions(protocol="file")
|
107
|
+
>>> fs = options.to_filesystem()
|
108
|
+
>>> files = fs.ls("/path/to/data")
|
109
|
+
"""
|
110
|
+
return filesystem(**self.to_dict(with_protocol=True))
|
35
111
|
|
36
|
-
def update(self, **kwargs):
|
112
|
+
def update(self, **kwargs: Any) -> None:
|
113
|
+
"""Update storage options with new values.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
**kwargs: New option values to set
|
117
|
+
|
118
|
+
Example:
|
119
|
+
>>> options = BaseStorageOptions(protocol="s3")
|
120
|
+
>>> options.update(region="us-east-1")
|
121
|
+
>>> print(options.region)
|
122
|
+
'us-east-1'
|
123
|
+
"""
|
37
124
|
self = self.model_copy(update=kwargs)
|
38
125
|
|
39
126
|
|
40
127
|
class AzureStorageOptions(BaseStorageOptions):
|
41
|
-
|
128
|
+
"""Azure Storage configuration options.
|
129
|
+
|
130
|
+
Provides configuration for Azure storage services:
|
131
|
+
- Azure Blob Storage (az://)
|
132
|
+
- Azure Data Lake Storage Gen2 (abfs://)
|
133
|
+
- Azure Data Lake Storage Gen1 (adl://)
|
134
|
+
|
135
|
+
Supports multiple authentication methods:
|
136
|
+
- Connection string
|
137
|
+
- Account key
|
138
|
+
- Service principal
|
139
|
+
- Managed identity
|
140
|
+
- SAS token
|
141
|
+
|
142
|
+
Attributes:
|
143
|
+
protocol (str): Storage protocol ("az", "abfs", or "adl")
|
144
|
+
account_name (str): Storage account name
|
145
|
+
account_key (str): Storage account access key
|
146
|
+
connection_string (str): Full connection string
|
147
|
+
tenant_id (str): Azure AD tenant ID
|
148
|
+
client_id (str): Service principal client ID
|
149
|
+
client_secret (str): Service principal client secret
|
150
|
+
sas_token (str): SAS token for limited access
|
151
|
+
|
152
|
+
Example:
|
153
|
+
>>> # Blob Storage with account key
|
154
|
+
>>> options = AzureStorageOptions(
|
155
|
+
... protocol="az",
|
156
|
+
... account_name="mystorageacct",
|
157
|
+
... account_key="key123..."
|
158
|
+
... )
|
159
|
+
>>>
|
160
|
+
>>> # Data Lake with service principal
|
161
|
+
>>> options = AzureStorageOptions(
|
162
|
+
... protocol="abfs",
|
163
|
+
... account_name="mydatalake",
|
164
|
+
... tenant_id="tenant123",
|
165
|
+
... client_id="client123",
|
166
|
+
... client_secret="secret123"
|
167
|
+
... )
|
168
|
+
>>>
|
169
|
+
>>> # Simple connection string auth
|
170
|
+
>>> options = AzureStorageOptions(
|
171
|
+
... protocol="az",
|
172
|
+
... connection_string="DefaultEndpoints..."
|
173
|
+
... )
|
174
|
+
"""
|
175
|
+
|
176
|
+
protocol: str
|
177
|
+
account_name: str | None = None
|
178
|
+
account_key: str | None = None
|
179
|
+
connection_string: str | None = None
|
180
|
+
tenant_id: str | None = None
|
181
|
+
client_id: str | None = None
|
182
|
+
client_secret: str | None = None
|
183
|
+
sas_token: str | None = None
|
184
|
+
|
185
|
+
@classmethod
|
186
|
+
def from_env(cls) -> "AzureStorageOptions":
|
187
|
+
"""Create storage options from environment variables.
|
188
|
+
|
189
|
+
Reads standard Azure environment variables:
|
190
|
+
- AZURE_STORAGE_ACCOUNT_NAME
|
191
|
+
- AZURE_STORAGE_ACCOUNT_KEY
|
192
|
+
- AZURE_STORAGE_CONNECTION_STRING
|
193
|
+
- AZURE_TENANT_ID
|
194
|
+
- AZURE_CLIENT_ID
|
195
|
+
- AZURE_CLIENT_SECRET
|
196
|
+
- AZURE_STORAGE_SAS_TOKEN
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
AzureStorageOptions: Configured storage options
|
200
|
+
|
201
|
+
Example:
|
202
|
+
>>> # With environment variables set:
|
203
|
+
>>> options = AzureStorageOptions.from_env()
|
204
|
+
>>> print(options.account_name) # From AZURE_STORAGE_ACCOUNT_NAME
|
205
|
+
'mystorageacct'
|
206
|
+
"""
|
207
|
+
return cls(
|
208
|
+
protocol=os.getenv("AZURE_STORAGE_PROTOCOL", "az"),
|
209
|
+
account_name=os.getenv("AZURE_STORAGE_ACCOUNT_NAME"),
|
210
|
+
account_key=os.getenv("AZURE_STORAGE_ACCOUNT_KEY"),
|
211
|
+
connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
|
212
|
+
tenant_id=os.getenv("AZURE_TENANT_ID"),
|
213
|
+
client_id=os.getenv("AZURE_CLIENT_ID"),
|
214
|
+
client_secret=os.getenv("AZURE_CLIENT_SECRET"),
|
215
|
+
sas_token=os.getenv("AZURE_STORAGE_SAS_TOKEN"),
|
216
|
+
)
|
217
|
+
|
218
|
+
def to_env(self) -> None:
|
219
|
+
"""Export options to environment variables.
|
220
|
+
|
221
|
+
Sets standard Azure environment variables.
|
222
|
+
|
223
|
+
Example:
|
224
|
+
>>> options = AzureStorageOptions(
|
225
|
+
... protocol="az",
|
226
|
+
... account_name="mystorageacct",
|
227
|
+
... account_key="key123"
|
228
|
+
... )
|
229
|
+
>>> options.to_env()
|
230
|
+
>>> print(os.getenv("AZURE_STORAGE_ACCOUNT_NAME"))
|
231
|
+
'mystorageacct'
|
232
|
+
"""
|
233
|
+
env = {
|
234
|
+
"AZURE_STORAGE_PROTOCOL": self.protocol,
|
235
|
+
"AZURE_STORAGE_ACCOUNT_NAME": self.account_name,
|
236
|
+
"AZURE_STORAGE_ACCOUNT_KEY": self.account_key,
|
237
|
+
"AZURE_STORAGE_CONNECTION_STRING": self.connection_string,
|
238
|
+
"AZURE_TENANT_ID": self.tenant_id,
|
239
|
+
"AZURE_CLIENT_ID": self.client_id,
|
240
|
+
"AZURE_CLIENT_SECRET": self.client_secret,
|
241
|
+
"AZURE_STORAGE_SAS_TOKEN": self.sas_token,
|
242
|
+
}
|
243
|
+
env = {k: v for k, v in env.items() if v is not None}
|
244
|
+
os.environ.update(env)
|
42
245
|
|
43
246
|
|
44
247
|
class GcsStorageOptions(BaseStorageOptions):
|
45
|
-
|
248
|
+
"""Google Cloud Storage configuration options.
|
249
|
+
|
250
|
+
Provides configuration for GCS access with support for:
|
251
|
+
- Service account authentication
|
252
|
+
- Default application credentials
|
253
|
+
- Token-based authentication
|
254
|
+
- Project configuration
|
255
|
+
- Custom endpoints
|
256
|
+
|
257
|
+
Attributes:
|
258
|
+
protocol (str): Storage protocol ("gs" or "gcs")
|
259
|
+
token (str): Path to service account JSON file
|
260
|
+
project (str): Google Cloud project ID
|
261
|
+
access_token (str): OAuth2 access token
|
262
|
+
endpoint_url (str): Custom storage endpoint
|
263
|
+
timeout (int): Request timeout in seconds
|
264
|
+
|
265
|
+
Example:
|
266
|
+
>>> # Service account auth
|
267
|
+
>>> options = GcsStorageOptions(
|
268
|
+
... protocol="gs",
|
269
|
+
... token="path/to/service-account.json",
|
270
|
+
... project="my-project-123"
|
271
|
+
... )
|
272
|
+
>>>
|
273
|
+
>>> # Application default credentials
|
274
|
+
>>> options = GcsStorageOptions(
|
275
|
+
... protocol="gcs",
|
276
|
+
... project="my-project-123"
|
277
|
+
... )
|
278
|
+
>>>
|
279
|
+
>>> # Custom endpoint (e.g., test server)
|
280
|
+
>>> options = GcsStorageOptions(
|
281
|
+
... protocol="gs",
|
282
|
+
... endpoint_url="http://localhost:4443",
|
283
|
+
... token="test-token.json"
|
284
|
+
... )
|
285
|
+
"""
|
286
|
+
|
287
|
+
protocol: str
|
288
|
+
token: str | None = None
|
289
|
+
project: str | None = None
|
290
|
+
access_token: str | None = None
|
291
|
+
endpoint_url: str | None = None
|
292
|
+
timeout: int | None = None
|
293
|
+
|
294
|
+
@classmethod
|
295
|
+
def from_env(cls) -> "GcsStorageOptions":
|
296
|
+
"""Create storage options from environment variables.
|
297
|
+
|
298
|
+
Reads standard GCP environment variables:
|
299
|
+
- GOOGLE_CLOUD_PROJECT: Project ID
|
300
|
+
- GOOGLE_APPLICATION_CREDENTIALS: Service account file path
|
301
|
+
- STORAGE_EMULATOR_HOST: Custom endpoint (for testing)
|
302
|
+
- GCS_OAUTH_TOKEN: OAuth2 access token
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
GcsStorageOptions: Configured storage options
|
306
|
+
|
307
|
+
Example:
|
308
|
+
>>> # With environment variables set:
|
309
|
+
>>> options = GcsStorageOptions.from_env()
|
310
|
+
>>> print(options.project) # From GOOGLE_CLOUD_PROJECT
|
311
|
+
'my-project-123'
|
312
|
+
"""
|
313
|
+
return cls(
|
314
|
+
protocol="gs",
|
315
|
+
project=os.getenv("GOOGLE_CLOUD_PROJECT"),
|
316
|
+
token=os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
|
317
|
+
endpoint_url=os.getenv("STORAGE_EMULATOR_HOST"),
|
318
|
+
access_token=os.getenv("GCS_OAUTH_TOKEN"),
|
319
|
+
)
|
320
|
+
|
321
|
+
def to_env(self) -> None:
|
322
|
+
"""Export options to environment variables.
|
323
|
+
|
324
|
+
Sets standard GCP environment variables.
|
325
|
+
|
326
|
+
Example:
|
327
|
+
>>> options = GcsStorageOptions(
|
328
|
+
... protocol="gs",
|
329
|
+
... project="my-project",
|
330
|
+
... token="service-account.json"
|
331
|
+
... )
|
332
|
+
>>> options.to_env()
|
333
|
+
>>> print(os.getenv("GOOGLE_CLOUD_PROJECT"))
|
334
|
+
'my-project'
|
335
|
+
"""
|
336
|
+
env = {
|
337
|
+
"GOOGLE_CLOUD_PROJECT": self.project,
|
338
|
+
"GOOGLE_APPLICATION_CREDENTIALS": self.token,
|
339
|
+
"STORAGE_EMULATOR_HOST": self.endpoint_url,
|
340
|
+
"GCS_OAUTH_TOKEN": self.access_token,
|
341
|
+
}
|
342
|
+
env = {k: v for k, v in env.items() if v is not None}
|
343
|
+
os.environ.update(env)
|
344
|
+
|
345
|
+
def to_fsspec_kwargs(self) -> dict:
|
346
|
+
"""Convert options to fsspec filesystem arguments.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
dict: Arguments suitable for GCSFileSystem
|
350
|
+
|
351
|
+
Example:
|
352
|
+
>>> options = GcsStorageOptions(
|
353
|
+
... protocol="gs",
|
354
|
+
... token="service-account.json",
|
355
|
+
... project="my-project"
|
356
|
+
... )
|
357
|
+
>>> kwargs = options.to_fsspec_kwargs()
|
358
|
+
>>> fs = filesystem("gcs", **kwargs)
|
359
|
+
"""
|
360
|
+
kwargs = {
|
361
|
+
"token": self.token,
|
362
|
+
"project": self.project,
|
363
|
+
"access_token": self.access_token,
|
364
|
+
"endpoint_url": self.endpoint_url,
|
365
|
+
"timeout": self.timeout,
|
366
|
+
}
|
367
|
+
return {k: v for k, v in kwargs.items() if v is not None}
|
46
368
|
|
47
369
|
|
48
370
|
class AwsStorageOptions(BaseStorageOptions):
|
371
|
+
"""AWS S3 storage configuration options.
|
372
|
+
|
373
|
+
Provides comprehensive configuration for S3 access with support for:
|
374
|
+
- Multiple authentication methods (keys, profiles, environment)
|
375
|
+
- Custom endpoints for S3-compatible services
|
376
|
+
- Region configuration
|
377
|
+
- SSL/TLS settings
|
378
|
+
|
379
|
+
Attributes:
|
380
|
+
protocol (str): Always "s3" for S3 storage
|
381
|
+
key (str): AWS access key ID (alias for access_key_id)
|
382
|
+
access_key_id (str): AWS access key ID
|
383
|
+
secret (str): AWS secret access key (alias for secret_access_key)
|
384
|
+
secret_access_key (str): AWS secret access key
|
385
|
+
token (str): AWS session token (alias for session_token)
|
386
|
+
session_token (str): AWS session token
|
387
|
+
endpoint_url (str): Custom S3 endpoint URL
|
388
|
+
region (str): AWS region name
|
389
|
+
allow_invalid_certificates (bool): Skip SSL certificate validation
|
390
|
+
allow_http (bool): Allow unencrypted HTTP connections
|
391
|
+
profile (str): AWS credentials profile name
|
392
|
+
|
393
|
+
Example:
|
394
|
+
>>> # Basic credentials
|
395
|
+
>>> options = AwsStorageOptions(
|
396
|
+
... access_key_id="AKIAXXXXXXXX",
|
397
|
+
... secret_access_key="SECRETKEY",
|
398
|
+
... region="us-east-1"
|
399
|
+
... )
|
400
|
+
>>>
|
401
|
+
>>> # Profile-based auth
|
402
|
+
>>> options = AwsStorageOptions(profile="dev")
|
403
|
+
>>>
|
404
|
+
>>> # S3-compatible service (MinIO)
|
405
|
+
>>> options = AwsStorageOptions(
|
406
|
+
... endpoint_url="http://localhost:9000",
|
407
|
+
... access_key_id="minioadmin",
|
408
|
+
... secret_access_key="minioadmin",
|
409
|
+
... allow_http=True
|
410
|
+
... )
|
411
|
+
"""
|
412
|
+
|
49
413
|
protocol: str = "s3"
|
414
|
+
key: str | None = None
|
50
415
|
access_key_id: str | None = None
|
416
|
+
secret: str | None = None
|
51
417
|
secret_access_key: str | None = None
|
418
|
+
token: str | None = None
|
52
419
|
session_token: str | None = None
|
53
420
|
endpoint_url: str | None = None
|
54
421
|
region: str | None = None
|
@@ -56,15 +423,42 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
56
423
|
allow_http: bool | None = None
|
57
424
|
profile: str | None = None
|
58
425
|
|
59
|
-
def model_post_init(self, __context):
|
426
|
+
def model_post_init(self, __context: Any) -> None:
|
427
|
+
"""Post-initialization processing of AWS credentials.
|
428
|
+
|
429
|
+
Handles credential aliasing and profile-based loading.
|
430
|
+
Called automatically after initialization.
|
431
|
+
|
432
|
+
Args:
|
433
|
+
__context: Pydantic validation context (unused)
|
434
|
+
|
435
|
+
Example:
|
436
|
+
>>> # Alias handling
|
437
|
+
>>> opts = AwsStorageOptions(
|
438
|
+
... key="ACCESS_KEY",
|
439
|
+
... secret="SECRET_KEY"
|
440
|
+
... )
|
441
|
+
>>> print(opts.access_key_id) # Normalized
|
442
|
+
'ACCESS_KEY'
|
443
|
+
"""
|
444
|
+
# Normalize credential aliases
|
445
|
+
if self.access_key_id is None and self.key is not None:
|
446
|
+
self.access_key_id = self.key
|
447
|
+
if self.secret_access_key is None and self.secret is not None:
|
448
|
+
self.secret_access_key = self.secret
|
449
|
+
if self.session_token is None and self.token is not None:
|
450
|
+
self.session_token = self.token
|
451
|
+
|
452
|
+
# Load profile if specified
|
60
453
|
if self.profile is not None:
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
allow_http=self.allow_http,
|
66
|
-
).to_dict()
|
454
|
+
profile_opts = self.from_aws_credentials(
|
455
|
+
profile=self.profile,
|
456
|
+
allow_invalid_certificates=self.allow_invalid_certificates,
|
457
|
+
allow_http=self.allow_http,
|
67
458
|
)
|
459
|
+
for k, v in profile_opts.to_dict().items():
|
460
|
+
if getattr(self, k) is None:
|
461
|
+
setattr(self, k, v)
|
68
462
|
|
69
463
|
@classmethod
|
70
464
|
def from_aws_credentials(
|
@@ -73,6 +467,29 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
73
467
|
allow_invalid_certificates: bool = False,
|
74
468
|
allow_http: bool = False,
|
75
469
|
) -> "AwsStorageOptions":
|
470
|
+
"""Create storage options from AWS credentials file.
|
471
|
+
|
472
|
+
Loads credentials from ~/.aws/credentials and ~/.aws/config files.
|
473
|
+
|
474
|
+
Args:
|
475
|
+
profile: AWS credentials profile name
|
476
|
+
allow_invalid_certificates: Skip SSL certificate validation
|
477
|
+
allow_http: Allow unencrypted HTTP connections
|
478
|
+
|
479
|
+
Returns:
|
480
|
+
AwsStorageOptions: Configured storage options
|
481
|
+
|
482
|
+
Raises:
|
483
|
+
ValueError: If profile not found
|
484
|
+
FileNotFoundError: If credentials files missing
|
485
|
+
|
486
|
+
Example:
|
487
|
+
>>> # Load developer profile
|
488
|
+
>>> options = AwsStorageOptions.from_aws_credentials(
|
489
|
+
... profile="dev",
|
490
|
+
... allow_http=True # For local testing
|
491
|
+
... )
|
492
|
+
"""
|
76
493
|
cp = configparser.ConfigParser()
|
77
494
|
cp.read(os.path.expanduser("~/.aws/credentials"))
|
78
495
|
cp.read(os.path.expanduser("~/.aws/config"))
|
@@ -100,6 +517,26 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
100
517
|
|
101
518
|
@classmethod
|
102
519
|
def from_env(cls) -> "AwsStorageOptions":
|
520
|
+
"""Create storage options from environment variables.
|
521
|
+
|
522
|
+
Reads standard AWS environment variables:
|
523
|
+
- AWS_ACCESS_KEY_ID
|
524
|
+
- AWS_SECRET_ACCESS_KEY
|
525
|
+
- AWS_SESSION_TOKEN
|
526
|
+
- AWS_ENDPOINT_URL
|
527
|
+
- AWS_DEFAULT_REGION
|
528
|
+
- ALLOW_INVALID_CERTIFICATES
|
529
|
+
- AWS_ALLOW_HTTP
|
530
|
+
|
531
|
+
Returns:
|
532
|
+
AwsStorageOptions: Configured storage options
|
533
|
+
|
534
|
+
Example:
|
535
|
+
>>> # Load from environment
|
536
|
+
>>> options = AwsStorageOptions.from_env()
|
537
|
+
>>> print(options.region)
|
538
|
+
'us-east-1' # From AWS_DEFAULT_REGION
|
539
|
+
"""
|
103
540
|
return cls(
|
104
541
|
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
105
542
|
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
@@ -112,6 +549,20 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
112
549
|
)
|
113
550
|
|
114
551
|
def to_fsspec_kwargs(self) -> dict:
|
552
|
+
"""Convert options to fsspec filesystem arguments.
|
553
|
+
|
554
|
+
Returns:
|
555
|
+
dict: Arguments suitable for fsspec S3FileSystem
|
556
|
+
|
557
|
+
Example:
|
558
|
+
>>> options = AwsStorageOptions(
|
559
|
+
... access_key_id="KEY",
|
560
|
+
... secret_access_key="SECRET",
|
561
|
+
... region="us-west-2"
|
562
|
+
... )
|
563
|
+
>>> kwargs = options.to_fsspec_kwargs()
|
564
|
+
>>> fs = filesystem("s3", **kwargs)
|
565
|
+
"""
|
115
566
|
fsspec_kwargs = {
|
116
567
|
"key": self.access_key_id,
|
117
568
|
"secret": self.secret_access_key,
|
@@ -119,27 +570,55 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
119
570
|
"endpoint_url": self.endpoint_url,
|
120
571
|
"client_kwargs": {
|
121
572
|
"region_name": self.region,
|
122
|
-
"verify":
|
123
|
-
|
124
|
-
|
125
|
-
else False
|
126
|
-
),
|
573
|
+
"verify": not self.allow_invalid_certificates
|
574
|
+
if self.allow_invalid_certificates is not None
|
575
|
+
else True,
|
127
576
|
"use_ssl": not self.allow_http if self.allow_http is not None else True,
|
128
577
|
},
|
129
578
|
}
|
130
579
|
return {k: v for k, v in fsspec_kwargs.items() if v is not None}
|
131
580
|
|
132
581
|
def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
|
133
|
-
|
582
|
+
"""Convert options to object store arguments.
|
583
|
+
|
584
|
+
Args:
|
585
|
+
with_conditional_put: Add etag-based conditional put support
|
586
|
+
|
587
|
+
Returns:
|
588
|
+
dict: Arguments suitable for object store clients
|
589
|
+
|
590
|
+
Example:
|
591
|
+
>>> options = AwsStorageOptions(
|
592
|
+
... access_key_id="KEY",
|
593
|
+
... secret_access_key="SECRET"
|
594
|
+
... )
|
595
|
+
>>> kwargs = options.to_object_store_kwargs()
|
596
|
+
>>> client = ObjectStore(**kwargs)
|
597
|
+
"""
|
598
|
+
kwargs = {
|
134
599
|
k: str(v)
|
135
600
|
for k, v in self.to_dict().items()
|
136
601
|
if v is not None and k != "protocol"
|
137
602
|
}
|
138
603
|
if with_conditional_put:
|
139
|
-
|
140
|
-
return
|
604
|
+
kwargs["conditional_put"] = "etag"
|
605
|
+
return kwargs
|
141
606
|
|
142
607
|
def to_env(self) -> None:
|
608
|
+
"""Export options to environment variables.
|
609
|
+
|
610
|
+
Sets standard AWS environment variables.
|
611
|
+
|
612
|
+
Example:
|
613
|
+
>>> options = AwsStorageOptions(
|
614
|
+
... access_key_id="KEY",
|
615
|
+
... secret_access_key="SECRET",
|
616
|
+
... region="us-east-1"
|
617
|
+
... )
|
618
|
+
>>> options.to_env()
|
619
|
+
>>> print(os.getenv("AWS_ACCESS_KEY_ID"))
|
620
|
+
'KEY'
|
621
|
+
"""
|
143
622
|
env = {
|
144
623
|
"AWS_ACCESS_KEY_ID": self.access_key_id,
|
145
624
|
"AWS_SECRET_ACCESS_KEY": self.secret_access_key,
|
@@ -157,67 +636,364 @@ class AwsStorageOptions(BaseStorageOptions):
|
|
157
636
|
|
158
637
|
|
159
638
|
class GitHubStorageOptions(BaseStorageOptions):
|
639
|
+
"""GitHub repository storage configuration options.
|
640
|
+
|
641
|
+
Provides access to files in GitHub repositories with support for:
|
642
|
+
- Public and private repositories
|
643
|
+
- Branch/tag/commit selection
|
644
|
+
- Token-based authentication
|
645
|
+
- Custom GitHub Enterprise instances
|
646
|
+
|
647
|
+
Attributes:
|
648
|
+
protocol (str): Always "github" for GitHub storage
|
649
|
+
org (str): Organization or user name
|
650
|
+
repo (str): Repository name
|
651
|
+
ref (str): Git reference (branch, tag, or commit SHA)
|
652
|
+
token (str): GitHub personal access token
|
653
|
+
api_url (str): Custom GitHub API URL for enterprise instances
|
654
|
+
|
655
|
+
Example:
|
656
|
+
>>> # Public repository
|
657
|
+
>>> options = GitHubStorageOptions(
|
658
|
+
... org="microsoft",
|
659
|
+
... repo="vscode",
|
660
|
+
... ref="main"
|
661
|
+
... )
|
662
|
+
>>>
|
663
|
+
>>> # Private repository
|
664
|
+
>>> options = GitHubStorageOptions(
|
665
|
+
... org="myorg",
|
666
|
+
... repo="private-repo",
|
667
|
+
... token="ghp_xxxx",
|
668
|
+
... ref="develop"
|
669
|
+
... )
|
670
|
+
>>>
|
671
|
+
>>> # Enterprise instance
|
672
|
+
>>> options = GitHubStorageOptions(
|
673
|
+
... org="company",
|
674
|
+
... repo="internal",
|
675
|
+
... api_url="https://github.company.com/api/v3",
|
676
|
+
... token="ghp_xxxx"
|
677
|
+
... )
|
678
|
+
"""
|
679
|
+
|
160
680
|
protocol: str = "github"
|
161
681
|
org: str | None = None
|
162
682
|
repo: str | None = None
|
163
|
-
|
683
|
+
ref: str | None = None
|
684
|
+
token: str | None = None
|
685
|
+
api_url: str | None = None
|
164
686
|
|
165
687
|
@classmethod
|
166
688
|
def from_env(cls) -> "GitHubStorageOptions":
|
689
|
+
"""Create storage options from environment variables.
|
690
|
+
|
691
|
+
Reads standard GitHub environment variables:
|
692
|
+
- GITHUB_ORG: Organization or user name
|
693
|
+
- GITHUB_REPO: Repository name
|
694
|
+
- GITHUB_REF: Git reference
|
695
|
+
- GITHUB_TOKEN: Personal access token
|
696
|
+
- GITHUB_API_URL: Custom API URL
|
697
|
+
|
698
|
+
Returns:
|
699
|
+
GitHubStorageOptions: Configured storage options
|
700
|
+
|
701
|
+
Example:
|
702
|
+
>>> # With environment variables set:
|
703
|
+
>>> options = GitHubStorageOptions.from_env()
|
704
|
+
>>> print(options.org) # From GITHUB_ORG
|
705
|
+
'microsoft'
|
706
|
+
"""
|
167
707
|
return cls(
|
168
708
|
protocol="github",
|
169
709
|
org=os.getenv("GITHUB_ORG"),
|
170
710
|
repo=os.getenv("GITHUB_REPO"),
|
171
|
-
|
711
|
+
ref=os.getenv("GITHUB_REF"),
|
712
|
+
token=os.getenv("GITHUB_TOKEN"),
|
713
|
+
api_url=os.getenv("GITHUB_API_URL"),
|
172
714
|
)
|
173
715
|
|
174
716
|
def to_env(self) -> None:
|
175
|
-
|
176
|
-
|
177
|
-
|
717
|
+
"""Export options to environment variables.
|
718
|
+
|
719
|
+
Sets standard GitHub environment variables.
|
720
|
+
|
721
|
+
Example:
|
722
|
+
>>> options = GitHubStorageOptions(
|
723
|
+
... org="microsoft",
|
724
|
+
... repo="vscode",
|
725
|
+
... token="ghp_xxxx"
|
726
|
+
... )
|
727
|
+
>>> options.to_env()
|
728
|
+
>>> print(os.getenv("GITHUB_ORG"))
|
729
|
+
'microsoft'
|
730
|
+
"""
|
731
|
+
env = {
|
732
|
+
"GITHUB_ORG": self.org,
|
733
|
+
"GITHUB_REPO": self.repo,
|
734
|
+
"GITHUB_REF": self.ref,
|
735
|
+
"GITHUB_TOKEN": self.token,
|
736
|
+
"GITHUB_API_URL": self.api_url,
|
737
|
+
}
|
738
|
+
env = {k: v for k, v in env.items() if v is not None}
|
739
|
+
os.environ.update(env)
|
740
|
+
|
741
|
+
def to_fsspec_kwargs(self) -> dict:
|
742
|
+
"""Convert options to fsspec filesystem arguments.
|
743
|
+
|
744
|
+
Returns:
|
745
|
+
dict: Arguments suitable for GitHubFileSystem
|
746
|
+
|
747
|
+
Example:
|
748
|
+
>>> options = GitHubStorageOptions(
|
749
|
+
... org="microsoft",
|
750
|
+
... repo="vscode",
|
751
|
+
... token="ghp_xxxx"
|
752
|
+
... )
|
753
|
+
>>> kwargs = options.to_fsspec_kwargs()
|
754
|
+
>>> fs = filesystem("github", **kwargs)
|
755
|
+
"""
|
756
|
+
kwargs = {
|
757
|
+
"org": self.org,
|
758
|
+
"repo": self.repo,
|
759
|
+
"ref": self.ref,
|
760
|
+
"token": self.token,
|
761
|
+
"api_url": self.api_url,
|
762
|
+
}
|
763
|
+
return {k: v for k, v in kwargs.items() if v is not None}
|
178
764
|
|
179
765
|
|
180
766
|
class GitLabStorageOptions(BaseStorageOptions):
|
767
|
+
"""GitLab repository storage configuration options.
|
768
|
+
|
769
|
+
Provides access to files in GitLab repositories with support for:
|
770
|
+
- Public and private repositories
|
771
|
+
- Self-hosted GitLab instances
|
772
|
+
- Project ID or name-based access
|
773
|
+
- Branch/tag/commit selection
|
774
|
+
- Token-based authentication
|
775
|
+
|
776
|
+
Attributes:
|
777
|
+
protocol (str): Always "gitlab" for GitLab storage
|
778
|
+
base_url (str): GitLab instance URL, defaults to gitlab.com
|
779
|
+
project_id (str | int): Project ID number
|
780
|
+
project_name (str): Project name/path
|
781
|
+
ref (str): Git reference (branch, tag, or commit SHA)
|
782
|
+
token (str): GitLab personal access token
|
783
|
+
api_version (str): API version to use
|
784
|
+
|
785
|
+
Example:
|
786
|
+
>>> # Public project on gitlab.com
|
787
|
+
>>> options = GitLabStorageOptions(
|
788
|
+
... project_name="group/project",
|
789
|
+
... ref="main"
|
790
|
+
... )
|
791
|
+
>>>
|
792
|
+
>>> # Private project with token
|
793
|
+
>>> options = GitLabStorageOptions(
|
794
|
+
... project_id=12345,
|
795
|
+
... token="glpat_xxxx",
|
796
|
+
... ref="develop"
|
797
|
+
... )
|
798
|
+
>>>
|
799
|
+
>>> # Self-hosted instance
|
800
|
+
>>> options = GitLabStorageOptions(
|
801
|
+
... base_url="https://gitlab.company.com",
|
802
|
+
... project_name="internal/project",
|
803
|
+
... token="glpat_xxxx"
|
804
|
+
... )
|
805
|
+
"""
|
806
|
+
|
181
807
|
protocol: str = "gitlab"
|
182
808
|
base_url: str = "https://gitlab.com"
|
183
|
-
access_token: str | None = None
|
184
809
|
project_id: str | int | None = None
|
185
810
|
project_name: str | None = None
|
811
|
+
ref: str | None = None
|
812
|
+
token: str | None = None
|
813
|
+
api_version: str = "v4"
|
814
|
+
|
815
|
+
def model_post_init(self, __context: Any) -> None:
|
816
|
+
"""Validate GitLab configuration after initialization.
|
817
|
+
|
818
|
+
Ensures either project_id or project_name is provided.
|
819
|
+
|
820
|
+
Args:
|
821
|
+
__context: Pydantic validation context (unused)
|
822
|
+
|
823
|
+
Raises:
|
824
|
+
ValueError: If neither project_id nor project_name is provided
|
825
|
+
|
826
|
+
Example:
|
827
|
+
>>> # Valid initialization
|
828
|
+
>>> options = GitLabStorageOptions(project_id=12345)
|
829
|
+
>>>
|
830
|
+
>>> # Invalid initialization
|
831
|
+
>>> try:
|
832
|
+
... options = GitLabStorageOptions()
|
833
|
+
... except ValueError as e:
|
834
|
+
... print(str(e))
|
835
|
+
'Either project_id or project_name must be provided'
|
836
|
+
"""
|
837
|
+
if self.project_id is None and self.project_name is None:
|
838
|
+
raise ValueError("Either project_id or project_name must be provided")
|
186
839
|
|
187
840
|
@classmethod
|
188
841
|
def from_env(cls) -> "GitLabStorageOptions":
|
842
|
+
"""Create storage options from environment variables.
|
843
|
+
|
844
|
+
Reads standard GitLab environment variables:
|
845
|
+
- GITLAB_URL: Instance URL
|
846
|
+
- GITLAB_PROJECT_ID: Project ID
|
847
|
+
- GITLAB_PROJECT_NAME: Project name/path
|
848
|
+
- GITLAB_REF: Git reference
|
849
|
+
- GITLAB_TOKEN: Personal access token
|
850
|
+
- GITLAB_API_VERSION: API version
|
851
|
+
|
852
|
+
Returns:
|
853
|
+
GitLabStorageOptions: Configured storage options
|
854
|
+
|
855
|
+
Example:
|
856
|
+
>>> # With environment variables set:
|
857
|
+
>>> options = GitLabStorageOptions.from_env()
|
858
|
+
>>> print(options.project_id) # From GITLAB_PROJECT_ID
|
859
|
+
'12345'
|
860
|
+
"""
|
189
861
|
return cls(
|
190
862
|
protocol="gitlab",
|
191
|
-
base_url=os.getenv("
|
192
|
-
access_token=os.getenv("GITLAB_ACCESS_TOKEN"),
|
863
|
+
base_url=os.getenv("GITLAB_URL", "https://gitlab.com"),
|
193
864
|
project_id=os.getenv("GITLAB_PROJECT_ID"),
|
194
865
|
project_name=os.getenv("GITLAB_PROJECT_NAME"),
|
866
|
+
ref=os.getenv("GITLAB_REF"),
|
867
|
+
token=os.getenv("GITLAB_TOKEN"),
|
868
|
+
api_version=os.getenv("GITLAB_API_VERSION", "v4"),
|
195
869
|
)
|
196
870
|
|
197
|
-
def
|
198
|
-
|
199
|
-
|
871
|
+
def to_env(self) -> None:
|
872
|
+
"""Export options to environment variables.
|
873
|
+
|
874
|
+
Sets standard GitLab environment variables.
|
875
|
+
|
876
|
+
Example:
|
877
|
+
>>> options = GitLabStorageOptions(
|
878
|
+
... project_id=12345,
|
879
|
+
... token="glpat_xxxx"
|
880
|
+
... )
|
881
|
+
>>> options.to_env()
|
882
|
+
>>> print(os.getenv("GITLAB_PROJECT_ID"))
|
883
|
+
'12345'
|
884
|
+
"""
|
885
|
+
env = {
|
886
|
+
"GITLAB_URL": self.base_url,
|
887
|
+
"GITLAB_PROJECT_ID": str(self.project_id) if self.project_id else None,
|
888
|
+
"GITLAB_PROJECT_NAME": self.project_name,
|
889
|
+
"GITLAB_REF": self.ref,
|
890
|
+
"GITLAB_TOKEN": self.token,
|
891
|
+
"GITLAB_API_VERSION": self.api_version,
|
892
|
+
}
|
893
|
+
env = {k: v for k, v in env.items() if v is not None}
|
894
|
+
os.environ.update(env)
|
895
|
+
|
896
|
+
def to_fsspec_kwargs(self) -> dict:
|
897
|
+
"""Convert options to fsspec filesystem arguments.
|
898
|
+
|
899
|
+
Returns:
|
900
|
+
dict: Arguments suitable for GitLabFileSystem
|
901
|
+
|
902
|
+
Example:
|
903
|
+
>>> options = GitLabStorageOptions(
|
904
|
+
... project_id=12345,
|
905
|
+
... token="glpat_xxxx"
|
906
|
+
... )
|
907
|
+
>>> kwargs = options.to_fsspec_kwargs()
|
908
|
+
>>> fs = filesystem("gitlab", **kwargs)
|
909
|
+
"""
|
910
|
+
kwargs = {
|
911
|
+
"base_url": self.base_url,
|
912
|
+
"project_id": self.project_id,
|
913
|
+
"project_name": self.project_name,
|
914
|
+
"ref": self.ref,
|
915
|
+
"token": self.token,
|
916
|
+
"api_version": self.api_version,
|
917
|
+
}
|
918
|
+
return {k: v for k, v in kwargs.items() if v is not None}
|
200
919
|
|
201
920
|
|
202
921
|
class LocalStorageOptions(BaseStorageOptions):
|
922
|
+
"""Local filesystem configuration options.
|
923
|
+
|
924
|
+
Provides basic configuration for local file access. While this class
|
925
|
+
is simple, it maintains consistency with other storage options and
|
926
|
+
enables transparent switching between local and remote storage.
|
927
|
+
|
928
|
+
Attributes:
|
929
|
+
protocol (str): Always "file" for local filesystem
|
930
|
+
auto_mkdir (bool): Create directories automatically
|
931
|
+
mode (int): Default file creation mode (unix-style)
|
932
|
+
|
933
|
+
Example:
|
934
|
+
>>> # Basic local access
|
935
|
+
>>> options = LocalStorageOptions()
|
936
|
+
>>> fs = options.to_filesystem()
|
937
|
+
>>> files = fs.ls("/path/to/data")
|
938
|
+
>>>
|
939
|
+
>>> # With auto directory creation
|
940
|
+
>>> options = LocalStorageOptions(auto_mkdir=True)
|
941
|
+
>>> fs = options.to_filesystem()
|
942
|
+
>>> with fs.open("/new/path/file.txt", "w") as f:
|
943
|
+
... f.write("test") # Creates /new/path/ automatically
|
944
|
+
"""
|
945
|
+
|
203
946
|
protocol: str = "file"
|
947
|
+
auto_mkdir: bool = False
|
948
|
+
mode: int | None = None
|
949
|
+
|
950
|
+
def to_fsspec_kwargs(self) -> dict:
|
951
|
+
"""Convert options to fsspec filesystem arguments.
|
952
|
+
|
953
|
+
Returns:
|
954
|
+
dict: Arguments suitable for LocalFileSystem
|
955
|
+
|
956
|
+
Example:
|
957
|
+
>>> options = LocalStorageOptions(auto_mkdir=True)
|
958
|
+
>>> kwargs = options.to_fsspec_kwargs()
|
959
|
+
>>> fs = filesystem("file", **kwargs)
|
960
|
+
"""
|
961
|
+
kwargs = {
|
962
|
+
"auto_mkdir": self.auto_mkdir,
|
963
|
+
"mode": self.mode,
|
964
|
+
}
|
965
|
+
return {k: v for k, v in kwargs.items() if v is not None}
|
966
|
+
|
967
|
+
|
968
|
+
def from_dict(protocol: str, storage_options: dict) -> BaseStorageOptions:
|
969
|
+
"""Create appropriate storage options instance from dictionary.
|
970
|
+
|
971
|
+
Factory function that creates the correct storage options class based on protocol.
|
204
972
|
|
973
|
+
Args:
|
974
|
+
protocol: Storage protocol identifier (e.g., "s3", "gs", "file")
|
975
|
+
storage_options: Dictionary of configuration options
|
205
976
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
977
|
+
Returns:
|
978
|
+
BaseStorageOptions: Appropriate storage options instance
|
979
|
+
|
980
|
+
Raises:
|
981
|
+
ValueError: If protocol is not supported
|
982
|
+
|
983
|
+
Example:
|
984
|
+
>>> # Create S3 options
|
985
|
+
>>> options = from_dict("s3", {
|
986
|
+
... "access_key_id": "KEY",
|
987
|
+
... "secret_access_key": "SECRET"
|
988
|
+
... })
|
989
|
+
>>> print(type(options).__name__)
|
990
|
+
'AwsStorageOptions'
|
991
|
+
"""
|
216
992
|
if protocol == "s3":
|
217
993
|
return AwsStorageOptions(**storage_options)
|
218
|
-
elif protocol
|
994
|
+
elif protocol in ["az", "abfs", "adl"]:
|
219
995
|
return AzureStorageOptions(**storage_options)
|
220
|
-
elif protocol
|
996
|
+
elif protocol in ["gs", "gcs"]:
|
221
997
|
return GcsStorageOptions(**storage_options)
|
222
998
|
elif protocol == "github":
|
223
999
|
return GitHubStorageOptions(**storage_options)
|
@@ -229,16 +1005,27 @@ def from_dict(
|
|
229
1005
|
raise ValueError(f"Unsupported protocol: {protocol}")
|
230
1006
|
|
231
1007
|
|
232
|
-
def from_env(
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
1008
|
+
def from_env(protocol: str) -> BaseStorageOptions:
|
1009
|
+
"""Create storage options from environment variables.
|
1010
|
+
|
1011
|
+
Factory function that creates and configures storage options from
|
1012
|
+
protocol-specific environment variables.
|
1013
|
+
|
1014
|
+
Args:
|
1015
|
+
protocol: Storage protocol identifier (e.g., "s3", "github")
|
1016
|
+
|
1017
|
+
Returns:
|
1018
|
+
BaseStorageOptions: Configured storage options instance
|
1019
|
+
|
1020
|
+
Raises:
|
1021
|
+
ValueError: If protocol is not supported
|
1022
|
+
|
1023
|
+
Example:
|
1024
|
+
>>> # With AWS credentials in environment
|
1025
|
+
>>> options = from_env("s3")
|
1026
|
+
>>> print(options.access_key_id) # From AWS_ACCESS_KEY_ID
|
1027
|
+
'AKIAXXXXXX'
|
1028
|
+
"""
|
242
1029
|
if protocol == "s3":
|
243
1030
|
return AwsStorageOptions.from_env()
|
244
1031
|
elif protocol == "github":
|
@@ -252,9 +1039,47 @@ def from_env(
|
|
252
1039
|
|
253
1040
|
|
254
1041
|
class StorageOptions(BaseModel):
|
1042
|
+
"""High-level storage options container and factory.
|
1043
|
+
|
1044
|
+
Provides a unified interface for creating and managing storage options
|
1045
|
+
for different protocols.
|
1046
|
+
|
1047
|
+
Attributes:
|
1048
|
+
storage_options (BaseStorageOptions): Underlying storage options instance
|
1049
|
+
|
1050
|
+
Example:
|
1051
|
+
>>> # Create from protocol
|
1052
|
+
>>> options = StorageOptions(
|
1053
|
+
... protocol="s3",
|
1054
|
+
... access_key_id="KEY",
|
1055
|
+
... secret_access_key="SECRET"
|
1056
|
+
... )
|
1057
|
+
>>>
|
1058
|
+
>>> # Create from existing options
|
1059
|
+
>>> s3_opts = AwsStorageOptions(access_key_id="KEY")
|
1060
|
+
>>> options = StorageOptions(storage_options=s3_opts)
|
1061
|
+
"""
|
1062
|
+
|
255
1063
|
storage_options: BaseStorageOptions
|
256
1064
|
|
257
|
-
def __init__(self, **data):
|
1065
|
+
def __init__(self, **data: Any):
|
1066
|
+
"""Initialize storage options from arguments.
|
1067
|
+
|
1068
|
+
Args:
|
1069
|
+
**data: Either:
|
1070
|
+
- protocol and configuration options
|
1071
|
+
- storage_options=pre-configured instance
|
1072
|
+
|
1073
|
+
Raises:
|
1074
|
+
ValueError: If protocol missing or invalid
|
1075
|
+
|
1076
|
+
Example:
|
1077
|
+
>>> # Direct protocol config
|
1078
|
+
>>> options = StorageOptions(
|
1079
|
+
... protocol="s3",
|
1080
|
+
... region="us-east-1"
|
1081
|
+
... )
|
1082
|
+
"""
|
258
1083
|
protocol = data.get("protocol")
|
259
1084
|
if protocol is None and "storage_options" not in data:
|
260
1085
|
raise ValueError("protocol must be specified")
|
@@ -281,12 +1106,39 @@ class StorageOptions(BaseModel):
|
|
281
1106
|
|
282
1107
|
@classmethod
|
283
1108
|
def from_yaml(cls, path: str, fs: AbstractFileSystem = None) -> "StorageOptions":
|
1109
|
+
"""Create storage options from YAML configuration.
|
1110
|
+
|
1111
|
+
Args:
|
1112
|
+
path: Path to YAML configuration file
|
1113
|
+
fs: Filesystem for reading configuration
|
1114
|
+
|
1115
|
+
Returns:
|
1116
|
+
StorageOptions: Configured storage options
|
1117
|
+
|
1118
|
+
Example:
|
1119
|
+
>>> # Load from config file
|
1120
|
+
>>> options = StorageOptions.from_yaml("storage.yml")
|
1121
|
+
>>> print(options.storage_options.protocol)
|
1122
|
+
's3'
|
1123
|
+
"""
|
284
1124
|
with fs.open(path, "r") as f:
|
285
1125
|
data = yaml.safe_load(f)
|
286
1126
|
return cls(**data)
|
287
1127
|
|
288
1128
|
@classmethod
|
289
1129
|
def from_env(cls, protocol: str) -> "StorageOptions":
|
1130
|
+
"""Create storage options from environment variables.
|
1131
|
+
|
1132
|
+
Args:
|
1133
|
+
protocol: Storage protocol to configure
|
1134
|
+
|
1135
|
+
Returns:
|
1136
|
+
StorageOptions: Environment-configured options
|
1137
|
+
|
1138
|
+
Example:
|
1139
|
+
>>> # Load AWS config from environment
|
1140
|
+
>>> options = StorageOptions.from_env("s3")
|
1141
|
+
"""
|
290
1142
|
if protocol == "s3":
|
291
1143
|
return cls(storage_options=AwsStorageOptions.from_env())
|
292
1144
|
elif protocol == "github":
|
@@ -299,12 +1151,202 @@ class StorageOptions(BaseModel):
|
|
299
1151
|
raise ValueError(f"Unsupported protocol: {protocol}")
|
300
1152
|
|
301
1153
|
def to_filesystem(self) -> AbstractFileSystem:
|
1154
|
+
"""Create fsspec filesystem instance.
|
1155
|
+
|
1156
|
+
Returns:
|
1157
|
+
AbstractFileSystem: Configured filesystem instance
|
1158
|
+
|
1159
|
+
Example:
|
1160
|
+
>>> options = StorageOptions(protocol="file")
|
1161
|
+
>>> fs = options.to_filesystem()
|
1162
|
+
>>> files = fs.ls("/data")
|
1163
|
+
"""
|
302
1164
|
return self.storage_options.to_filesystem()
|
303
1165
|
|
304
1166
|
def to_dict(self, protocol: bool = False) -> dict:
|
1167
|
+
"""Convert storage options to dictionary.
|
1168
|
+
|
1169
|
+
Args:
|
1170
|
+
protocol: Whether to include protocol in output
|
1171
|
+
|
1172
|
+
Returns:
|
1173
|
+
dict: Storage options as dictionary
|
1174
|
+
|
1175
|
+
Example:
|
1176
|
+
>>> options = StorageOptions(
|
1177
|
+
... protocol="s3",
|
1178
|
+
... region="us-east-1"
|
1179
|
+
... )
|
1180
|
+
>>> print(options.to_dict())
|
1181
|
+
{'region': 'us-east-1'}
|
1182
|
+
"""
|
305
1183
|
return self.storage_options.to_dict(protocol=protocol)
|
306
1184
|
|
307
1185
|
def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
|
1186
|
+
"""Get options formatted for object store clients.
|
1187
|
+
|
1188
|
+
Args:
|
1189
|
+
with_conditional_put: Add etag-based conditional put support
|
1190
|
+
|
1191
|
+
Returns:
|
1192
|
+
dict: Object store configuration dictionary
|
1193
|
+
|
1194
|
+
Example:
|
1195
|
+
>>> options = StorageOptions(protocol="s3")
|
1196
|
+
>>> kwargs = options.to_object_store_kwargs()
|
1197
|
+
>>> store = ObjectStore(**kwargs)
|
1198
|
+
"""
|
308
1199
|
return self.storage_options.to_object_store_kwargs(
|
309
1200
|
with_conditional_put=with_conditional_put
|
310
1201
|
)
|
1202
|
+
|
1203
|
+
|
1204
|
+
def infer_protocol_from_uri(uri: str) -> str:
|
1205
|
+
"""Infer the storage protocol from a URI string.
|
1206
|
+
|
1207
|
+
Analyzes the URI to determine the appropriate storage protocol based on
|
1208
|
+
the scheme or path format.
|
1209
|
+
|
1210
|
+
Args:
|
1211
|
+
uri: URI or path string to analyze. Examples:
|
1212
|
+
- "s3://bucket/path"
|
1213
|
+
- "gs://bucket/path"
|
1214
|
+
- "github://org/repo"
|
1215
|
+
- "/local/path"
|
1216
|
+
|
1217
|
+
Returns:
|
1218
|
+
str: Inferred protocol identifier
|
1219
|
+
|
1220
|
+
Example:
|
1221
|
+
>>> # S3 protocol
|
1222
|
+
>>> infer_protocol_from_uri("s3://my-bucket/data")
|
1223
|
+
's3'
|
1224
|
+
>>>
|
1225
|
+
>>> # Local file
|
1226
|
+
>>> infer_protocol_from_uri("/home/user/data")
|
1227
|
+
'file'
|
1228
|
+
>>>
|
1229
|
+
>>> # GitHub repository
|
1230
|
+
>>> infer_protocol_from_uri("github://microsoft/vscode")
|
1231
|
+
'github'
|
1232
|
+
"""
|
1233
|
+
if uri.startswith("s3://"):
|
1234
|
+
return "s3"
|
1235
|
+
elif uri.startswith("gs://") or uri.startswith("gcs://"):
|
1236
|
+
return "gs"
|
1237
|
+
elif uri.startswith("github://"):
|
1238
|
+
return "github"
|
1239
|
+
elif uri.startswith("gitlab://"):
|
1240
|
+
return "gitlab"
|
1241
|
+
elif uri.startswith(("az://", "abfs://", "adl://")):
|
1242
|
+
return uri.split("://")[0]
|
1243
|
+
else:
|
1244
|
+
return "file"
|
1245
|
+
|
1246
|
+
|
1247
|
+
def storage_options_from_uri(uri: str) -> BaseStorageOptions:
|
1248
|
+
"""Create storage options instance from a URI string.
|
1249
|
+
|
1250
|
+
Infers the protocol and extracts relevant configuration from the URI
|
1251
|
+
to create appropriate storage options.
|
1252
|
+
|
1253
|
+
Args:
|
1254
|
+
uri: URI string containing protocol and optional configuration.
|
1255
|
+
Examples:
|
1256
|
+
- "s3://bucket/path"
|
1257
|
+
- "gs://project/bucket/path"
|
1258
|
+
- "github://org/repo"
|
1259
|
+
|
1260
|
+
Returns:
|
1261
|
+
BaseStorageOptions: Configured storage options instance
|
1262
|
+
|
1263
|
+
Example:
|
1264
|
+
>>> # S3 options
|
1265
|
+
>>> opts = storage_options_from_uri("s3://my-bucket/data")
|
1266
|
+
>>> print(opts.protocol)
|
1267
|
+
's3'
|
1268
|
+
>>>
|
1269
|
+
>>> # GitHub options
|
1270
|
+
>>> opts = storage_options_from_uri("github://microsoft/vscode")
|
1271
|
+
>>> print(opts.org)
|
1272
|
+
'microsoft'
|
1273
|
+
>>> print(opts.repo)
|
1274
|
+
'vscode'
|
1275
|
+
"""
|
1276
|
+
protocol = infer_protocol_from_uri(uri)
|
1277
|
+
options = infer_storage_options(uri)
|
1278
|
+
|
1279
|
+
if protocol == "s3":
|
1280
|
+
return AwsStorageOptions(protocol=protocol, **options)
|
1281
|
+
elif protocol in ["gs", "gcs"]:
|
1282
|
+
return GcsStorageOptions(protocol=protocol, **options)
|
1283
|
+
elif protocol == "github":
|
1284
|
+
parts = uri.replace("github://", "").split("/")
|
1285
|
+
return GitHubStorageOptions(
|
1286
|
+
protocol=protocol, org=parts[0], repo=parts[1] if len(parts) > 1 else None
|
1287
|
+
)
|
1288
|
+
elif protocol == "gitlab":
|
1289
|
+
parts = uri.replace("gitlab://", "").split("/")
|
1290
|
+
return GitLabStorageOptions(
|
1291
|
+
protocol=protocol, project_name=parts[-1] if parts else None
|
1292
|
+
)
|
1293
|
+
elif protocol in ["az", "abfs", "adl"]:
|
1294
|
+
return AzureStorageOptions(protocol=protocol, **options)
|
1295
|
+
else:
|
1296
|
+
return LocalStorageOptions()
|
1297
|
+
|
1298
|
+
|
1299
|
+
def merge_storage_options(
|
1300
|
+
*options: BaseStorageOptions | dict | None, overwrite: bool = True
|
1301
|
+
) -> BaseStorageOptions:
|
1302
|
+
"""Merge multiple storage options into a single configuration.
|
1303
|
+
|
1304
|
+
Combines options from multiple sources with control over precedence.
|
1305
|
+
|
1306
|
+
Args:
|
1307
|
+
*options: Storage options to merge. Can be:
|
1308
|
+
- BaseStorageOptions instances
|
1309
|
+
- Dictionaries of options
|
1310
|
+
- None values (ignored)
|
1311
|
+
overwrite: Whether later options override earlier ones
|
1312
|
+
|
1313
|
+
Returns:
|
1314
|
+
BaseStorageOptions: Combined storage options
|
1315
|
+
|
1316
|
+
Example:
|
1317
|
+
>>> # Merge with overwrite
|
1318
|
+
>>> base = AwsStorageOptions(
|
1319
|
+
... region="us-east-1",
|
1320
|
+
... access_key_id="OLD_KEY"
|
1321
|
+
... )
|
1322
|
+
>>> override = {"access_key_id": "NEW_KEY"}
|
1323
|
+
>>> merged = merge_storage_options(base, override)
|
1324
|
+
>>> print(merged.access_key_id)
|
1325
|
+
'NEW_KEY'
|
1326
|
+
>>>
|
1327
|
+
>>> # Preserve existing values
|
1328
|
+
>>> merged = merge_storage_options(
|
1329
|
+
... base,
|
1330
|
+
... override,
|
1331
|
+
... overwrite=False
|
1332
|
+
... )
|
1333
|
+
>>> print(merged.access_key_id)
|
1334
|
+
'OLD_KEY'
|
1335
|
+
"""
|
1336
|
+
result = {}
|
1337
|
+
protocol = None
|
1338
|
+
|
1339
|
+
for opts in options:
|
1340
|
+
if opts is None:
|
1341
|
+
continue
|
1342
|
+
if isinstance(opts, BaseStorageOptions):
|
1343
|
+
opts = opts.to_dict(with_protocol=True)
|
1344
|
+
if not protocol and "protocol" in opts:
|
1345
|
+
protocol = opts["protocol"]
|
1346
|
+
for k, v in opts.items():
|
1347
|
+
if overwrite or k not in result:
|
1348
|
+
result[k] = v
|
1349
|
+
|
1350
|
+
if not protocol:
|
1351
|
+
protocol = "file"
|
1352
|
+
return from_dict(protocol, result)
|