FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +401 -133
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.19.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,1420 +0,0 @@
|
|
1
|
-
import configparser
|
2
|
-
import os
|
3
|
-
from typing import Any, TypeVar, Union
|
4
|
-
|
5
|
-
import msgspec
|
6
|
-
import yaml
|
7
|
-
from fsspec import AbstractFileSystem, filesystem
|
8
|
-
from fsspec.utils import infer_storage_options
|
9
|
-
|
10
|
-
|
11
|
-
class BaseStorageOptions(msgspec.Struct):
|
12
|
-
"""Base class for filesystem storage configuration options.
|
13
|
-
|
14
|
-
Provides common functionality for all storage option classes including:
|
15
|
-
- YAML serialization/deserialization
|
16
|
-
- Dictionary conversion
|
17
|
-
- Filesystem instance creation
|
18
|
-
- Configuration updates
|
19
|
-
|
20
|
-
Attributes:
|
21
|
-
protocol (str): Storage protocol identifier (e.g., "s3", "gs", "file")
|
22
|
-
|
23
|
-
Example:
|
24
|
-
>>> # Create and save options
|
25
|
-
>>> options = BaseStorageOptions(protocol="s3")
|
26
|
-
>>> options.to_yaml("config.yml")
|
27
|
-
>>>
|
28
|
-
>>> # Load from YAML
|
29
|
-
>>> loaded = BaseStorageOptions.from_yaml("config.yml")
|
30
|
-
>>> print(loaded.protocol)
|
31
|
-
's3'
|
32
|
-
"""
|
33
|
-
|
34
|
-
protocol: str
|
35
|
-
|
36
|
-
def to_dict(self, with_protocol: bool = False) -> dict:
|
37
|
-
"""Convert storage options to dictionary.
|
38
|
-
|
39
|
-
Args:
|
40
|
-
with_protocol: Whether to include protocol in output dictionary
|
41
|
-
|
42
|
-
Returns:
|
43
|
-
dict: Dictionary of storage options with non-None values
|
44
|
-
|
45
|
-
Example:
|
46
|
-
>>> options = BaseStorageOptions(protocol="s3")
|
47
|
-
>>> print(options.to_dict())
|
48
|
-
{}
|
49
|
-
>>> print(options.to_dict(with_protocol=True))
|
50
|
-
{'protocol': 's3'}
|
51
|
-
"""
|
52
|
-
data = msgspec.structs.asdict(self)
|
53
|
-
result = {}
|
54
|
-
for key, value in data.items():
|
55
|
-
if value is None:
|
56
|
-
continue
|
57
|
-
|
58
|
-
if key == "protocol":
|
59
|
-
if with_protocol:
|
60
|
-
result[key] = value
|
61
|
-
else:
|
62
|
-
result[key] = value
|
63
|
-
return result
|
64
|
-
|
65
|
-
@classmethod
|
66
|
-
def from_yaml(
|
67
|
-
cls, path: str, fs: AbstractFileSystem = None
|
68
|
-
) -> "BaseStorageOptions":
|
69
|
-
"""Load storage options from YAML file.
|
70
|
-
|
71
|
-
Args:
|
72
|
-
path: Path to YAML configuration file
|
73
|
-
fs: Filesystem to use for reading file
|
74
|
-
|
75
|
-
Returns:
|
76
|
-
BaseStorageOptions: Loaded storage options instance
|
77
|
-
|
78
|
-
Example:
|
79
|
-
>>> # Load from local file
|
80
|
-
>>> options = BaseStorageOptions.from_yaml("config.yml")
|
81
|
-
>>> print(options.protocol)
|
82
|
-
's3'
|
83
|
-
"""
|
84
|
-
if fs is None:
|
85
|
-
fs = filesystem("file")
|
86
|
-
with fs.open(path) as f:
|
87
|
-
data = yaml.safe_load(f)
|
88
|
-
return cls(**data)
|
89
|
-
|
90
|
-
def to_yaml(self, path: str, fs: AbstractFileSystem = None) -> None:
|
91
|
-
"""Save storage options to YAML file.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
path: Path where to save configuration
|
95
|
-
fs: Filesystem to use for writing
|
96
|
-
|
97
|
-
Example:
|
98
|
-
>>> options = BaseStorageOptions(protocol="s3")
|
99
|
-
>>> options.to_yaml("config.yml")
|
100
|
-
"""
|
101
|
-
if fs is None:
|
102
|
-
fs = filesystem("file")
|
103
|
-
data = self.to_dict()
|
104
|
-
with fs.open(path, "w") as f:
|
105
|
-
yaml.safe_dump(data, f)
|
106
|
-
|
107
|
-
def to_filesystem(self) -> AbstractFileSystem:
|
108
|
-
"""Create fsspec filesystem instance from options.
|
109
|
-
|
110
|
-
Returns:
|
111
|
-
AbstractFileSystem: Configured filesystem instance
|
112
|
-
|
113
|
-
Example:
|
114
|
-
>>> options = BaseStorageOptions(protocol="file")
|
115
|
-
>>> fs = options.to_filesystem()
|
116
|
-
>>> files = fs.ls("/path/to/data")
|
117
|
-
"""
|
118
|
-
return filesystem(**self.to_dict(with_protocol=True))
|
119
|
-
|
120
|
-
def update(self, **kwargs: Any) -> "BaseStorageOptions":
|
121
|
-
"""Update storage options with new values.
|
122
|
-
|
123
|
-
Args:
|
124
|
-
**kwargs: New option values to set
|
125
|
-
|
126
|
-
Returns:
|
127
|
-
BaseStorageOptions: Updated instance
|
128
|
-
|
129
|
-
Example:
|
130
|
-
>>> options = BaseStorageOptions(protocol="s3")
|
131
|
-
>>> options = options.update(region="us-east-1")
|
132
|
-
>>> print(options.region)
|
133
|
-
'us-east-1'
|
134
|
-
"""
|
135
|
-
return self.replace(**kwargs)
|
136
|
-
|
137
|
-
|
138
|
-
class AzureStorageOptions(BaseStorageOptions):
|
139
|
-
"""Azure Storage configuration options.
|
140
|
-
|
141
|
-
Provides configuration for Azure storage services:
|
142
|
-
- Azure Blob Storage (az://)
|
143
|
-
- Azure Data Lake Storage Gen2 (abfs://)
|
144
|
-
- Azure Data Lake Storage Gen1 (adl://)
|
145
|
-
|
146
|
-
Supports multiple authentication methods:
|
147
|
-
- Connection string
|
148
|
-
- Account key
|
149
|
-
- Service principal
|
150
|
-
- Managed identity
|
151
|
-
- SAS token
|
152
|
-
|
153
|
-
Attributes:
|
154
|
-
protocol (str): Storage protocol ("az", "abfs", or "adl")
|
155
|
-
account_name (str): Storage account name
|
156
|
-
account_key (str): Storage account access key
|
157
|
-
connection_string (str): Full connection string
|
158
|
-
tenant_id (str): Azure AD tenant ID
|
159
|
-
client_id (str): Service principal client ID
|
160
|
-
client_secret (str): Service principal client secret
|
161
|
-
sas_token (str): SAS token for limited access
|
162
|
-
|
163
|
-
Example:
|
164
|
-
>>> # Blob Storage with account key
|
165
|
-
>>> options = AzureStorageOptions(
|
166
|
-
... protocol="az",
|
167
|
-
... account_name="mystorageacct",
|
168
|
-
... account_key="key123..."
|
169
|
-
... )
|
170
|
-
>>>
|
171
|
-
>>> # Data Lake with service principal
|
172
|
-
>>> options = AzureStorageOptions(
|
173
|
-
... protocol="abfs",
|
174
|
-
... account_name="mydatalake",
|
175
|
-
... tenant_id="tenant123",
|
176
|
-
... client_id="client123",
|
177
|
-
... client_secret="secret123"
|
178
|
-
... )
|
179
|
-
>>>
|
180
|
-
>>> # Simple connection string auth
|
181
|
-
>>> options = AzureStorageOptions(
|
182
|
-
... protocol="az",
|
183
|
-
... connection_string="DefaultEndpoints..."
|
184
|
-
... )
|
185
|
-
"""
|
186
|
-
|
187
|
-
protocol: str
|
188
|
-
account_name: str | None = None
|
189
|
-
account_key: str | None = None
|
190
|
-
connection_string: str | None = None
|
191
|
-
tenant_id: str | None = None
|
192
|
-
client_id: str | None = None
|
193
|
-
client_secret: str | None = None
|
194
|
-
sas_token: str | None = None
|
195
|
-
|
196
|
-
@classmethod
|
197
|
-
def from_env(cls) -> "AzureStorageOptions":
|
198
|
-
"""Create storage options from environment variables.
|
199
|
-
|
200
|
-
Reads standard Azure environment variables:
|
201
|
-
- AZURE_STORAGE_ACCOUNT_NAME
|
202
|
-
- AZURE_STORAGE_ACCOUNT_KEY
|
203
|
-
- AZURE_STORAGE_CONNECTION_STRING
|
204
|
-
- AZURE_TENANT_ID
|
205
|
-
- AZURE_CLIENT_ID
|
206
|
-
- AZURE_CLIENT_SECRET
|
207
|
-
- AZURE_STORAGE_SAS_TOKEN
|
208
|
-
|
209
|
-
Returns:
|
210
|
-
AzureStorageOptions: Configured storage options
|
211
|
-
|
212
|
-
Example:
|
213
|
-
>>> # With environment variables set:
|
214
|
-
>>> options = AzureStorageOptions.from_env()
|
215
|
-
>>> print(options.account_name) # From AZURE_STORAGE_ACCOUNT_NAME
|
216
|
-
'mystorageacct'
|
217
|
-
"""
|
218
|
-
return cls(
|
219
|
-
protocol=os.getenv("AZURE_STORAGE_PROTOCOL", "az"),
|
220
|
-
account_name=os.getenv("AZURE_STORAGE_ACCOUNT_NAME"),
|
221
|
-
account_key=os.getenv("AZURE_STORAGE_ACCOUNT_KEY"),
|
222
|
-
connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
|
223
|
-
tenant_id=os.getenv("AZURE_TENANT_ID"),
|
224
|
-
client_id=os.getenv("AZURE_CLIENT_ID"),
|
225
|
-
client_secret=os.getenv("AZURE_CLIENT_SECRET"),
|
226
|
-
sas_token=os.getenv("AZURE_STORAGE_SAS_TOKEN"),
|
227
|
-
)
|
228
|
-
|
229
|
-
def to_env(self) -> None:
|
230
|
-
"""Export options to environment variables.
|
231
|
-
|
232
|
-
Sets standard Azure environment variables.
|
233
|
-
|
234
|
-
Example:
|
235
|
-
>>> options = AzureStorageOptions(
|
236
|
-
... protocol="az",
|
237
|
-
... account_name="mystorageacct",
|
238
|
-
... account_key="key123"
|
239
|
-
... )
|
240
|
-
>>> options.to_env()
|
241
|
-
>>> print(os.getenv("AZURE_STORAGE_ACCOUNT_NAME"))
|
242
|
-
'mystorageacct'
|
243
|
-
"""
|
244
|
-
env = {
|
245
|
-
"AZURE_STORAGE_PROTOCOL": self.protocol,
|
246
|
-
"AZURE_STORAGE_ACCOUNT_NAME": self.account_name,
|
247
|
-
"AZURE_STORAGE_ACCOUNT_KEY": self.account_key,
|
248
|
-
"AZURE_STORAGE_CONNECTION_STRING": self.connection_string,
|
249
|
-
"AZURE_TENANT_ID": self.tenant_id,
|
250
|
-
"AZURE_CLIENT_ID": self.client_id,
|
251
|
-
"AZURE_CLIENT_SECRET": self.client_secret,
|
252
|
-
"AZURE_STORAGE_SAS_TOKEN": self.sas_token,
|
253
|
-
}
|
254
|
-
env = {k: v for k, v in env.items() if v is not None}
|
255
|
-
os.environ.update(env)
|
256
|
-
|
257
|
-
|
258
|
-
class GcsStorageOptions(BaseStorageOptions):
|
259
|
-
"""Google Cloud Storage configuration options.
|
260
|
-
|
261
|
-
Provides configuration for GCS access with support for:
|
262
|
-
- Service account authentication
|
263
|
-
- Default application credentials
|
264
|
-
- Token-based authentication
|
265
|
-
- Project configuration
|
266
|
-
- Custom endpoints
|
267
|
-
|
268
|
-
Attributes:
|
269
|
-
protocol (str): Storage protocol ("gs" or "gcs")
|
270
|
-
token (str): Path to service account JSON file
|
271
|
-
project (str): Google Cloud project ID
|
272
|
-
access_token (str): OAuth2 access token
|
273
|
-
endpoint_url (str): Custom storage endpoint
|
274
|
-
timeout (int): Request timeout in seconds
|
275
|
-
|
276
|
-
Example:
|
277
|
-
>>> # Service account auth
|
278
|
-
>>> options = GcsStorageOptions(
|
279
|
-
... protocol="gs",
|
280
|
-
... token="path/to/service-account.json",
|
281
|
-
... project="my-project-123"
|
282
|
-
... )
|
283
|
-
>>>
|
284
|
-
>>> # Application default credentials
|
285
|
-
>>> options = GcsStorageOptions(
|
286
|
-
... protocol="gcs",
|
287
|
-
... project="my-project-123"
|
288
|
-
... )
|
289
|
-
>>>
|
290
|
-
>>> # Custom endpoint (e.g., test server)
|
291
|
-
>>> options = GcsStorageOptions(
|
292
|
-
... protocol="gs",
|
293
|
-
... endpoint_url="http://localhost:4443",
|
294
|
-
... token="test-token.json"
|
295
|
-
... )
|
296
|
-
"""
|
297
|
-
|
298
|
-
protocol: str
|
299
|
-
token: str | None = None
|
300
|
-
project: str | None = None
|
301
|
-
access_token: str | None = None
|
302
|
-
endpoint_url: str | None = None
|
303
|
-
timeout: int | None = None
|
304
|
-
|
305
|
-
@classmethod
|
306
|
-
def from_env(cls) -> "GcsStorageOptions":
|
307
|
-
"""Create storage options from environment variables.
|
308
|
-
|
309
|
-
Reads standard GCP environment variables:
|
310
|
-
- GOOGLE_CLOUD_PROJECT: Project ID
|
311
|
-
- GOOGLE_APPLICATION_CREDENTIALS: Service account file path
|
312
|
-
- STORAGE_EMULATOR_HOST: Custom endpoint (for testing)
|
313
|
-
- GCS_OAUTH_TOKEN: OAuth2 access token
|
314
|
-
|
315
|
-
Returns:
|
316
|
-
GcsStorageOptions: Configured storage options
|
317
|
-
|
318
|
-
Example:
|
319
|
-
>>> # With environment variables set:
|
320
|
-
>>> options = GcsStorageOptions.from_env()
|
321
|
-
>>> print(options.project) # From GOOGLE_CLOUD_PROJECT
|
322
|
-
'my-project-123'
|
323
|
-
"""
|
324
|
-
return cls(
|
325
|
-
protocol="gs",
|
326
|
-
project=os.getenv("GOOGLE_CLOUD_PROJECT"),
|
327
|
-
token=os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
|
328
|
-
endpoint_url=os.getenv("STORAGE_EMULATOR_HOST"),
|
329
|
-
access_token=os.getenv("GCS_OAUTH_TOKEN"),
|
330
|
-
)
|
331
|
-
|
332
|
-
def to_env(self) -> None:
|
333
|
-
"""Export options to environment variables.
|
334
|
-
|
335
|
-
Sets standard GCP environment variables.
|
336
|
-
|
337
|
-
Example:
|
338
|
-
>>> options = GcsStorageOptions(
|
339
|
-
... protocol="gs",
|
340
|
-
... project="my-project",
|
341
|
-
... token="service-account.json"
|
342
|
-
... )
|
343
|
-
>>> options.to_env()
|
344
|
-
>>> print(os.getenv("GOOGLE_CLOUD_PROJECT"))
|
345
|
-
'my-project'
|
346
|
-
"""
|
347
|
-
env = {
|
348
|
-
"GOOGLE_CLOUD_PROJECT": self.project,
|
349
|
-
"GOOGLE_APPLICATION_CREDENTIALS": self.token,
|
350
|
-
"STORAGE_EMULATOR_HOST": self.endpoint_url,
|
351
|
-
"GCS_OAUTH_TOKEN": self.access_token,
|
352
|
-
}
|
353
|
-
env = {k: v for k, v in env.items() if v is not None}
|
354
|
-
os.environ.update(env)
|
355
|
-
|
356
|
-
def to_fsspec_kwargs(self) -> dict:
|
357
|
-
"""Convert options to fsspec filesystem arguments.
|
358
|
-
|
359
|
-
Returns:
|
360
|
-
dict: Arguments suitable for GCSFileSystem
|
361
|
-
|
362
|
-
Example:
|
363
|
-
>>> options = GcsStorageOptions(
|
364
|
-
... protocol="gs",
|
365
|
-
... token="service-account.json",
|
366
|
-
... project="my-project"
|
367
|
-
... )
|
368
|
-
>>> kwargs = options.to_fsspec_kwargs()
|
369
|
-
>>> fs = filesystem("gcs", **kwargs)
|
370
|
-
"""
|
371
|
-
kwargs = {
|
372
|
-
"token": self.token,
|
373
|
-
"project": self.project,
|
374
|
-
"access_token": self.access_token,
|
375
|
-
"endpoint_url": self.endpoint_url,
|
376
|
-
"timeout": self.timeout,
|
377
|
-
}
|
378
|
-
return {k: v for k, v in kwargs.items() if v is not None}
|
379
|
-
|
380
|
-
|
381
|
-
class AwsStorageOptions(BaseStorageOptions):
|
382
|
-
"""AWS S3 storage configuration options.
|
383
|
-
|
384
|
-
Provides comprehensive configuration for S3 access with support for:
|
385
|
-
- Multiple authentication methods (keys, profiles, environment)
|
386
|
-
- Custom endpoints for S3-compatible services
|
387
|
-
- Region configuration
|
388
|
-
- SSL/TLS settings
|
389
|
-
|
390
|
-
Attributes:
|
391
|
-
protocol (str): Always "s3" for S3 storage
|
392
|
-
access_key_id (str): AWS access key ID
|
393
|
-
secret_access_key (str): AWS secret access key
|
394
|
-
session_token (str): AWS session token
|
395
|
-
endpoint_url (str): Custom S3 endpoint URL
|
396
|
-
region (str): AWS region name
|
397
|
-
allow_invalid_certificates (bool): Skip SSL certificate validation
|
398
|
-
allow_http (bool): Allow unencrypted HTTP connections
|
399
|
-
profile (str): AWS credentials profile name
|
400
|
-
|
401
|
-
Example:
|
402
|
-
>>> # Basic credentials
|
403
|
-
>>> options = AwsStorageOptions(
|
404
|
-
... access_key_id="AKIAXXXXXXXX",
|
405
|
-
... secret_access_key="SECRETKEY",
|
406
|
-
... region="us-east-1"
|
407
|
-
... )
|
408
|
-
>>>
|
409
|
-
>>> # Profile-based auth
|
410
|
-
>>> options = AwsStorageOptions.create(profile="dev")
|
411
|
-
>>>
|
412
|
-
>>> # S3-compatible service (MinIO)
|
413
|
-
>>> options = AwsStorageOptions(
|
414
|
-
... endpoint_url="http://localhost:9000",
|
415
|
-
... access_key_id="minioadmin",
|
416
|
-
... secret_access_key="minioadmin",
|
417
|
-
... allow_http=True
|
418
|
-
... )
|
419
|
-
"""
|
420
|
-
|
421
|
-
protocol: str = "s3"
|
422
|
-
access_key_id: str | None = None
|
423
|
-
secret_access_key: str | None = None
|
424
|
-
session_token: str | None = None
|
425
|
-
endpoint_url: str | None = None
|
426
|
-
region: str | None = None
|
427
|
-
allow_invalid_certificates: bool | None = None
|
428
|
-
allow_http: bool | None = None
|
429
|
-
|
430
|
-
@classmethod
|
431
|
-
def create(
|
432
|
-
cls,
|
433
|
-
protocol: str = "s3",
|
434
|
-
access_key_id: str | None = None,
|
435
|
-
secret_access_key: str | None = None,
|
436
|
-
session_token: str | None = None,
|
437
|
-
endpoint_url: str | None = None,
|
438
|
-
region: str | None = None,
|
439
|
-
allow_invalid_certificates: bool | None = None,
|
440
|
-
allow_http: bool | None = None,
|
441
|
-
# Alias and loading params
|
442
|
-
key: str | None = None,
|
443
|
-
secret: str | None = None,
|
444
|
-
token: str | None = None, # maps to session_token
|
445
|
-
profile: str | None = None,
|
446
|
-
) -> "AwsStorageOptions":
|
447
|
-
"""Creates an AwsStorageOptions instance, handling aliases and profile loading.
|
448
|
-
|
449
|
-
Args:
|
450
|
-
protocol: Storage protocol, defaults to "s3".
|
451
|
-
access_key_id: AWS access key ID.
|
452
|
-
secret_access_key: AWS secret access key.
|
453
|
-
session_token: AWS session token.
|
454
|
-
endpoint_url: Custom S3 endpoint URL.
|
455
|
-
region: AWS region name.
|
456
|
-
allow_invalid_certificates: Skip SSL certificate validation.
|
457
|
-
allow_http: Allow unencrypted HTTP connections.
|
458
|
-
key: Alias for access_key_id.
|
459
|
-
secret: Alias for secret_access_key.
|
460
|
-
token: Alias for session_token.
|
461
|
-
profile: AWS credentials profile name to load credentials from.
|
462
|
-
|
463
|
-
Returns:
|
464
|
-
An initialized AwsStorageOptions instance.
|
465
|
-
"""
|
466
|
-
|
467
|
-
# Initial values from explicit args or their aliases
|
468
|
-
args = {
|
469
|
-
"protocol": protocol,
|
470
|
-
"access_key_id": access_key_id if access_key_id is not None else key,
|
471
|
-
"secret_access_key": secret_access_key
|
472
|
-
if secret_access_key is not None
|
473
|
-
else secret,
|
474
|
-
"session_token": session_token if session_token is not None else token,
|
475
|
-
"endpoint_url": endpoint_url,
|
476
|
-
"region": region,
|
477
|
-
"allow_invalid_certificates": allow_invalid_certificates,
|
478
|
-
"allow_http": allow_http,
|
479
|
-
}
|
480
|
-
|
481
|
-
if profile is not None:
|
482
|
-
# Note: allow_invalid_certificates and allow_http are passed to from_aws_credentials.
|
483
|
-
# If they are None here, from_aws_credentials will use its own defaults for those flags when reading.
|
484
|
-
profile_instance = cls.from_aws_credentials(
|
485
|
-
profile=profile,
|
486
|
-
allow_invalid_certificates=args["allow_invalid_certificates"],
|
487
|
-
allow_http=args["allow_http"],
|
488
|
-
)
|
489
|
-
# Fill in missing values from profile if not already set by direct/aliased args
|
490
|
-
if args["access_key_id"] is None:
|
491
|
-
args["access_key_id"] = profile_instance.access_key_id
|
492
|
-
if args["secret_access_key"] is None:
|
493
|
-
args["secret_access_key"] = profile_instance.secret_access_key
|
494
|
-
if args["session_token"] is None:
|
495
|
-
args["session_token"] = profile_instance.session_token
|
496
|
-
if args["endpoint_url"] is None:
|
497
|
-
args["endpoint_url"] = profile_instance.endpoint_url
|
498
|
-
if args["region"] is None:
|
499
|
-
args["region"] = profile_instance.region
|
500
|
-
# If allow_invalid_certificates/allow_http were None in args, and from_aws_credentials
|
501
|
-
# used its defaults to set them on profile_instance, we update args.
|
502
|
-
if (
|
503
|
-
args["allow_invalid_certificates"] is None
|
504
|
-
and profile_instance.allow_invalid_certificates is not None
|
505
|
-
):
|
506
|
-
args["allow_invalid_certificates"] = (
|
507
|
-
profile_instance.allow_invalid_certificates
|
508
|
-
)
|
509
|
-
if args["allow_http"] is None and profile_instance.allow_http is not None:
|
510
|
-
args["allow_http"] = profile_instance.allow_http
|
511
|
-
|
512
|
-
# Ensure protocol is 's3' if it somehow became None
|
513
|
-
if args["protocol"] is None:
|
514
|
-
args["protocol"] = "s3"
|
515
|
-
|
516
|
-
return cls(**args)
|
517
|
-
|
518
|
-
@classmethod
|
519
|
-
def from_aws_credentials(
|
520
|
-
cls,
|
521
|
-
profile: str,
|
522
|
-
allow_invalid_certificates: bool = False,
|
523
|
-
allow_http: bool = False,
|
524
|
-
) -> "AwsStorageOptions":
|
525
|
-
"""Create storage options from AWS credentials file.
|
526
|
-
|
527
|
-
Loads credentials from ~/.aws/credentials and ~/.aws/config files.
|
528
|
-
|
529
|
-
Args:
|
530
|
-
profile: AWS credentials profile name
|
531
|
-
allow_invalid_certificates: Skip SSL certificate validation
|
532
|
-
allow_http: Allow unencrypted HTTP connections
|
533
|
-
|
534
|
-
Returns:
|
535
|
-
AwsStorageOptions: Configured storage options
|
536
|
-
|
537
|
-
Raises:
|
538
|
-
ValueError: If profile not found
|
539
|
-
FileNotFoundError: If credentials files missing
|
540
|
-
|
541
|
-
Example:
|
542
|
-
>>> # Load developer profile
|
543
|
-
>>> options = AwsStorageOptions.from_aws_credentials(
|
544
|
-
... profile="dev",
|
545
|
-
... allow_http=True # For local testing
|
546
|
-
... )
|
547
|
-
"""
|
548
|
-
cp = configparser.ConfigParser()
|
549
|
-
cp.read(os.path.expanduser("~/.aws/credentials"))
|
550
|
-
cp.read(os.path.expanduser("~/.aws/config"))
|
551
|
-
if profile not in cp:
|
552
|
-
raise ValueError(f"Profile '{profile}' not found in AWS credentials file")
|
553
|
-
|
554
|
-
return cls(
|
555
|
-
protocol="s3",
|
556
|
-
access_key_id=cp[profile].get("aws_access_key_id", None),
|
557
|
-
secret_access_key=cp[profile].get("aws_secret_access_key", None),
|
558
|
-
session_token=cp[profile].get("aws_session_token", None),
|
559
|
-
endpoint_url=cp[profile].get("aws_endpoint_url", None)
|
560
|
-
or cp[profile].get("endpoint_url", None)
|
561
|
-
or cp[profile].get("aws_endpoint", None)
|
562
|
-
or cp[profile].get("endpoint", None),
|
563
|
-
region=(
|
564
|
-
cp[profile].get("region", None)
|
565
|
-
or cp[f"profile {profile}"].get("region", None)
|
566
|
-
if f"profile {profile}" in cp
|
567
|
-
else None
|
568
|
-
),
|
569
|
-
allow_invalid_certificates=allow_invalid_certificates,
|
570
|
-
allow_http=allow_http,
|
571
|
-
)
|
572
|
-
|
573
|
-
@classmethod
|
574
|
-
def from_env(cls) -> "AwsStorageOptions":
|
575
|
-
"""Create storage options from environment variables.
|
576
|
-
|
577
|
-
Reads standard AWS environment variables:
|
578
|
-
- AWS_ACCESS_KEY_ID
|
579
|
-
- AWS_SECRET_ACCESS_KEY
|
580
|
-
- AWS_SESSION_TOKEN
|
581
|
-
- AWS_ENDPOINT_URL
|
582
|
-
- AWS_DEFAULT_REGION
|
583
|
-
- ALLOW_INVALID_CERTIFICATES
|
584
|
-
- AWS_ALLOW_HTTP
|
585
|
-
|
586
|
-
Returns:
|
587
|
-
AwsStorageOptions: Configured storage options
|
588
|
-
|
589
|
-
Example:
|
590
|
-
>>> # Load from environment
|
591
|
-
>>> options = AwsStorageOptions.from_env()
|
592
|
-
>>> print(options.region)
|
593
|
-
'us-east-1' # From AWS_DEFAULT_REGION
|
594
|
-
"""
|
595
|
-
return cls(
|
596
|
-
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
597
|
-
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
598
|
-
session_token=os.getenv("AWS_SESSION_TOKEN"),
|
599
|
-
endpoint_url=os.getenv("AWS_ENDPOINT_URL"),
|
600
|
-
region=os.getenv("AWS_DEFAULT_REGION"),
|
601
|
-
allow_invalid_certificates="true"
|
602
|
-
== (os.getenv("ALLOW_INVALID_CERTIFICATES", "False").lower()),
|
603
|
-
allow_http="true" == (os.getenv("AWS_ALLOW_HTTP", "False").lower()),
|
604
|
-
)
|
605
|
-
|
606
|
-
def to_fsspec_kwargs(self) -> dict:
|
607
|
-
"""Convert options to fsspec filesystem arguments.
|
608
|
-
|
609
|
-
Returns:
|
610
|
-
dict: Arguments suitable for fsspec S3FileSystem
|
611
|
-
|
612
|
-
Example:
|
613
|
-
>>> options = AwsStorageOptions(
|
614
|
-
... access_key_id="KEY",
|
615
|
-
... secret_access_key="SECRET",
|
616
|
-
... region="us-west-2"
|
617
|
-
... )
|
618
|
-
>>> kwargs = options.to_fsspec_kwargs()
|
619
|
-
>>> fs = filesystem("s3", **kwargs)
|
620
|
-
"""
|
621
|
-
fsspec_kwargs = {
|
622
|
-
"key": self.access_key_id,
|
623
|
-
"secret": self.secret_access_key,
|
624
|
-
"token": self.session_token,
|
625
|
-
"endpoint_url": self.endpoint_url,
|
626
|
-
"client_kwargs": {
|
627
|
-
"region_name": self.region,
|
628
|
-
"verify": not self.allow_invalid_certificates
|
629
|
-
if self.allow_invalid_certificates is not None
|
630
|
-
else True,
|
631
|
-
"use_ssl": not self.allow_http if self.allow_http is not None else True,
|
632
|
-
},
|
633
|
-
}
|
634
|
-
return {k: v for k, v in fsspec_kwargs.items() if v is not None}
|
635
|
-
|
636
|
-
def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
|
637
|
-
"""Convert options to object store arguments.
|
638
|
-
|
639
|
-
Args:
|
640
|
-
with_conditional_put: Add etag-based conditional put support
|
641
|
-
|
642
|
-
Returns:
|
643
|
-
dict: Arguments suitable for object store clients
|
644
|
-
|
645
|
-
Example:
|
646
|
-
>>> options = AwsStorageOptions(
|
647
|
-
... access_key_id="KEY",
|
648
|
-
... secret_access_key="SECRET"
|
649
|
-
... )
|
650
|
-
>>> kwargs = options.to_object_store_kwargs()
|
651
|
-
>>> client = ObjectStore(**kwargs)
|
652
|
-
"""
|
653
|
-
kwargs = {
|
654
|
-
k: str(v)
|
655
|
-
for k, v in self.to_dict().items()
|
656
|
-
if v is not None and k != "protocol"
|
657
|
-
}
|
658
|
-
if with_conditional_put:
|
659
|
-
kwargs["conditional_put"] = "etag"
|
660
|
-
return kwargs
|
661
|
-
|
662
|
-
def to_env(self) -> None:
|
663
|
-
"""Export options to environment variables.
|
664
|
-
|
665
|
-
Sets standard AWS environment variables.
|
666
|
-
|
667
|
-
Example:
|
668
|
-
>>> options = AwsStorageOptions(
|
669
|
-
... access_key_id="KEY",
|
670
|
-
... secret_access_key="SECRET",
|
671
|
-
... region="us-east-1"
|
672
|
-
... )
|
673
|
-
>>> options.to_env()
|
674
|
-
>>> print(os.getenv("AWS_ACCESS_KEY_ID"))
|
675
|
-
'KEY'
|
676
|
-
"""
|
677
|
-
env = {
|
678
|
-
"AWS_ACCESS_KEY_ID": self.access_key_id,
|
679
|
-
"AWS_SECRET_ACCESS_KEY": self.secret_access_key,
|
680
|
-
"AWS_SESSION_TOKEN": self.session_token,
|
681
|
-
"AWS_ENDPOINT_URL": self.endpoint_url,
|
682
|
-
"AWS_DEFAULT_REGION": self.region,
|
683
|
-
"ALLOW_INVALID_CERTIFICATES": str(self.allow_invalid_certificates),
|
684
|
-
"AWS_ALLOW_HTTP": str(self.allow_http),
|
685
|
-
}
|
686
|
-
env = {k: v for k, v in env.items() if v is not None}
|
687
|
-
os.environ.update(env)
|
688
|
-
|
689
|
-
def to_filesystem(self):
|
690
|
-
return filesystem(self.protocol, **self.to_fsspec_kwargs())
|
691
|
-
|
692
|
-
|
693
|
-
class GitHubStorageOptions(BaseStorageOptions):
|
694
|
-
"""GitHub repository storage configuration options.
|
695
|
-
|
696
|
-
Provides access to files in GitHub repositories with support for:
|
697
|
-
- Public and private repositories
|
698
|
-
- Branch/tag/commit selection
|
699
|
-
- Token-based authentication
|
700
|
-
- Custom GitHub Enterprise instances
|
701
|
-
|
702
|
-
Attributes:
|
703
|
-
protocol (str): Always "github" for GitHub storage
|
704
|
-
org (str): Organization or user name
|
705
|
-
repo (str): Repository name
|
706
|
-
ref (str): Git reference (branch, tag, or commit SHA)
|
707
|
-
token (str): GitHub personal access token
|
708
|
-
api_url (str): Custom GitHub API URL for enterprise instances
|
709
|
-
|
710
|
-
Example:
|
711
|
-
>>> # Public repository
|
712
|
-
>>> options = GitHubStorageOptions(
|
713
|
-
... org="microsoft",
|
714
|
-
... repo="vscode",
|
715
|
-
... ref="main"
|
716
|
-
... )
|
717
|
-
>>>
|
718
|
-
>>> # Private repository
|
719
|
-
>>> options = GitHubStorageOptions(
|
720
|
-
... org="myorg",
|
721
|
-
... repo="private-repo",
|
722
|
-
... token="ghp_xxxx",
|
723
|
-
... ref="develop"
|
724
|
-
... )
|
725
|
-
>>>
|
726
|
-
>>> # Enterprise instance
|
727
|
-
>>> options = GitHubStorageOptions(
|
728
|
-
... org="company",
|
729
|
-
... repo="internal",
|
730
|
-
... api_url="https://github.company.com/api/v3",
|
731
|
-
... token="ghp_xxxx"
|
732
|
-
... )
|
733
|
-
"""
|
734
|
-
|
735
|
-
protocol: str = "github"
|
736
|
-
org: str | None = None
|
737
|
-
repo: str | None = None
|
738
|
-
ref: str | None = None
|
739
|
-
token: str | None = None
|
740
|
-
api_url: str | None = None
|
741
|
-
|
742
|
-
@classmethod
|
743
|
-
def from_env(cls) -> "GitHubStorageOptions":
|
744
|
-
"""Create storage options from environment variables.
|
745
|
-
|
746
|
-
Reads standard GitHub environment variables:
|
747
|
-
- GITHUB_ORG: Organization or user name
|
748
|
-
- GITHUB_REPO: Repository name
|
749
|
-
- GITHUB_REF: Git reference
|
750
|
-
- GITHUB_TOKEN: Personal access token
|
751
|
-
- GITHUB_API_URL: Custom API URL
|
752
|
-
|
753
|
-
Returns:
|
754
|
-
GitHubStorageOptions: Configured storage options
|
755
|
-
|
756
|
-
Example:
|
757
|
-
>>> # With environment variables set:
|
758
|
-
>>> options = GitHubStorageOptions.from_env()
|
759
|
-
>>> print(options.org) # From GITHUB_ORG
|
760
|
-
'microsoft'
|
761
|
-
"""
|
762
|
-
return cls(
|
763
|
-
protocol="github",
|
764
|
-
org=os.getenv("GITHUB_ORG"),
|
765
|
-
repo=os.getenv("GITHUB_REPO"),
|
766
|
-
ref=os.getenv("GITHUB_REF"),
|
767
|
-
token=os.getenv("GITHUB_TOKEN"),
|
768
|
-
api_url=os.getenv("GITHUB_API_URL"),
|
769
|
-
)
|
770
|
-
|
771
|
-
def to_env(self) -> None:
|
772
|
-
"""Export options to environment variables.
|
773
|
-
|
774
|
-
Sets standard GitHub environment variables.
|
775
|
-
|
776
|
-
Example:
|
777
|
-
>>> options = GitHubStorageOptions(
|
778
|
-
... org="microsoft",
|
779
|
-
... repo="vscode",
|
780
|
-
... token="ghp_xxxx"
|
781
|
-
... )
|
782
|
-
>>> options.to_env()
|
783
|
-
>>> print(os.getenv("GITHUB_ORG"))
|
784
|
-
'microsoft'
|
785
|
-
"""
|
786
|
-
env = {
|
787
|
-
"GITHUB_ORG": self.org,
|
788
|
-
"GITHUB_REPO": self.repo,
|
789
|
-
"GITHUB_REF": self.ref,
|
790
|
-
"GITHUB_TOKEN": self.token,
|
791
|
-
"GITHUB_API_URL": self.api_url,
|
792
|
-
}
|
793
|
-
env = {k: v for k, v in env.items() if v is not None}
|
794
|
-
os.environ.update(env)
|
795
|
-
|
796
|
-
def to_fsspec_kwargs(self) -> dict:
|
797
|
-
"""Convert options to fsspec filesystem arguments.
|
798
|
-
|
799
|
-
Returns:
|
800
|
-
dict: Arguments suitable for GitHubFileSystem
|
801
|
-
|
802
|
-
Example:
|
803
|
-
>>> options = GitHubStorageOptions(
|
804
|
-
... org="microsoft",
|
805
|
-
... repo="vscode",
|
806
|
-
... token="ghp_xxxx"
|
807
|
-
... )
|
808
|
-
>>> kwargs = options.to_fsspec_kwargs()
|
809
|
-
>>> fs = filesystem("github", **kwargs)
|
810
|
-
"""
|
811
|
-
kwargs = {
|
812
|
-
"org": self.org,
|
813
|
-
"repo": self.repo,
|
814
|
-
"ref": self.ref,
|
815
|
-
"token": self.token,
|
816
|
-
"api_url": self.api_url,
|
817
|
-
}
|
818
|
-
return {k: v for k, v in kwargs.items() if v is not None}
|
819
|
-
|
820
|
-
|
821
|
-
class GitLabStorageOptions(BaseStorageOptions):
|
822
|
-
"""GitLab repository storage configuration options.
|
823
|
-
|
824
|
-
Provides access to files in GitLab repositories with support for:
|
825
|
-
- Public and private repositories
|
826
|
-
- Self-hosted GitLab instances
|
827
|
-
- Project ID or name-based access
|
828
|
-
- Branch/tag/commit selection
|
829
|
-
- Token-based authentication
|
830
|
-
|
831
|
-
Attributes:
|
832
|
-
protocol (str): Always "gitlab" for GitLab storage
|
833
|
-
base_url (str): GitLab instance URL, defaults to gitlab.com
|
834
|
-
project_id (str | int): Project ID number
|
835
|
-
project_name (str): Project name/path
|
836
|
-
ref (str): Git reference (branch, tag, or commit SHA)
|
837
|
-
token (str): GitLab personal access token
|
838
|
-
api_version (str): API version to use
|
839
|
-
|
840
|
-
Example:
|
841
|
-
>>> # Public project on gitlab.com
|
842
|
-
>>> options = GitLabStorageOptions(
|
843
|
-
... project_name="group/project",
|
844
|
-
... ref="main"
|
845
|
-
... )
|
846
|
-
>>>
|
847
|
-
>>> # Private project with token
|
848
|
-
>>> options = GitLabStorageOptions(
|
849
|
-
... project_id=12345,
|
850
|
-
... token="glpat_xxxx",
|
851
|
-
... ref="develop"
|
852
|
-
... )
|
853
|
-
>>>
|
854
|
-
>>> # Self-hosted instance
|
855
|
-
>>> options = GitLabStorageOptions(
|
856
|
-
... base_url="https://gitlab.company.com",
|
857
|
-
... project_name="internal/project",
|
858
|
-
... token="glpat_xxxx"
|
859
|
-
... )
|
860
|
-
"""
|
861
|
-
|
862
|
-
protocol: str = "gitlab"
|
863
|
-
base_url: str = "https://gitlab.com"
|
864
|
-
project_id: str | int | None = None
|
865
|
-
project_name: str | None = None
|
866
|
-
ref: str | None = None
|
867
|
-
token: str | None = None
|
868
|
-
api_version: str = "v4"
|
869
|
-
|
870
|
-
def __post_init__(self) -> None:
|
871
|
-
"""Validate GitLab configuration after initialization.
|
872
|
-
|
873
|
-
Ensures either project_id or project_name is provided.
|
874
|
-
|
875
|
-
Args:
|
876
|
-
__context: Pydantic validation context (unused)
|
877
|
-
|
878
|
-
Raises:
|
879
|
-
ValueError: If neither project_id nor project_name is provided
|
880
|
-
|
881
|
-
Example:
|
882
|
-
>>> # Valid initialization
|
883
|
-
>>> options = GitLabStorageOptions(project_id=12345)
|
884
|
-
>>>
|
885
|
-
>>> # Invalid initialization
|
886
|
-
>>> try:
|
887
|
-
... options = GitLabStorageOptions()
|
888
|
-
... except ValueError as e:
|
889
|
-
... print(str(e))
|
890
|
-
'Either project_id or project_name must be provided'
|
891
|
-
"""
|
892
|
-
if self.project_id is None and self.project_name is None:
|
893
|
-
raise ValueError("Either project_id or project_name must be provided")
|
894
|
-
|
895
|
-
@classmethod
|
896
|
-
def from_env(cls) -> "GitLabStorageOptions":
|
897
|
-
"""Create storage options from environment variables.
|
898
|
-
|
899
|
-
Reads standard GitLab environment variables:
|
900
|
-
- GITLAB_URL: Instance URL
|
901
|
-
- GITLAB_PROJECT_ID: Project ID
|
902
|
-
- GITLAB_PROJECT_NAME: Project name/path
|
903
|
-
- GITLAB_REF: Git reference
|
904
|
-
- GITLAB_TOKEN: Personal access token
|
905
|
-
- GITLAB_API_VERSION: API version
|
906
|
-
|
907
|
-
Returns:
|
908
|
-
GitLabStorageOptions: Configured storage options
|
909
|
-
|
910
|
-
Example:
|
911
|
-
>>> # With environment variables set:
|
912
|
-
>>> options = GitLabStorageOptions.from_env()
|
913
|
-
>>> print(options.project_id) # From GITLAB_PROJECT_ID
|
914
|
-
'12345'
|
915
|
-
"""
|
916
|
-
return cls(
|
917
|
-
protocol="gitlab",
|
918
|
-
base_url=os.getenv("GITLAB_URL", "https://gitlab.com"),
|
919
|
-
project_id=os.getenv("GITLAB_PROJECT_ID"),
|
920
|
-
project_name=os.getenv("GITLAB_PROJECT_NAME"),
|
921
|
-
ref=os.getenv("GITLAB_REF"),
|
922
|
-
token=os.getenv("GITLAB_TOKEN"),
|
923
|
-
api_version=os.getenv("GITLAB_API_VERSION", "v4"),
|
924
|
-
)
|
925
|
-
|
926
|
-
def to_env(self) -> None:
|
927
|
-
"""Export options to environment variables.
|
928
|
-
|
929
|
-
Sets standard GitLab environment variables.
|
930
|
-
|
931
|
-
Example:
|
932
|
-
>>> options = GitLabStorageOptions(
|
933
|
-
... project_id=12345,
|
934
|
-
... token="glpat_xxxx"
|
935
|
-
... )
|
936
|
-
>>> options.to_env()
|
937
|
-
>>> print(os.getenv("GITLAB_PROJECT_ID"))
|
938
|
-
'12345'
|
939
|
-
"""
|
940
|
-
env = {
|
941
|
-
"GITLAB_URL": self.base_url,
|
942
|
-
"GITLAB_PROJECT_ID": str(self.project_id) if self.project_id else None,
|
943
|
-
"GITLAB_PROJECT_NAME": self.project_name,
|
944
|
-
"GITLAB_REF": self.ref,
|
945
|
-
"GITLAB_TOKEN": self.token,
|
946
|
-
"GITLAB_API_VERSION": self.api_version,
|
947
|
-
}
|
948
|
-
env = {k: v for k, v in env.items() if v is not None}
|
949
|
-
os.environ.update(env)
|
950
|
-
|
951
|
-
def to_fsspec_kwargs(self) -> dict:
|
952
|
-
"""Convert options to fsspec filesystem arguments.
|
953
|
-
|
954
|
-
Returns:
|
955
|
-
dict: Arguments suitable for GitLabFileSystem
|
956
|
-
|
957
|
-
Example:
|
958
|
-
>>> options = GitLabStorageOptions(
|
959
|
-
... project_id=12345,
|
960
|
-
... token="glpat_xxxx"
|
961
|
-
... )
|
962
|
-
>>> kwargs = options.to_fsspec_kwargs()
|
963
|
-
>>> fs = filesystem("gitlab", **kwargs)
|
964
|
-
"""
|
965
|
-
kwargs = {
|
966
|
-
"base_url": self.base_url,
|
967
|
-
"project_id": self.project_id,
|
968
|
-
"project_name": self.project_name,
|
969
|
-
"ref": self.ref,
|
970
|
-
"token": self.token,
|
971
|
-
"api_version": self.api_version,
|
972
|
-
}
|
973
|
-
return {k: v for k, v in kwargs.items() if v is not None}
|
974
|
-
|
975
|
-
|
976
|
-
class LocalStorageOptions(BaseStorageOptions):
|
977
|
-
"""Local filesystem configuration options.
|
978
|
-
|
979
|
-
Provides basic configuration for local file access. While this class
|
980
|
-
is simple, it maintains consistency with other storage options and
|
981
|
-
enables transparent switching between local and remote storage.
|
982
|
-
|
983
|
-
Attributes:
|
984
|
-
protocol (str): Always "file" for local filesystem
|
985
|
-
auto_mkdir (bool): Create directories automatically
|
986
|
-
mode (int): Default file creation mode (unix-style)
|
987
|
-
|
988
|
-
Example:
|
989
|
-
>>> # Basic local access
|
990
|
-
>>> options = LocalStorageOptions()
|
991
|
-
>>> fs = options.to_filesystem()
|
992
|
-
>>> files = fs.ls("/path/to/data")
|
993
|
-
>>>
|
994
|
-
>>> # With auto directory creation
|
995
|
-
>>> options = LocalStorageOptions(auto_mkdir=True)
|
996
|
-
>>> fs = options.to_filesystem()
|
997
|
-
>>> with fs.open("/new/path/file.txt", "w") as f:
|
998
|
-
... f.write("test") # Creates /new/path/ automatically
|
999
|
-
"""
|
1000
|
-
|
1001
|
-
protocol: str = "file"
|
1002
|
-
auto_mkdir: bool = False
|
1003
|
-
mode: int | None = None
|
1004
|
-
|
1005
|
-
def to_fsspec_kwargs(self) -> dict:
|
1006
|
-
"""Convert options to fsspec filesystem arguments.
|
1007
|
-
|
1008
|
-
Returns:
|
1009
|
-
dict: Arguments suitable for LocalFileSystem
|
1010
|
-
|
1011
|
-
Example:
|
1012
|
-
>>> options = LocalStorageOptions(auto_mkdir=True)
|
1013
|
-
>>> kwargs = options.to_fsspec_kwargs()
|
1014
|
-
>>> fs = filesystem("file", **kwargs)
|
1015
|
-
"""
|
1016
|
-
kwargs = {
|
1017
|
-
"auto_mkdir": self.auto_mkdir,
|
1018
|
-
"mode": self.mode,
|
1019
|
-
}
|
1020
|
-
return {k: v for k, v in kwargs.items() if v is not None}
|
1021
|
-
|
1022
|
-
|
1023
|
-
def from_dict(protocol: str, storage_options: dict) -> BaseStorageOptions:
|
1024
|
-
"""Create appropriate storage options instance from dictionary.
|
1025
|
-
|
1026
|
-
Factory function that creates the correct storage options class based on protocol.
|
1027
|
-
|
1028
|
-
Args:
|
1029
|
-
protocol: Storage protocol identifier (e.g., "s3", "gs", "file")
|
1030
|
-
storage_options: Dictionary of configuration options
|
1031
|
-
|
1032
|
-
Returns:
|
1033
|
-
BaseStorageOptions: Appropriate storage options instance
|
1034
|
-
|
1035
|
-
Raises:
|
1036
|
-
ValueError: If protocol is not supported
|
1037
|
-
|
1038
|
-
Example:
|
1039
|
-
>>> # Create S3 options
|
1040
|
-
>>> options = from_dict("s3", {
|
1041
|
-
... "access_key_id": "KEY",
|
1042
|
-
... "secret_access_key": "SECRET"
|
1043
|
-
... })
|
1044
|
-
>>> print(type(options).__name__)
|
1045
|
-
'AwsStorageOptions'
|
1046
|
-
"""
|
1047
|
-
if protocol == "s3":
|
1048
|
-
if (
|
1049
|
-
"profile" in storage_options
|
1050
|
-
or "key" in storage_options
|
1051
|
-
or "secret" in storage_options
|
1052
|
-
):
|
1053
|
-
return AwsStorageOptions.create(**storage_options)
|
1054
|
-
return AwsStorageOptions(**storage_options)
|
1055
|
-
elif protocol in ["az", "abfs", "adl"]:
|
1056
|
-
return AzureStorageOptions(**storage_options)
|
1057
|
-
elif protocol in ["gs", "gcs"]:
|
1058
|
-
return GcsStorageOptions(**storage_options)
|
1059
|
-
elif protocol == "github":
|
1060
|
-
return GitHubStorageOptions(**storage_options)
|
1061
|
-
elif protocol == "gitlab":
|
1062
|
-
return GitLabStorageOptions(**storage_options)
|
1063
|
-
elif protocol == "file":
|
1064
|
-
return LocalStorageOptions()
|
1065
|
-
else:
|
1066
|
-
raise ValueError(f"Unsupported protocol: {protocol}")
|
1067
|
-
|
1068
|
-
|
1069
|
-
def from_env(protocol: str) -> BaseStorageOptions:
|
1070
|
-
"""Create storage options from environment variables.
|
1071
|
-
|
1072
|
-
Factory function that creates and configures storage options from
|
1073
|
-
protocol-specific environment variables.
|
1074
|
-
|
1075
|
-
Args:
|
1076
|
-
protocol: Storage protocol identifier (e.g., "s3", "github")
|
1077
|
-
|
1078
|
-
Returns:
|
1079
|
-
BaseStorageOptions: Configured storage options instance
|
1080
|
-
|
1081
|
-
Raises:
|
1082
|
-
ValueError: If protocol is not supported
|
1083
|
-
|
1084
|
-
Example:
|
1085
|
-
>>> # With AWS credentials in environment
|
1086
|
-
>>> options = from_env("s3")
|
1087
|
-
>>> print(options.access_key_id) # From AWS_ACCESS_KEY_ID
|
1088
|
-
'AKIAXXXXXX'
|
1089
|
-
"""
|
1090
|
-
if protocol == "s3":
|
1091
|
-
return AwsStorageOptions.from_env()
|
1092
|
-
elif protocol == "github":
|
1093
|
-
return GitHubStorageOptions.from_env()
|
1094
|
-
elif protocol == "gitlab":
|
1095
|
-
return GitLabStorageOptions.from_env()
|
1096
|
-
elif protocol == "file":
|
1097
|
-
return LocalStorageOptions()
|
1098
|
-
else:
|
1099
|
-
raise ValueError(f"Unsupported protocol: {protocol}")
|
1100
|
-
|
1101
|
-
|
1102
|
-
class StorageOptions(msgspec.Struct):
|
1103
|
-
"""High-level storage options container and factory.
|
1104
|
-
|
1105
|
-
Provides a unified interface for creating and managing storage options
|
1106
|
-
for different protocols.
|
1107
|
-
|
1108
|
-
Attributes:
|
1109
|
-
storage_options (BaseStorageOptions): Underlying storage options instance
|
1110
|
-
|
1111
|
-
Example:
|
1112
|
-
>>> # Create from protocol
|
1113
|
-
>>> options = StorageOptions.create(
|
1114
|
-
... protocol="s3",
|
1115
|
-
... access_key_id="KEY",
|
1116
|
-
... secret_access_key="SECRET"
|
1117
|
-
... )
|
1118
|
-
>>>
|
1119
|
-
>>> # Create from existing options
|
1120
|
-
>>> s3_opts = AwsStorageOptions(access_key_id="KEY")
|
1121
|
-
>>> options = StorageOptions(storage_options=s3_opts)
|
1122
|
-
"""
|
1123
|
-
|
1124
|
-
storage_options: BaseStorageOptions
|
1125
|
-
|
1126
|
-
@classmethod
|
1127
|
-
def create(cls, **data: Any) -> "StorageOptions":
|
1128
|
-
"""Create storage options from arguments.
|
1129
|
-
|
1130
|
-
Args:
|
1131
|
-
**data: Either:
|
1132
|
-
- protocol and configuration options
|
1133
|
-
- storage_options=pre-configured instance
|
1134
|
-
|
1135
|
-
Returns:
|
1136
|
-
StorageOptions: Configured storage options instance
|
1137
|
-
|
1138
|
-
Raises:
|
1139
|
-
ValueError: If protocol missing or invalid
|
1140
|
-
|
1141
|
-
Example:
|
1142
|
-
>>> # Direct protocol config
|
1143
|
-
>>> options = StorageOptions.create(
|
1144
|
-
... protocol="s3",
|
1145
|
-
... region="us-east-1"
|
1146
|
-
... )
|
1147
|
-
"""
|
1148
|
-
protocol = data.get("protocol")
|
1149
|
-
if protocol is None and "storage_options" not in data:
|
1150
|
-
raise ValueError("protocol must be specified")
|
1151
|
-
|
1152
|
-
if "storage_options" not in data:
|
1153
|
-
if protocol == "s3":
|
1154
|
-
if "profile" in data or "key" in data or "secret" in data:
|
1155
|
-
storage_options = AwsStorageOptions.create(**data)
|
1156
|
-
else:
|
1157
|
-
storage_options = AwsStorageOptions(**data)
|
1158
|
-
elif protocol == "github":
|
1159
|
-
storage_options = GitHubStorageOptions(**data)
|
1160
|
-
elif protocol == "gitlab":
|
1161
|
-
storage_options = GitLabStorageOptions(**data)
|
1162
|
-
elif protocol in ["az", "abfs", "adl"]:
|
1163
|
-
storage_options = AzureStorageOptions(**data)
|
1164
|
-
elif protocol in ["gs", "gcs"]:
|
1165
|
-
storage_options = GcsStorageOptions(**data)
|
1166
|
-
elif protocol == "file":
|
1167
|
-
storage_options = LocalStorageOptions(**data)
|
1168
|
-
else:
|
1169
|
-
raise ValueError(f"Unsupported protocol: {protocol}")
|
1170
|
-
|
1171
|
-
return cls(storage_options=storage_options)
|
1172
|
-
else:
|
1173
|
-
return cls(**data)
|
1174
|
-
|
1175
|
-
@classmethod
|
1176
|
-
def from_yaml(cls, path: str, fs: AbstractFileSystem = None) -> "StorageOptions":
|
1177
|
-
"""Create storage options from YAML configuration.
|
1178
|
-
|
1179
|
-
Args:
|
1180
|
-
path: Path to YAML configuration file
|
1181
|
-
fs: Filesystem for reading configuration
|
1182
|
-
|
1183
|
-
Returns:
|
1184
|
-
StorageOptions: Configured storage options
|
1185
|
-
|
1186
|
-
Example:
|
1187
|
-
>>> # Load from config file
|
1188
|
-
>>> options = StorageOptions.from_yaml("storage.yml")
|
1189
|
-
>>> print(options.storage_options.protocol)
|
1190
|
-
's3'
|
1191
|
-
"""
|
1192
|
-
with fs.open(path, "r") as f:
|
1193
|
-
data = yaml.safe_load(f)
|
1194
|
-
return cls(**data)
|
1195
|
-
|
1196
|
-
@classmethod
|
1197
|
-
def from_env(cls, protocol: str) -> "StorageOptions":
|
1198
|
-
"""Create storage options from environment variables.
|
1199
|
-
|
1200
|
-
Args:
|
1201
|
-
protocol: Storage protocol to configure
|
1202
|
-
|
1203
|
-
Returns:
|
1204
|
-
StorageOptions: Environment-configured options
|
1205
|
-
|
1206
|
-
Example:
|
1207
|
-
>>> # Load AWS config from environment
|
1208
|
-
>>> options = StorageOptions.from_env("s3")
|
1209
|
-
"""
|
1210
|
-
if protocol == "s3":
|
1211
|
-
return cls(storage_options=AwsStorageOptions.from_env())
|
1212
|
-
elif protocol == "github":
|
1213
|
-
return cls(storage_options=GitHubStorageOptions.from_env())
|
1214
|
-
elif protocol == "gitlab":
|
1215
|
-
return cls(storage_options=GitLabStorageOptions.from_env())
|
1216
|
-
elif protocol == "file":
|
1217
|
-
return cls(storage_options=LocalStorageOptions())
|
1218
|
-
else:
|
1219
|
-
raise ValueError(f"Unsupported protocol: {protocol}")
|
1220
|
-
|
1221
|
-
def to_filesystem(self) -> AbstractFileSystem:
|
1222
|
-
"""Create fsspec filesystem instance.
|
1223
|
-
|
1224
|
-
Returns:
|
1225
|
-
AbstractFileSystem: Configured filesystem instance
|
1226
|
-
|
1227
|
-
Example:
|
1228
|
-
>>> options = StorageOptions(protocol="file")
|
1229
|
-
>>> fs = options.to_filesystem()
|
1230
|
-
>>> files = fs.ls("/data")
|
1231
|
-
"""
|
1232
|
-
return self.storage_options.to_filesystem()
|
1233
|
-
|
1234
|
-
def to_dict(self, protocol: bool = False) -> dict:
|
1235
|
-
"""Convert storage options to dictionary.
|
1236
|
-
|
1237
|
-
Args:
|
1238
|
-
protocol: Whether to include protocol in output
|
1239
|
-
|
1240
|
-
Returns:
|
1241
|
-
dict: Storage options as dictionary
|
1242
|
-
|
1243
|
-
Example:
|
1244
|
-
>>> options = StorageOptions(
|
1245
|
-
... protocol="s3",
|
1246
|
-
... region="us-east-1"
|
1247
|
-
... )
|
1248
|
-
>>> print(options.to_dict())
|
1249
|
-
{'region': 'us-east-1'}
|
1250
|
-
"""
|
1251
|
-
return self.storage_options.to_dict(protocol=protocol)
|
1252
|
-
|
1253
|
-
def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
|
1254
|
-
"""Get options formatted for object store clients.
|
1255
|
-
|
1256
|
-
Args:
|
1257
|
-
with_conditional_put: Add etag-based conditional put support
|
1258
|
-
|
1259
|
-
Returns:
|
1260
|
-
dict: Object store configuration dictionary
|
1261
|
-
|
1262
|
-
Example:
|
1263
|
-
>>> options = StorageOptions(protocol="s3")
|
1264
|
-
>>> kwargs = options.to_object_store_kwargs()
|
1265
|
-
>>> store = ObjectStore(**kwargs)
|
1266
|
-
"""
|
1267
|
-
return self.storage_options.to_object_store_kwargs(
|
1268
|
-
with_conditional_put=with_conditional_put
|
1269
|
-
)
|
1270
|
-
|
1271
|
-
|
1272
|
-
def infer_protocol_from_uri(uri: str) -> str:
|
1273
|
-
"""Infer the storage protocol from a URI string.
|
1274
|
-
|
1275
|
-
Analyzes the URI to determine the appropriate storage protocol based on
|
1276
|
-
the scheme or path format.
|
1277
|
-
|
1278
|
-
Args:
|
1279
|
-
uri: URI or path string to analyze. Examples:
|
1280
|
-
- "s3://bucket/path"
|
1281
|
-
- "gs://bucket/path"
|
1282
|
-
- "github://org/repo"
|
1283
|
-
- "/local/path"
|
1284
|
-
|
1285
|
-
Returns:
|
1286
|
-
str: Inferred protocol identifier
|
1287
|
-
|
1288
|
-
Example:
|
1289
|
-
>>> # S3 protocol
|
1290
|
-
>>> infer_protocol_from_uri("s3://my-bucket/data")
|
1291
|
-
's3'
|
1292
|
-
>>>
|
1293
|
-
>>> # Local file
|
1294
|
-
>>> infer_protocol_from_uri("/home/user/data")
|
1295
|
-
'file'
|
1296
|
-
>>>
|
1297
|
-
>>> # GitHub repository
|
1298
|
-
>>> infer_protocol_from_uri("github://microsoft/vscode")
|
1299
|
-
'github'
|
1300
|
-
"""
|
1301
|
-
if uri.startswith("s3://"):
|
1302
|
-
return "s3"
|
1303
|
-
elif uri.startswith("gs://") or uri.startswith("gcs://"):
|
1304
|
-
return "gs"
|
1305
|
-
elif uri.startswith("github://"):
|
1306
|
-
return "github"
|
1307
|
-
elif uri.startswith("gitlab://"):
|
1308
|
-
return "gitlab"
|
1309
|
-
elif uri.startswith(("az://", "abfs://", "adl://")):
|
1310
|
-
return uri.split("://")[0]
|
1311
|
-
else:
|
1312
|
-
return "file"
|
1313
|
-
|
1314
|
-
|
1315
|
-
def storage_options_from_uri(uri: str) -> BaseStorageOptions:
|
1316
|
-
"""Create storage options instance from a URI string.
|
1317
|
-
|
1318
|
-
Infers the protocol and extracts relevant configuration from the URI
|
1319
|
-
to create appropriate storage options.
|
1320
|
-
|
1321
|
-
Args:
|
1322
|
-
uri: URI string containing protocol and optional configuration.
|
1323
|
-
Examples:
|
1324
|
-
- "s3://bucket/path"
|
1325
|
-
- "gs://project/bucket/path"
|
1326
|
-
- "github://org/repo"
|
1327
|
-
|
1328
|
-
Returns:
|
1329
|
-
BaseStorageOptions: Configured storage options instance
|
1330
|
-
|
1331
|
-
Example:
|
1332
|
-
>>> # S3 options
|
1333
|
-
>>> opts = storage_options_from_uri("s3://my-bucket/data")
|
1334
|
-
>>> print(opts.protocol)
|
1335
|
-
's3'
|
1336
|
-
>>>
|
1337
|
-
>>> # GitHub options
|
1338
|
-
>>> opts = storage_options_from_uri("github://microsoft/vscode")
|
1339
|
-
>>> print(opts.org)
|
1340
|
-
'microsoft'
|
1341
|
-
>>> print(opts.repo)
|
1342
|
-
'vscode'
|
1343
|
-
"""
|
1344
|
-
protocol = infer_protocol_from_uri(uri)
|
1345
|
-
options = infer_storage_options(uri)
|
1346
|
-
|
1347
|
-
if protocol == "s3":
|
1348
|
-
return AwsStorageOptions(protocol=protocol, **options)
|
1349
|
-
elif protocol in ["gs", "gcs"]:
|
1350
|
-
return GcsStorageOptions(protocol=protocol, **options)
|
1351
|
-
elif protocol == "github":
|
1352
|
-
parts = uri.replace("github://", "").split("/")
|
1353
|
-
return GitHubStorageOptions(
|
1354
|
-
protocol=protocol, org=parts[0], repo=parts[1] if len(parts) > 1 else None
|
1355
|
-
)
|
1356
|
-
elif protocol == "gitlab":
|
1357
|
-
parts = uri.replace("gitlab://", "").split("/")
|
1358
|
-
return GitLabStorageOptions(
|
1359
|
-
protocol=protocol, project_name=parts[-1] if parts else None
|
1360
|
-
)
|
1361
|
-
elif protocol in ["az", "abfs", "adl"]:
|
1362
|
-
return AzureStorageOptions(protocol=protocol, **options)
|
1363
|
-
else:
|
1364
|
-
return LocalStorageOptions()
|
1365
|
-
|
1366
|
-
|
1367
|
-
def merge_storage_options(
|
1368
|
-
*options: BaseStorageOptions | dict | None, overwrite: bool = True
|
1369
|
-
) -> BaseStorageOptions:
|
1370
|
-
"""Merge multiple storage options into a single configuration.
|
1371
|
-
|
1372
|
-
Combines options from multiple sources with control over precedence.
|
1373
|
-
|
1374
|
-
Args:
|
1375
|
-
*options: Storage options to merge. Can be:
|
1376
|
-
- BaseStorageOptions instances
|
1377
|
-
- Dictionaries of options
|
1378
|
-
- None values (ignored)
|
1379
|
-
overwrite: Whether later options override earlier ones
|
1380
|
-
|
1381
|
-
Returns:
|
1382
|
-
BaseStorageOptions: Combined storage options
|
1383
|
-
|
1384
|
-
Example:
|
1385
|
-
>>> # Merge with overwrite
|
1386
|
-
>>> base = AwsStorageOptions(
|
1387
|
-
... region="us-east-1",
|
1388
|
-
... access_key_id="OLD_KEY"
|
1389
|
-
... )
|
1390
|
-
>>> override = {"access_key_id": "NEW_KEY"}
|
1391
|
-
>>> merged = merge_storage_options(base, override)
|
1392
|
-
>>> print(merged.access_key_id)
|
1393
|
-
'NEW_KEY'
|
1394
|
-
>>>
|
1395
|
-
>>> # Preserve existing values
|
1396
|
-
>>> merged = merge_storage_options(
|
1397
|
-
... base,
|
1398
|
-
... override,
|
1399
|
-
... overwrite=False
|
1400
|
-
... )
|
1401
|
-
>>> print(merged.access_key_id)
|
1402
|
-
'OLD_KEY'
|
1403
|
-
"""
|
1404
|
-
result = {}
|
1405
|
-
protocol = None
|
1406
|
-
|
1407
|
-
for opts in options:
|
1408
|
-
if opts is None:
|
1409
|
-
continue
|
1410
|
-
if isinstance(opts, BaseStorageOptions):
|
1411
|
-
opts = opts.to_dict(with_protocol=True)
|
1412
|
-
if not protocol and "protocol" in opts:
|
1413
|
-
protocol = opts["protocol"]
|
1414
|
-
for k, v in opts.items():
|
1415
|
-
if overwrite or k not in result:
|
1416
|
-
result[k] = v
|
1417
|
-
|
1418
|
-
if not protocol:
|
1419
|
-
protocol = "file"
|
1420
|
-
return from_dict(protocol, result)
|