FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -35
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +49 -4
  14. flowerpower/cli/pipeline.py +576 -381
  15. flowerpower/cli/utils.py +55 -0
  16. flowerpower/flowerpower.py +12 -7
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +18 -142
  59. flowerpower/web/app.py +0 -0
  60. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  61. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  62. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
  63. flowerpower/cfg/pipeline/tracker.py +0 -14
  64. flowerpower/cfg/project/open_telemetry.py +0 -8
  65. flowerpower/cfg/project/tracker.py +0 -11
  66. flowerpower/cfg/project/worker.py +0 -19
  67. flowerpower/cli/scheduler.py +0 -309
  68. flowerpower/event_handler.py +0 -23
  69. flowerpower/mqtt.py +0 -525
  70. flowerpower/pipeline.py +0 -2419
  71. flowerpower/scheduler.py +0 -680
  72. flowerpower/tui.py +0 -79
  73. flowerpower/utils/datastore.py +0 -186
  74. flowerpower/utils/eventbroker.py +0 -127
  75. flowerpower/utils/executor.py +0 -58
  76. flowerpower/utils/trigger.py +0 -140
  77. flowerpower-0.9.12.4.dist-info/METADATA +0 -575
  78. flowerpower-0.9.12.4.dist-info/RECORD +0 -70
  79. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  80. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  81. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,54 @@
1
1
  import configparser
2
2
  import os
3
+ from typing import Any, TypeVar, Union
3
4
 
4
5
  import yaml
5
6
  from fsspec import AbstractFileSystem, filesystem
7
+ from fsspec.utils import infer_storage_options
6
8
  from pydantic import BaseModel
7
9
 
8
10
 
9
11
  class BaseStorageOptions(BaseModel):
12
+ """Base class for filesystem storage configuration options.
13
+
14
+ Provides common functionality for all storage option classes including:
15
+ - YAML serialization/deserialization
16
+ - Dictionary conversion
17
+ - Filesystem instance creation
18
+ - Configuration updates
19
+
20
+ Attributes:
21
+ protocol (str): Storage protocol identifier (e.g., "s3", "gs", "file")
22
+
23
+ Example:
24
+ >>> # Create and save options
25
+ >>> options = BaseStorageOptions(protocol="s3")
26
+ >>> options.to_yaml("config.yml")
27
+ >>>
28
+ >>> # Load from YAML
29
+ >>> loaded = BaseStorageOptions.from_yaml("config.yml")
30
+ >>> print(loaded.protocol)
31
+ 's3'
32
+ """
33
+
10
34
  protocol: str
11
35
 
12
36
  def to_dict(self, with_protocol: bool = False) -> dict:
37
+ """Convert storage options to dictionary.
38
+
39
+ Args:
40
+ with_protocol: Whether to include protocol in output dictionary
41
+
42
+ Returns:
43
+ dict: Dictionary of storage options with non-None values
44
+
45
+ Example:
46
+ >>> options = BaseStorageOptions(protocol="s3")
47
+ >>> print(options.to_dict())
48
+ {}
49
+ >>> print(options.to_dict(with_protocol=True))
50
+ {'protocol': 's3'}
51
+ """
13
52
  items = self.model_dump().items()
14
53
  if not with_protocol:
15
54
  return {k: v for k, v in items if k != "protocol" and v is not None}
@@ -19,11 +58,38 @@ class BaseStorageOptions(BaseModel):
19
58
  def from_yaml(
20
59
  cls, path: str, fs: AbstractFileSystem = None
21
60
  ) -> "BaseStorageOptions":
22
- with fs.open(path, "r") as f:
61
+ """Load storage options from YAML file.
62
+
63
+ Args:
64
+ path: Path to YAML configuration file
65
+ fs: Filesystem to use for reading file
66
+
67
+ Returns:
68
+ BaseStorageOptions: Loaded storage options instance
69
+
70
+ Example:
71
+ >>> # Load from local file
72
+ >>> options = BaseStorageOptions.from_yaml("config.yml")
73
+ >>> print(options.protocol)
74
+ 's3'
75
+ """
76
+ if fs is None:
77
+ fs = filesystem("file")
78
+ with fs.open(path) as f:
23
79
  data = yaml.safe_load(f)
24
80
  return cls(**data)
25
81
 
26
- def to_yaml(self, path: str, fs: AbstractFileSystem = None):
82
+ def to_yaml(self, path: str, fs: AbstractFileSystem = None) -> None:
83
+ """Save storage options to YAML file.
84
+
85
+ Args:
86
+ path: Path where to save configuration
87
+ fs: Filesystem to use for writing
88
+
89
+ Example:
90
+ >>> options = BaseStorageOptions(protocol="s3")
91
+ >>> options.to_yaml("config.yml")
92
+ """
27
93
  if fs is None:
28
94
  fs = filesystem("file")
29
95
  data = self.to_dict()
@@ -31,24 +97,325 @@ class BaseStorageOptions(BaseModel):
31
97
  yaml.safe_dump(data, f)
32
98
 
33
99
  def to_filesystem(self) -> AbstractFileSystem:
34
- return filesystem(**self.to_dict())
100
+ """Create fsspec filesystem instance from options.
101
+
102
+ Returns:
103
+ AbstractFileSystem: Configured filesystem instance
104
+
105
+ Example:
106
+ >>> options = BaseStorageOptions(protocol="file")
107
+ >>> fs = options.to_filesystem()
108
+ >>> files = fs.ls("/path/to/data")
109
+ """
110
+ return filesystem(**self.to_dict(with_protocol=True))
35
111
 
36
- def update(self, **kwargs):
112
+ def update(self, **kwargs: Any) -> None:
113
+ """Update storage options with new values.
114
+
115
+ Args:
116
+ **kwargs: New option values to set
117
+
118
+ Example:
119
+ >>> options = BaseStorageOptions(protocol="s3")
120
+ >>> options.update(region="us-east-1")
121
+ >>> print(options.region)
122
+ 'us-east-1'
123
+ """
37
124
  self = self.model_copy(update=kwargs)
38
125
 
39
126
 
40
127
  class AzureStorageOptions(BaseStorageOptions):
41
- pass
128
+ """Azure Storage configuration options.
129
+
130
+ Provides configuration for Azure storage services:
131
+ - Azure Blob Storage (az://)
132
+ - Azure Data Lake Storage Gen2 (abfs://)
133
+ - Azure Data Lake Storage Gen1 (adl://)
134
+
135
+ Supports multiple authentication methods:
136
+ - Connection string
137
+ - Account key
138
+ - Service principal
139
+ - Managed identity
140
+ - SAS token
141
+
142
+ Attributes:
143
+ protocol (str): Storage protocol ("az", "abfs", or "adl")
144
+ account_name (str): Storage account name
145
+ account_key (str): Storage account access key
146
+ connection_string (str): Full connection string
147
+ tenant_id (str): Azure AD tenant ID
148
+ client_id (str): Service principal client ID
149
+ client_secret (str): Service principal client secret
150
+ sas_token (str): SAS token for limited access
151
+
152
+ Example:
153
+ >>> # Blob Storage with account key
154
+ >>> options = AzureStorageOptions(
155
+ ... protocol="az",
156
+ ... account_name="mystorageacct",
157
+ ... account_key="key123..."
158
+ ... )
159
+ >>>
160
+ >>> # Data Lake with service principal
161
+ >>> options = AzureStorageOptions(
162
+ ... protocol="abfs",
163
+ ... account_name="mydatalake",
164
+ ... tenant_id="tenant123",
165
+ ... client_id="client123",
166
+ ... client_secret="secret123"
167
+ ... )
168
+ >>>
169
+ >>> # Simple connection string auth
170
+ >>> options = AzureStorageOptions(
171
+ ... protocol="az",
172
+ ... connection_string="DefaultEndpoints..."
173
+ ... )
174
+ """
175
+
176
+ protocol: str
177
+ account_name: str | None = None
178
+ account_key: str | None = None
179
+ connection_string: str | None = None
180
+ tenant_id: str | None = None
181
+ client_id: str | None = None
182
+ client_secret: str | None = None
183
+ sas_token: str | None = None
184
+
185
+ @classmethod
186
+ def from_env(cls) -> "AzureStorageOptions":
187
+ """Create storage options from environment variables.
188
+
189
+ Reads standard Azure environment variables:
190
+ - AZURE_STORAGE_ACCOUNT_NAME
191
+ - AZURE_STORAGE_ACCOUNT_KEY
192
+ - AZURE_STORAGE_CONNECTION_STRING
193
+ - AZURE_TENANT_ID
194
+ - AZURE_CLIENT_ID
195
+ - AZURE_CLIENT_SECRET
196
+ - AZURE_STORAGE_SAS_TOKEN
197
+
198
+ Returns:
199
+ AzureStorageOptions: Configured storage options
200
+
201
+ Example:
202
+ >>> # With environment variables set:
203
+ >>> options = AzureStorageOptions.from_env()
204
+ >>> print(options.account_name) # From AZURE_STORAGE_ACCOUNT_NAME
205
+ 'mystorageacct'
206
+ """
207
+ return cls(
208
+ protocol=os.getenv("AZURE_STORAGE_PROTOCOL", "az"),
209
+ account_name=os.getenv("AZURE_STORAGE_ACCOUNT_NAME"),
210
+ account_key=os.getenv("AZURE_STORAGE_ACCOUNT_KEY"),
211
+ connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
212
+ tenant_id=os.getenv("AZURE_TENANT_ID"),
213
+ client_id=os.getenv("AZURE_CLIENT_ID"),
214
+ client_secret=os.getenv("AZURE_CLIENT_SECRET"),
215
+ sas_token=os.getenv("AZURE_STORAGE_SAS_TOKEN"),
216
+ )
217
+
218
+ def to_env(self) -> None:
219
+ """Export options to environment variables.
220
+
221
+ Sets standard Azure environment variables.
222
+
223
+ Example:
224
+ >>> options = AzureStorageOptions(
225
+ ... protocol="az",
226
+ ... account_name="mystorageacct",
227
+ ... account_key="key123"
228
+ ... )
229
+ >>> options.to_env()
230
+ >>> print(os.getenv("AZURE_STORAGE_ACCOUNT_NAME"))
231
+ 'mystorageacct'
232
+ """
233
+ env = {
234
+ "AZURE_STORAGE_PROTOCOL": self.protocol,
235
+ "AZURE_STORAGE_ACCOUNT_NAME": self.account_name,
236
+ "AZURE_STORAGE_ACCOUNT_KEY": self.account_key,
237
+ "AZURE_STORAGE_CONNECTION_STRING": self.connection_string,
238
+ "AZURE_TENANT_ID": self.tenant_id,
239
+ "AZURE_CLIENT_ID": self.client_id,
240
+ "AZURE_CLIENT_SECRET": self.client_secret,
241
+ "AZURE_STORAGE_SAS_TOKEN": self.sas_token,
242
+ }
243
+ env = {k: v for k, v in env.items() if v is not None}
244
+ os.environ.update(env)
42
245
 
43
246
 
44
247
  class GcsStorageOptions(BaseStorageOptions):
45
- pass
248
+ """Google Cloud Storage configuration options.
249
+
250
+ Provides configuration for GCS access with support for:
251
+ - Service account authentication
252
+ - Default application credentials
253
+ - Token-based authentication
254
+ - Project configuration
255
+ - Custom endpoints
256
+
257
+ Attributes:
258
+ protocol (str): Storage protocol ("gs" or "gcs")
259
+ token (str): Path to service account JSON file
260
+ project (str): Google Cloud project ID
261
+ access_token (str): OAuth2 access token
262
+ endpoint_url (str): Custom storage endpoint
263
+ timeout (int): Request timeout in seconds
264
+
265
+ Example:
266
+ >>> # Service account auth
267
+ >>> options = GcsStorageOptions(
268
+ ... protocol="gs",
269
+ ... token="path/to/service-account.json",
270
+ ... project="my-project-123"
271
+ ... )
272
+ >>>
273
+ >>> # Application default credentials
274
+ >>> options = GcsStorageOptions(
275
+ ... protocol="gcs",
276
+ ... project="my-project-123"
277
+ ... )
278
+ >>>
279
+ >>> # Custom endpoint (e.g., test server)
280
+ >>> options = GcsStorageOptions(
281
+ ... protocol="gs",
282
+ ... endpoint_url="http://localhost:4443",
283
+ ... token="test-token.json"
284
+ ... )
285
+ """
286
+
287
+ protocol: str
288
+ token: str | None = None
289
+ project: str | None = None
290
+ access_token: str | None = None
291
+ endpoint_url: str | None = None
292
+ timeout: int | None = None
293
+
294
+ @classmethod
295
+ def from_env(cls) -> "GcsStorageOptions":
296
+ """Create storage options from environment variables.
297
+
298
+ Reads standard GCP environment variables:
299
+ - GOOGLE_CLOUD_PROJECT: Project ID
300
+ - GOOGLE_APPLICATION_CREDENTIALS: Service account file path
301
+ - STORAGE_EMULATOR_HOST: Custom endpoint (for testing)
302
+ - GCS_OAUTH_TOKEN: OAuth2 access token
303
+
304
+ Returns:
305
+ GcsStorageOptions: Configured storage options
306
+
307
+ Example:
308
+ >>> # With environment variables set:
309
+ >>> options = GcsStorageOptions.from_env()
310
+ >>> print(options.project) # From GOOGLE_CLOUD_PROJECT
311
+ 'my-project-123'
312
+ """
313
+ return cls(
314
+ protocol="gs",
315
+ project=os.getenv("GOOGLE_CLOUD_PROJECT"),
316
+ token=os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
317
+ endpoint_url=os.getenv("STORAGE_EMULATOR_HOST"),
318
+ access_token=os.getenv("GCS_OAUTH_TOKEN"),
319
+ )
320
+
321
+ def to_env(self) -> None:
322
+ """Export options to environment variables.
323
+
324
+ Sets standard GCP environment variables.
325
+
326
+ Example:
327
+ >>> options = GcsStorageOptions(
328
+ ... protocol="gs",
329
+ ... project="my-project",
330
+ ... token="service-account.json"
331
+ ... )
332
+ >>> options.to_env()
333
+ >>> print(os.getenv("GOOGLE_CLOUD_PROJECT"))
334
+ 'my-project'
335
+ """
336
+ env = {
337
+ "GOOGLE_CLOUD_PROJECT": self.project,
338
+ "GOOGLE_APPLICATION_CREDENTIALS": self.token,
339
+ "STORAGE_EMULATOR_HOST": self.endpoint_url,
340
+ "GCS_OAUTH_TOKEN": self.access_token,
341
+ }
342
+ env = {k: v for k, v in env.items() if v is not None}
343
+ os.environ.update(env)
344
+
345
+ def to_fsspec_kwargs(self) -> dict:
346
+ """Convert options to fsspec filesystem arguments.
347
+
348
+ Returns:
349
+ dict: Arguments suitable for GCSFileSystem
350
+
351
+ Example:
352
+ >>> options = GcsStorageOptions(
353
+ ... protocol="gs",
354
+ ... token="service-account.json",
355
+ ... project="my-project"
356
+ ... )
357
+ >>> kwargs = options.to_fsspec_kwargs()
358
+ >>> fs = filesystem("gcs", **kwargs)
359
+ """
360
+ kwargs = {
361
+ "token": self.token,
362
+ "project": self.project,
363
+ "access_token": self.access_token,
364
+ "endpoint_url": self.endpoint_url,
365
+ "timeout": self.timeout,
366
+ }
367
+ return {k: v for k, v in kwargs.items() if v is not None}
46
368
 
47
369
 
48
370
  class AwsStorageOptions(BaseStorageOptions):
371
+ """AWS S3 storage configuration options.
372
+
373
+ Provides comprehensive configuration for S3 access with support for:
374
+ - Multiple authentication methods (keys, profiles, environment)
375
+ - Custom endpoints for S3-compatible services
376
+ - Region configuration
377
+ - SSL/TLS settings
378
+
379
+ Attributes:
380
+ protocol (str): Always "s3" for S3 storage
381
+ key (str): AWS access key ID (alias for access_key_id)
382
+ access_key_id (str): AWS access key ID
383
+ secret (str): AWS secret access key (alias for secret_access_key)
384
+ secret_access_key (str): AWS secret access key
385
+ token (str): AWS session token (alias for session_token)
386
+ session_token (str): AWS session token
387
+ endpoint_url (str): Custom S3 endpoint URL
388
+ region (str): AWS region name
389
+ allow_invalid_certificates (bool): Skip SSL certificate validation
390
+ allow_http (bool): Allow unencrypted HTTP connections
391
+ profile (str): AWS credentials profile name
392
+
393
+ Example:
394
+ >>> # Basic credentials
395
+ >>> options = AwsStorageOptions(
396
+ ... access_key_id="AKIAXXXXXXXX",
397
+ ... secret_access_key="SECRETKEY",
398
+ ... region="us-east-1"
399
+ ... )
400
+ >>>
401
+ >>> # Profile-based auth
402
+ >>> options = AwsStorageOptions(profile="dev")
403
+ >>>
404
+ >>> # S3-compatible service (MinIO)
405
+ >>> options = AwsStorageOptions(
406
+ ... endpoint_url="http://localhost:9000",
407
+ ... access_key_id="minioadmin",
408
+ ... secret_access_key="minioadmin",
409
+ ... allow_http=True
410
+ ... )
411
+ """
412
+
49
413
  protocol: str = "s3"
414
+ key: str | None = None
50
415
  access_key_id: str | None = None
416
+ secret: str | None = None
51
417
  secret_access_key: str | None = None
418
+ token: str | None = None
52
419
  session_token: str | None = None
53
420
  endpoint_url: str | None = None
54
421
  region: str | None = None
@@ -56,15 +423,42 @@ class AwsStorageOptions(BaseStorageOptions):
56
423
  allow_http: bool | None = None
57
424
  profile: str | None = None
58
425
 
59
- def model_post_init(self, __context):
426
+ def model_post_init(self, __context: Any) -> None:
427
+ """Post-initialization processing of AWS credentials.
428
+
429
+ Handles credential aliasing and profile-based loading.
430
+ Called automatically after initialization.
431
+
432
+ Args:
433
+ __context: Pydantic validation context (unused)
434
+
435
+ Example:
436
+ >>> # Alias handling
437
+ >>> opts = AwsStorageOptions(
438
+ ... key="ACCESS_KEY",
439
+ ... secret="SECRET_KEY"
440
+ ... )
441
+ >>> print(opts.access_key_id) # Normalized
442
+ 'ACCESS_KEY'
443
+ """
444
+ # Normalize credential aliases
445
+ if self.access_key_id is None and self.key is not None:
446
+ self.access_key_id = self.key
447
+ if self.secret_access_key is None and self.secret is not None:
448
+ self.secret_access_key = self.secret
449
+ if self.session_token is None and self.token is not None:
450
+ self.session_token = self.token
451
+
452
+ # Load profile if specified
60
453
  if self.profile is not None:
61
- super().__init__(
62
- **self.from_aws_credentials(
63
- profile=self.profile,
64
- allow_invalid_certificates=self.allow_invalid_certificates,
65
- allow_http=self.allow_http,
66
- ).to_dict()
454
+ profile_opts = self.from_aws_credentials(
455
+ profile=self.profile,
456
+ allow_invalid_certificates=self.allow_invalid_certificates,
457
+ allow_http=self.allow_http,
67
458
  )
459
+ for k, v in profile_opts.to_dict().items():
460
+ if getattr(self, k) is None:
461
+ setattr(self, k, v)
68
462
 
69
463
  @classmethod
70
464
  def from_aws_credentials(
@@ -73,6 +467,29 @@ class AwsStorageOptions(BaseStorageOptions):
73
467
  allow_invalid_certificates: bool = False,
74
468
  allow_http: bool = False,
75
469
  ) -> "AwsStorageOptions":
470
+ """Create storage options from AWS credentials file.
471
+
472
+ Loads credentials from ~/.aws/credentials and ~/.aws/config files.
473
+
474
+ Args:
475
+ profile: AWS credentials profile name
476
+ allow_invalid_certificates: Skip SSL certificate validation
477
+ allow_http: Allow unencrypted HTTP connections
478
+
479
+ Returns:
480
+ AwsStorageOptions: Configured storage options
481
+
482
+ Raises:
483
+ ValueError: If profile not found
484
+ FileNotFoundError: If credentials files missing
485
+
486
+ Example:
487
+ >>> # Load developer profile
488
+ >>> options = AwsStorageOptions.from_aws_credentials(
489
+ ... profile="dev",
490
+ ... allow_http=True # For local testing
491
+ ... )
492
+ """
76
493
  cp = configparser.ConfigParser()
77
494
  cp.read(os.path.expanduser("~/.aws/credentials"))
78
495
  cp.read(os.path.expanduser("~/.aws/config"))
@@ -100,6 +517,26 @@ class AwsStorageOptions(BaseStorageOptions):
100
517
 
101
518
  @classmethod
102
519
  def from_env(cls) -> "AwsStorageOptions":
520
+ """Create storage options from environment variables.
521
+
522
+ Reads standard AWS environment variables:
523
+ - AWS_ACCESS_KEY_ID
524
+ - AWS_SECRET_ACCESS_KEY
525
+ - AWS_SESSION_TOKEN
526
+ - AWS_ENDPOINT_URL
527
+ - AWS_DEFAULT_REGION
528
+ - ALLOW_INVALID_CERTIFICATES
529
+ - AWS_ALLOW_HTTP
530
+
531
+ Returns:
532
+ AwsStorageOptions: Configured storage options
533
+
534
+ Example:
535
+ >>> # Load from environment
536
+ >>> options = AwsStorageOptions.from_env()
537
+ >>> print(options.region)
538
+ 'us-east-1' # From AWS_DEFAULT_REGION
539
+ """
103
540
  return cls(
104
541
  access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
105
542
  secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
@@ -112,6 +549,20 @@ class AwsStorageOptions(BaseStorageOptions):
112
549
  )
113
550
 
114
551
  def to_fsspec_kwargs(self) -> dict:
552
+ """Convert options to fsspec filesystem arguments.
553
+
554
+ Returns:
555
+ dict: Arguments suitable for fsspec S3FileSystem
556
+
557
+ Example:
558
+ >>> options = AwsStorageOptions(
559
+ ... access_key_id="KEY",
560
+ ... secret_access_key="SECRET",
561
+ ... region="us-west-2"
562
+ ... )
563
+ >>> kwargs = options.to_fsspec_kwargs()
564
+ >>> fs = filesystem("s3", **kwargs)
565
+ """
115
566
  fsspec_kwargs = {
116
567
  "key": self.access_key_id,
117
568
  "secret": self.secret_access_key,
@@ -119,27 +570,55 @@ class AwsStorageOptions(BaseStorageOptions):
119
570
  "endpoint_url": self.endpoint_url,
120
571
  "client_kwargs": {
121
572
  "region_name": self.region,
122
- "verify": (
123
- not self.allow_invalid_certificates
124
- if self.allow_invalid_certificates is not None
125
- else False
126
- ),
573
+ "verify": not self.allow_invalid_certificates
574
+ if self.allow_invalid_certificates is not None
575
+ else True,
127
576
  "use_ssl": not self.allow_http if self.allow_http is not None else True,
128
577
  },
129
578
  }
130
579
  return {k: v for k, v in fsspec_kwargs.items() if v is not None}
131
580
 
132
581
  def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
133
- object_store_kwargs = {
582
+ """Convert options to object store arguments.
583
+
584
+ Args:
585
+ with_conditional_put: Add etag-based conditional put support
586
+
587
+ Returns:
588
+ dict: Arguments suitable for object store clients
589
+
590
+ Example:
591
+ >>> options = AwsStorageOptions(
592
+ ... access_key_id="KEY",
593
+ ... secret_access_key="SECRET"
594
+ ... )
595
+ >>> kwargs = options.to_object_store_kwargs()
596
+ >>> client = ObjectStore(**kwargs)
597
+ """
598
+ kwargs = {
134
599
  k: str(v)
135
600
  for k, v in self.to_dict().items()
136
601
  if v is not None and k != "protocol"
137
602
  }
138
603
  if with_conditional_put:
139
- object_store_kwargs["conditional_put"] = "etag"
140
- return object_store_kwargs
604
+ kwargs["conditional_put"] = "etag"
605
+ return kwargs
141
606
 
142
607
  def to_env(self) -> None:
608
+ """Export options to environment variables.
609
+
610
+ Sets standard AWS environment variables.
611
+
612
+ Example:
613
+ >>> options = AwsStorageOptions(
614
+ ... access_key_id="KEY",
615
+ ... secret_access_key="SECRET",
616
+ ... region="us-east-1"
617
+ ... )
618
+ >>> options.to_env()
619
+ >>> print(os.getenv("AWS_ACCESS_KEY_ID"))
620
+ 'KEY'
621
+ """
143
622
  env = {
144
623
  "AWS_ACCESS_KEY_ID": self.access_key_id,
145
624
  "AWS_SECRET_ACCESS_KEY": self.secret_access_key,
@@ -157,67 +636,364 @@ class AwsStorageOptions(BaseStorageOptions):
157
636
 
158
637
 
159
638
  class GitHubStorageOptions(BaseStorageOptions):
639
+ """GitHub repository storage configuration options.
640
+
641
+ Provides access to files in GitHub repositories with support for:
642
+ - Public and private repositories
643
+ - Branch/tag/commit selection
644
+ - Token-based authentication
645
+ - Custom GitHub Enterprise instances
646
+
647
+ Attributes:
648
+ protocol (str): Always "github" for GitHub storage
649
+ org (str): Organization or user name
650
+ repo (str): Repository name
651
+ ref (str): Git reference (branch, tag, or commit SHA)
652
+ token (str): GitHub personal access token
653
+ api_url (str): Custom GitHub API URL for enterprise instances
654
+
655
+ Example:
656
+ >>> # Public repository
657
+ >>> options = GitHubStorageOptions(
658
+ ... org="microsoft",
659
+ ... repo="vscode",
660
+ ... ref="main"
661
+ ... )
662
+ >>>
663
+ >>> # Private repository
664
+ >>> options = GitHubStorageOptions(
665
+ ... org="myorg",
666
+ ... repo="private-repo",
667
+ ... token="ghp_xxxx",
668
+ ... ref="develop"
669
+ ... )
670
+ >>>
671
+ >>> # Enterprise instance
672
+ >>> options = GitHubStorageOptions(
673
+ ... org="company",
674
+ ... repo="internal",
675
+ ... api_url="https://github.company.com/api/v3",
676
+ ... token="ghp_xxxx"
677
+ ... )
678
+ """
679
+
160
680
  protocol: str = "github"
161
681
  org: str | None = None
162
682
  repo: str | None = None
163
- sha: str | None = None
683
+ ref: str | None = None
684
+ token: str | None = None
685
+ api_url: str | None = None
164
686
 
165
687
  @classmethod
166
688
  def from_env(cls) -> "GitHubStorageOptions":
689
+ """Create storage options from environment variables.
690
+
691
+ Reads standard GitHub environment variables:
692
+ - GITHUB_ORG: Organization or user name
693
+ - GITHUB_REPO: Repository name
694
+ - GITHUB_REF: Git reference
695
+ - GITHUB_TOKEN: Personal access token
696
+ - GITHUB_API_URL: Custom API URL
697
+
698
+ Returns:
699
+ GitHubStorageOptions: Configured storage options
700
+
701
+ Example:
702
+ >>> # With environment variables set:
703
+ >>> options = GitHubStorageOptions.from_env()
704
+ >>> print(options.org) # From GITHUB_ORG
705
+ 'microsoft'
706
+ """
167
707
  return cls(
168
708
  protocol="github",
169
709
  org=os.getenv("GITHUB_ORG"),
170
710
  repo=os.getenv("GITHUB_REPO"),
171
- sha=os.getenv("GITHUB_SHA"),
711
+ ref=os.getenv("GITHUB_REF"),
712
+ token=os.getenv("GITHUB_TOKEN"),
713
+ api_url=os.getenv("GITHUB_API_URL"),
172
714
  )
173
715
 
174
716
  def to_env(self) -> None:
175
- os.environ.update(
176
- {"GITHUB_ORG": self.org, "GITHUB_REPO": self.repo, "GITHUB_SHA": self.sha}
177
- )
717
+ """Export options to environment variables.
718
+
719
+ Sets standard GitHub environment variables.
720
+
721
+ Example:
722
+ >>> options = GitHubStorageOptions(
723
+ ... org="microsoft",
724
+ ... repo="vscode",
725
+ ... token="ghp_xxxx"
726
+ ... )
727
+ >>> options.to_env()
728
+ >>> print(os.getenv("GITHUB_ORG"))
729
+ 'microsoft'
730
+ """
731
+ env = {
732
+ "GITHUB_ORG": self.org,
733
+ "GITHUB_REPO": self.repo,
734
+ "GITHUB_REF": self.ref,
735
+ "GITHUB_TOKEN": self.token,
736
+ "GITHUB_API_URL": self.api_url,
737
+ }
738
+ env = {k: v for k, v in env.items() if v is not None}
739
+ os.environ.update(env)
740
+
741
+ def to_fsspec_kwargs(self) -> dict:
742
+ """Convert options to fsspec filesystem arguments.
743
+
744
+ Returns:
745
+ dict: Arguments suitable for GitHubFileSystem
746
+
747
+ Example:
748
+ >>> options = GitHubStorageOptions(
749
+ ... org="microsoft",
750
+ ... repo="vscode",
751
+ ... token="ghp_xxxx"
752
+ ... )
753
+ >>> kwargs = options.to_fsspec_kwargs()
754
+ >>> fs = filesystem("github", **kwargs)
755
+ """
756
+ kwargs = {
757
+ "org": self.org,
758
+ "repo": self.repo,
759
+ "ref": self.ref,
760
+ "token": self.token,
761
+ "api_url": self.api_url,
762
+ }
763
+ return {k: v for k, v in kwargs.items() if v is not None}
178
764
 
179
765
 
180
766
  class GitLabStorageOptions(BaseStorageOptions):
767
+ """GitLab repository storage configuration options.
768
+
769
+ Provides access to files in GitLab repositories with support for:
770
+ - Public and private repositories
771
+ - Self-hosted GitLab instances
772
+ - Project ID or name-based access
773
+ - Branch/tag/commit selection
774
+ - Token-based authentication
775
+
776
+ Attributes:
777
+ protocol (str): Always "gitlab" for GitLab storage
778
+ base_url (str): GitLab instance URL, defaults to gitlab.com
779
+ project_id (str | int): Project ID number
780
+ project_name (str): Project name/path
781
+ ref (str): Git reference (branch, tag, or commit SHA)
782
+ token (str): GitLab personal access token
783
+ api_version (str): API version to use
784
+
785
+ Example:
786
+ >>> # Public project on gitlab.com
787
+ >>> options = GitLabStorageOptions(
788
+ ... project_name="group/project",
789
+ ... ref="main"
790
+ ... )
791
+ >>>
792
+ >>> # Private project with token
793
+ >>> options = GitLabStorageOptions(
794
+ ... project_id=12345,
795
+ ... token="glpat_xxxx",
796
+ ... ref="develop"
797
+ ... )
798
+ >>>
799
+ >>> # Self-hosted instance
800
+ >>> options = GitLabStorageOptions(
801
+ ... base_url="https://gitlab.company.com",
802
+ ... project_name="internal/project",
803
+ ... token="glpat_xxxx"
804
+ ... )
805
+ """
806
+
181
807
  protocol: str = "gitlab"
182
808
  base_url: str = "https://gitlab.com"
183
- access_token: str | None = None
184
809
  project_id: str | int | None = None
185
810
  project_name: str | None = None
811
+ ref: str | None = None
812
+ token: str | None = None
813
+ api_version: str = "v4"
814
+
815
+ def model_post_init(self, __context: Any) -> None:
816
+ """Validate GitLab configuration after initialization.
817
+
818
+ Ensures either project_id or project_name is provided.
819
+
820
+ Args:
821
+ __context: Pydantic validation context (unused)
822
+
823
+ Raises:
824
+ ValueError: If neither project_id nor project_name is provided
825
+
826
+ Example:
827
+ >>> # Valid initialization
828
+ >>> options = GitLabStorageOptions(project_id=12345)
829
+ >>>
830
+ >>> # Invalid initialization
831
+ >>> try:
832
+ ... options = GitLabStorageOptions()
833
+ ... except ValueError as e:
834
+ ... print(str(e))
835
+ 'Either project_id or project_name must be provided'
836
+ """
837
+ if self.project_id is None and self.project_name is None:
838
+ raise ValueError("Either project_id or project_name must be provided")
186
839
 
187
840
  @classmethod
188
841
  def from_env(cls) -> "GitLabStorageOptions":
842
+ """Create storage options from environment variables.
843
+
844
+ Reads standard GitLab environment variables:
845
+ - GITLAB_URL: Instance URL
846
+ - GITLAB_PROJECT_ID: Project ID
847
+ - GITLAB_PROJECT_NAME: Project name/path
848
+ - GITLAB_REF: Git reference
849
+ - GITLAB_TOKEN: Personal access token
850
+ - GITLAB_API_VERSION: API version
851
+
852
+ Returns:
853
+ GitLabStorageOptions: Configured storage options
854
+
855
+ Example:
856
+ >>> # With environment variables set:
857
+ >>> options = GitLabStorageOptions.from_env()
858
+ >>> print(options.project_id) # From GITLAB_PROJECT_ID
859
+ '12345'
860
+ """
189
861
  return cls(
190
862
  protocol="gitlab",
191
- base_url=os.getenv("GITLAB_BASE_URL"),
192
- access_token=os.getenv("GITLAB_ACCESS_TOKEN"),
863
+ base_url=os.getenv("GITLAB_URL", "https://gitlab.com"),
193
864
  project_id=os.getenv("GITLAB_PROJECT_ID"),
194
865
  project_name=os.getenv("GITLAB_PROJECT_NAME"),
866
+ ref=os.getenv("GITLAB_REF"),
867
+ token=os.getenv("GITLAB_TOKEN"),
868
+ api_version=os.getenv("GITLAB_API_VERSION", "v4"),
195
869
  )
196
870
 
197
- def model_post_init(self, __context):
198
- if self.project_id is None and self.project_name is None:
199
- raise ValueError("Either 'project_id' or 'project_name' must be provided")
871
+ def to_env(self) -> None:
872
+ """Export options to environment variables.
873
+
874
+ Sets standard GitLab environment variables.
875
+
876
+ Example:
877
+ >>> options = GitLabStorageOptions(
878
+ ... project_id=12345,
879
+ ... token="glpat_xxxx"
880
+ ... )
881
+ >>> options.to_env()
882
+ >>> print(os.getenv("GITLAB_PROJECT_ID"))
883
+ '12345'
884
+ """
885
+ env = {
886
+ "GITLAB_URL": self.base_url,
887
+ "GITLAB_PROJECT_ID": str(self.project_id) if self.project_id else None,
888
+ "GITLAB_PROJECT_NAME": self.project_name,
889
+ "GITLAB_REF": self.ref,
890
+ "GITLAB_TOKEN": self.token,
891
+ "GITLAB_API_VERSION": self.api_version,
892
+ }
893
+ env = {k: v for k, v in env.items() if v is not None}
894
+ os.environ.update(env)
895
+
896
+ def to_fsspec_kwargs(self) -> dict:
897
+ """Convert options to fsspec filesystem arguments.
898
+
899
+ Returns:
900
+ dict: Arguments suitable for GitLabFileSystem
901
+
902
+ Example:
903
+ >>> options = GitLabStorageOptions(
904
+ ... project_id=12345,
905
+ ... token="glpat_xxxx"
906
+ ... )
907
+ >>> kwargs = options.to_fsspec_kwargs()
908
+ >>> fs = filesystem("gitlab", **kwargs)
909
+ """
910
+ kwargs = {
911
+ "base_url": self.base_url,
912
+ "project_id": self.project_id,
913
+ "project_name": self.project_name,
914
+ "ref": self.ref,
915
+ "token": self.token,
916
+ "api_version": self.api_version,
917
+ }
918
+ return {k: v for k, v in kwargs.items() if v is not None}
200
919
 
201
920
 
202
921
  class LocalStorageOptions(BaseStorageOptions):
922
+ """Local filesystem configuration options.
923
+
924
+ Provides basic configuration for local file access. While this class
925
+ is simple, it maintains consistency with other storage options and
926
+ enables transparent switching between local and remote storage.
927
+
928
+ Attributes:
929
+ protocol (str): Always "file" for local filesystem
930
+ auto_mkdir (bool): Create directories automatically
931
+ mode (int): Default file creation mode (unix-style)
932
+
933
+ Example:
934
+ >>> # Basic local access
935
+ >>> options = LocalStorageOptions()
936
+ >>> fs = options.to_filesystem()
937
+ >>> files = fs.ls("/path/to/data")
938
+ >>>
939
+ >>> # With auto directory creation
940
+ >>> options = LocalStorageOptions(auto_mkdir=True)
941
+ >>> fs = options.to_filesystem()
942
+ >>> with fs.open("/new/path/file.txt", "w") as f:
943
+ ... f.write("test") # Creates /new/path/ automatically
944
+ """
945
+
203
946
  protocol: str = "file"
947
+ auto_mkdir: bool = False
948
+ mode: int | None = None
949
+
950
+ def to_fsspec_kwargs(self) -> dict:
951
+ """Convert options to fsspec filesystem arguments.
952
+
953
+ Returns:
954
+ dict: Arguments suitable for LocalFileSystem
955
+
956
+ Example:
957
+ >>> options = LocalStorageOptions(auto_mkdir=True)
958
+ >>> kwargs = options.to_fsspec_kwargs()
959
+ >>> fs = filesystem("file", **kwargs)
960
+ """
961
+ kwargs = {
962
+ "auto_mkdir": self.auto_mkdir,
963
+ "mode": self.mode,
964
+ }
965
+ return {k: v for k, v in kwargs.items() if v is not None}
966
+
967
+
968
+ def from_dict(protocol: str, storage_options: dict) -> BaseStorageOptions:
969
+ """Create appropriate storage options instance from dictionary.
970
+
971
+ Factory function that creates the correct storage options class based on protocol.
204
972
 
973
+ Args:
974
+ protocol: Storage protocol identifier (e.g., "s3", "gs", "file")
975
+ storage_options: Dictionary of configuration options
205
976
 
206
- def from_dict(
207
- protocol: str, storage_options: dict
208
- ) -> (
209
- AwsStorageOptions
210
- | AzureStorageOptions
211
- | GcsStorageOptions
212
- | GitHubStorageOptions
213
- | GitLabStorageOptions
214
- | LocalStorageOptions
215
- ):
977
+ Returns:
978
+ BaseStorageOptions: Appropriate storage options instance
979
+
980
+ Raises:
981
+ ValueError: If protocol is not supported
982
+
983
+ Example:
984
+ >>> # Create S3 options
985
+ >>> options = from_dict("s3", {
986
+ ... "access_key_id": "KEY",
987
+ ... "secret_access_key": "SECRET"
988
+ ... })
989
+ >>> print(type(options).__name__)
990
+ 'AwsStorageOptions'
991
+ """
216
992
  if protocol == "s3":
217
993
  return AwsStorageOptions(**storage_options)
218
- elif protocol == "az" or protocol == "abfs" or protocol == "adl":
994
+ elif protocol in ["az", "abfs", "adl"]:
219
995
  return AzureStorageOptions(**storage_options)
220
- elif protocol == "gs" or protocol == "gcs":
996
+ elif protocol in ["gs", "gcs"]:
221
997
  return GcsStorageOptions(**storage_options)
222
998
  elif protocol == "github":
223
999
  return GitHubStorageOptions(**storage_options)
@@ -229,16 +1005,27 @@ def from_dict(
229
1005
  raise ValueError(f"Unsupported protocol: {protocol}")
230
1006
 
231
1007
 
232
- def from_env(
233
- protocol: str,
234
- ) -> (
235
- AwsStorageOptions
236
- | AzureStorageOptions
237
- | GcsStorageOptions
238
- | GitHubStorageOptions
239
- | GitLabStorageOptions
240
- | LocalStorageOptions
241
- ):
1008
+ def from_env(protocol: str) -> BaseStorageOptions:
1009
+ """Create storage options from environment variables.
1010
+
1011
+ Factory function that creates and configures storage options from
1012
+ protocol-specific environment variables.
1013
+
1014
+ Args:
1015
+ protocol: Storage protocol identifier (e.g., "s3", "github")
1016
+
1017
+ Returns:
1018
+ BaseStorageOptions: Configured storage options instance
1019
+
1020
+ Raises:
1021
+ ValueError: If protocol is not supported
1022
+
1023
+ Example:
1024
+ >>> # With AWS credentials in environment
1025
+ >>> options = from_env("s3")
1026
+ >>> print(options.access_key_id) # From AWS_ACCESS_KEY_ID
1027
+ 'AKIAXXXXXX'
1028
+ """
242
1029
  if protocol == "s3":
243
1030
  return AwsStorageOptions.from_env()
244
1031
  elif protocol == "github":
@@ -252,9 +1039,47 @@ def from_env(
252
1039
 
253
1040
 
254
1041
  class StorageOptions(BaseModel):
1042
+ """High-level storage options container and factory.
1043
+
1044
+ Provides a unified interface for creating and managing storage options
1045
+ for different protocols.
1046
+
1047
+ Attributes:
1048
+ storage_options (BaseStorageOptions): Underlying storage options instance
1049
+
1050
+ Example:
1051
+ >>> # Create from protocol
1052
+ >>> options = StorageOptions(
1053
+ ... protocol="s3",
1054
+ ... access_key_id="KEY",
1055
+ ... secret_access_key="SECRET"
1056
+ ... )
1057
+ >>>
1058
+ >>> # Create from existing options
1059
+ >>> s3_opts = AwsStorageOptions(access_key_id="KEY")
1060
+ >>> options = StorageOptions(storage_options=s3_opts)
1061
+ """
1062
+
255
1063
  storage_options: BaseStorageOptions
256
1064
 
257
- def __init__(self, **data):
1065
+ def __init__(self, **data: Any):
1066
+ """Initialize storage options from arguments.
1067
+
1068
+ Args:
1069
+ **data: Either:
1070
+ - protocol and configuration options
1071
+ - storage_options=pre-configured instance
1072
+
1073
+ Raises:
1074
+ ValueError: If protocol missing or invalid
1075
+
1076
+ Example:
1077
+ >>> # Direct protocol config
1078
+ >>> options = StorageOptions(
1079
+ ... protocol="s3",
1080
+ ... region="us-east-1"
1081
+ ... )
1082
+ """
258
1083
  protocol = data.get("protocol")
259
1084
  if protocol is None and "storage_options" not in data:
260
1085
  raise ValueError("protocol must be specified")
@@ -281,12 +1106,39 @@ class StorageOptions(BaseModel):
281
1106
 
282
1107
  @classmethod
283
1108
  def from_yaml(cls, path: str, fs: AbstractFileSystem = None) -> "StorageOptions":
1109
+ """Create storage options from YAML configuration.
1110
+
1111
+ Args:
1112
+ path: Path to YAML configuration file
1113
+ fs: Filesystem for reading configuration
1114
+
1115
+ Returns:
1116
+ StorageOptions: Configured storage options
1117
+
1118
+ Example:
1119
+ >>> # Load from config file
1120
+ >>> options = StorageOptions.from_yaml("storage.yml")
1121
+ >>> print(options.storage_options.protocol)
1122
+ 's3'
1123
+ """
284
1124
  with fs.open(path, "r") as f:
285
1125
  data = yaml.safe_load(f)
286
1126
  return cls(**data)
287
1127
 
288
1128
  @classmethod
289
1129
  def from_env(cls, protocol: str) -> "StorageOptions":
1130
+ """Create storage options from environment variables.
1131
+
1132
+ Args:
1133
+ protocol: Storage protocol to configure
1134
+
1135
+ Returns:
1136
+ StorageOptions: Environment-configured options
1137
+
1138
+ Example:
1139
+ >>> # Load AWS config from environment
1140
+ >>> options = StorageOptions.from_env("s3")
1141
+ """
290
1142
  if protocol == "s3":
291
1143
  return cls(storage_options=AwsStorageOptions.from_env())
292
1144
  elif protocol == "github":
@@ -299,12 +1151,202 @@ class StorageOptions(BaseModel):
299
1151
  raise ValueError(f"Unsupported protocol: {protocol}")
300
1152
 
301
1153
  def to_filesystem(self) -> AbstractFileSystem:
1154
+ """Create fsspec filesystem instance.
1155
+
1156
+ Returns:
1157
+ AbstractFileSystem: Configured filesystem instance
1158
+
1159
+ Example:
1160
+ >>> options = StorageOptions(protocol="file")
1161
+ >>> fs = options.to_filesystem()
1162
+ >>> files = fs.ls("/data")
1163
+ """
302
1164
  return self.storage_options.to_filesystem()
303
1165
 
304
1166
  def to_dict(self, protocol: bool = False) -> dict:
1167
+ """Convert storage options to dictionary.
1168
+
1169
+ Args:
1170
+ protocol: Whether to include protocol in output
1171
+
1172
+ Returns:
1173
+ dict: Storage options as dictionary
1174
+
1175
+ Example:
1176
+ >>> options = StorageOptions(
1177
+ ... protocol="s3",
1178
+ ... region="us-east-1"
1179
+ ... )
1180
+ >>> print(options.to_dict())
1181
+ {'region': 'us-east-1'}
1182
+ """
305
1183
  return self.storage_options.to_dict(protocol=protocol)
306
1184
 
307
1185
  def to_object_store_kwargs(self, with_conditional_put: bool = False) -> dict:
1186
+ """Get options formatted for object store clients.
1187
+
1188
+ Args:
1189
+ with_conditional_put: Add etag-based conditional put support
1190
+
1191
+ Returns:
1192
+ dict: Object store configuration dictionary
1193
+
1194
+ Example:
1195
+ >>> options = StorageOptions(protocol="s3")
1196
+ >>> kwargs = options.to_object_store_kwargs()
1197
+ >>> store = ObjectStore(**kwargs)
1198
+ """
308
1199
  return self.storage_options.to_object_store_kwargs(
309
1200
  with_conditional_put=with_conditional_put
310
1201
  )
1202
+
1203
+
1204
+ def infer_protocol_from_uri(uri: str) -> str:
1205
+ """Infer the storage protocol from a URI string.
1206
+
1207
+ Analyzes the URI to determine the appropriate storage protocol based on
1208
+ the scheme or path format.
1209
+
1210
+ Args:
1211
+ uri: URI or path string to analyze. Examples:
1212
+ - "s3://bucket/path"
1213
+ - "gs://bucket/path"
1214
+ - "github://org/repo"
1215
+ - "/local/path"
1216
+
1217
+ Returns:
1218
+ str: Inferred protocol identifier
1219
+
1220
+ Example:
1221
+ >>> # S3 protocol
1222
+ >>> infer_protocol_from_uri("s3://my-bucket/data")
1223
+ 's3'
1224
+ >>>
1225
+ >>> # Local file
1226
+ >>> infer_protocol_from_uri("/home/user/data")
1227
+ 'file'
1228
+ >>>
1229
+ >>> # GitHub repository
1230
+ >>> infer_protocol_from_uri("github://microsoft/vscode")
1231
+ 'github'
1232
+ """
1233
+ if uri.startswith("s3://"):
1234
+ return "s3"
1235
+ elif uri.startswith("gs://") or uri.startswith("gcs://"):
1236
+ return "gs"
1237
+ elif uri.startswith("github://"):
1238
+ return "github"
1239
+ elif uri.startswith("gitlab://"):
1240
+ return "gitlab"
1241
+ elif uri.startswith(("az://", "abfs://", "adl://")):
1242
+ return uri.split("://")[0]
1243
+ else:
1244
+ return "file"
1245
+
1246
+
1247
+ def storage_options_from_uri(uri: str) -> BaseStorageOptions:
1248
+ """Create storage options instance from a URI string.
1249
+
1250
+ Infers the protocol and extracts relevant configuration from the URI
1251
+ to create appropriate storage options.
1252
+
1253
+ Args:
1254
+ uri: URI string containing protocol and optional configuration.
1255
+ Examples:
1256
+ - "s3://bucket/path"
1257
+ - "gs://project/bucket/path"
1258
+ - "github://org/repo"
1259
+
1260
+ Returns:
1261
+ BaseStorageOptions: Configured storage options instance
1262
+
1263
+ Example:
1264
+ >>> # S3 options
1265
+ >>> opts = storage_options_from_uri("s3://my-bucket/data")
1266
+ >>> print(opts.protocol)
1267
+ 's3'
1268
+ >>>
1269
+ >>> # GitHub options
1270
+ >>> opts = storage_options_from_uri("github://microsoft/vscode")
1271
+ >>> print(opts.org)
1272
+ 'microsoft'
1273
+ >>> print(opts.repo)
1274
+ 'vscode'
1275
+ """
1276
+ protocol = infer_protocol_from_uri(uri)
1277
+ options = infer_storage_options(uri)
1278
+
1279
+ if protocol == "s3":
1280
+ return AwsStorageOptions(protocol=protocol, **options)
1281
+ elif protocol in ["gs", "gcs"]:
1282
+ return GcsStorageOptions(protocol=protocol, **options)
1283
+ elif protocol == "github":
1284
+ parts = uri.replace("github://", "").split("/")
1285
+ return GitHubStorageOptions(
1286
+ protocol=protocol, org=parts[0], repo=parts[1] if len(parts) > 1 else None
1287
+ )
1288
+ elif protocol == "gitlab":
1289
+ parts = uri.replace("gitlab://", "").split("/")
1290
+ return GitLabStorageOptions(
1291
+ protocol=protocol, project_name=parts[-1] if parts else None
1292
+ )
1293
+ elif protocol in ["az", "abfs", "adl"]:
1294
+ return AzureStorageOptions(protocol=protocol, **options)
1295
+ else:
1296
+ return LocalStorageOptions()
1297
+
1298
+
1299
+ def merge_storage_options(
1300
+ *options: BaseStorageOptions | dict | None, overwrite: bool = True
1301
+ ) -> BaseStorageOptions:
1302
+ """Merge multiple storage options into a single configuration.
1303
+
1304
+ Combines options from multiple sources with control over precedence.
1305
+
1306
+ Args:
1307
+ *options: Storage options to merge. Can be:
1308
+ - BaseStorageOptions instances
1309
+ - Dictionaries of options
1310
+ - None values (ignored)
1311
+ overwrite: Whether later options override earlier ones
1312
+
1313
+ Returns:
1314
+ BaseStorageOptions: Combined storage options
1315
+
1316
+ Example:
1317
+ >>> # Merge with overwrite
1318
+ >>> base = AwsStorageOptions(
1319
+ ... region="us-east-1",
1320
+ ... access_key_id="OLD_KEY"
1321
+ ... )
1322
+ >>> override = {"access_key_id": "NEW_KEY"}
1323
+ >>> merged = merge_storage_options(base, override)
1324
+ >>> print(merged.access_key_id)
1325
+ 'NEW_KEY'
1326
+ >>>
1327
+ >>> # Preserve existing values
1328
+ >>> merged = merge_storage_options(
1329
+ ... base,
1330
+ ... override,
1331
+ ... overwrite=False
1332
+ ... )
1333
+ >>> print(merged.access_key_id)
1334
+ 'OLD_KEY'
1335
+ """
1336
+ result = {}
1337
+ protocol = None
1338
+
1339
+ for opts in options:
1340
+ if opts is None:
1341
+ continue
1342
+ if isinstance(opts, BaseStorageOptions):
1343
+ opts = opts.to_dict(with_protocol=True)
1344
+ if not protocol and "protocol" in opts:
1345
+ protocol = opts["protocol"]
1346
+ for k, v in opts.items():
1347
+ if overwrite or k not in result:
1348
+ result[k] = v
1349
+
1350
+ if not protocol:
1351
+ protocol = "file"
1352
+ return from_dict(protocol, result)